summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/cso_cache/Makefile1
-rw-r--r--src/gallium/auxiliary/cso_cache/SConscript1
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_cache.c5
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_cache.h16
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.c354
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.h85
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_hash.c7
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_hash.h10
-rw-r--r--src/gallium/auxiliary/draw/SConscript1
-rw-r--r--src/gallium/auxiliary/draw/draw_aaline.c57
-rw-r--r--src/gallium/auxiliary/draw/draw_aapoint.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c15
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h5
-rw-r--r--src/gallium/auxiliary/draw/draw_passthrough.c222
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h29
-rw-r--r--src/gallium/auxiliary/draw/draw_pstipple.c73
-rw-r--r--src/gallium/auxiliary/draw/draw_unfilled.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_vbuf.h6
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex_cache.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex_fetch.c44
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex_shader.c17
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c42
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_llvm.c48
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c42
-rw-r--r--src/gallium/auxiliary/gallivm/Makefile7
-rw-r--r--src/gallium/auxiliary/gallivm/gallivm.cpp10
-rw-r--r--src/gallium/auxiliary/gallivm/gallivm.h10
-rw-r--r--src/gallium/auxiliary/gallivm/instructions.cpp10
-rw-r--r--src/gallium/auxiliary/gallivm/instructionssoa.cpp185
-rw-r--r--src/gallium/auxiliary/gallivm/instructionssoa.h26
-rw-r--r--src/gallium/auxiliary/gallivm/soabuiltins.c72
-rw-r--r--src/gallium/auxiliary/gallivm/storagesoa.cpp28
-rw-r--r--src/gallium/auxiliary/gallivm/storagesoa.h15
-rw-r--r--src/gallium/auxiliary/gallivm/tgsitollvm.cpp39
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c47
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h16
-rw-r--r--src/gallium/auxiliary/sct/Makefile12
-rw-r--r--src/gallium/auxiliary/sct/SConscript9
-rw-r--r--src/gallium/auxiliary/sct/sct.c453
-rw-r--r--src/gallium/auxiliary/sct/sct.h123
-rw-r--r--src/gallium/auxiliary/sct/usage.c61
-rw-r--r--src/gallium/auxiliary/tgsi/exec/tgsi_exec.c46
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_build.c5
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_build.h1
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_dump.c10
-rw-r--r--src/gallium/auxiliary/util/Makefile4
-rw-r--r--src/gallium/auxiliary/util/SConscript2
-rw-r--r--src/gallium/auxiliary/util/p_debug.c107
-rw-r--r--src/gallium/auxiliary/util/p_tile.h18
-rw-r--r--src/gallium/auxiliary/util/u_cpu_detect.c506
-rw-r--r--src/gallium/auxiliary/util/u_cpu_detect.h78
-rw-r--r--src/gallium/auxiliary/util/u_handle_table.c207
-rw-r--r--src/gallium/auxiliary/util/u_handle_table.h96
-rw-r--r--src/gallium/auxiliary/util/u_snprintf.c1478
54 files changed, 4511 insertions, 261 deletions
diff --git a/src/gallium/auxiliary/cso_cache/Makefile b/src/gallium/auxiliary/cso_cache/Makefile
index 3e49266163..6bd6602088 100644
--- a/src/gallium/auxiliary/cso_cache/Makefile
+++ b/src/gallium/auxiliary/cso_cache/Makefile
@@ -4,6 +4,7 @@ include $(TOP)/configs/current
LIBNAME = cso_cache
C_SOURCES = \
+ cso_context.c \
cso_cache.c \
cso_hash.c
diff --git a/src/gallium/auxiliary/cso_cache/SConscript b/src/gallium/auxiliary/cso_cache/SConscript
index 9751881613..651e68a191 100644
--- a/src/gallium/auxiliary/cso_cache/SConscript
+++ b/src/gallium/auxiliary/cso_cache/SConscript
@@ -3,6 +3,7 @@ Import('*')
cso_cache = env.ConvenienceLibrary(
target = 'cso_cache',
source = [
+ 'cso_context.c',
'cso_cache.c',
'cso_hash.c',
])
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c
index b427b509f8..a2764b4265 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.c
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.c
@@ -207,8 +207,11 @@ static INLINE void sanitize_hash(struct cso_hash *hash, enum cso_cache_type type
{
/* if we're approach the maximum size, remove fourth of the entries
* otherwise every subsequent call will go through the same */
- int max_entries = (max_size > cso_hash_size(hash)) ? max_size : cso_hash_size(hash);
+ int hash_size = cso_hash_size(hash);
+ int max_entries = (max_size > hash_size) ? max_size : hash_size;
int to_remove = (max_size < max_entries) * max_entries/4;
+ if (hash_size > max_size)
+ to_remove += hash_size - max_size;
while (to_remove) {
/*remove elements until we're good */
/*fixme: currently we pick the nodes to remove at random*/
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h
index 44ee128a4a..e5edbbb556 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.h
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.h
@@ -84,47 +84,49 @@
extern "C" {
#endif
+typedef void (*cso_state_callback)(void *ctx, void *obj);
+
struct cso_cache;
struct cso_blend {
struct pipe_blend_state state;
void *data;
- void (*delete_state)(struct pipe_context *, void *);
+ cso_state_callback delete_state;
struct pipe_context *context;
};
struct cso_depth_stencil_alpha {
struct pipe_depth_stencil_alpha_state state;
void *data;
- void (*delete_state)(struct pipe_context *, void *);
+ cso_state_callback delete_state;
struct pipe_context *context;
};
struct cso_rasterizer {
struct pipe_rasterizer_state state;
void *data;
- void (*delete_state)(struct pipe_context *, void *);
+ cso_state_callback delete_state;
struct pipe_context *context;
};
struct cso_fragment_shader {
struct pipe_shader_state state;
void *data;
- void (*delete_state)(struct pipe_context *, void *);
+ cso_state_callback delete_state;
struct pipe_context *context;
};
struct cso_vertex_shader {
struct pipe_shader_state state;
void *data;
- void (*delete_state)(struct pipe_context *, void *);
+ cso_state_callback delete_state;
struct pipe_context *context;
};
struct cso_sampler {
struct pipe_sampler_state state;
void *data;
- void (*delete_state)(struct pipe_context *, void *);
+ cso_state_callback delete_state;
struct pipe_context *context;
};
@@ -138,8 +140,6 @@ enum cso_cache_type {
CSO_VERTEX_SHADER
};
-typedef void (*cso_state_callback)(void *, void *);
-
unsigned cso_construct_key(void *item, int item_size);
struct cso_cache *cso_cache_create(void);
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
new file mode 100644
index 0000000000..f7f4aebb16
--- /dev/null
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -0,0 +1,354 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /* Wrap the cso cache & hash mechanisms in a simplified
+ * pipe-driver-specific interface.
+ *
+ * Authors:
+ * Zack Rusin <zack@tungstengraphics.com>
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_state.h"
+#include "pipe/p_util.h"
+
+#include "cso_cache/cso_context.h"
+#include "cso_cache/cso_cache.h"
+#include "cso_cache/cso_hash.h"
+
+struct cso_context {
+ struct pipe_context *pipe;
+ struct cso_cache *cache;
+
+ struct {
+ void *samplers[PIPE_MAX_SAMPLERS];
+ unsigned nr_samplers;
+ } hw;
+
+ void *samplers[PIPE_MAX_SAMPLERS];
+ unsigned nr_samplers;
+
+ void *blend;
+ void *depth_stencil;
+ void *rasterizer;
+ void *fragment_shader;
+ void *vertex_shader;
+};
+
+
+struct cso_context *cso_create_context( struct pipe_context *pipe )
+{
+ struct cso_context *ctx = CALLOC_STRUCT(cso_context);
+ if (ctx == NULL)
+ goto out;
+
+ ctx->cache = cso_cache_create();
+ if (ctx->cache == NULL)
+ goto out;
+
+ ctx->pipe = pipe;
+
+ /* Enable for testing: */
+ if (0) cso_set_maximum_cache_size( ctx->cache, 4 );
+
+ return ctx;
+
+out:
+ cso_destroy_context( ctx );
+ return NULL;
+}
+
+static void cso_release_all( struct cso_context *ctx )
+{
+ if (ctx->pipe) {
+ ctx->pipe->bind_blend_state( ctx->pipe, NULL );
+ ctx->pipe->bind_rasterizer_state( ctx->pipe, NULL );
+ ctx->pipe->bind_sampler_states( ctx->pipe, 0, NULL );
+ ctx->pipe->bind_depth_stencil_alpha_state( ctx->pipe, NULL );
+ ctx->pipe->bind_fs_state( ctx->pipe, NULL );
+ ctx->pipe->bind_vs_state( ctx->pipe, NULL );
+ }
+
+ if (ctx->cache) {
+ cso_cache_delete( ctx->cache );
+ ctx->cache = NULL;
+ }
+}
+
+
+void cso_destroy_context( struct cso_context *ctx )
+{
+ debug_printf("%s\n", __FUNCTION__);
+
+ if (ctx)
+ cso_release_all( ctx );
+
+ FREE( ctx );
+}
+
+
+/* Those function will either find the state of the given template
+ * in the cache or they will create a new state from the given
+ * template, insert it in the cache and return it.
+ */
+
+/*
+ * If the driver returns 0 from the create method then they will assign
+ * the data member of the cso to be the template itself.
+ */
+
+void cso_set_blend(struct cso_context *ctx,
+ const struct pipe_blend_state *templ)
+{
+ unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_blend_state));
+ struct cso_hash_iter iter = cso_find_state_template(ctx->cache,
+ hash_key, CSO_BLEND,
+ (void*)templ);
+ void *handle;
+
+ if (cso_hash_iter_is_null(iter)) {
+ struct cso_blend *cso = MALLOC(sizeof(struct cso_blend));
+
+ cso->state = *templ;
+ cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state);
+ cso->delete_state = (cso_state_callback)ctx->pipe->delete_blend_state;
+ cso->context = ctx->pipe;
+
+ iter = cso_insert_state(ctx->cache, hash_key, CSO_BLEND, cso);
+ handle = cso->data;
+ }
+ else {
+ handle = ((struct cso_blend *)cso_hash_iter_data(iter))->data;
+ }
+
+ if (ctx->blend != handle) {
+ ctx->blend = handle;
+ ctx->pipe->bind_blend_state(ctx->pipe, handle);
+ }
+}
+
+void cso_single_sampler(struct cso_context *ctx,
+ unsigned idx,
+ const struct pipe_sampler_state *templ)
+{
+ void *handle = NULL;
+
+ if (templ != NULL) {
+ unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_sampler_state));
+ struct cso_hash_iter iter = cso_find_state_template(ctx->cache,
+ hash_key, CSO_SAMPLER,
+ (void*)templ);
+
+ if (cso_hash_iter_is_null(iter)) {
+ struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler));
+
+ cso->state = *templ;
+ cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state);
+ cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state;
+ cso->context = ctx->pipe;
+
+ iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso);
+ handle = cso->data;
+ }
+ else {
+ handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data;
+ }
+ }
+
+ ctx->samplers[idx] = handle;
+}
+
+void cso_single_sampler_done( struct cso_context *ctx )
+{
+ unsigned i;
+
+ for (i = 0; i < 8; i++)
+ if (ctx->samplers[i] == NULL)
+ break;
+
+ ctx->nr_samplers = i;
+
+ if (ctx->hw.nr_samplers != ctx->nr_samplers ||
+ memcmp(ctx->hw.samplers,
+ ctx->samplers,
+ ctx->nr_samplers * sizeof(void *)) != 0)
+ {
+ memcpy(ctx->hw.samplers, ctx->samplers, ctx->nr_samplers * sizeof(void *));
+ ctx->hw.nr_samplers = ctx->nr_samplers;
+
+ ctx->pipe->bind_sampler_states(ctx->pipe, ctx->nr_samplers, ctx->samplers);
+ }
+}
+
+void cso_set_samplers( struct cso_context *ctx,
+ unsigned nr,
+ const struct pipe_sampler_state **templates )
+{
+ unsigned i;
+
+ /* TODO: fastpath
+ */
+
+ for (i = 0; i < nr; i++)
+ cso_single_sampler( ctx, i, templates[i] );
+
+ for ( ; i < ctx->nr_samplers; i++)
+ cso_single_sampler( ctx, i, NULL );
+
+ cso_single_sampler_done( ctx );
+}
+
+void cso_set_depth_stencil_alpha(struct cso_context *ctx,
+ const struct pipe_depth_stencil_alpha_state *templ)
+{
+ unsigned hash_key = cso_construct_key((void*)templ,
+ sizeof(struct pipe_depth_stencil_alpha_state));
+ struct cso_hash_iter iter = cso_find_state_template(ctx->cache,
+ hash_key,
+ CSO_DEPTH_STENCIL_ALPHA,
+ (void*)templ);
+ void *handle;
+
+ if (cso_hash_iter_is_null(iter)) {
+ struct cso_depth_stencil_alpha *cso = MALLOC(sizeof(struct cso_depth_stencil_alpha));
+
+ cso->state = *templ;
+ cso->data = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &cso->state);
+ cso->delete_state = (cso_state_callback)ctx->pipe->delete_depth_stencil_alpha_state;
+ cso->context = ctx->pipe;
+
+ cso_insert_state(ctx->cache, hash_key, CSO_DEPTH_STENCIL_ALPHA, cso);
+ handle = cso->data;
+ }
+ else {
+ handle = ((struct cso_depth_stencil_alpha *)cso_hash_iter_data(iter))->data;
+ }
+
+ if (ctx->depth_stencil != handle) {
+ ctx->depth_stencil = handle;
+ ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, handle);
+ }
+}
+
+
+
+void cso_set_rasterizer(struct cso_context *ctx,
+ const struct pipe_rasterizer_state *templ)
+{
+ unsigned hash_key = cso_construct_key((void*)templ,
+ sizeof(struct pipe_rasterizer_state));
+ struct cso_hash_iter iter = cso_find_state_template(ctx->cache,
+ hash_key, CSO_RASTERIZER,
+ (void*)templ);
+ void *handle = NULL;
+
+ if (cso_hash_iter_is_null(iter)) {
+ struct cso_rasterizer *cso = MALLOC(sizeof(struct cso_rasterizer));
+
+ cso->state = *templ;
+ cso->data = ctx->pipe->create_rasterizer_state(ctx->pipe, &cso->state);
+ cso->delete_state = (cso_state_callback)ctx->pipe->delete_rasterizer_state;
+ cso->context = ctx->pipe;
+
+ cso_insert_state(ctx->cache, hash_key, CSO_RASTERIZER, cso);
+ handle = cso->data;
+ }
+ else {
+ handle = ((struct cso_rasterizer *)cso_hash_iter_data(iter))->data;
+ }
+
+ if (ctx->rasterizer != handle) {
+ ctx->rasterizer = handle;
+ ctx->pipe->bind_rasterizer_state(ctx->pipe, handle);
+ }
+}
+
+
+
+
+
+void cso_set_fragment_shader(struct cso_context *ctx,
+ const struct pipe_shader_state *templ)
+{
+ unsigned hash_key = cso_construct_key((void*)templ,
+ sizeof(struct pipe_shader_state));
+ struct cso_hash_iter iter = cso_find_state_template(ctx->cache,
+ hash_key, CSO_FRAGMENT_SHADER,
+ (void*)templ);
+ void *handle = NULL;
+
+ if (cso_hash_iter_is_null(iter)) {
+ struct cso_fragment_shader *cso = MALLOC(sizeof(struct cso_fragment_shader));
+
+ cso->state = *templ;
+ cso->data = ctx->pipe->create_fs_state(ctx->pipe, &cso->state);
+ cso->delete_state = (cso_state_callback)ctx->pipe->delete_fs_state;
+ cso->context = ctx->pipe;
+
+ iter = cso_insert_state(ctx->cache, hash_key, CSO_FRAGMENT_SHADER, cso);
+ handle = cso->data;
+ }
+ else {
+ handle = ((struct cso_fragment_shader *)cso_hash_iter_data(iter))->data;
+ }
+
+ if (ctx->fragment_shader != handle) {
+ ctx->fragment_shader = handle;
+ ctx->pipe->bind_fs_state(ctx->pipe, handle);
+ }
+}
+
+void cso_set_vertex_shader(struct cso_context *ctx,
+ const struct pipe_shader_state *templ)
+{
+ unsigned hash_key = cso_construct_key((void*)templ,
+ sizeof(struct pipe_shader_state));
+ struct cso_hash_iter iter = cso_find_state_template(ctx->cache,
+ hash_key, CSO_VERTEX_SHADER,
+ (void*)templ);
+ void *handle = NULL;
+
+ if (cso_hash_iter_is_null(iter)) {
+ struct cso_vertex_shader *cso = MALLOC(sizeof(struct cso_vertex_shader));
+
+ cso->state = *templ;
+ cso->data = ctx->pipe->create_vs_state(ctx->pipe, &cso->state);
+ cso->delete_state = (cso_state_callback)ctx->pipe->delete_vs_state;
+ cso->context = ctx->pipe;
+
+ iter = cso_insert_state(ctx->cache, hash_key, CSO_VERTEX_SHADER, cso);
+ handle = cso->data;
+ }
+ else {
+ handle = ((struct cso_vertex_shader *)cso_hash_iter_data(iter))->data;
+ }
+
+ if (ctx->vertex_shader != handle) {
+ ctx->vertex_shader = handle;
+ ctx->pipe->bind_vs_state(ctx->pipe, handle);
+ }
+}
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h
new file mode 100644
index 0000000000..1f2a630804
--- /dev/null
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -0,0 +1,85 @@
+/**************************************************************************
+ *
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef CSO_CONTEXT_H
+#define CSO_CONTEXT_H
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct cso_context;
+
+struct cso_context *cso_create_context( struct pipe_context *pipe );
+
+void cso_set_blend( struct cso_context *cso,
+ const struct pipe_blend_state *blend );
+
+void cso_set_depth_stencil_alpha( struct cso_context *cso,
+ const struct pipe_depth_stencil_alpha_state *dsa );
+
+void cso_set_rasterizer( struct cso_context *cso,
+ const struct pipe_rasterizer_state *rasterizer );
+
+void cso_set_samplers( struct cso_context *cso,
+ unsigned count,
+ const struct pipe_sampler_state **states );
+
+/* Alternate interface to support state trackers that like to modify
+ * samplers one at a time:
+ */
+void cso_single_sampler( struct cso_context *cso,
+ unsigned nr,
+ const struct pipe_sampler_state *states );
+
+void cso_single_sampler_done( struct cso_context *cso );
+
+
+/* These aren't really sensible -- most of the time the api provides
+ * object semantics for shaders anyway, and the cases where it doesn't
+ * (eg mesa's internall-generated texenv programs), it will be up to
+ * the state tracker to implement their own specialized caching.
+ */
+void cso_set_fragment_shader( struct cso_context *cso,
+ const struct pipe_shader_state *shader );
+
+void cso_set_vertex_shader( struct cso_context *cso,
+ const struct pipe_shader_state *shader );
+
+void cso_destroy_context( struct cso_context *cso );
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c
index b3b4d667d2..5cad5d3be7 100644
--- a/src/gallium/auxiliary/cso_cache/cso_hash.c
+++ b/src/gallium/auxiliary/cso_cache/cso_hash.c
@@ -101,13 +101,6 @@ static void *cso_data_allocate_node(struct cso_hash_data *hash)
static void cso_data_free_node(struct cso_node *node)
{
- /* XXX still a leak here.
- * Need to cast value ptr to original cso type, then free the
- * driver-specific data hanging off of it. For example:
- struct cso_sampler *csamp = (struct cso_sampler *) node->value;
- FREE(csamp->data);
- */
- FREE(node->value);
FREE(node);
}
diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h
index d5bca9d591..84b45a5963 100644
--- a/src/gallium/auxiliary/cso_cache/cso_hash.h
+++ b/src/gallium/auxiliary/cso_cache/cso_hash.h
@@ -25,6 +25,16 @@
*
**************************************************************************/
+/**
+ This file provides a hash implementation that is capable of dealing
+ with collisions. It stores colliding entries in linked list. All
+ functions operating on the hash return an iterator. The iterator
+ itself points to the collision list. If there wasn't any collision
+ the list will have just one entry, otherwise client code should
+ iterate over the entries to find the exact entry among ones that
+ had the same key (e.g. memcmp could be used on the data to check
+ that)
+*/
/*
* Authors:
* Zack Rusin <zack@tungstengraphics.com>
diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript
index c18dcb2927..5cb7664c85 100644
--- a/src/gallium/auxiliary/draw/SConscript
+++ b/src/gallium/auxiliary/draw/SConscript
@@ -16,6 +16,7 @@ draw = env.ConvenienceLibrary(
'draw_offset.c',
'draw_prim.c',
'draw_pstipple.c',
+ 'draw_passthrough.c',
'draw_stipple.c',
'draw_twoside.c',
'draw_unfilled.c',
diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c
index 7660e56fe6..6b1e640ae9 100644
--- a/src/gallium/auxiliary/draw/draw_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_aaline.c
@@ -78,7 +78,8 @@ struct aaline_stage
void *sampler_cso;
struct pipe_texture *texture;
- uint sampler_unit;
+ uint num_samplers;
+ uint num_textures;
/*
@@ -98,11 +99,10 @@ struct aaline_stage
void (*driver_bind_fs_state)(struct pipe_context *, void *);
void (*driver_delete_fs_state)(struct pipe_context *, void *);
- void (*driver_bind_sampler_state)(struct pipe_context *, unsigned, void *);
-
- void (*driver_set_sampler_texture)(struct pipe_context *,
- unsigned sampler,
- struct pipe_texture *);
+ void (*driver_bind_sampler_states)(struct pipe_context *, unsigned,
+ void **);
+ void (*driver_set_sampler_textures)(struct pipe_context *, unsigned,
+ struct pipe_texture **);
struct pipe_context *pipe;
};
@@ -607,6 +607,7 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
auto struct aaline_stage *aaline = aaline_stage(stage);
struct draw_context *draw = stage->draw;
struct pipe_context *pipe = aaline->pipe;
+ uint num = MAX2(aaline->num_textures, aaline->num_samplers);
assert(draw->rasterizer->line_smooth);
@@ -624,8 +625,11 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
*/
bind_aaline_fragment_shader(aaline);
- aaline->driver_bind_sampler_state(pipe, aaline->sampler_unit, aaline->sampler_cso);
- aaline->driver_set_sampler_texture(pipe, aaline->sampler_unit, aaline->texture);
+ aaline->state.sampler[num] = aaline->sampler_cso;
+ aaline->state.texture[num] = aaline->texture;
+
+ aaline->driver_bind_sampler_states(pipe, num + 1, aaline->state.sampler);
+ aaline->driver_set_sampler_textures(pipe, num + 1, aaline->state.texture);
/* now really draw first line */
stage->line = aaline_line;
@@ -647,10 +651,10 @@ aaline_flush(struct draw_stage *stage, unsigned flags)
aaline->driver_bind_fs_state(pipe, aaline->fs->driver_fs);
/* XXX restore original texture, sampler state */
- aaline->driver_bind_sampler_state(pipe, aaline->sampler_unit,
- aaline->state.sampler[aaline->sampler_unit]);
- aaline->driver_set_sampler_texture(pipe, aaline->sampler_unit,
- aaline->state.texture[aaline->sampler_unit]);
+ aaline->driver_bind_sampler_states(pipe, aaline->num_samplers,
+ aaline->state.sampler);
+ aaline->driver_set_sampler_textures(pipe, aaline->num_textures,
+ aaline->state.texture);
draw->extra_vp_outputs.slot = 0;
}
@@ -729,7 +733,8 @@ aaline_bind_fs_state(struct pipe_context *pipe, void *fs)
/* save current */
aaline->fs = aafs;
/* pass-through */
- aaline->driver_bind_fs_state(aaline->pipe, aafs->driver_fs);
+ aaline->driver_bind_fs_state(aaline->pipe,
+ (aafs ? aafs->driver_fs : NULL));
}
@@ -745,26 +750,28 @@ aaline_delete_fs_state(struct pipe_context *pipe, void *fs)
static void
-aaline_bind_sampler_state(struct pipe_context *pipe,
- unsigned unit, void *sampler)
+aaline_bind_sampler_states(struct pipe_context *pipe,
+ unsigned num, void **sampler)
{
struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
/* save current */
- aaline->state.sampler[unit] = sampler;
+ memcpy(aaline->state.sampler, sampler, num * sizeof(void *));
+ aaline->num_samplers = num;
/* pass-through */
- aaline->driver_bind_sampler_state(aaline->pipe, unit, sampler);
+ aaline->driver_bind_sampler_states(aaline->pipe, num, sampler);
}
static void
-aaline_set_sampler_texture(struct pipe_context *pipe,
- unsigned sampler, struct pipe_texture *texture)
+aaline_set_sampler_textures(struct pipe_context *pipe,
+ unsigned num, struct pipe_texture **texture)
{
struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
/* save current */
- aaline->state.texture[sampler] = texture;
+ memcpy(aaline->state.texture, texture, num * sizeof(struct pipe_texture *));
+ aaline->num_textures = num;
/* pass-through */
- aaline->driver_set_sampler_texture(aaline->pipe, sampler, texture);
+ aaline->driver_set_sampler_textures(aaline->pipe, num, texture);
}
@@ -798,14 +805,14 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe)
aaline->driver_bind_fs_state = pipe->bind_fs_state;
aaline->driver_delete_fs_state = pipe->delete_fs_state;
- aaline->driver_bind_sampler_state = pipe->bind_sampler_state;
- aaline->driver_set_sampler_texture = pipe->set_sampler_texture;
+ aaline->driver_bind_sampler_states = pipe->bind_sampler_states;
+ aaline->driver_set_sampler_textures = pipe->set_sampler_textures;
/* override the driver's functions */
pipe->create_fs_state = aaline_create_fs_state;
pipe->bind_fs_state = aaline_bind_fs_state;
pipe->delete_fs_state = aaline_delete_fs_state;
- pipe->bind_sampler_state = aaline_bind_sampler_state;
- pipe->set_sampler_texture = aaline_set_sampler_texture;
+ pipe->bind_sampler_states = aaline_bind_sampler_states;
+ pipe->set_sampler_textures = aaline_set_sampler_textures;
}
diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c
index 70f696475f..99e9e9fe34 100644
--- a/src/gallium/auxiliary/draw/draw_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_aapoint.c
@@ -800,7 +800,8 @@ aapoint_bind_fs_state(struct pipe_context *pipe, void *fs)
/* save current */
aapoint->fs = aafs;
/* pass-through */
- aapoint->driver_bind_fs_state(aapoint->pipe, aafs->driver_fs);
+ aapoint->driver_bind_fs_state(aapoint->pipe,
+ (aafs ? aafs->driver_fs : NULL));
}
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 428b6209e0..fed2b6e759 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -34,6 +34,7 @@
#include "pipe/p_util.h"
#include "draw_context.h"
#include "draw_private.h"
+#include "draw_vbuf.h"
@@ -114,6 +115,13 @@ void draw_destroy( struct draw_context *draw )
draw->pipeline.rasterize->destroy( draw->pipeline.rasterize );
tgsi_exec_machine_free_data(&draw->machine);
align_free( draw->vs.queue[0].vertex ); /* Frees all the vertices. */
+
+ /* Not so fast -- we're just borrowing this at the moment.
+ *
+ if (draw->render)
+ draw->render->destroy( draw->render );
+ */
+
FREE( draw );
}
@@ -349,3 +357,10 @@ void draw_reset_vertex_ids(struct draw_context *draw)
draw_vertex_cache_reset_vertex_ids(draw);
}
+
+
+void draw_set_render( struct draw_context *draw,
+ struct vbuf_render *render )
+{
+ draw->render = render;
+}
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index ab87b4127c..df63e91a22 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -168,4 +168,9 @@ unsigned draw_trim_prim( unsigned mode, unsigned count );
+
+struct vbuf_render;
+void draw_set_render( struct draw_context *draw,
+ struct vbuf_render *render );
+
#endif /* DRAW_CONTEXT_H */
diff --git a/src/gallium/auxiliary/draw/draw_passthrough.c b/src/gallium/auxiliary/draw/draw_passthrough.c
new file mode 100644
index 0000000000..a51fa0ab23
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_passthrough.c
@@ -0,0 +1,222 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+/* This code is a prototype of what a passhthrough vertex shader might
+ * look like.
+ *
+ * Probably the best approach for us is to do:
+ * - vertex fetch
+ * - vertex shader
+ * - cliptest / viewport transform
+ *
+ * in one step, then examine the clipOrMask & choose between two paths:
+ *
+ * Either:
+ * - build primitive headers
+ * - clip and the primitive path
+ * - build clipped vertex buffers,
+ * - vertex-emit to vbuf buffers
+ *
+ * Or, if no clipping:
+ * - vertex-emit directly to vbuf buffers
+ *
+ * But when bypass clipping is enabled, we just take the latter
+ * choice. If (some new) passthrough-vertex-shader flag is also set,
+ * the pipeline degenerates to:
+ *
+ * - vertex fetch
+ * - vertex emit to vbuf buffers
+ *
+ * Which is what is prototyped here.
+ */
+#include "pipe/p_util.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+
+
+
+/* Example of a fetch/emit passthrough shader which could be
+ * generated when bypass_clipping is enabled on a passthrough vertex
+ * shader.
+ */
+static void fetch_xyz_rgb_st( struct draw_context *draw,
+ float *out,
+ unsigned start,
+ unsigned count )
+{
+ const unsigned *pitch = draw->vertex_fetch.pitch;
+ const ubyte **src = draw->vertex_fetch.src_ptr;
+ unsigned i;
+
+ const ubyte *xyzw = src[0] + start * pitch[0];
+ const ubyte *rgba = src[1] + start * pitch[1];
+ const ubyte *st = src[2] + start * pitch[2];
+
+ /* loop over vertex attributes (vertex shader inputs)
+ */
+ for (i = 0; i < count; i++) {
+ {
+ const float *in = (const float *)xyzw; xyzw += pitch[0];
+ /* decode input, encode output. Assume both are float[4] */
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+ }
+
+ {
+ const float *in = (const float *)rgba; rgba += pitch[1];
+ /* decode input, encode output. Assume both are float[4] */
+ out[4] = in[0];
+ out[5] = in[1];
+ out[6] = in[2];
+ out[7] = in[3];
+ }
+
+ {
+ const float *in = (const float *)st; st += pitch[2];
+ /* decode input, encode output. Assume both are float[2] */
+ out[8] = in[0];
+ out[9] = in[1];
+ }
+
+ out += 10;
+ }
+}
+
+
+static boolean update_shader( struct draw_context *draw )
+{
+ const struct vertex_info *vinfo = draw->render->get_vertex_info(draw->render);
+
+ unsigned nr_attrs = vinfo->num_attribs;
+ unsigned i;
+
+ for (i = 0; i < nr_attrs; i++) {
+ unsigned buf = draw->vertex_element[i].vertex_buffer_index;
+
+ draw->vertex_fetch.src_ptr[i] = (const ubyte *) draw->user.vbuffer[buf] +
+ draw->vertex_buffer[buf].buffer_offset +
+ draw->vertex_element[i].src_offset;
+
+ draw->vertex_fetch.pitch[i] = draw->vertex_buffer[buf].pitch;
+ draw->vertex_fetch.fetch[i] = NULL;
+ }
+
+ draw->vertex_fetch.nr_attrs = nr_attrs;
+ draw->vertex_fetch.fetch_func = NULL;
+ draw->vertex_fetch.pt_fetch = NULL;
+
+ draw->pt.hw_vertex_size = vinfo->size * 4;
+
+ /* Just trying to figure out how this would work:
+ */
+ if (nr_attrs == 3 &&
+ 0 /* some other tests */)
+ {
+ draw->vertex_fetch.pt_fetch = fetch_xyz_rgb_st;
+ assert(vinfo->size == 10);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+
+static boolean set_prim( struct draw_context *draw,
+ unsigned prim )
+{
+ assert(!draw->user.elts);
+
+ draw->pt.prim = prim;
+
+ switch (prim) {
+ case PIPE_PRIM_LINE_LOOP:
+ case PIPE_PRIM_QUADS:
+ case PIPE_PRIM_QUAD_STRIP:
+ return FALSE;
+ default:
+ draw->render->set_primitive( draw->render, prim );
+ return TRUE;
+ }
+}
+
+
+
+boolean
+draw_passthrough_arrays(struct draw_context *draw,
+ unsigned prim,
+ unsigned start,
+ unsigned count)
+{
+ float *hw_verts;
+
+ if (!set_prim(draw, prim))
+ return FALSE;
+
+ if (!update_shader( draw ))
+ return FALSE;
+
+ hw_verts = draw->render->allocate_vertices( draw->render,
+ draw->pt.hw_vertex_size,
+ count );
+
+ if (!hw_verts)
+ return FALSE;
+
+ /* Single routine to fetch vertices, run shader and emit HW verts.
+ * Clipping and viewport transformation are done on hardware.
+ */
+ draw->vertex_fetch.pt_fetch( draw,
+ hw_verts,
+ start, count );
+
+ /* Draw arrays path to avoid re-emitting index list again and
+ * again.
+ */
+ draw->render->draw_arrays( draw->render,
+ start,
+ count );
+
+
+ draw->render->release_vertices( draw->render,
+ hw_verts,
+ draw->pt.hw_vertex_size,
+ count );
+
+ return TRUE;
+}
+
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index c732d723a7..4147472d45 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -133,7 +133,7 @@ struct draw_vertex_shader {
/* This member will disappear shortly:
*/
- const struct pipe_shader_state *state;
+ struct pipe_shader_state state;
struct tgsi_shader_info info;
@@ -162,8 +162,14 @@ typedef void (*full_fetch_func)( struct draw_context *draw,
const unsigned *elts,
unsigned count );
+typedef void (*pt_fetch_func)( struct draw_context *draw,
+ float *out,
+ unsigned start,
+ unsigned count );
+struct vbuf_render;
+
/**
* Private context for the drawing module.
*/
@@ -191,6 +197,17 @@ struct draw_context
struct draw_stage *rasterize;
} pipeline;
+
+ struct vbuf_render *render;
+
+ /* Support prototype passthrough path:
+ */
+ struct {
+ unsigned prim;
+ unsigned hw_vertex_size;
+ } pt;
+
+
/* pipe state that we need: */
const struct pipe_rasterizer_state *rasterizer;
struct pipe_viewport_state viewport;
@@ -244,6 +261,7 @@ struct draw_context
fetch_func fetch[PIPE_ATTRIB_MAX];
unsigned nr_attrs;
full_fetch_func fetch_func;
+ pt_fetch_func pt_fetch;
} vertex_fetch;
/* Post-tnl vertex cache:
@@ -331,6 +349,15 @@ struct tgsi_exec_machine;
extern void draw_update_vertex_fetch( struct draw_context *draw );
+/* Prototype/hack
+ */
+boolean
+draw_passthrough_arrays(struct draw_context *draw,
+ unsigned prim,
+ unsigned start,
+ unsigned count);
+
+
#define DRAW_FLUSH_SHADER_QUEUE 0x1 /* sized not to overflow, never raised */
#define DRAW_FLUSH_PRIM_QUEUE 0x2
#define DRAW_FLUSH_VERTEX_CACHE 0x4
diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c
index 2cfeb813b3..8b3e84a9a0 100644
--- a/src/gallium/auxiliary/draw/draw_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pstipple.c
@@ -67,16 +67,18 @@ struct pstip_stage
struct draw_stage stage;
void *sampler_cso;
- struct pipe_texture *texture;
uint sampler_unit;
+ struct pipe_texture *texture;
+ uint num_samplers;
+ uint num_textures;
/*
* Currently bound state
*/
struct pstip_fragment_shader *fs;
struct {
- void *sampler[PIPE_MAX_SAMPLERS];
- struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
+ void *samplers[PIPE_MAX_SAMPLERS];
+ struct pipe_texture *textures[PIPE_MAX_SAMPLERS];
const struct pipe_poly_stipple *stipple;
} state;
@@ -88,11 +90,10 @@ struct pstip_stage
void (*driver_bind_fs_state)(struct pipe_context *, void *);
void (*driver_delete_fs_state)(struct pipe_context *, void *);
- void (*driver_bind_sampler_state)(struct pipe_context *, unsigned, void *);
+ void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, void **);
- void (*driver_set_sampler_texture)(struct pipe_context *,
- unsigned sampler,
- struct pipe_texture *);
+ void (*driver_set_sampler_textures)(struct pipe_context *, unsigned,
+ struct pipe_texture **);
void (*driver_set_polygon_stipple)(struct pipe_context *,
const struct pipe_poly_stipple *);
@@ -484,18 +485,25 @@ static void
pstip_first_tri(struct draw_stage *stage, struct prim_header *header)
{
struct pstip_stage *pstip = pstip_stage(stage);
- struct draw_context *draw = stage->draw;
struct pipe_context *pipe = pstip->pipe;
+ uint num_samplers;
- assert(draw->rasterizer->poly_stipple_enable);
+ /* how many samplers? */
+ /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
+ num_samplers = MAX2(pstip->num_textures, pstip->num_samplers);
+ num_samplers = MAX2(num_samplers, pstip->sampler_unit + 1);
- /*
- * Bind our fragprog, sampler and texture
- */
+ assert(stage->draw->rasterizer->poly_stipple_enable);
+
+ /* bind our fragprog */
bind_pstip_fragment_shader(pstip);
- pstip->driver_bind_sampler_state(pipe, pstip->sampler_unit, pstip->sampler_cso);
- pstip->driver_set_sampler_texture(pipe, pstip->sampler_unit, pstip->texture);
+ /* plug in our sampler, texture */
+ pstip->state.samplers[pstip->sampler_unit] = pstip->sampler_cso;
+ pstip->state.textures[pstip->sampler_unit] = pstip->texture;
+
+ pstip->driver_bind_sampler_states(pipe, num_samplers, pstip->state.samplers);
+ pstip->driver_set_sampler_textures(pipe, num_samplers, pstip->state.textures);
/* now really draw first line */
stage->tri = passthrough_tri;
@@ -517,10 +525,10 @@ pstip_flush(struct draw_stage *stage, unsigned flags)
pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs);
/* XXX restore original texture, sampler state */
- pstip->driver_bind_sampler_state(pipe, pstip->sampler_unit,
- pstip->state.sampler[pstip->sampler_unit]);
- pstip->driver_set_sampler_texture(pipe, pstip->sampler_unit,
- pstip->state.texture[pstip->sampler_unit]);
+ pstip->driver_bind_sampler_states(pipe, pstip->num_samplers,
+ pstip->state.samplers);
+ pstip->driver_set_sampler_textures(pipe, pstip->num_textures,
+ pstip->state.textures);
}
@@ -597,7 +605,8 @@ pstip_bind_fs_state(struct pipe_context *pipe, void *fs)
/* save current */
pstip->fs = aafs;
/* pass-through */
- pstip->driver_bind_fs_state(pstip->pipe, aafs->driver_fs);
+ pstip->driver_bind_fs_state(pstip->pipe,
+ (aafs ? aafs->driver_fs : NULL));
}
@@ -613,26 +622,28 @@ pstip_delete_fs_state(struct pipe_context *pipe, void *fs)
static void
-pstip_bind_sampler_state(struct pipe_context *pipe,
- unsigned unit, void *sampler)
+pstip_bind_sampler_states(struct pipe_context *pipe,
+ unsigned num, void **sampler)
{
struct pstip_stage *pstip = pstip_stage_from_pipe(pipe);
/* save current */
- pstip->state.sampler[unit] = sampler;
+ memcpy(pstip->state.samplers, sampler, num * sizeof(void *));
+ pstip->num_samplers = num;
/* pass-through */
- pstip->driver_bind_sampler_state(pstip->pipe, unit, sampler);
+ pstip->driver_bind_sampler_states(pstip->pipe, num, sampler);
}
static void
-pstip_set_sampler_texture(struct pipe_context *pipe,
- unsigned sampler, struct pipe_texture *texture)
+pstip_set_sampler_textures(struct pipe_context *pipe,
+ unsigned num, struct pipe_texture **texture)
{
struct pstip_stage *pstip = pstip_stage_from_pipe(pipe);
/* save current */
- pstip->state.texture[sampler] = texture;
+ memcpy(pstip->state.textures, texture, num * sizeof(struct pipe_texture *));
+ pstip->num_textures = num;
/* pass-through */
- pstip->driver_set_sampler_texture(pstip->pipe, sampler, texture);
+ pstip->driver_set_sampler_textures(pstip->pipe, num, texture);
}
@@ -682,8 +693,8 @@ draw_install_pstipple_stage(struct draw_context *draw,
pstip->driver_bind_fs_state = pipe->bind_fs_state;
pstip->driver_delete_fs_state = pipe->delete_fs_state;
- pstip->driver_bind_sampler_state = pipe->bind_sampler_state;
- pstip->driver_set_sampler_texture = pipe->set_sampler_texture;
+ pstip->driver_bind_sampler_states = pipe->bind_sampler_states;
+ pstip->driver_set_sampler_textures = pipe->set_sampler_textures;
pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple;
/* override the driver's functions */
@@ -691,7 +702,7 @@ draw_install_pstipple_stage(struct draw_context *draw,
pipe->bind_fs_state = pstip_bind_fs_state;
pipe->delete_fs_state = pstip_delete_fs_state;
- pipe->bind_sampler_state = pstip_bind_sampler_state;
- pipe->set_sampler_texture = pstip_set_sampler_texture;
+ pipe->bind_sampler_states = pstip_bind_sampler_states;
+ pipe->set_sampler_textures = pstip_set_sampler_textures;
pipe->set_polygon_stipple = pstip_set_polygon_stipple;
}
diff --git a/src/gallium/auxiliary/draw/draw_unfilled.c b/src/gallium/auxiliary/draw/draw_unfilled.c
index 4d718d514c..b07860cd9e 100644
--- a/src/gallium/auxiliary/draw/draw_unfilled.c
+++ b/src/gallium/auxiliary/draw/draw_unfilled.c
@@ -129,7 +129,7 @@ static void unfilled_tri( struct draw_stage *stage,
points( stage, header );
break;
default:
- abort();
+ assert(0);
}
}
diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h
index cfd2b9820c..5e7de905c1 100644
--- a/src/gallium/auxiliary/draw/draw_vbuf.h
+++ b/src/gallium/auxiliary/draw/draw_vbuf.h
@@ -85,6 +85,12 @@ struct vbuf_render {
const ushort *indices,
uint nr_indices );
+ /* Draw Arrays path too.
+ */
+ void (*draw_arrays)( struct vbuf_render *,
+ unsigned start,
+ uint nr );
+
/**
* Called when vbuf is done with this set of vertices:
*/
diff --git a/src/gallium/auxiliary/draw/draw_vertex_cache.c b/src/gallium/auxiliary/draw/draw_vertex_cache.c
index 53f8bbec44..161b247d4e 100644
--- a/src/gallium/auxiliary/draw/draw_vertex_cache.c
+++ b/src/gallium/auxiliary/draw/draw_vertex_cache.c
@@ -41,7 +41,11 @@ void draw_vertex_cache_invalidate( struct draw_context *draw )
assert(draw->vs.queue_nr == 0);
assert(draw->vcache.referenced == 0);
-// memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx));
+ /* There's an error somewhere in the vcache code that requires this
+ * memset. The bug is exposed in q3demo demo001, but probably
+ * elsewhere as well. Will track it down later.
+ */
+ memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx));
}
diff --git a/src/gallium/auxiliary/draw/draw_vertex_fetch.c b/src/gallium/auxiliary/draw/draw_vertex_fetch.c
index cb8cdd04a3..b56d85396d 100644
--- a/src/gallium/auxiliary/draw/draw_vertex_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_vertex_fetch.c
@@ -54,7 +54,7 @@ fetch_##NAME(const void *ptr, float *attrib) \
int i; \
\
for (i = 0; i < SZ; i++) { \
- attrib[i] = CVT; \
+ attrib[i] = CVT(i); \
} \
\
for (; i < 4; i++) { \
@@ -62,24 +62,24 @@ fetch_##NAME(const void *ptr, float *attrib) \
} \
}
-#define CVT_64_FLOAT (float) ((double *) ptr)[i]
-#define CVT_32_FLOAT ((float *) ptr)[i]
+#define CVT_64_FLOAT(i) (float) ((double *) ptr)[i]
+#define CVT_32_FLOAT(i) ((float *) ptr)[i]
-#define CVT_8_USCALED (float) ((unsigned char *) ptr)[i]
-#define CVT_16_USCALED (float) ((unsigned short *) ptr)[i]
-#define CVT_32_USCALED (float) ((unsigned int *) ptr)[i]
+#define CVT_8_USCALED(i) (float) ((unsigned char *) ptr)[i]
+#define CVT_16_USCALED(i) (float) ((unsigned short *) ptr)[i]
+#define CVT_32_USCALED(i) (float) ((unsigned int *) ptr)[i]
-#define CVT_8_SSCALED (float) ((char *) ptr)[i]
-#define CVT_16_SSCALED (float) ((short *) ptr)[i]
-#define CVT_32_SSCALED (float) ((int *) ptr)[i]
+#define CVT_8_SSCALED(i) (float) ((char *) ptr)[i]
+#define CVT_16_SSCALED(i) (float) ((short *) ptr)[i]
+#define CVT_32_SSCALED(i) (float) ((int *) ptr)[i]
-#define CVT_8_UNORM (float) ((unsigned char *) ptr)[i] / 255.0f
-#define CVT_16_UNORM (float) ((unsigned short *) ptr)[i] / 65535.0f
-#define CVT_32_UNORM (float) ((unsigned int *) ptr)[i] / 4294967295.0f
+#define CVT_8_UNORM(i) (float) ((unsigned char *) ptr)[i] / 255.0f
+#define CVT_16_UNORM(i) (float) ((unsigned short *) ptr)[i] / 65535.0f
+#define CVT_32_UNORM(i) (float) ((unsigned int *) ptr)[i] / 4294967295.0f
-#define CVT_8_SNORM (float) ((char *) ptr)[i] / 127.0f
-#define CVT_16_SNORM (float) ((short *) ptr)[i] / 32767.0f
-#define CVT_32_SNORM (float) ((int *) ptr)[i] / 2147483647.0f
+#define CVT_8_SNORM(i) (float) ((char *) ptr)[i] / 127.0f
+#define CVT_16_SNORM(i) (float) ((short *) ptr)[i] / 32767.0f
+#define CVT_32_SNORM(i) (float) ((int *) ptr)[i] / 2147483647.0f
FETCH_ATTRIB( R64G64B64A64_FLOAT, 4, CVT_64_FLOAT )
FETCH_ATTRIB( R64G64B64_FLOAT, 3, CVT_64_FLOAT )
@@ -156,6 +156,16 @@ FETCH_ATTRIB( A8R8G8B8_UNORM, 4, CVT_8_UNORM )
+static void
+fetch_B8G8R8A8_UNORM(const void *ptr, float *attrib)
+{
+ attrib[2] = CVT_8_UNORM(0);
+ attrib[1] = CVT_8_UNORM(1);
+ attrib[0] = CVT_8_UNORM(2);
+ attrib[3] = CVT_8_UNORM(3);
+}
+
+
static fetch_func get_fetch_func( enum pipe_format format )
{
#if 0
@@ -296,6 +306,10 @@ static fetch_func get_fetch_func( enum pipe_format format )
case PIPE_FORMAT_A8R8G8B8_UNORM:
return fetch_A8R8G8B8_UNORM;
+
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ return fetch_B8G8R8A8_UNORM;
+
case 0:
return NULL; /* not sure why this is needed */
diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c
index 1e95355555..133418baca 100644
--- a/src/gallium/auxiliary/draw/draw_vertex_shader.c
+++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c
@@ -110,13 +110,20 @@ draw_bind_vertex_shader(struct draw_context *draw,
struct draw_vertex_shader *dvs)
{
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+
+ if (dvs)
+ {
+ draw->vertex_shader = dvs;
+ draw->num_vs_outputs = dvs->info.num_outputs;
- draw->vertex_shader = dvs;
- draw->num_vs_outputs = dvs->info.num_outputs;
+ tgsi_exec_machine_init(&draw->machine);
- tgsi_exec_machine_init(&draw->machine);
-
- dvs->prepare( dvs, draw );
+ dvs->prepare( dvs, draw );
+ }
+ else {
+ draw->vertex_shader = NULL;
+ draw->num_vs_outputs = 0;
+ }
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index 583812aadd..55bec14116 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -71,7 +71,7 @@ vs_exec_prepare( struct draw_vertex_shader *shader,
{
/* specify the vertex program to interpret/execute */
tgsi_exec_machine_bind_shader(&draw->machine,
- shader->state->tokens,
+ shader->state.tokens,
PIPE_MAX_SAMPLERS,
NULL /*samplers*/ );
@@ -132,20 +132,30 @@ vs_exec_run( struct draw_vertex_shader *shader,
z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
- vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes);
- vOut[j]->edgeflag = 1;
-
- /* divide by w */
- w = 1.0f / w;
- x *= w;
- y *= w;
- z *= w;
-
- /* Viewport mapping */
- vOut[j]->data[0][0] = x * scale[0] + trans[0];
- vOut[j]->data[0][1] = y * scale[1] + trans[1];
- vOut[j]->data[0][2] = z * scale[2] + trans[2];
- vOut[j]->data[0][3] = w;
+ if (!draw->rasterizer->bypass_clipping) {
+ vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes);
+ vOut[j]->edgeflag = 1;
+
+ /* divide by w */
+ w = 1.0f / w;
+ x *= w;
+ y *= w;
+ z *= w;
+
+ /* Viewport mapping */
+ vOut[j]->data[0][0] = x * scale[0] + trans[0];
+ vOut[j]->data[0][1] = y * scale[1] + trans[1];
+ vOut[j]->data[0][2] = z * scale[2] + trans[2];
+ vOut[j]->data[0][3] = w;
+ }
+ else {
+ vOut[j]->clipmask = 0;
+ vOut[j]->edgeflag = 1;
+ vOut[j]->data[0][0] = x;
+ vOut[j]->data[0][1] = y;
+ vOut[j]->data[0][2] = z;
+ vOut[j]->data[0][3] = w;
+ }
/* Remaining attributes are packed into sequential post-transform
* vertex attrib slots.
@@ -177,7 +187,7 @@ draw_create_vs_exec(struct draw_context *draw,
if (vs == NULL)
return NULL;
- vs->state = state;
+ vs->state = *state;
vs->prepare = vs_exec_prepare;
vs->run = vs_exec_run;
vs->delete = vs_exec_delete;
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
index 0fd557d667..53c260be53 100644
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -135,25 +135,35 @@ vs_llvm_run( struct draw_vertex_shader *base,
unsigned slot;
float x, y, z, w;
- x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
- y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
- z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
- w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
-
- vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes);
- vOut[j]->edgeflag = 1;
-
- /* divide by w */
- w = 1.0f / w;
- x *= w;
- y *= w;
- z *= w;
-
- /* Viewport mapping */
- vOut[j]->data[0][0] = x * scale[0] + trans[0];
- vOut[j]->data[0][1] = y * scale[1] + trans[1];
- vOut[j]->data[0][2] = z * scale[2] + trans[2];
- vOut[j]->data[0][3] = w;
+ if (!draw->rasterizer->bypass_clipping) {
+ x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
+ y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
+ z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
+ w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
+
+ vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes);
+ vOut[j]->edgeflag = 1;
+
+ /* divide by w */
+ w = 1.0f / w;
+ x *= w;
+ y *= w;
+ z *= w;
+
+ /* Viewport mapping */
+ vOut[j]->data[0][0] = x * scale[0] + trans[0];
+ vOut[j]->data[0][1] = y * scale[1] + trans[1];
+ vOut[j]->data[0][2] = z * scale[2] + trans[2];
+ vOut[j]->data[0][3] = w;
+ }
+ else {
+ vOut[j]->clipmask = 0;
+ vOut[j]->edgeflag = 1;
+ vOut[j]->data[0][0] = x;
+ vOut[j]->data[0][1] = y;
+ vOut[j]->data[0][2] = z;
+ vOut[j]->data[0][3] = w;
+ }
/* Remaining attributes are packed into sequential post-transform
* vertex attrib slots.
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 0b8bc2bf14..5ee2adb344 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -158,20 +158,30 @@ vs_sse_run( struct draw_vertex_shader *base,
z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
- vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes);
- vOut[j]->edgeflag = 1;
-
- /* divide by w */
- w = 1.0f / w;
- x *= w;
- y *= w;
- z *= w;
-
- /* Viewport mapping */
- vOut[j]->data[0][0] = x * scale[0] + trans[0];
- vOut[j]->data[0][1] = y * scale[1] + trans[1];
- vOut[j]->data[0][2] = z * scale[2] + trans[2];
- vOut[j]->data[0][3] = w;
+ if (!draw->rasterizer->bypass_clipping) {
+ vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes);
+ vOut[j]->edgeflag = 1;
+
+ /* divide by w */
+ w = 1.0f / w;
+ x *= w;
+ y *= w;
+ z *= w;
+
+ /* Viewport mapping */
+ vOut[j]->data[0][0] = x * scale[0] + trans[0];
+ vOut[j]->data[0][1] = y * scale[1] + trans[1];
+ vOut[j]->data[0][2] = z * scale[2] + trans[2];
+ vOut[j]->data[0][3] = w;
+ }
+ else {
+ vOut[j]->clipmask = 0;
+ vOut[j]->edgeflag = 1;
+ vOut[j]->data[0][0] = x;
+ vOut[j]->data[0][1] = y;
+ vOut[j]->data[0][2] = z;
+ vOut[j]->data[0][3] = w;
+ }
/* Remaining attributes are packed into sequential post-transform
* vertex attrib slots.
@@ -211,14 +221,14 @@ draw_create_vs_sse(struct draw_context *draw,
if (vs == NULL)
return NULL;
- vs->base.state = templ;
+ vs->base.state = *templ;
vs->base.prepare = vs_sse_prepare;
vs->base.run = vs_sse_run;
vs->base.delete = vs_sse_delete;
x86_init_func( &vs->sse2_program );
- if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state->tokens,
+ if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
&vs->sse2_program ))
goto fail;
diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile
index 39fac6ea4a..c24e19e062 100644
--- a/src/gallium/auxiliary/gallivm/Makefile
+++ b/src/gallium/auxiliary/gallivm/Makefile
@@ -15,7 +15,7 @@ GALLIVM_SOURCES = \
storagesoa.cpp \
instructionssoa.cpp
-INC_SOURCES = gallivm_builtins.cpp
+INC_SOURCES = gallivm_builtins.cpp gallivmsoabuiltins.cpp
CPP_SOURCES = \
$(GALLIVM_SOURCES)
@@ -65,8 +65,10 @@ depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES)
gallivm_builtins.cpp: llvm_builtins.c
- clang --emit-llvm $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=$@ -f -for=shader -funcname=createGallivmBuiltins
+ clang --emit-llvm < $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=$@ -f -for=shader -funcname=createGallivmBuiltins
+gallivmsoabuiltins.cpp: soabuiltins.c
+ clang --emit-llvm < $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-module -o=$@ -f -for=shader -funcname=createSoaBuiltins
# Emacs tags
tags:
@@ -78,6 +80,7 @@ clean:
-rm -f *.o */*.o *~ *.so *~ server/*.o
-rm -f depend depend.bak
-rm -f gallivm_builtins.cpp
+ -rm -f gallivmsoabuiltins.cpp
symlinks:
diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp
index d14bb3b99a..b6f641a3f8 100644
--- a/src/gallium/auxiliary/gallivm/gallivm.cpp
+++ b/src/gallium/auxiliary/gallivm/gallivm.cpp
@@ -306,11 +306,19 @@ struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir)
{
struct gallivm_prog *prog =
(struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog));
+
+ std::cout << "Before optimizations:"<<std::endl;
+ ir->module->dump();
+ std::cout<<"-------------------------------"<<std::endl;
+
+ PassManager veri;
+ veri.add(createVerifierPass());
+ veri.run(*ir->module);
llvm::Module *mod = llvm::CloneModule(ir->module);
prog->num_consts = ir->num_consts;
memcpy(prog->interpolators, ir->interpolators, sizeof(prog->interpolators));
prog->num_interp = ir->num_interp;
-
+
/* Run optimization passes over it */
PassManager passes;
passes.add(new TargetData(mod));
diff --git a/src/gallium/auxiliary/gallivm/gallivm.h b/src/gallium/auxiliary/gallivm/gallivm.h
index 57912a952f..b4d6555d2f 100644
--- a/src/gallium/auxiliary/gallivm/gallivm.h
+++ b/src/gallium/auxiliary/gallivm/gallivm.h
@@ -33,6 +33,16 @@
#ifndef GALLIVM_H
#define GALLIVM_H
+/*
+ LLVM representation consists of two stages - layout independent
+ intermediate representation gallivm_ir and driver specific
+ gallivm_prog. TGSI is first being translated into gallivm_ir
+ after that driver can set number of options on gallivm_ir and
+ have it compiled into gallivm_prog. gallivm_prog can be either
+ executed (assuming there's LLVM JIT backend for the current
+ target) or machine code generation can be done (assuming there's
+ a LLVM code generator for thecurrent target)
+ */
#if defined __cplusplus
extern "C" {
#endif
diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp
index 55d39fa5f1..8919491792 100644
--- a/src/gallium/auxiliary/gallivm/instructions.cpp
+++ b/src/gallium/auxiliary/gallivm/instructions.cpp
@@ -42,6 +42,7 @@
#include <llvm/InstrTypes.h>
#include <llvm/Instructions.h>
#include <llvm/ParameterAttributes.h>
+#include <llvm/ParamAttrsList.h>
#include <sstream>
#include <fstream>
@@ -51,6 +52,15 @@ using namespace llvm;
#include "gallivm_builtins.cpp"
+#if 0
+
+llvm::Value *arrayFromChannels(std::vector<llvm::Value*> &vals)
+{
+ VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
+ ArrayType *vectorArray = ArrayType::get(vectorType, 4);
+}
+#endif
+
static inline std::string createFuncName(int label)
{
std::ostringstream stream;
diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp
index a4d5046637..89d513afd0 100644
--- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp
+++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp
@@ -2,9 +2,28 @@
#include "storagesoa.h"
+#include "pipe/p_shader_tokens.h"
+
+#include <llvm/CallingConv.h>
#include <llvm/Constants.h>
+#include <llvm/Module.h>
+#include <llvm/Function.h>
+#include <llvm/Instructions.h>
+#include <llvm/Transforms/Utils/Cloning.h>
+#include <llvm/ParamAttrsList.h>
+
+#include <iostream>
+
+/* disable some warnings. this file is autogenerated */
+#if defined(__GNUC__)
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
using namespace llvm;
+#include "gallivmsoabuiltins.cpp"
+#if defined(__GNUC__)
+#pragma GCC diagnostic warning "-Wunused-variable"
+#endif
InstructionsSoa::InstructionsSoa(llvm::Module *mod, llvm::Function *func,
llvm::BasicBlock *block, StorageSoa *storage)
@@ -12,6 +31,8 @@ InstructionsSoa::InstructionsSoa(llvm::Module *mod, llvm::Function *func,
m_storage(storage),
m_idx(0)
{
+ createFunctionMap();
+ createBuiltins();
}
const char * InstructionsSoa::name(const char *prefix) const
@@ -119,3 +140,167 @@ std::vector<llvm::Value*> InstructionsSoa::extractVector(llvm::Value *vector)
return res;
}
+
+void InstructionsSoa::createFunctionMap()
+{
+ m_functionsMap[TGSI_OPCODE_DP3] = "dp3";
+ m_functionsMap[TGSI_OPCODE_DP4] = "dp4";
+}
+
+llvm::Function * InstructionsSoa::function(int op)
+{
+ if (m_functions.find(op) != m_functions.end())
+ return m_functions[op];
+
+ std::string name = m_functionsMap[op];
+
+ llvm::Function *originalFunc = m_builtins->getFunction(name);
+ llvm::Function *func = CloneFunction(originalFunc);
+ currentModule()->getFunctionList().push_back(func);
+ std::cout << "Func parent is "<<func->getParent()
+ <<", cur is "<<currentModule() <<std::endl;
+ func->dump();
+ //func->setParent(currentModule());
+ m_functions[op] = func;
+ return func;
+}
+
+llvm::Module * InstructionsSoa::currentModule() const
+{
+ BasicBlock *block = m_builder.GetInsertBlock();
+ if (!block || !block->getParent())
+ return 0;
+
+ return block->getParent()->getParent();
+}
+
+void InstructionsSoa::createBuiltins()
+{
+ m_builtins = createSoaBuiltins();
+}
+
+std::vector<llvm::Value*> InstructionsSoa::dp3(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ llvm::Function *func = function(TGSI_OPCODE_DP3);
+ return callBuiltin(func, in1, in2);
+}
+
+llvm::Value * InstructionsSoa::allocaTemp()
+{
+ VectorType *vector = VectorType::get(Type::FloatTy, 4);
+ ArrayType *vecArray = ArrayType::get(vector, 4);
+ AllocaInst *alloca = new AllocaInst(vecArray, name("tmpRes"),
+ m_builder.GetInsertBlock());
+
+ std::vector<Value*> indices;
+ indices.push_back(m_storage->constantInt(0));
+ indices.push_back(m_storage->constantInt(0));
+ GetElementPtrInst *getElem = new GetElementPtrInst(alloca,
+ indices.begin(),
+ indices.end(),
+ name("allocaPtr"),
+ m_builder.GetInsertBlock());
+ return getElem;
+}
+
+std::vector<llvm::Value*> InstructionsSoa::allocaToResult(llvm::Value *allocaPtr)
+{
+ GetElementPtrInst *xElemPtr = new GetElementPtrInst(allocaPtr,
+ m_storage->constantInt(0),
+ name("xPtr"),
+ m_builder.GetInsertBlock());
+ GetElementPtrInst *yElemPtr = new GetElementPtrInst(allocaPtr,
+ m_storage->constantInt(1),
+ name("yPtr"),
+ m_builder.GetInsertBlock());
+ GetElementPtrInst *zElemPtr = new GetElementPtrInst(allocaPtr,
+ m_storage->constantInt(2),
+ name("zPtr"),
+ m_builder.GetInsertBlock());
+ GetElementPtrInst *wElemPtr = new GetElementPtrInst(allocaPtr,
+ m_storage->constantInt(3),
+ name("wPtr"),
+ m_builder.GetInsertBlock());
+
+ std::vector<llvm::Value*> res(4);
+ res[0] = new LoadInst(xElemPtr, name("xRes"), false, m_builder.GetInsertBlock());
+ res[1] = new LoadInst(yElemPtr, name("yRes"), false, m_builder.GetInsertBlock());
+ res[2] = new LoadInst(zElemPtr, name("zRes"), false, m_builder.GetInsertBlock());
+ res[3] = new LoadInst(wElemPtr, name("wRes"), false, m_builder.GetInsertBlock());
+
+ return res;
+}
+
+std::vector<llvm::Value*> InstructionsSoa::dp4(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ llvm::Function *func = function(TGSI_OPCODE_DP4);
+ return callBuiltin(func, in1, in2);
+}
+
+std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector<llvm::Value*> in1)
+{
+ std::vector<Value*> params;
+
+ llvm::Value *allocaPtr = allocaTemp();
+ params.push_back(allocaPtr);
+ params.push_back(in1[0]);
+ params.push_back(in1[1]);
+ params.push_back(in1[2]);
+ params.push_back(in1[3]);
+ CallInst *call = m_builder.CreateCall(func, params.begin(), params.end());
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(false);
+
+ return allocaToResult(allocaPtr);
+}
+
+std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ std::vector<Value*> params;
+
+ llvm::Value *allocaPtr = allocaTemp();
+ params.push_back(allocaPtr);
+ params.push_back(in1[0]);
+ params.push_back(in1[1]);
+ params.push_back(in1[2]);
+ params.push_back(in1[3]);
+ params.push_back(in2[0]);
+ params.push_back(in2[1]);
+ params.push_back(in2[2]);
+ params.push_back(in2[3]);
+ CallInst *call = m_builder.CreateCall(func, params.begin(), params.end());
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(false);
+
+ return allocaToResult(allocaPtr);
+}
+
+std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2,
+ const std::vector<llvm::Value*> in3)
+{
+ std::vector<Value*> params;
+
+ llvm::Value *allocaPtr = allocaTemp();
+ params.push_back(allocaPtr);
+ params.push_back(in1[0]);
+ params.push_back(in1[1]);
+ params.push_back(in1[2]);
+ params.push_back(in1[3]);
+ params.push_back(in2[0]);
+ params.push_back(in2[1]);
+ params.push_back(in2[2]);
+ params.push_back(in2[3]);
+ params.push_back(in3[0]);
+ params.push_back(in3[1]);
+ params.push_back(in3[2]);
+ params.push_back(in3[3]);
+ CallInst *call = m_builder.CreateCall(func, params.begin(), params.end());
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(false);
+
+ return allocaToResult(allocaPtr);
+}
diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h
index 4169dcbb2e..3ef51dcaff 100644
--- a/src/gallium/auxiliary/gallivm/instructionssoa.h
+++ b/src/gallium/auxiliary/gallivm/instructionssoa.h
@@ -30,6 +30,7 @@
#include <llvm/Support/LLVMBuilder.h>
+#include <map>
#include <vector>
namespace llvm {
@@ -47,9 +48,12 @@ public:
llvm::BasicBlock *block, StorageSoa *storage);
std::vector<llvm::Value*> arl(const std::vector<llvm::Value*> in);
-
std::vector<llvm::Value*> add(const std::vector<llvm::Value*> in1,
const std::vector<llvm::Value*> in2);
+ std::vector<llvm::Value*> dp3(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2);
+ std::vector<llvm::Value*> dp4(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2);
std::vector<llvm::Value*> madd(const std::vector<llvm::Value*> in1,
const std::vector<llvm::Value*> in2,
const std::vector<llvm::Value*> in3);
@@ -62,9 +66,29 @@ private:
const char * name(const char *prefix) const;
llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y,
llvm::Value *z, llvm::Value *w);
+ void createFunctionMap();
+ void createBuiltins();
+ llvm::Function *function(int);
+ llvm::Module *currentModule() const;
+ llvm::Value *allocaTemp();
+ std::vector<llvm::Value*> allocaToResult(llvm::Value *allocaPtr);
+ std::vector<llvm::Value*> callBuiltin(llvm::Function *func,
+ const std::vector<llvm::Value*> in1);
+ std::vector<llvm::Value*> callBuiltin(llvm::Function *func,
+ const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2);
+ std::vector<llvm::Value*> callBuiltin(llvm::Function *func,
+ const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2,
+ const std::vector<llvm::Value*> in3);
private:
llvm::LLVMFoldingBuilder m_builder;
StorageSoa *m_storage;
+
+ std::map<int, std::string> m_functionsMap;
+ std::map<int, llvm::Function*> m_functions;
+ llvm::Module *m_builtins;
+
private:
mutable int m_idx;
mutable char m_name[32];
diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c
new file mode 100644
index 0000000000..24c14e1b69
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/soabuiltins.c
@@ -0,0 +1,72 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * This file is compiled with clang into the LLVM bitcode
+ *
+ * Authors:
+ * Zack Rusin zack@tungstengraphics.com
+ */
+typedef __attribute__(( ocu_vector_type(4) )) float float4;
+
+void dp3(float4 *res,
+ float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
+ float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
+{
+ float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) +
+ (tmp0z * tmp1z);
+
+ res[0] = dot;
+ res[1] = dot;
+ res[2] = dot;
+ res[3] = dot;
+}
+
+
+void dp4(float4 *res,
+ float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
+ float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
+{
+ float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) +
+ (tmp0z * tmp1z) + (tmp0w * tmp1w);
+
+ res[0] = dot;
+ res[1] = dot;
+ res[2] = dot;
+ res[3] = dot;
+}
+
+#if 0
+void yo(float4 *out, float4 *in)
+{
+ float4 res[4];
+
+ dp3(res, in[0], in[1], in[2], in[3],
+ in[4], in[5], in[6], in[7]);
+ out[1] = res[1];
+}
+#endif
diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp
index ed0674a96f..bb6fe3d7e1 100644
--- a/src/gallium/auxiliary/gallivm/storagesoa.cpp
+++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp
@@ -277,7 +277,7 @@ llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector<float> &v
return constVector;
}
-std::vector<llvm::Value*> StorageSoa::load(Argument type, int idx, int swizzle,
+std::vector<llvm::Value*> StorageSoa::load(enum tgsi_file_type type, int idx, int swizzle,
llvm::Value *indIdx)
{
std::vector<llvm::Value*> val(4);
@@ -292,25 +292,29 @@ std::vector<llvm::Value*> StorageSoa::load(Argument type, int idx, int swizzle,
debug_printf("XXXXXXXXX realIdx = %p, indIdx = %p\n", realIndex, indIdx);
switch(type) {
- case Input:
+ case TGSI_FILE_INPUT:
val = inputElement(realIndex);
break;
- case Output:
+ case TGSI_FILE_OUTPUT:
val = outputElement(realIndex);
break;
- case Temp:
+ case TGSI_FILE_TEMPORARY:
val = tempElement(realIndex);
break;
- case Const:
+ case TGSI_FILE_CONSTANT:
val = constElement(realIndex);
break;
- case Immediate:
+ case TGSI_FILE_IMMEDIATE:
val = immediateElement(realIndex);
break;
- case Address:
+ case TGSI_FILE_ADDRESS:
debug_printf("Address not handled in the load phase!\n");
assert(0);
break;
+ default:
+ debug_printf("Unknown load!\n");
+ assert(0);
+ break;
}
if (!gallivm_is_swizzle(swizzle))
return val;
@@ -324,21 +328,21 @@ std::vector<llvm::Value*> StorageSoa::load(Argument type, int idx, int swizzle,
return res;
}
-void StorageSoa::store(Argument type, int idx, const std::vector<llvm::Value*> &val,
+void StorageSoa::store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val,
int mask)
{
llvm::Value *out = 0;
switch(type) {
- case Output:
+ case TGSI_FILE_OUTPUT:
out = m_output;
break;
- case Temp:
+ case TGSI_FILE_TEMPORARY:
out = m_temps;
break;
- case Input:
+ case TGSI_FILE_INPUT:
out = m_input;
break;
- case Address: {
+ case TGSI_FILE_ADDRESS: {
llvm::Value *addr = m_addresses[idx];
if (!addr) {
addAddress(idx);
diff --git a/src/gallium/auxiliary/gallivm/storagesoa.h b/src/gallium/auxiliary/gallivm/storagesoa.h
index 6443351f27..ae2fc7c6ae 100644
--- a/src/gallium/auxiliary/gallivm/storagesoa.h
+++ b/src/gallium/auxiliary/gallivm/storagesoa.h
@@ -28,6 +28,8 @@
#ifndef STORAGESOA_H
#define STORAGESOA_H
+#include <pipe/p_shader_tokens.h>
+
#include <vector>
#include <list>
#include <map>
@@ -46,15 +48,6 @@ namespace llvm {
class StorageSoa
{
public:
- enum Argument {
- Input,
- Output,
- Temp,
- Const,
- Immediate,
- Address
- };
-public:
StorageSoa(llvm::BasicBlock *block,
llvm::Value *input,
llvm::Value *output,
@@ -62,9 +55,9 @@ public:
llvm::Value *temps);
- std::vector<llvm::Value*> load(Argument type, int idx, int swizzle,
+ std::vector<llvm::Value*> load(enum tgsi_file_type type, int idx, int swizzle,
llvm::Value *indIdx =0);
- void store(Argument type, int idx, const std::vector<llvm::Value*> &val,
+ void store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val,
int mask);
void addImmediate(float *vec);
diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
index 2cb4acce32..3f65865a5a 100644
--- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
+++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
@@ -708,25 +708,9 @@ translate_instructionir(llvm::Module *module,
if (src->SrcRegister.Indirect) {
indIdx = storage->addrElement(src->SrcRegisterInd.Index);
}
- if (src->SrcRegister.File == TGSI_FILE_CONSTANT) {
- val = storage->load(StorageSoa::Const,
- src->SrcRegister.Index, swizzle, indIdx);
- } else if (src->SrcRegister.File == TGSI_FILE_INPUT) {
- val = storage->load(StorageSoa::Input,
- src->SrcRegister.Index, swizzle, indIdx);
- } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) {
- val = storage->load(StorageSoa::Temp,
- src->SrcRegister.Index, swizzle, indIdx);
- } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) {
- val = storage->load(StorageSoa::Output,
- src->SrcRegister.Index, swizzle, indIdx);
- } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) {
- val = storage->load(StorageSoa::Immediate,
- src->SrcRegister.Index, swizzle, indIdx);
- } else {
- fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File);
- return;
- }
+
+ val = storage->load((enum tgsi_file_type)src->SrcRegister.File,
+ src->SrcRegister.Index, swizzle, indIdx);
inputs[i] = val;
}
@@ -763,9 +747,11 @@ translate_instructionir(llvm::Module *module,
}
break;
case TGSI_OPCODE_DP3: {
+ out = instr->dp3(inputs[0], inputs[1]);
}
break;
case TGSI_OPCODE_DP4: {
+ out = instr->dp4(inputs[0], inputs[1]);
}
break;
case TGSI_OPCODE_DST: {
@@ -1067,19 +1053,8 @@ translate_instructionir(llvm::Module *module,
for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
- if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
- storage->store(StorageSoa::Output,
- dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
- } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) {
- storage->store(StorageSoa::Temp,
- dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
- } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) {
- storage->store(StorageSoa::Address,
- dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
- } else {
- fprintf(stderr, "ERROR: unsupported LLVM destination!");
- assert(!"wrong destination");
- }
+ storage->store((enum tgsi_file_type)dst->DstRegister.File,
+ dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
}
}
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
index 95a2d6fcbb..a996218ce7 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
@@ -306,6 +306,11 @@ void spe_init_func(struct spe_function *p, unsigned code_size)
{
p->store = align_malloc(code_size, 16);
p->csr = p->store;
+
+ /* Conservatively treat R0 - R2 and R80 - R127 as non-volatile.
+ */
+ p->regs[0] = ~7;
+ p->regs[1] = (1U << (80 - 64)) - 1;
}
@@ -317,6 +322,48 @@ void spe_release_func(struct spe_function *p)
}
+int spe_allocate_available_register(struct spe_function *p)
+{
+ unsigned i;
+ for (i = 0; i < 128; i++) {
+ const uint64_t mask = (1ULL << (i % 128));
+ const unsigned idx = i / 128;
+
+ if ((p->regs[idx] & mask) != 0) {
+ p->regs[idx] &= ~mask;
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+
+int spe_allocate_register(struct spe_function *p, int reg)
+{
+ const unsigned idx = reg / 128;
+ const unsigned bit = reg % 128;
+
+ assert((p->regs[idx] & (1ULL << bit)) != 0);
+
+ p->regs[idx] &= ~(1ULL << bit);
+ return reg;
+}
+
+
+void spe_release_register(struct spe_function *p, int reg)
+{
+ const unsigned idx = reg / 128;
+ const unsigned bit = reg % 128;
+
+ assert((p->regs[idx] & (1ULL << bit)) == 0);
+
+ p->regs[idx] |= (1ULL << bit);
+}
+
+
+
+
void spe_bi(struct spe_function *p, unsigned rA, int d, int e)
{
emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4));
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
index 10ce44b3a0..5a1eb1ed8d 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
@@ -39,11 +39,27 @@ struct spe_function {
uint32_t *store;
uint32_t *csr;
const char *fn;
+
+ /**
+ * Mask of used / unused registers
+ *
+ * Each set bit corresponds to an available register. Each cleared bit
+ * corresponds to an allocated register.
+ *
+ * \sa
+ * spe_allocate_register, spe_allocate_available_register,
+ * spe_release_register
+ */
+ uint64_t regs[2];
};
extern void spe_init_func(struct spe_function *p, unsigned code_size);
extern void spe_release_func(struct spe_function *p);
+extern int spe_allocate_available_register(struct spe_function *p);
+extern int spe_allocate_register(struct spe_function *p, int reg);
+extern void spe_release_register(struct spe_function *p, int reg);
+
#endif /* RTASM_PPC_SPE_H */
#ifndef EMIT_
diff --git a/src/gallium/auxiliary/sct/Makefile b/src/gallium/auxiliary/sct/Makefile
new file mode 100644
index 0000000000..516d1756cf
--- /dev/null
+++ b/src/gallium/auxiliary/sct/Makefile
@@ -0,0 +1,12 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = sct
+
+C_SOURCES = \
+ sct.c
+
+include ../../Makefile.template
+
+symlinks:
+
diff --git a/src/gallium/auxiliary/sct/SConscript b/src/gallium/auxiliary/sct/SConscript
new file mode 100644
index 0000000000..76927d973f
--- /dev/null
+++ b/src/gallium/auxiliary/sct/SConscript
@@ -0,0 +1,9 @@
+Import('*')
+
+sct = env.ConvenienceLibrary(
+ target = 'sct',
+ source = [
+ 'sct.c'
+ ])
+
+auxiliaries.insert(0, sct)
diff --git a/src/gallium/auxiliary/sct/sct.c b/src/gallium/auxiliary/sct/sct.c
new file mode 100644
index 0000000000..97ee5882a1
--- /dev/null
+++ b/src/gallium/auxiliary/sct/sct.c
@@ -0,0 +1,453 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_util.h"
+#include "pipe/p_state.h"
+#include "pipe/p_inlines.h"
+#include "sct.h"
+
+
+struct texture_list
+{
+ struct pipe_texture *texture;
+ struct texture_list *next;
+};
+
+
+
+#define MAX_SURFACES ((PIPE_MAX_COLOR_BUFS) + 1)
+
+struct sct_context
+{
+ const struct pipe_context *context;
+
+ /** surfaces the context is drawing into */
+ struct pipe_surface *surfaces[MAX_SURFACES];
+
+ /** currently bound textures */
+ struct pipe_texture *textures[PIPE_MAX_SAMPLERS];
+
+ /** previously bound textures, used but not flushed */
+ struct texture_list *textures_used;
+
+ boolean needs_flush;
+
+ struct sct_context *next;
+};
+
+
+
+struct sct_surface
+{
+ const struct pipe_surface *surface;
+
+ /** list of contexts drawing to this surface */
+ struct sct_context_list *contexts;
+
+ struct sct_surface *next;
+};
+
+
+
+/**
+ * Find the surface_info for the given pipe_surface
+ */
+static struct sct_surface *
+find_surface_info(struct surface_context_tracker *sct,
+ const struct pipe_surface *surface)
+{
+ struct sct_surface *si;
+ for (si = sct->surfaces; si; si = si->next)
+ if (si->surface == surface)
+ return si;
+ return NULL;
+}
+
+
+/**
+ * As above, but create new surface_info if surface is new.
+ */
+static struct sct_surface *
+find_create_surface_info(struct surface_context_tracker *sct,
+ const struct pipe_surface *surface)
+{
+ struct sct_surface *si = find_surface_info(sct, surface);
+ if (si)
+ return si;
+
+ /* alloc new */
+ si = CALLOC_STRUCT(sct_surface);
+ if (si) {
+ si->surface = surface;
+
+ /* insert at head */
+ si->next = sct->surfaces;
+ sct->surfaces = si;
+ }
+
+ return si;
+}
+
+
+/**
+ * Find a context_info for the given context.
+ */
+static struct sct_context *
+find_context_info(struct surface_context_tracker *sct,
+ const struct pipe_context *context)
+{
+ struct sct_context *ci;
+ for (ci = sct->contexts; ci; ci = ci->next)
+ if (ci->context == context)
+ return ci;
+ return NULL;
+}
+
+
+/**
+ * As above, but create new context_info if context is new.
+ */
+static struct sct_context *
+find_create_context_info(struct surface_context_tracker *sct,
+ const struct pipe_context *context)
+{
+ struct sct_context *ci = find_context_info(sct, context);
+ if (ci)
+ return ci;
+
+ /* alloc new */
+ ci = CALLOC_STRUCT(sct_context);
+ if (ci) {
+ ci->context = context;
+
+ /* insert at head */
+ ci->next = sct->contexts;
+ sct->contexts = ci;
+ }
+
+ return ci;
+}
+
+
+/**
+ * Is the context already bound to the surface?
+ */
+static boolean
+find_surface_context(const struct sct_surface *si,
+ const struct pipe_context *context)
+{
+ const struct sct_context_list *cl;
+ for (cl = si->contexts; cl; cl = cl->next) {
+ if (cl->context == context) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+
+/**
+ * Add a context to the list of contexts associated with a surface.
+ */
+static void
+add_context_to_surface(struct sct_surface *si,
+ const struct pipe_context *context)
+{
+ struct sct_context_list *cl = CALLOC_STRUCT(sct_context_list);
+ if (cl) {
+ cl->context = context;
+ /* insert at head of list of contexts */
+ cl->next = si->contexts;
+ si->contexts = cl;
+ }
+}
+
+
+/**
+ * Remove a context from the list of contexts associated with a surface.
+ */
+static void
+remove_context_from_surface(struct sct_surface *si,
+ const struct pipe_context *context)
+{
+ struct sct_context_list *prev = NULL, *curr, *next;
+
+ for (curr = si->contexts; curr; curr = next) {
+ if (curr->context == context) {
+ /* remove */
+ if (prev)
+ prev->next = curr->next;
+ else
+ si->contexts = curr->next;
+ next = curr->next;
+ FREE(curr);
+ }
+ else {
+ prev = curr;
+ }
+ }
+}
+
+
+/**
+ * Unbind context from surface.
+ */
+static void
+unbind_context_surface(struct surface_context_tracker *sct,
+ struct pipe_context *context,
+ struct pipe_surface *surface)
+{
+ struct sct_surface *si = find_surface_info(sct, surface);
+ if (si) {
+ remove_context_from_surface(si, context);
+ }
+}
+
+
+/**
+ * Bind context to a set of surfaces (color + Z).
+ * Like MakeCurrent().
+ */
+void
+sct_bind_surfaces(struct surface_context_tracker *sct,
+ struct pipe_context *context,
+ uint num_surf,
+ struct pipe_surface **surfaces)
+{
+ struct sct_context *ci = find_create_context_info(sct, context);
+ uint i;
+
+ if (!ci) {
+ return; /* out of memory */
+ }
+
+ /* unbind currently bound surfaces */
+ for (i = 0; i < MAX_SURFACES; i++) {
+ if (ci->surfaces[i]) {
+ unbind_context_surface(sct, context, ci->surfaces[i]);
+ }
+ }
+
+ /* bind new surfaces */
+ for (i = 0; i < num_surf; i++) {
+ struct sct_surface *si = find_create_surface_info(sct, surfaces[i]);
+ if (!find_surface_context(si, context)) {
+ add_context_to_surface(si, context);
+ }
+ }
+}
+
+
+/**
+ * Return list of contexts bound to a surface.
+ */
+const struct sct_context_list *
+sct_get_surface_contexts(struct surface_context_tracker *sct,
+ const struct pipe_surface *surface)
+{
+ const struct sct_surface *si = find_surface_info(sct, surface);
+ return si->contexts;
+}
+
+
+
+static boolean
+find_texture(const struct sct_context *ci,
+ const struct pipe_texture *texture)
+{
+ const struct texture_list *tl;
+
+ for (tl = ci->textures_used; tl; tl = tl->next) {
+ if (tl->texture == texture) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+
+/**
+ * Add the given texture to the context's list of used textures.
+ */
+static void
+add_texture_used(struct sct_context *ci,
+ struct pipe_texture *texture)
+{
+ if (!find_texture(ci, texture)) {
+ /* add to list */
+ struct texture_list *tl = CALLOC_STRUCT(texture_list);
+ if (tl) {
+ pipe_texture_reference(&tl->texture, texture);
+ /* insert at head */
+ tl->next = ci->textures_used;
+ ci->textures_used = tl;
+ }
+ }
+}
+
+
+/**
+ * Bind a texture to a rendering context.
+ */
+void
+sct_bind_texture(struct surface_context_tracker *sct,
+ struct pipe_context *context,
+ uint unit,
+ struct pipe_texture *tex)
+{
+ struct sct_context *ci = find_context_info(sct, context);
+
+ if (ci->textures[unit] != tex) {
+ /* put texture on the 'used' list */
+ add_texture_used(ci, tex);
+ /* bind new */
+ pipe_texture_reference(&ci->textures[unit], tex);
+ }
+}
+
+
+/**
+ * Check if the given texture has been used by the rendering context
+ * since the last call to sct_flush_textures().
+ */
+boolean
+sct_is_texture_used(struct surface_context_tracker *sct,
+ const struct pipe_context *context,
+ const struct pipe_texture *texture)
+{
+ const struct sct_context *ci = find_context_info(sct, context);
+ return find_texture(ci, texture);
+}
+
+
+/**
+ * To be called when the image contents of a texture are changed, such
+ * as for gl[Copy]TexSubImage().
+ * XXX this may not be needed
+ */
+void
+sct_update_texture(struct pipe_texture *tex)
+{
+
+}
+
+
+/**
+ * When a scene is flushed/rendered we can release the list of
+ * used textures.
+ */
+void
+sct_flush_textures(struct surface_context_tracker *sct,
+ struct pipe_context *context)
+{
+ struct sct_context *ci = find_context_info(sct, context);
+ struct texture_list *tl, *next;
+ uint i;
+
+ for (tl = ci->textures_used; tl; tl = next) {
+ next = tl->next;
+ pipe_texture_release(&tl->texture);
+ FREE(tl);
+ }
+ ci->textures_used = NULL;
+
+ /* put the currently bound textures on the 'used' list */
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ add_texture_used(ci, ci->textures[i]);
+ }
+}
+
+
+
+void
+sct_destroy_context(struct surface_context_tracker *sct,
+ struct pipe_context *context)
+{
+ /* XXX should we require an unbinding first? */
+ {
+ struct sct_surface *si;
+ for (si = sct->surfaces; si; si = si->next) {
+ remove_context_from_surface(si, context);
+ }
+ }
+
+ /* remove context from context_info list */
+ {
+ struct sct_context *ci, *next, *prev = NULL;
+ for (ci = sct->contexts; ci; ci = next) {
+ next = ci->next;
+ if (ci->context == context) {
+ if (prev)
+ prev->next = ci->next;
+ else
+ sct->contexts = ci->next;
+ FREE(ci);
+ }
+ else {
+ prev = ci;
+ }
+ }
+ }
+
+}
+
+
+void
+sct_destroy_surface(struct surface_context_tracker *sct,
+ struct pipe_surface *surface)
+{
+ if (1) {
+ /* debug/sanity: no context should be bound to surface */
+ struct sct_context *ci;
+ uint i;
+ for (ci = sct->contexts; ci; ci = ci->next) {
+ for (i = 0; i < MAX_SURFACES; i++) {
+ assert(ci->surfaces[i] != surface);
+ }
+ }
+ }
+
+ /* remove surface from sct_surface list */
+ {
+ struct sct_surface *si, *next, *prev = NULL;
+ for (si = sct->surfaces; si; si = next) {
+ next = si->next;
+ if (si->surface == surface) {
+ /* unlink */
+ if (prev)
+ prev->next = si->next;
+ else
+ sct->surfaces = si->next;
+ FREE(si);
+ }
+ else {
+ prev = si;
+ }
+ }
+ }
+}
diff --git a/src/gallium/auxiliary/sct/sct.h b/src/gallium/auxiliary/sct/sct.h
new file mode 100644
index 0000000000..cf7c4d3bdf
--- /dev/null
+++ b/src/gallium/auxiliary/sct/sct.h
@@ -0,0 +1,123 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Surface/Context Tracking
+ *
+ * For some drivers, we need to monitor the binding between contexts and
+ * surfaces/textures.
+ * This code may evolve quite a bit...
+ */
+
+
+#ifndef SCT_H
+#define SCT_H
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+struct pipe_context;
+struct pipe_surface;
+
+struct sct_context;
+struct sct_surface;
+
+
+/**
+ * Per-device info, basically
+ */
+struct surface_context_tracker
+{
+ struct sct_context *contexts;
+ struct sct_surface *surfaces;
+};
+
+
+
+/**
+ * Simple linked list of contexts
+ */
+struct sct_context_list
+{
+ const struct pipe_context *context;
+ struct sct_context_list *next;
+};
+
+
+
+extern void
+sct_bind_surfaces(struct surface_context_tracker *sct,
+ struct pipe_context *context,
+ uint num_surf,
+ struct pipe_surface **surfaces);
+
+
+extern void
+sct_bind_texture(struct surface_context_tracker *sct,
+ struct pipe_context *context,
+ uint unit,
+ struct pipe_texture *texture);
+
+
+extern void
+sct_update_texture(struct pipe_texture *tex);
+
+
+extern boolean
+sct_is_texture_used(struct surface_context_tracker *sct,
+ const struct pipe_context *context,
+ const struct pipe_texture *texture);
+
+extern void
+sct_flush_textures(struct surface_context_tracker *sct,
+ struct pipe_context *context);
+
+
+extern const struct sct_context_list *
+sct_get_surface_contexts(struct surface_context_tracker *sct,
+ const struct pipe_surface *surf);
+
+
+extern void
+sct_destroy_context(struct surface_context_tracker *sct,
+ struct pipe_context *context);
+
+
+extern void
+sct_destroy_surface(struct surface_context_tracker *sct,
+ struct pipe_surface *surface);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SCT_H */
diff --git a/src/gallium/auxiliary/sct/usage.c b/src/gallium/auxiliary/sct/usage.c
new file mode 100644
index 0000000000..6227f19962
--- /dev/null
+++ b/src/gallium/auxiliary/sct/usage.c
@@ -0,0 +1,61 @@
+/* surface / context tracking */
+
+
+/*
+
+context A:
+ render to texture T
+
+context B:
+ texture from T
+
+-----------------------
+
+flush surface:
+ which contexts are bound to the surface?
+
+-----------------------
+
+glTexSubImage():
+ which contexts need to be flushed?
+
+ */
+
+
+/*
+
+in MakeCurrent():
+
+ call sct_bind_surfaces(context, list of surfaces) to update the
+ dependencies between context and surfaces
+
+
+in SurfaceFlush(), or whatever it is in D3D:
+
+ call sct_get_surface_contexts(surface) to get a list of contexts
+ which are currently bound to the surface.
+
+
+
+in BindTexture():
+
+ call sct_bind_texture(context, texture) to indicate that the texture
+ is used in the scene.
+
+
+in glTexSubImage() or RenderToTexture():
+
+ call sct_is_texture_used(context, texture) to determine if the texture
+ has been used in the scene, but the scene's not flushed. If TRUE is
+ returned it means the scene has to be rendered/flushed before the contents
+ of the texture can be changed.
+
+
+in psb_scene_flush/terminate():
+
+ call sct_flush_textures(context) to tell the SCT that the textures which
+ were used in the scene can be released.
+
+
+
+*/
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index ac52441400..f2ed9e0353 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -1220,7 +1220,8 @@ fetch_texel( struct tgsi_sampler *sampler,
static void
exec_tex(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst,
- boolean biasLod)
+ boolean biasLod,
+ boolean projected)
{
const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
union tgsi_exec_channel r[8];
@@ -1234,17 +1235,9 @@ exec_tex(struct tgsi_exec_machine *mach,
FETCH(&r[0], 0, CHAN_X);
- switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
- case TGSI_EXTSWIZZLE_W:
+ if (projected) {
FETCH(&r[1], 0, CHAN_W);
micro_div( &r[0], &r[0], &r[1] );
- break;
-
- case TGSI_EXTSWIZZLE_ONE:
- break;
-
- default:
- assert (0);
}
if (biasLod) {
@@ -1266,19 +1259,11 @@ exec_tex(struct tgsi_exec_machine *mach,
FETCH(&r[1], 0, CHAN_Y);
FETCH(&r[2], 0, CHAN_Z);
- switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
- case TGSI_EXTSWIZZLE_W:
+ if (projected) {
FETCH(&r[3], 0, CHAN_W);
micro_div( &r[0], &r[0], &r[3] );
micro_div( &r[1], &r[1], &r[3] );
micro_div( &r[2], &r[2], &r[3] );
- break;
-
- case TGSI_EXTSWIZZLE_ONE:
- break;
-
- default:
- assert (0);
}
if (biasLod) {
@@ -1300,19 +1285,11 @@ exec_tex(struct tgsi_exec_machine *mach,
FETCH(&r[1], 0, CHAN_Y);
FETCH(&r[2], 0, CHAN_Z);
- switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
- case TGSI_EXTSWIZZLE_W:
+ if (projected) {
FETCH(&r[3], 0, CHAN_W);
micro_div( &r[0], &r[0], &r[3] );
micro_div( &r[1], &r[1], &r[3] );
micro_div( &r[2], &r[2], &r[3] );
- break;
-
- case TGSI_EXTSWIZZLE_ONE:
- break;
-
- default:
- assert (0);
}
if (biasLod) {
@@ -2007,14 +1984,14 @@ exec_instruction(
/* simple texture lookup */
/* src[0] = texcoord */
/* src[1] = sampler unit */
- exec_tex(mach, inst, FALSE);
+ exec_tex(mach, inst, FALSE, FALSE);
break;
case TGSI_OPCODE_TXB:
/* Texture lookup with lod bias */
/* src[0] = texcoord (src[0].w = LOD bias) */
/* src[1] = sampler unit */
- exec_tex(mach, inst, TRUE);
+ exec_tex(mach, inst, TRUE, FALSE);
break;
case TGSI_OPCODE_TXD:
@@ -2030,7 +2007,14 @@ exec_instruction(
/* Texture lookup with explit LOD */
/* src[0] = texcoord (src[0].w = LOD) */
/* src[1] = sampler unit */
- exec_tex(mach, inst, TRUE);
+ exec_tex(mach, inst, TRUE, FALSE);
+ break;
+
+ case TGSI_OPCODE_TXP:
+ /* Texture lookup with projection */
+ /* src[0] = texcoord (src[0].w = projection) */
+ /* src[1] = sampler unit */
+ exec_tex(mach, inst, FALSE, TRUE);
break;
case TGSI_OPCODE_UP2H:
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.c b/src/gallium/auxiliary/tgsi/util/tgsi_build.c
index a00ff1c2a5..9c883ab704 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.c
@@ -719,7 +719,6 @@ tgsi_build_full_instruction(
reg->SrcRegisterExtSwz.NegateY,
reg->SrcRegisterExtSwz.NegateZ,
reg->SrcRegisterExtSwz.NegateW,
- reg->SrcRegisterExtSwz.ExtDivide,
prev_token,
instruction,
header );
@@ -1057,7 +1056,6 @@ tgsi_default_src_register_ext_swz( void )
src_register_ext_swz.NegateY = 0;
src_register_ext_swz.NegateZ = 0;
src_register_ext_swz.NegateW = 0;
- src_register_ext_swz.ExtDivide = TGSI_EXTSWIZZLE_ONE;
src_register_ext_swz.Padding = 0;
src_register_ext_swz.Extended = 0;
@@ -1084,7 +1082,6 @@ tgsi_build_src_register_ext_swz(
unsigned negate_y,
unsigned negate_z,
unsigned negate_w,
- unsigned ext_divide,
struct tgsi_token *prev_token,
struct tgsi_instruction *instruction,
struct tgsi_header *header )
@@ -1099,7 +1096,6 @@ tgsi_build_src_register_ext_swz(
assert( negate_y <= 1 );
assert( negate_z <= 1 );
assert( negate_w <= 1 );
- assert( ext_divide <= TGSI_EXTSWIZZLE_ONE );
src_register_ext_swz = tgsi_default_src_register_ext_swz();
src_register_ext_swz.ExtSwizzleX = ext_swizzle_x;
@@ -1110,7 +1106,6 @@ tgsi_build_src_register_ext_swz(
src_register_ext_swz.NegateY = negate_y;
src_register_ext_swz.NegateZ = negate_z;
src_register_ext_swz.NegateW = negate_w;
- src_register_ext_swz.ExtDivide = ext_divide;
prev_token->Extended = 1;
instruction_grow( instruction, header );
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.h b/src/gallium/auxiliary/tgsi/util/tgsi_build.h
index 607860e7fc..80bffc4ae7 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.h
@@ -229,7 +229,6 @@ tgsi_build_src_register_ext_swz(
unsigned negate_y,
unsigned negate_z,
unsigned negate_w,
- unsigned ext_divide,
struct tgsi_token *prev_token,
struct tgsi_instruction *instruction,
struct tgsi_header *header );
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index 59be14a748..ceb407b884 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -459,7 +459,8 @@ static const char *TGSI_OPCODES[] =
"OPCODE_IFC",
"OPCODE_BREAKC",
"OPCODE_KIL",
- "OPCODE_END"
+ "OPCODE_END",
+ "OPCODE_TXP"
};
static const char *TGSI_OPCODES_SHORT[] =
@@ -597,7 +598,8 @@ static const char *TGSI_OPCODES_SHORT[] =
"IFC",
"BREAKC",
"KIL",
- "END"
+ "END",
+ "TXP"
};
static const char *TGSI_SATS[] =
@@ -1361,10 +1363,6 @@ dump_instruction_verbose(
TXT( "\nNegateW : " );
UID( src->SrcRegisterExtSwz.NegateW );
}
- if( deflt || fs->SrcRegisterExtSwz.ExtDivide != src->SrcRegisterExtSwz.ExtDivide ) {
- TXT( "\nExtDivide : " );
- ENM( src->SrcRegisterExtSwz.ExtDivide, TGSI_EXTSWIZZLES );
- }
if( ignored ) {
TXT( "\nPadding : " );
UIX( src->SrcRegisterExtSwz.Padding );
diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile
index 906a46d6b4..2abbe9500e 100644
--- a/src/gallium/auxiliary/util/Makefile
+++ b/src/gallium/auxiliary/util/Makefile
@@ -7,7 +7,9 @@ C_SOURCES = \
p_debug.c \
p_tile.c \
p_util.c \
- u_mm.c
+ u_handle_table.c \
+ u_mm.c \
+ u_snprintf.c
include ../../Makefile.template
diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript
index 4717941434..2030214aa7 100644
--- a/src/gallium/auxiliary/util/SConscript
+++ b/src/gallium/auxiliary/util/SConscript
@@ -6,7 +6,9 @@ util = env.ConvenienceLibrary(
'p_debug.c',
'p_tile.c',
'p_util.c',
+ 'u_handle_table.c',
'u_mm.c',
+ 'u_snprintf.c',
])
auxiliaries.insert(0, util)
diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c
index b9607a6ba7..09cabdae25 100644
--- a/src/gallium/auxiliary/util/p_debug.c
+++ b/src/gallium/auxiliary/util/p_debug.c
@@ -36,14 +36,37 @@
#include <stdlib.h>
#endif
-#include "pipe/p_debug.h"
#include "pipe/p_compiler.h"
+#include "pipe/p_util.h"
+#include "pipe/p_debug.h"
+
+
+#ifdef WIN32
+static INLINE void
+rpl_EngDebugPrint(const char *format, ...)
+{
+ va_list ap;
+ va_start(ap, format);
+ EngDebugPrint("", (PCHAR)format, ap);
+ va_end(ap);
+}
+
+int rpl_vsnprintf(char *, size_t, const char *, va_list);
+#endif
void debug_vprintf(const char *format, va_list ap)
{
#ifdef WIN32
- EngDebugPrint("Gallium3D: ", (PCHAR)format, ap);
+#ifndef WINCE
+ /* EngDebugPrint does not handle float point arguments, so we need to use
+ * our own vsnprintf implementation */
+ char buf[512 + 1];
+ rpl_vsnprintf(buf, sizeof(buf), format, ap);
+ rpl_EngDebugPrint("%s", buf);
+#else
+ /* TODO: Implement debug print for WINCE */
+#endif
#else
vfprintf(stderr, format, ap);
#endif
@@ -59,18 +82,92 @@ void debug_printf(const char *format, ...)
}
-static INLINE void debug_abort(void)
+/* TODO: implement a debug_abort that calls EngBugCheckEx on WIN32 */
+
+
+static INLINE void debug_break(void)
{
-#ifdef WIN32
+#if (defined(__i386__) || defined(__386__)) && defined(__GNUC__)
+ __asm("int3");
+#elif (defined(__i386__) || defined(__386__)) && defined(__MSC__)
+ _asm {int 3};
+#elif defined(WIN32) && !defined(WINCE)
EngDebugBreak();
#else
abort();
#endif
}
+#if defined(WIN32)
+ULONG_PTR debug_config_file = 0;
+void *mapped_config_file = 0;
+
+enum {
+ eAssertAbortEn = 0x1,
+};
+
+/* Check for aborts enabled. */
+static unsigned abort_en()
+{
+ if (!mapped_config_file)
+ {
+ /* Open an 8 byte file for configuration data. */
+ mapped_config_file = EngMapFile(L"\\??\\c:\\gaDebug.cfg", 8, &debug_config_file);
+ }
+ /* An value of "0" (ascii) in the configuration file will clear the first 8 bits in the test byte. */
+ /* An value of "1" (ascii) in the configuration file will set the first bit in the test byte. */
+ /* An value of "2" (ascii) in the configuration file will set the second bit in the test byte. */
+ return ((((char *)mapped_config_file)[0]) - 0x30) & eAssertAbortEn;
+}
+#else /* WIN32 */
+static unsigned abort_en()
+{
+ return !GETENV("GALLIUM_ABORT_ON_ASSERT");
+}
+#endif
void debug_assert_fail(const char *expr, const char *file, unsigned line)
{
debug_printf("%s:%i: Assertion `%s' failed.\n", file, line, expr);
- debug_abort();
+ if (abort_en())
+ {
+ debug_break();
+ } else
+ {
+ debug_printf("continuing...\n");
+ }
+}
+
+
+#define DEBUG_MASK_TABLE_SIZE 256
+
+
+/**
+ * Mask hash table.
+ *
+ * For now we just take the lower bits of the key, and do no attempt to solve
+ * collisions. Use a proper hash table when we have dozens of drivers.
+ */
+static uint32_t debug_mask_table[DEBUG_MASK_TABLE_SIZE];
+
+
+void debug_mask_set(uint32_t uuid, uint32_t mask)
+{
+ unsigned hash = uuid & (DEBUG_MASK_TABLE_SIZE - 1);
+ debug_mask_table[hash] = mask;
+}
+
+
+uint32_t debug_mask_get(uint32_t uuid)
+{
+ unsigned hash = uuid & (DEBUG_MASK_TABLE_SIZE - 1);
+ return debug_mask_table[hash];
+}
+
+
+void debug_mask_vprintf(uint32_t uuid, uint32_t what, const char *format, va_list ap)
+{
+ uint32_t mask = debug_mask_get(uuid);
+ if(mask & what)
+ debug_vprintf(format, ap);
}
diff --git a/src/gallium/auxiliary/util/p_tile.h b/src/gallium/auxiliary/util/p_tile.h
index 318b6d11a6..fdc80a13b3 100644
--- a/src/gallium/auxiliary/util/p_tile.h
+++ b/src/gallium/auxiliary/util/p_tile.h
@@ -52,44 +52,50 @@ pipe_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_surface *ps)
return FALSE;
}
+#ifdef __cplusplus
+extern "C" {
+#endif
-extern void
+void
pipe_get_tile_raw(struct pipe_context *pipe,
struct pipe_surface *ps,
uint x, uint y, uint w, uint h,
void *p, int dst_stride);
-extern void
+void
pipe_put_tile_raw(struct pipe_context *pipe,
struct pipe_surface *ps,
uint x, uint y, uint w, uint h,
const void *p, int src_stride);
-extern void
+void
pipe_get_tile_rgba(struct pipe_context *pipe,
struct pipe_surface *ps,
uint x, uint y, uint w, uint h,
float *p);
-extern void
+void
pipe_put_tile_rgba(struct pipe_context *pipe,
struct pipe_surface *ps,
uint x, uint y, uint w, uint h,
const float *p);
-extern void
+void
pipe_get_tile_z(struct pipe_context *pipe,
struct pipe_surface *ps,
uint x, uint y, uint w, uint h,
uint *z);
-extern void
+void
pipe_put_tile_z(struct pipe_context *pipe,
struct pipe_surface *ps,
uint x, uint y, uint w, uint h,
const uint *z);
+#ifdef __cplusplus
+}
+#endif
#endif
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
new file mode 100644
index 0000000000..d9f2f8fc28
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -0,0 +1,506 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Dennis Smit
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Based on the work of Eric Anholt <anholt@FreeBSD.org>
+ */
+
+/* FIXME: clean this entire file up */
+
+#include "u_cpu_detect.h"
+
+#ifdef __linux__
+#define OS_LINUX
+#endif
+#ifdef WIN32
+#define OS_WIN32
+#endif
+
+#if defined(ARCH_POWERPC)
+#if defined(OS_DARWIN)
+#include <sys/sysctl.h>
+#else
+#include <signal.h>
+#include <setjmp.h>
+#endif
+#endif
+
+#if defined(OS_NETBSD) || defined(OS_OPENBSD)
+#include <sys/param.h>
+#include <sys/sysctl.h>
+#include <machine/cpu.h>
+#endif
+
+#if defined(OS_FREEBSD)
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#endif
+
+#if defined(OS_LINUX)
+#include <signal.h>
+#endif
+
+#if defined(OS_WIN32)
+#include <windows.h>
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+
+
+static struct cpu_detect_caps __cpu_detect_caps;
+static int __cpu_detect_initialized = 0;
+
+static int has_cpuid(void);
+static int cpuid(unsigned int ax, unsigned int *p);
+
+/* The sigill handlers */
+#if defined(ARCH_X86) /* x86 (linux katmai handler check thing) */
+#if defined(OS_LINUX) && defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC)
+static void sigill_handler_sse(int signal, struct sigcontext sc)
+{
+ /* Both the "xorps %%xmm0,%%xmm0" and "divps %xmm0,%%xmm1"
+ * instructions are 3 bytes long. We must increment the instruction
+ * pointer manually to avoid repeated execution of the offending
+ * instruction.
+ *
+ * If the SIGILL is caused by a divide-by-zero when unmasked
+ * exceptions aren't supported, the SIMD FPU status and control
+ * word will be restored at the end of the test, so we don't need
+ * to worry about doing it here. Besides, we may not be able to...
+ */
+ sc.eip += 3;
+
+ __cpu_detect_caps.hasSSE=0;
+}
+
+static void sigfpe_handler_sse(int signal, struct sigcontext sc)
+{
+ if (sc.fpstate->magic != 0xffff) {
+ /* Our signal context has the extended FPU state, so reset the
+ * divide-by-zero exception mask and clear the divide-by-zero
+ * exception bit.
+ */
+ sc.fpstate->mxcsr |= 0x00000200;
+ sc.fpstate->mxcsr &= 0xfffffffb;
+ } else {
+ /* If we ever get here, we're completely hosed.
+ */
+ }
+}
+#endif
+#endif /* OS_LINUX && _POSIX_SOURCE && X86_FXSR_MAGIC */
+
+#if defined(OS_WIN32)
+LONG CALLBACK win32_sig_handler_sse(EXCEPTION_POINTERS* ep)
+{
+ if(ep->ExceptionRecord->ExceptionCode==EXCEPTION_ILLEGAL_INSTRUCTION){
+ ep->ContextRecord->Eip +=3;
+ __cpu_detect_caps.hasSSE=0;
+ return EXCEPTION_CONTINUE_EXECUTION;
+ }
+ return EXCEPTION_CONTINUE_SEARCH;
+}
+#endif /* OS_WIN32 */
+
+
+#if defined(ARCH_POWERPC) && !defined(OS_DARWIN)
+static sigjmp_buf __lv_powerpc_jmpbuf;
+static volatile sig_atomic_t __lv_powerpc_canjump = 0;
+
+static void sigill_handler (int sig);
+
+static void sigill_handler (int sig)
+{
+ if (!__lv_powerpc_canjump) {
+ signal (sig, SIG_DFL);
+ raise (sig);
+ }
+
+ __lv_powerpc_canjump = 0;
+ siglongjmp(__lv_powerpc_jmpbuf, 1);
+}
+
+static void check_os_altivec_support(void)
+{
+#if defined(OS_DARWIN)
+ int sels[2] = {CTL_HW, HW_VECTORUNIT};
+ int has_vu = 0;
+ int len = sizeof (has_vu);
+ int err;
+
+ err = sysctl(sels, 2, &has_vu, &len, NULL, 0);
+
+ if (err == 0) {
+ if (has_vu != 0) {
+ __cpu_detect_caps.hasAltiVec = 1;
+ }
+ }
+#else /* !OS_DARWIN */
+ /* no Darwin, do it the brute-force way */
+ /* this is borrowed from the libmpeg2 library */
+ signal(SIGILL, sigill_handler);
+ if (sigsetjmp(__lv_powerpc_jmpbuf, 1)) {
+ signal(SIGILL, SIG_DFL);
+ } else {
+ __lv_powerpc_canjump = 1;
+
+ __asm __volatile
+ ("mtspr 256, %0\n\t"
+ "vand %%v0, %%v0, %%v0"
+ :
+ : "r" (-1));
+
+ signal(SIGILL, SIG_DFL);
+ __cpu_detect_caps.hasAltiVec = 1;
+ }
+#endif
+}
+#endif
+
+/* If we're running on a processor that can do SSE, let's see if we
+ * are allowed to or not. This will catch 2.4.0 or later kernels that
+ * haven't been configured for a Pentium III but are running on one,
+ * and RedHat patched 2.2 kernels that have broken exception handling
+ * support for user space apps that do SSE.
+ */
+static void check_os_katmai_support(void)
+{
+#if defined(ARCH_X86)
+#if defined(OS_FREEBSD)
+ int has_sse=0, ret;
+ int len = sizeof (has_sse);
+
+ ret = sysctlbyname("hw.instruction_sse", &has_sse, &len, NULL, 0);
+ if (ret || !has_sse)
+ __cpu_detect_caps.hasSSE=0;
+
+#elif defined(OS_NETBSD) || defined(OS_OPENBSD)
+ int has_sse, has_sse2, ret, mib[2];
+ int varlen;
+
+ mib[0] = CTL_MACHDEP;
+ mib[1] = CPU_SSE;
+ varlen = sizeof (has_sse);
+
+ ret = sysctl(mib, 2, &has_sse, &varlen, NULL, 0);
+ if (ret < 0 || !has_sse) {
+ __cpu_detect_caps.hasSSE = 0;
+ } else {
+ __cpu_detect_caps.hasSSE = 1;
+ }
+
+ mib[1] = CPU_SSE2;
+ varlen = sizeof (has_sse2);
+ ret = sysctl(mib, 2, &has_sse2, &varlen, NULL, 0);
+ if (ret < 0 || !has_sse2) {
+ __cpu_detect_caps.hasSSE2 = 0;
+ } else {
+ __cpu_detect_caps.hasSSE2 = 1;
+ }
+ __cpu_detect_caps.hasSSE = 0; /* FIXME ?!?!? */
+
+#elif defined(OS_WIN32)
+ LPTOP_LEVEL_EXCEPTION_FILTER exc_fil;
+ if (__cpu_detect_caps.hasSSE) {
+ exc_fil = SetUnhandledExceptionFilter(win32_sig_handler_sse);
+ __asm __volatile ("xorps %xmm0, %xmm0");
+ SetUnhandledExceptionFilter(exc_fil);
+ }
+#elif defined(OS_LINUX)
+ struct sigaction saved_sigill;
+ struct sigaction saved_sigfpe;
+
+ /* Save the original signal handlers.
+ */
+ sigaction(SIGILL, NULL, &saved_sigill);
+ sigaction(SIGFPE, NULL, &saved_sigfpe);
+
+ signal(SIGILL, (void (*)(int))sigill_handler_sse);
+ signal(SIGFPE, (void (*)(int))sigfpe_handler_sse);
+
+ /* Emulate test for OSFXSR in CR4. The OS will set this bit if it
+ * supports the extended FPU save and restore required for SSE. If
+ * we execute an SSE instruction on a PIII and get a SIGILL, the OS
+ * doesn't support Streaming SIMD Exceptions, even if the processor
+ * does.
+ */
+ if (__cpu_detect_caps.hasSSE) {
+ __asm __volatile ("xorps %xmm1, %xmm0");
+ }
+
+ /* Emulate test for OSXMMEXCPT in CR4. The OS will set this bit if
+ * it supports unmasked SIMD FPU exceptions. If we unmask the
+ * exceptions, do a SIMD divide-by-zero and get a SIGILL, the OS
+ * doesn't support unmasked SIMD FPU exceptions. If we get a SIGFPE
+ * as expected, we're okay but we need to clean up after it.
+ *
+ * Are we being too stringent in our requirement that the OS support
+ * unmasked exceptions? Certain RedHat 2.2 kernels enable SSE by
+ * setting CR4.OSFXSR but don't support unmasked exceptions. Win98
+ * doesn't even support them. We at least know the user-space SSE
+ * support is good in kernels that do support unmasked exceptions,
+ * and therefore to be safe I'm going to leave this test in here.
+ */
+ if (__cpu_detect_caps.hasSSE) {
+ // test_os_katmai_exception_support();
+ }
+
+ /* Restore the original signal handlers.
+ */
+ sigaction(SIGILL, &saved_sigill, NULL);
+ sigaction(SIGFPE, &saved_sigfpe, NULL);
+
+#else
+ /* We can't use POSIX signal handling to test the availability of
+ * SSE, so we disable it by default.
+ */
+ __cpu_detect_caps.hasSSE = 0;
+#endif /* __linux__ */
+#endif
+}
+
+
+static int has_cpuid(void)
+{
+#if defined(ARCH_X86)
+ int a, c;
+
+ __asm __volatile
+ ("pushf\n"
+ "popl %0\n"
+ "movl %0, %1\n"
+ "xorl $0x200000, %0\n"
+ "push %0\n"
+ "popf\n"
+ "pushf\n"
+ "popl %0\n"
+ : "=a" (a), "=c" (c)
+ :
+ : "cc");
+
+ return a != c;
+#else
+ return 0;
+#endif
+}
+
+static int cpuid(unsigned int ax, unsigned int *p)
+{
+#if defined(ARCH_X86)
+ unsigned int flags;
+
+ __asm __volatile
+ ("movl %%ebx, %%esi\n\t"
+ "cpuid\n\t"
+ "xchgl %%ebx, %%esi"
+ : "=a" (p[0]), "=S" (p[1]),
+ "=c" (p[2]), "=d" (p[3])
+ : "0" (ax));
+
+ return 0;
+#else
+ return -1;
+#endif
+}
+
+void cpu_detect_initialize()
+{
+ unsigned int regs[4];
+ unsigned int regs2[4];
+
+ int mib[2], ncpu;
+ int len;
+
+ memset(&__cpu_detect_caps, 0, sizeof (struct cpu_detect_caps));
+
+ /* Check for arch type */
+#if defined(ARCH_MIPS)
+ __cpu_detect_caps.type = CPU_DETECT_TYPE_MIPS;
+#elif defined(ARCH_ALPHA)
+ __cpu_detect_caps.type = CPU_DETECT_TYPE_ALPHA;
+#elif defined(ARCH_SPARC)
+ __cpu_detect_caps.type = CPU_DETECT_TYPE_SPARC;
+#elif defined(ARCH_X86)
+ __cpu_detect_caps.type = CPU_DETECT_TYPE_X86;
+#elif defined(ARCH_POWERPC)
+ __cpu_detect_caps.type = CPU_DETECT_TYPE_POWERPC;
+#else
+ __cpu_detect_caps.type = CPU_DETECT_TYPE_OTHER;
+#endif
+
+ /* Count the number of CPUs in system */
+#if !defined(OS_WIN32) && !defined(OS_UNKNOWN) && defined(_SC_NPROCESSORS_ONLN)
+ __cpu_detect_caps.nrcpu = sysconf(_SC_NPROCESSORS_ONLN);
+ if (__cpu_detect_caps.nrcpu == -1)
+ __cpu_detect_caps.nrcpu = 1;
+
+#elif defined(OS_NETBSD) || defined(OS_FREEBSD) || defined(OS_OPENBSD)
+
+ mib[0] = CTL_HW;
+ mib[1] = HW_NCPU;
+
+ len = sizeof (ncpu);
+ sysctl(mib, 2, &ncpu, &len, NULL, 0);
+ __cpu_detect_caps.nrcpu = ncpu;
+
+#else
+ __cpu_detect_caps.nrcpu = 1;
+#endif
+
+#if defined(ARCH_X86)
+ /* No cpuid, old 486 or lower */
+ if (has_cpuid() == 0)
+ return;
+
+ __cpu_detect_caps.cacheline = 32;
+
+ /* Get max cpuid level */
+ cpuid(0x00000000, regs);
+
+ if (regs[0] >= 0x00000001) {
+ unsigned int cacheline;
+
+ cpuid (0x00000001, regs2);
+
+ __cpu_detect_caps.x86cpuType = (regs2[0] >> 8) & 0xf;
+ if (__cpu_detect_caps.x86cpuType == 0xf)
+ __cpu_detect_caps.x86cpuType = 8 + ((regs2[0] >> 20) & 255); /* use extended family (P4, IA64) */
+
+ /* general feature flags */
+ __cpu_detect_caps.hasTSC = (regs2[3] & (1 << 8 )) >> 8; /* 0x0000010 */
+ __cpu_detect_caps.hasMMX = (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */
+ __cpu_detect_caps.hasSSE = (regs2[3] & (1 << 25 )) >> 25; /* 0x2000000 */
+ __cpu_detect_caps.hasSSE2 = (regs2[3] & (1 << 26 )) >> 26; /* 0x4000000 */
+ __cpu_detect_caps.hasSSE3 = (regs2[2] & (1)); /* 0x0000001 */
+ __cpu_detect_caps.hasSSSE3 = (regs2[2] & (1 << 9 )) >> 9; /* 0x0000020 */
+ __cpu_detect_caps.hasMMX2 = __cpu_detect_caps.hasSSE; /* SSE cpus supports mmxext too */
+
+ cacheline = ((regs2[1] >> 8) & 0xFF) * 8;
+ if (cacheline > 0)
+ __cpu_detect_caps.cacheline = cacheline;
+ }
+
+ cpuid(0x80000000, regs);
+
+ if (regs[0] >= 0x80000001) {
+
+ cpuid(0x80000001, regs2);
+
+ __cpu_detect_caps.hasMMX |= (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */
+ __cpu_detect_caps.hasMMX2 |= (regs2[3] & (1 << 22 )) >> 22; /* 0x400000 */
+ __cpu_detect_caps.has3DNow = (regs2[3] & (1 << 31 )) >> 31; /* 0x80000000 */
+ __cpu_detect_caps.has3DNowExt = (regs2[3] & (1 << 30 )) >> 30;
+ }
+
+ if (regs[0] >= 0x80000006) {
+ cpuid(0x80000006, regs2);
+ __cpu_detect_caps.cacheline = regs2[2] & 0xFF;
+ }
+
+
+#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_CYGWIN) || defined(OS_OPENBSD)
+ if (__cpu_detect_caps.hasSSE)
+ check_os_katmai_support();
+
+ if (!__cpu_detect_caps.hasSSE) {
+ __cpu_detect_caps.hasSSE2 = 0;
+ __cpu_detect_caps.hasSSE3 = 0;
+ __cpu_detect_caps.hasSSSE3 = 0;
+ }
+#else
+ __cpu_detect_caps.hasSSE = 0;
+ __cpu_detect_caps.hasSSE2 = 0;
+ __cpu_detect_caps.hasSSE3 = 0;
+ __cpu_detect_caps.hasSSSE3 = 0;
+#endif
+#endif /* ARCH_X86 */
+
+#if defined(ARCH_POWERPC)
+ check_os_altivec_support();
+#endif /* ARCH_POWERPC */
+
+ __cpu_detect_initialized = 1;
+}
+
+struct cpu_detect_caps *cpu_detect_get_caps()
+{
+ return &__cpu_detect_caps;
+}
+
+/* The getters and setters for feature flags */
+int cpu_detect_get_tsc()
+{
+ return __cpu_detect_caps.hasTSC;
+}
+
+int cpu_detect_get_mmx()
+{
+ return __cpu_detect_caps.hasMMX;
+}
+
+int cpu_detect_get_mmx2()
+{
+ return __cpu_detect_caps.hasMMX2;
+}
+
+int cpu_detect_get_sse()
+{
+ return __cpu_detect_caps.hasSSE;
+}
+
+int cpu_detect_get_sse2()
+{
+ return __cpu_detect_caps.hasSSE2;
+}
+
+int cpu_detect_get_sse3()
+{
+ return __cpu_detect_caps.hasSSE3;
+}
+
+int cpu_detect_get_ssse3()
+{
+ return __cpu_detect_caps.hasSSSE3;
+}
+
+int cpu_detect_get_3dnow()
+{
+ return __cpu_detect_caps.has3DNow;
+}
+
+int cpu_detect_get_3dnow2()
+{
+ return __cpu_detect_caps.has3DNowExt;
+}
+
+int cpu_detect_get_altivec()
+{
+ return __cpu_detect_caps.hasAltiVec;
+}
+
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h
new file mode 100644
index 0000000000..1612d49286
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_cpu_detect.h
@@ -0,0 +1,78 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Dennis Smit
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ ***************************************************************************/
+
+/*
+ * Based on the work of Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#ifndef _CPU_DETECT_H
+#define _CPU_DETECT_H
+
+typedef enum {
+ CPU_DETECT_TYPE_MIPS,
+ CPU_DETECT_TYPE_ALPHA,
+ CPU_DETECT_TYPE_SPARC,
+ CPU_DETECT_TYPE_X86,
+ CPU_DETECT_TYPE_POWERPC,
+ CPU_DETECT_TYPE_OTHER
+} cpu_detect_type;
+
+struct cpu_detect_caps {
+ cpu_detect_type type;
+ int nrcpu;
+
+ /* Feature flags */
+ int x86cpuType;
+ int cacheline;
+
+ int hasTSC;
+ int hasMMX;
+ int hasMMX2;
+ int hasSSE;
+ int hasSSE2;
+ int hasSSE3;
+ int hasSSSE3;
+ int has3DNow;
+ int has3DNowExt;
+ int hasAltiVec;
+};
+
+/* prototypes */
+void cpu_detect_initialize(void);
+struct cpu_detect_caps *cpu_detect_get_caps(void);
+
+int cpu_detect_get_tsc(void);
+int cpu_detect_get_mmx(void);
+int cpu_detect_get_mmx2(void);
+int cpu_detect_get_sse(void);
+int cpu_detect_get_sse2(void);
+int cpu_detect_get_sse3(void);
+int cpu_detect_get_ssse3(void);
+int cpu_detect_get_3dnow(void);
+int cpu_detect_get_3dnow2(void);
+int cpu_detect_get_altivec(void);
+
+#endif /* _CPU_DETECT_H */
diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c
new file mode 100644
index 0000000000..8a298f7c41
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_handle_table.c
@@ -0,0 +1,207 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Generic handle table implementation.
+ *
+ * @author José Fonseca <jrfonseca@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_debug.h"
+#include "pipe/p_util.h"
+
+#include "u_handle_table.h"
+
+
+#define HANDLE_TABLE_INITIAL_SIZE 16
+
+
+struct handle_table
+{
+ /** Object array. Empty handles have a null object */
+ void **objects;
+
+ /** Number of objects the handle can currently hold */
+ unsigned size;
+ /** Number of consecutive objects allocated at the start of the table */
+ unsigned filled;
+
+ /** Optional object destructor */
+ void (*destroy)(void *object);
+};
+
+
+struct handle_table *
+handle_table_create(void)
+{
+ struct handle_table *ht;
+
+ ht = MALLOC_STRUCT(handle_table);
+ if(!ht)
+ return NULL;
+
+ ht->objects = (void **)CALLOC(HANDLE_TABLE_INITIAL_SIZE, sizeof(void *));
+ if(!ht->objects) {
+ FREE(ht);
+ return NULL;
+ }
+
+ ht->size = HANDLE_TABLE_INITIAL_SIZE;
+ ht->filled = 0;
+
+ ht->destroy = NULL;
+
+ return ht;
+}
+
+
+void
+handle_table_set_destroy(struct handle_table *ht,
+ void (*destroy)(void *object))
+{
+ assert(ht);
+ ht->destroy = destroy;
+}
+
+
+unsigned
+handle_table_add(struct handle_table *ht,
+ void *object)
+{
+ unsigned index;
+ unsigned handle;
+
+ assert(ht);
+ assert(object);
+ if(!object)
+ return 0;
+
+ /* linear search for an empty handle */
+ while(ht->filled < ht->size) {
+ if(!ht->objects[ht->filled])
+ break;
+ ++ht->filled;
+ }
+
+ /* grow the table */
+ if(ht->filled == ht->size) {
+ unsigned new_size;
+ void **new_objects;
+
+ new_size = ht->size*2;
+ assert(new_size);
+
+ new_objects = (void **)REALLOC((void *)ht->objects,
+ ht->size*sizeof(void *),
+ new_size*sizeof(void *));
+ if(!new_objects)
+ return 0;
+
+ memset(new_objects + ht->size, 0, (new_size - ht->size)*sizeof(void *));
+
+ ht->size = new_size;
+ ht->objects = new_objects;
+ }
+
+ index = ht->filled;
+
+ handle = index + 1;
+
+ /* check integer overflow */
+ if(!handle)
+ return 0;
+
+ assert(!ht->objects[index]);
+ ht->objects[index] = object;
+ ++ht->filled;
+
+ return handle;
+}
+
+
+void *
+handle_table_get(struct handle_table *ht,
+ unsigned handle)
+{
+ void *object;
+
+ assert(ht);
+ assert(handle > 0);
+ assert(handle <= ht->size);
+ if(!handle || handle > ht->size)
+ return NULL;
+
+ object = ht->objects[handle - 1];
+ assert(object);
+
+ return object;
+}
+
+
+void
+handle_table_remove(struct handle_table *ht,
+ unsigned handle)
+{
+ void *object;
+ unsigned index;
+
+ assert(ht);
+ assert(handle > 0);
+ assert(handle <= ht->size);
+ if(!handle || handle > ht->size)
+ return;
+
+ index = handle - 1;
+ object = ht->objects[index];
+ assert(object);
+
+ if(object && ht->destroy)
+ ht->destroy(object);
+
+ ht->objects[index] = NULL;
+ if(index < ht->filled)
+ ht->filled = index;
+}
+
+
+void
+handle_table_destroy(struct handle_table *ht)
+{
+ unsigned index;
+ assert(ht);
+
+ if(ht->destroy)
+ for(index = 0; index < ht->size; ++index)
+ if(ht->objects[index])
+ ht->destroy(ht->objects[index]);
+
+ FREE(ht->objects);
+ FREE(ht);
+}
+
diff --git a/src/gallium/auxiliary/util/u_handle_table.h b/src/gallium/auxiliary/util/u_handle_table.h
new file mode 100644
index 0000000000..51fc273865
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_handle_table.h
@@ -0,0 +1,96 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Generic handle table.
+ *
+ * @author José Fonseca <jrfonseca@tungstengraphics.com>
+ */
+
+#ifndef U_HANDLE_TABLE_H_
+#define U_HANDLE_TABLE_H_
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/**
+ * Abstract data type to map integer handles to objects.
+ */
+struct handle_table;
+
+
+struct handle_table *
+handle_table_create(void);
+
+
+/**
+ * Set an optional destructor callback.
+ *
+ * If set, it will be called during handle_table_remove and
+ * handle_table_destroy calls.
+ */
+void
+handle_table_set_destroy(struct handle_table *ht,
+ void (*destroy)(void *object));
+
+
+/**
+ * Add a new object.
+ *
+ * Returns a zero handle on failure (out of memory).
+ */
+unsigned
+handle_table_add(struct handle_table *ht,
+ void *object);
+
+/**
+ * Fetch an existing object.
+ *
+ * Returns NULL for an invalid handle.
+ */
+void *
+handle_table_get(struct handle_table *ht,
+ unsigned handle);
+
+
+void
+handle_table_remove(struct handle_table *ht,
+ unsigned handle);
+
+
+void
+handle_table_destroy(struct handle_table *ht);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* U_HANDLE_TABLE_H_ */
diff --git a/src/gallium/auxiliary/util/u_snprintf.c b/src/gallium/auxiliary/util/u_snprintf.c
new file mode 100644
index 0000000000..61c20b48f7
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_snprintf.c
@@ -0,0 +1,1478 @@
+/*
+ * Copyright (c) 1995 Patrick Powell.
+ *
+ * This code is based on code written by Patrick Powell <papowell@astart.com>.
+ * It may be used for any purpose as long as this notice remains intact on all
+ * source code distributions.
+ */
+
+/*
+ * Copyright (c) 2008 Holger Weiss.
+ *
+ * This version of the code is maintained by Holger Weiss <holger@jhweiss.de>.
+ * My changes to the code may freely be used, modified and/or redistributed for
+ * any purpose. It would be nice if additions and fixes to this file (including
+ * trivial code cleanups) would be sent back in order to let me include them in
+ * the version available at <http://www.jhweiss.de/software/snprintf.html>.
+ * However, this is not a requirement for using or redistributing (possibly
+ * modified) versions of this file, nor is leaving this notice intact mandatory.
+ */
+
+/*
+ * History
+ *
+ * 2008-01-20 Holger Weiss <holger@jhweiss.de> for C99-snprintf 1.1:
+ *
+ * Fixed the detection of infinite floating point values on IRIX (and
+ * possibly other systems) and applied another few minor cleanups.
+ *
+ * 2008-01-06 Holger Weiss <holger@jhweiss.de> for C99-snprintf 1.0:
+ *
+ * Added a lot of new features, fixed many bugs, and incorporated various
+ * improvements done by Andrew Tridgell <tridge@samba.org>, Russ Allbery
+ * <rra@stanford.edu>, Hrvoje Niksic <hniksic@xemacs.org>, Damien Miller
+ * <djm@mindrot.org>, and others for the Samba, INN, Wget, and OpenSSH
+ * projects. The additions include: support the "e", "E", "g", "G", and
+ * "F" conversion specifiers (and use conversion style "f" or "F" for the
+ * still unsupported "a" and "A" specifiers); support the "hh", "ll", "j",
+ * "t", and "z" length modifiers; support the "#" flag and the (non-C99)
+ * "'" flag; use localeconv(3) (if available) to get both the current
+ * locale's decimal point character and the separator between groups of
+ * digits; fix the handling of various corner cases of field width and
+ * precision specifications; fix various floating point conversion bugs;
+ * handle infinite and NaN floating point values; don't attempt to write to
+ * the output buffer (which may be NULL) if a size of zero was specified;
+ * check for integer overflow of the field width, precision, and return
+ * values and during the floating point conversion; use the OUTCHAR() macro
+ * instead of a function for better performance; provide asprintf(3) and
+ * vasprintf(3) functions; add new test cases. The replacement functions
+ * have been renamed to use an "rpl_" prefix, the function calls in the
+ * main project (and in this file) must be redefined accordingly for each
+ * replacement function which is needed (by using Autoconf or other means).
+ * Various other minor improvements have been applied and the coding style
+ * was cleaned up for consistency.
+ *
+ * 2007-07-23 Holger Weiss <holger@jhweiss.de> for Mutt 1.5.13:
+ *
+ * C99 compliant snprintf(3) and vsnprintf(3) functions return the number
+ * of characters that would have been written to a sufficiently sized
+ * buffer (excluding the '\0'). The original code simply returned the
+ * length of the resulting output string, so that's been fixed.
+ *
+ * 1998-03-05 Michael Elkins <me@mutt.org> for Mutt 0.90.8:
+ *
+ * The original code assumed that both snprintf(3) and vsnprintf(3) were
+ * missing. Some systems only have snprintf(3) but not vsnprintf(3), so
+ * the code is now broken down under HAVE_SNPRINTF and HAVE_VSNPRINTF.
+ *
+ * 1998-01-27 Thomas Roessler <roessler@does-not-exist.org> for Mutt 0.89i:
+ *
+ * The PGP code was using unsigned hexadecimal formats. Unfortunately,
+ * unsigned formats simply didn't work.
+ *
+ * 1997-10-22 Brandon Long <blong@fiction.net> for Mutt 0.87.1:
+ *
+ * Ok, added some minimal floating point support, which means this probably
+ * requires libm on most operating systems. Don't yet support the exponent
+ * (e,E) and sigfig (g,G). Also, fmtint() was pretty badly broken, it just
+ * wasn't being exercised in ways which showed it, so that's been fixed.
+ * Also, formatted the code to Mutt conventions, and removed dead code left
+ * over from the original. Also, there is now a builtin-test, run with:
+ * gcc -DTEST_SNPRINTF -o snprintf snprintf.c -lm && ./snprintf
+ *
+ * 2996-09-15 Brandon Long <blong@fiction.net> for Mutt 0.43:
+ *
+ * This was ugly. It is still ugly. I opted out of floating point
+ * numbers, but the formatter understands just about everything from the
+ * normal C string format, at least as far as I can tell from the Solaris
+ * 2.5 printf(3S) man page.
+ */
+
+/*
+ * ToDo
+ *
+ * - Add wide character support.
+ * - Add support for "%a" and "%A" conversions.
+ * - Create test routines which predefine the expected results. Our test cases
+ * usually expose bugs in system implementations rather than in ours :-)
+ */
+
+/*
+ * Usage
+ *
+ * 1) The following preprocessor macros should be defined to 1 if the feature or
+ * file in question is available on the target system (by using Autoconf or
+ * other means), though basic functionality should be available as long as
+ * HAVE_STDARG_H and HAVE_STDLIB_H are defined correctly:
+ *
+ * HAVE_VSNPRINTF
+ * HAVE_SNPRINTF
+ * HAVE_VASPRINTF
+ * HAVE_ASPRINTF
+ * HAVE_STDARG_H
+ * HAVE_STDDEF_H
+ * HAVE_STDINT_H
+ * HAVE_STDLIB_H
+ * HAVE_INTTYPES_H
+ * HAVE_LOCALE_H
+ * HAVE_LOCALECONV
+ * HAVE_LCONV_DECIMAL_POINT
+ * HAVE_LCONV_THOUSANDS_SEP
+ * HAVE_LONG_DOUBLE
+ * HAVE_LONG_LONG_INT
+ * HAVE_UNSIGNED_LONG_LONG_INT
+ * HAVE_INTMAX_T
+ * HAVE_UINTMAX_T
+ * HAVE_UINTPTR_T
+ * HAVE_PTRDIFF_T
+ * HAVE_VA_COPY
+ * HAVE___VA_COPY
+ *
+ * 2) The calls to the functions which should be replaced must be redefined
+ * throughout the project files (by using Autoconf or other means):
+ *
+ * #define vsnprintf rpl_vsnprintf
+ * #define snprintf rpl_snprintf
+ * #define vasprintf rpl_vasprintf
+ * #define asprintf rpl_asprintf
+ *
+ * 3) The required replacement functions should be declared in some header file
+ * included throughout the project files:
+ *
+ * #if HAVE_CONFIG_H
+ * #include <config.h>
+ * #endif
+ * #if HAVE_STDARG_H
+ * #include <stdarg.h>
+ * #if !HAVE_VSNPRINTF
+ * int rpl_vsnprintf(char *, size_t, const char *, va_list);
+ * #endif
+ * #if !HAVE_SNPRINTF
+ * int rpl_snprintf(char *, size_t, const char *, ...);
+ * #endif
+ * #if !HAVE_VASPRINTF
+ * int rpl_vasprintf(char **, const char *, va_list);
+ * #endif
+ * #if !HAVE_ASPRINTF
+ * int rpl_asprintf(char **, const char *, ...);
+ * #endif
+ * #endif
+ *
+ * Autoconf macros for handling step 1 and step 2 are available at
+ * <http://www.jhweiss.de/software/snprintf.html>.
+ */
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#else
+#ifdef WIN32
+#define vsnprintf rpl_vsnprintf
+#define snprintf rpl_snprintf
+#define HAVE_VSNPRINTF 0
+#define HAVE_SNPRINTF 0
+#define HAVE_VASPRINTF 1 /* not needed */
+#define HAVE_ASPRINTF 1 /* not needed */
+#define HAVE_STDARG_H 1
+#define HAVE_STDDEF_H 1
+#define HAVE_STDINT_H 0
+#define HAVE_STDLIB_H 1
+#define HAVE_INTTYPES_H 0
+#define HAVE_LOCALE_H 0
+#define HAVE_LOCALECONV 0
+#define HAVE_LCONV_DECIMAL_POINT 0
+#define HAVE_LCONV_THOUSANDS_SEP 0
+#define HAVE_LONG_DOUBLE 0
+#define HAVE_LONG_LONG_INT 1
+#define HAVE_UNSIGNED_LONG_LONG_INT 1
+#define HAVE_INTMAX_T 0
+#define HAVE_UINTMAX_T 0
+#define HAVE_UINTPTR_T 1
+#define HAVE_PTRDIFF_T 1
+#define HAVE_VA_COPY 0
+#define HAVE___VA_COPY 0
+#else
+#define HAVE_VSNPRINTF 1
+#define HAVE_SNPRINTF 1
+#define HAVE_VASPRINTF 1
+#define HAVE_ASPRINTF 1
+#endif
+#endif /* HAVE_CONFIG_H */
+
+#if !HAVE_SNPRINTF || !HAVE_VSNPRINTF || !HAVE_ASPRINTF || !HAVE_VASPRINTF
+#include <stdio.h> /* For NULL, size_t, vsnprintf(3), and vasprintf(3). */
+#ifdef VA_START
+#undef VA_START
+#endif /* defined(VA_START) */
+#ifdef VA_SHIFT
+#undef VA_SHIFT
+#endif /* defined(VA_SHIFT) */
+#if HAVE_STDARG_H
+#include <stdarg.h>
+#define VA_START(ap, last) va_start(ap, last)
+#define VA_SHIFT(ap, value, type) /* No-op for ANSI C. */
+#else /* Assume <varargs.h> is available. */
+#include <varargs.h>
+#define VA_START(ap, last) va_start(ap) /* "last" is ignored. */
+#define VA_SHIFT(ap, value, type) value = va_arg(ap, type)
+#endif /* HAVE_STDARG_H */
+
+#if !HAVE_VASPRINTF
+#if HAVE_STDLIB_H
+#include <stdlib.h> /* For malloc(3). */
+#endif /* HAVE_STDLIB_H */
+#ifdef VA_COPY
+#undef VA_COPY
+#endif /* defined(VA_COPY) */
+#ifdef VA_END_COPY
+#undef VA_END_COPY
+#endif /* defined(VA_END_COPY) */
+#if HAVE_VA_COPY
+#define VA_COPY(dest, src) va_copy(dest, src)
+#define VA_END_COPY(ap) va_end(ap)
+#elif HAVE___VA_COPY
+#define VA_COPY(dest, src) __va_copy(dest, src)
+#define VA_END_COPY(ap) va_end(ap)
+#else
+#define VA_COPY(dest, src) (void)mymemcpy(&dest, &src, sizeof(va_list))
+#define VA_END_COPY(ap) /* No-op. */
+#define NEED_MYMEMCPY 1
+static void *mymemcpy(void *, void *, size_t);
+#endif /* HAVE_VA_COPY */
+#endif /* !HAVE_VASPRINTF */
+
+#if !HAVE_VSNPRINTF
+#include <limits.h> /* For *_MAX. */
+#if HAVE_INTTYPES_H
+#include <inttypes.h> /* For intmax_t (if not defined in <stdint.h>). */
+#endif /* HAVE_INTTYPES_H */
+#if HAVE_LOCALE_H
+#include <locale.h> /* For localeconv(3). */
+#endif /* HAVE_LOCALE_H */
+#if HAVE_STDDEF_H
+#include <stddef.h> /* For ptrdiff_t. */
+#endif /* HAVE_STDDEF_H */
+#if HAVE_STDINT_H
+#include <stdint.h> /* For intmax_t. */
+#endif /* HAVE_STDINT_H */
+
+/* Support for unsigned long long int. We may also need ULLONG_MAX. */
+#ifndef ULONG_MAX /* We may need ULONG_MAX as a fallback. */
+#ifdef UINT_MAX
+#define ULONG_MAX UINT_MAX
+#else
+#define ULONG_MAX INT_MAX
+#endif /* defined(UINT_MAX) */
+#endif /* !defined(ULONG_MAX) */
+#ifdef ULLONG
+#undef ULLONG
+#endif /* defined(ULLONG) */
+#if HAVE_UNSIGNED_LONG_LONG_INT
+#define ULLONG unsigned long long int
+#ifndef ULLONG_MAX
+#define ULLONG_MAX ULONG_MAX
+#endif /* !defined(ULLONG_MAX) */
+#else
+#define ULLONG unsigned long int
+#ifdef ULLONG_MAX
+#undef ULLONG_MAX
+#endif /* defined(ULLONG_MAX) */
+#define ULLONG_MAX ULONG_MAX
+#endif /* HAVE_LONG_LONG_INT */
+
+/* Support for uintmax_t. We also need UINTMAX_MAX. */
+#ifdef UINTMAX_T
+#undef UINTMAX_T
+#endif /* defined(UINTMAX_T) */
+#if HAVE_UINTMAX_T || defined(uintmax_t)
+#define UINTMAX_T uintmax_t
+#ifndef UINTMAX_MAX
+#define UINTMAX_MAX ULLONG_MAX
+#endif /* !defined(UINTMAX_MAX) */
+#else
+#define UINTMAX_T ULLONG
+#ifdef UINTMAX_MAX
+#undef UINTMAX_MAX
+#endif /* defined(UINTMAX_MAX) */
+#define UINTMAX_MAX ULLONG_MAX
+#endif /* HAVE_UINTMAX_T || defined(uintmax_t) */
+
+/* Support for long double. */
+#ifndef LDOUBLE
+#if HAVE_LONG_DOUBLE
+#define LDOUBLE long double
+#else
+#define LDOUBLE double
+#endif /* HAVE_LONG_DOUBLE */
+#endif /* !defined(LDOUBLE) */
+
+/* Support for long long int. */
+#ifndef LLONG
+#if HAVE_LONG_LONG_INT
+#define LLONG long long int
+#else
+#define LLONG long int
+#endif /* HAVE_LONG_LONG_INT */
+#endif /* !defined(LLONG) */
+
+/* Support for intmax_t. */
+#ifndef INTMAX_T
+#if HAVE_INTMAX_T || defined(intmax_t)
+#define INTMAX_T intmax_t
+#else
+#define INTMAX_T LLONG
+#endif /* HAVE_INTMAX_T || defined(intmax_t) */
+#endif /* !defined(INTMAX_T) */
+
+/* Support for uintptr_t. */
+#ifndef UINTPTR_T
+#if HAVE_UINTPTR_T || defined(uintptr_t)
+#define UINTPTR_T uintptr_t
+#else
+#define UINTPTR_T unsigned long int
+#endif /* HAVE_UINTPTR_T || defined(uintptr_t) */
+#endif /* !defined(UINTPTR_T) */
+
+/* Support for ptrdiff_t. */
+#ifndef PTRDIFF_T
+#if HAVE_PTRDIFF_T || defined(ptrdiff_t)
+#define PTRDIFF_T ptrdiff_t
+#else
+#define PTRDIFF_T long int
+#endif /* HAVE_PTRDIFF_T || defined(ptrdiff_t) */
+#endif /* !defined(PTRDIFF_T) */
+
+/*
+ * We need an unsigned integer type corresponding to ptrdiff_t (cf. C99:
+ * 7.19.6.1, 7). However, we'll simply use PTRDIFF_T and convert it to an
+ * unsigned type if necessary. This should work just fine in practice.
+ */
+#ifndef UPTRDIFF_T
+#define UPTRDIFF_T PTRDIFF_T
+#endif /* !defined(UPTRDIFF_T) */
+
+/*
+ * We need a signed integer type corresponding to size_t (cf. C99: 7.19.6.1, 7).
+ * However, we'll simply use size_t and convert it to a signed type if
+ * necessary. This should work just fine in practice.
+ */
+#ifndef SSIZE_T
+#define SSIZE_T size_t
+#endif /* !defined(SSIZE_T) */
+
+/* Either ERANGE or E2BIG should be available everywhere. */
+#ifndef ERANGE
+#define ERANGE E2BIG
+#endif /* !defined(ERANGE) */
+#ifndef EOVERFLOW
+#define EOVERFLOW ERANGE
+#endif /* !defined(EOVERFLOW) */
+
+/*
+ * Buffer size to hold the octal string representation of UINT128_MAX without
+ * nul-termination ("3777777777777777777777777777777777777777777").
+ */
+#ifdef MAX_CONVERT_LENGTH
+#undef MAX_CONVERT_LENGTH
+#endif /* defined(MAX_CONVERT_LENGTH) */
+#define MAX_CONVERT_LENGTH 43
+
+/* Format read states. */
+#define PRINT_S_DEFAULT 0
+#define PRINT_S_FLAGS 1
+#define PRINT_S_WIDTH 2
+#define PRINT_S_DOT 3
+#define PRINT_S_PRECISION 4
+#define PRINT_S_MOD 5
+#define PRINT_S_CONV 6
+
+/* Format flags. */
+#define PRINT_F_MINUS (1 << 0)
+#define PRINT_F_PLUS (1 << 1)
+#define PRINT_F_SPACE (1 << 2)
+#define PRINT_F_NUM (1 << 3)
+#define PRINT_F_ZERO (1 << 4)
+#define PRINT_F_QUOTE (1 << 5)
+#define PRINT_F_UP (1 << 6)
+#define PRINT_F_UNSIGNED (1 << 7)
+#define PRINT_F_TYPE_G (1 << 8)
+#define PRINT_F_TYPE_E (1 << 9)
+
+/* Conversion flags. */
+#define PRINT_C_CHAR 1
+#define PRINT_C_SHORT 2
+#define PRINT_C_LONG 3
+#define PRINT_C_LLONG 4
+#define PRINT_C_LDOUBLE 5
+#define PRINT_C_SIZE 6
+#define PRINT_C_PTRDIFF 7
+#define PRINT_C_INTMAX 8
+
+#ifndef MAX
+#define MAX(x, y) ((x >= y) ? x : y)
+#endif /* !defined(MAX) */
+#ifndef CHARTOINT
+#define CHARTOINT(ch) (ch - '0')
+#endif /* !defined(CHARTOINT) */
+#ifndef ISDIGIT
+#define ISDIGIT(ch) ('0' <= (unsigned char)ch && (unsigned char)ch <= '9')
+#endif /* !defined(ISDIGIT) */
+#ifndef ISNAN
+#define ISNAN(x) (x != x)
+#endif /* !defined(ISNAN) */
+#ifndef ISINF
+#define ISINF(x) (x != 0.0 && x + x == x)
+#endif /* !defined(ISINF) */
+
+#ifdef OUTCHAR
+#undef OUTCHAR
+#endif /* defined(OUTCHAR) */
+#define OUTCHAR(str, len, size, ch) \
+do { \
+ if (len + 1 < size) \
+ str[len] = ch; \
+ (len)++; \
+} while (/* CONSTCOND */ 0)
+
+static void fmtstr(char *, size_t *, size_t, const char *, int, int, int);
+static void fmtint(char *, size_t *, size_t, INTMAX_T, int, int, int, int);
+static void fmtflt(char *, size_t *, size_t, LDOUBLE, int, int, int, int *);
+static void printsep(char *, size_t *, size_t);
+static int getnumsep(int);
+static int getexponent(LDOUBLE);
+static int convert(UINTMAX_T, char *, size_t, int, int);
+static UINTMAX_T cast(LDOUBLE);
+static UINTMAX_T myround(LDOUBLE);
+static LDOUBLE mypow10(int);
+
+int
+rpl_vsnprintf(char *str, size_t size, const char *format, va_list args)
+{
+ LDOUBLE fvalue;
+ INTMAX_T value;
+ unsigned char cvalue;
+ const char *strvalue;
+ INTMAX_T *intmaxptr;
+ PTRDIFF_T *ptrdiffptr;
+ SSIZE_T *sizeptr;
+ LLONG *llongptr;
+ long int *longptr;
+ int *intptr;
+ short int *shortptr;
+ signed char *charptr;
+ size_t len = 0;
+ int overflow = 0;
+ int base = 0;
+ int cflags = 0;
+ int flags = 0;
+ int width = 0;
+ int precision = -1;
+ int state = PRINT_S_DEFAULT;
+ char ch = *format++;
+
+ /*
+ * C99 says: "If `n' is zero, nothing is written, and `s' may be a null
+ * pointer." (7.19.6.5, 2) We're forgiving and allow a NULL pointer
+ * even if a size larger than zero was specified. At least NetBSD's
+ * snprintf(3) does the same, as well as other versions of this file.
+ * (Though some of these versions will write to a non-NULL buffer even
+ * if a size of zero was specified, which violates the standard.)
+ */
+ if (str == NULL && size != 0)
+ size = 0;
+
+ while (ch != '\0')
+ switch (state) {
+ case PRINT_S_DEFAULT:
+ if (ch == '%')
+ state = PRINT_S_FLAGS;
+ else
+ OUTCHAR(str, len, size, ch);
+ ch = *format++;
+ break;
+ case PRINT_S_FLAGS:
+ switch (ch) {
+ case '-':
+ flags |= PRINT_F_MINUS;
+ ch = *format++;
+ break;
+ case '+':
+ flags |= PRINT_F_PLUS;
+ ch = *format++;
+ break;
+ case ' ':
+ flags |= PRINT_F_SPACE;
+ ch = *format++;
+ break;
+ case '#':
+ flags |= PRINT_F_NUM;
+ ch = *format++;
+ break;
+ case '0':
+ flags |= PRINT_F_ZERO;
+ ch = *format++;
+ break;
+ case '\'': /* SUSv2 flag (not in C99). */
+ flags |= PRINT_F_QUOTE;
+ ch = *format++;
+ break;
+ default:
+ state = PRINT_S_WIDTH;
+ break;
+ }
+ break;
+ case PRINT_S_WIDTH:
+ if (ISDIGIT(ch)) {
+ ch = CHARTOINT(ch);
+ if (width > (INT_MAX - ch) / 10) {
+ overflow = 1;
+ goto out;
+ }
+ width = 10 * width + ch;
+ ch = *format++;
+ } else if (ch == '*') {
+ /*
+ * C99 says: "A negative field width argument is
+ * taken as a `-' flag followed by a positive
+ * field width." (7.19.6.1, 5)
+ */
+ if ((width = va_arg(args, int)) < 0) {
+ flags |= PRINT_F_MINUS;
+ width = -width;
+ }
+ ch = *format++;
+ state = PRINT_S_DOT;
+ } else
+ state = PRINT_S_DOT;
+ break;
+ case PRINT_S_DOT:
+ if (ch == '.') {
+ state = PRINT_S_PRECISION;
+ ch = *format++;
+ } else
+ state = PRINT_S_MOD;
+ break;
+ case PRINT_S_PRECISION:
+ if (precision == -1)
+ precision = 0;
+ if (ISDIGIT(ch)) {
+ ch = CHARTOINT(ch);
+ if (precision > (INT_MAX - ch) / 10) {
+ overflow = 1;
+ goto out;
+ }
+ precision = 10 * precision + ch;
+ ch = *format++;
+ } else if (ch == '*') {
+ /*
+ * C99 says: "A negative precision argument is
+ * taken as if the precision were omitted."
+ * (7.19.6.1, 5)
+ */
+ if ((precision = va_arg(args, int)) < 0)
+ precision = -1;
+ ch = *format++;
+ state = PRINT_S_MOD;
+ } else
+ state = PRINT_S_MOD;
+ break;
+ case PRINT_S_MOD:
+ switch (ch) {
+ case 'h':
+ ch = *format++;
+ if (ch == 'h') { /* It's a char. */
+ ch = *format++;
+ cflags = PRINT_C_CHAR;
+ } else
+ cflags = PRINT_C_SHORT;
+ break;
+ case 'l':
+ ch = *format++;
+ if (ch == 'l') { /* It's a long long. */
+ ch = *format++;
+ cflags = PRINT_C_LLONG;
+ } else
+ cflags = PRINT_C_LONG;
+ break;
+ case 'L':
+ cflags = PRINT_C_LDOUBLE;
+ ch = *format++;
+ break;
+ case 'j':
+ cflags = PRINT_C_INTMAX;
+ ch = *format++;
+ break;
+ case 't':
+ cflags = PRINT_C_PTRDIFF;
+ ch = *format++;
+ break;
+ case 'z':
+ cflags = PRINT_C_SIZE;
+ ch = *format++;
+ break;
+ }
+ state = PRINT_S_CONV;
+ break;
+ case PRINT_S_CONV:
+ switch (ch) {
+ case 'd':
+ /* FALLTHROUGH */
+ case 'i':
+ switch (cflags) {
+ case PRINT_C_CHAR:
+ value = (signed char)va_arg(args, int);
+ break;
+ case PRINT_C_SHORT:
+ value = (short int)va_arg(args, int);
+ break;
+ case PRINT_C_LONG:
+ value = va_arg(args, long int);
+ break;
+ case PRINT_C_LLONG:
+ value = va_arg(args, LLONG);
+ break;
+ case PRINT_C_SIZE:
+ value = va_arg(args, SSIZE_T);
+ break;
+ case PRINT_C_INTMAX:
+ value = va_arg(args, INTMAX_T);
+ break;
+ case PRINT_C_PTRDIFF:
+ value = va_arg(args, PTRDIFF_T);
+ break;
+ default:
+ value = va_arg(args, int);
+ break;
+ }
+ fmtint(str, &len, size, value, 10, width,
+ precision, flags);
+ break;
+ case 'X':
+ flags |= PRINT_F_UP;
+ /* FALLTHROUGH */
+ case 'x':
+ base = 16;
+ /* FALLTHROUGH */
+ case 'o':
+ if (base == 0)
+ base = 8;
+ /* FALLTHROUGH */
+ case 'u':
+ if (base == 0)
+ base = 10;
+ flags |= PRINT_F_UNSIGNED;
+ switch (cflags) {
+ case PRINT_C_CHAR:
+ value = (unsigned char)va_arg(args,
+ unsigned int);
+ break;
+ case PRINT_C_SHORT:
+ value = (unsigned short int)va_arg(args,
+ unsigned int);
+ break;
+ case PRINT_C_LONG:
+ value = va_arg(args, unsigned long int);
+ break;
+ case PRINT_C_LLONG:
+ value = va_arg(args, ULLONG);
+ break;
+ case PRINT_C_SIZE:
+ value = va_arg(args, size_t);
+ break;
+ case PRINT_C_INTMAX:
+ value = va_arg(args, UINTMAX_T);
+ break;
+ case PRINT_C_PTRDIFF:
+ value = va_arg(args, UPTRDIFF_T);
+ break;
+ default:
+ value = va_arg(args, unsigned int);
+ break;
+ }
+ fmtint(str, &len, size, value, base, width,
+ precision, flags);
+ break;
+ case 'A':
+ /* Not yet supported, we'll use "%F". */
+ /* FALLTHROUGH */
+ case 'F':
+ flags |= PRINT_F_UP;
+ case 'a':
+ /* Not yet supported, we'll use "%f". */
+ /* FALLTHROUGH */
+ case 'f':
+ if (cflags == PRINT_C_LDOUBLE)
+ fvalue = va_arg(args, LDOUBLE);
+ else
+ fvalue = va_arg(args, double);
+ fmtflt(str, &len, size, fvalue, width,
+ precision, flags, &overflow);
+ if (overflow)
+ goto out;
+ break;
+ case 'E':
+ flags |= PRINT_F_UP;
+ /* FALLTHROUGH */
+ case 'e':
+ flags |= PRINT_F_TYPE_E;
+ if (cflags == PRINT_C_LDOUBLE)
+ fvalue = va_arg(args, LDOUBLE);
+ else
+ fvalue = va_arg(args, double);
+ fmtflt(str, &len, size, fvalue, width,
+ precision, flags, &overflow);
+ if (overflow)
+ goto out;
+ break;
+ case 'G':
+ flags |= PRINT_F_UP;
+ /* FALLTHROUGH */
+ case 'g':
+ flags |= PRINT_F_TYPE_G;
+ if (cflags == PRINT_C_LDOUBLE)
+ fvalue = va_arg(args, LDOUBLE);
+ else
+ fvalue = va_arg(args, double);
+ /*
+ * If the precision is zero, it is treated as
+ * one (cf. C99: 7.19.6.1, 8).
+ */
+ if (precision == 0)
+ precision = 1;
+ fmtflt(str, &len, size, fvalue, width,
+ precision, flags, &overflow);
+ if (overflow)
+ goto out;
+ break;
+ case 'c':
+ cvalue = (unsigned char)va_arg(args, int);
+ OUTCHAR(str, len, size, cvalue);
+ break;
+ case 's':
+ strvalue = va_arg(args, char *);
+ fmtstr(str, &len, size, strvalue, width,
+ precision, flags);
+ break;
+ case 'p':
+ /*
+ * C99 says: "The value of the pointer is
+ * converted to a sequence of printing
+ * characters, in an implementation-defined
+ * manner." (C99: 7.19.6.1, 8)
+ */
+ if ((strvalue = va_arg(args, void *)) == NULL)
+ /*
+ * We use the glibc format. BSD prints
+ * "0x0", SysV "0".
+ */
+ fmtstr(str, &len, size, "(nil)", width,
+ -1, flags);
+ else {
+ /*
+ * We use the BSD/glibc format. SysV
+ * omits the "0x" prefix (which we emit
+ * using the PRINT_F_NUM flag).
+ */
+ flags |= PRINT_F_NUM;
+ flags |= PRINT_F_UNSIGNED;
+ fmtint(str, &len, size,
+ (UINTPTR_T)strvalue, 16, width,
+ precision, flags);
+ }
+ break;
+ case 'n':
+ switch (cflags) {
+ case PRINT_C_CHAR:
+ charptr = va_arg(args, signed char *);
+ *charptr = len;
+ break;
+ case PRINT_C_SHORT:
+ shortptr = va_arg(args, short int *);
+ *shortptr = len;
+ break;
+ case PRINT_C_LONG:
+ longptr = va_arg(args, long int *);
+ *longptr = len;
+ break;
+ case PRINT_C_LLONG:
+ llongptr = va_arg(args, LLONG *);
+ *llongptr = len;
+ break;
+ case PRINT_C_SIZE:
+ /*
+ * C99 says that with the "z" length
+ * modifier, "a following `n' conversion
+ * specifier applies to a pointer to a
+ * signed integer type corresponding to
+ * size_t argument." (7.19.6.1, 7)
+ */
+ sizeptr = va_arg(args, SSIZE_T *);
+ *sizeptr = len;
+ break;
+ case PRINT_C_INTMAX:
+ intmaxptr = va_arg(args, INTMAX_T *);
+ *intmaxptr = len;
+ break;
+ case PRINT_C_PTRDIFF:
+ ptrdiffptr = va_arg(args, PTRDIFF_T *);
+ *ptrdiffptr = len;
+ break;
+ default:
+ intptr = va_arg(args, int *);
+ *intptr = len;
+ break;
+ }
+ break;
+ case '%': /* Print a "%" character verbatim. */
+ OUTCHAR(str, len, size, ch);
+ break;
+ default: /* Skip other characters. */
+ break;
+ }
+ ch = *format++;
+ state = PRINT_S_DEFAULT;
+ base = cflags = flags = width = 0;
+ precision = -1;
+ break;
+ }
+out:
+ if (len < size)
+ str[len] = '\0';
+ else if (size > 0)
+ str[size - 1] = '\0';
+
+ if (overflow || len >= INT_MAX) {
+ return -1;
+ }
+ return (int)len;
+}
+
+static void
+fmtstr(char *str, size_t *len, size_t size, const char *value, int width,
+ int precision, int flags)
+{
+ int padlen, strln; /* Amount to pad. */
+ int noprecision = (precision == -1);
+
+ if (value == NULL) /* We're forgiving. */
+ value = "(null)";
+
+ /* If a precision was specified, don't read the string past it. */
+ for (strln = 0; value[strln] != '\0' &&
+ (noprecision || strln < precision); strln++)
+ continue;
+
+ if ((padlen = width - strln) < 0)
+ padlen = 0;
+ if (flags & PRINT_F_MINUS) /* Left justify. */
+ padlen = -padlen;
+
+ while (padlen > 0) { /* Leading spaces. */
+ OUTCHAR(str, *len, size, ' ');
+ padlen--;
+ }
+ while (*value != '\0' && (noprecision || precision-- > 0)) {
+ OUTCHAR(str, *len, size, *value);
+ value++;
+ }
+ while (padlen < 0) { /* Trailing spaces. */
+ OUTCHAR(str, *len, size, ' ');
+ padlen++;
+ }
+}
+
+static void
+fmtint(char *str, size_t *len, size_t size, INTMAX_T value, int base, int width,
+ int precision, int flags)
+{
+ UINTMAX_T uvalue;
+ char iconvert[MAX_CONVERT_LENGTH];
+ char sign = 0;
+ char hexprefix = 0;
+ int spadlen = 0; /* Amount to space pad. */
+ int zpadlen = 0; /* Amount to zero pad. */
+ int pos;
+ int separators = (flags & PRINT_F_QUOTE);
+ int noprecision = (precision == -1);
+
+ if (flags & PRINT_F_UNSIGNED)
+ uvalue = value;
+ else {
+ uvalue = (value >= 0) ? value : -value;
+ if (value < 0)
+ sign = '-';
+ else if (flags & PRINT_F_PLUS) /* Do a sign. */
+ sign = '+';
+ else if (flags & PRINT_F_SPACE)
+ sign = ' ';
+ }
+
+ pos = convert(uvalue, iconvert, sizeof(iconvert), base,
+ flags & PRINT_F_UP);
+
+ if (flags & PRINT_F_NUM && uvalue != 0) {
+ /*
+ * C99 says: "The result is converted to an `alternative form'.
+ * For `o' conversion, it increases the precision, if and only
+ * if necessary, to force the first digit of the result to be a
+ * zero (if the value and precision are both 0, a single 0 is
+ * printed). For `x' (or `X') conversion, a nonzero result has
+ * `0x' (or `0X') prefixed to it." (7.19.6.1, 6)
+ */
+ switch (base) {
+ case 8:
+ if (precision <= pos)
+ precision = pos + 1;
+ break;
+ case 16:
+ hexprefix = (flags & PRINT_F_UP) ? 'X' : 'x';
+ break;
+ }
+ }
+
+ if (separators) /* Get the number of group separators we'll print. */
+ separators = getnumsep(pos);
+
+ zpadlen = precision - pos - separators;
+ spadlen = width /* Minimum field width. */
+ - separators /* Number of separators. */
+ - MAX(precision, pos) /* Number of integer digits. */
+ - ((sign != 0) ? 1 : 0) /* Will we print a sign? */
+ - ((hexprefix != 0) ? 2 : 0); /* Will we print a prefix? */
+
+ if (zpadlen < 0)
+ zpadlen = 0;
+ if (spadlen < 0)
+ spadlen = 0;
+
+ /*
+ * C99 says: "If the `0' and `-' flags both appear, the `0' flag is
+ * ignored. For `d', `i', `o', `u', `x', and `X' conversions, if a
+ * precision is specified, the `0' flag is ignored." (7.19.6.1, 6)
+ */
+ if (flags & PRINT_F_MINUS) /* Left justify. */
+ spadlen = -spadlen;
+ else if (flags & PRINT_F_ZERO && noprecision) {
+ zpadlen += spadlen;
+ spadlen = 0;
+ }
+ while (spadlen > 0) { /* Leading spaces. */
+ OUTCHAR(str, *len, size, ' ');
+ spadlen--;
+ }
+ if (sign != 0) /* Sign. */
+ OUTCHAR(str, *len, size, sign);
+ if (hexprefix != 0) { /* A "0x" or "0X" prefix. */
+ OUTCHAR(str, *len, size, '0');
+ OUTCHAR(str, *len, size, hexprefix);
+ }
+ while (zpadlen > 0) { /* Leading zeros. */
+ OUTCHAR(str, *len, size, '0');
+ zpadlen--;
+ }
+ while (pos > 0) { /* The actual digits. */
+ pos--;
+ OUTCHAR(str, *len, size, iconvert[pos]);
+ if (separators > 0 && pos > 0 && pos % 3 == 0)
+ printsep(str, len, size);
+ }
+ while (spadlen < 0) { /* Trailing spaces. */
+ OUTCHAR(str, *len, size, ' ');
+ spadlen++;
+ }
+}
+
+static void
+fmtflt(char *str, size_t *len, size_t size, LDOUBLE fvalue, int width,
+ int precision, int flags, int *overflow)
+{
+ LDOUBLE ufvalue;
+ UINTMAX_T intpart;
+ UINTMAX_T fracpart;
+ UINTMAX_T mask;
+ const char *infnan = NULL;
+ char iconvert[MAX_CONVERT_LENGTH];
+ char fconvert[MAX_CONVERT_LENGTH];
+ char econvert[4]; /* "e-12" (without nul-termination). */
+ char esign = 0;
+ char sign = 0;
+ int leadfraczeros = 0;
+ int exponent = 0;
+ int emitpoint = 0;
+ int omitzeros = 0;
+ int omitcount = 0;
+ int padlen = 0;
+ int epos = 0;
+ int fpos = 0;
+ int ipos = 0;
+ int separators = (flags & PRINT_F_QUOTE);
+ int estyle = (flags & PRINT_F_TYPE_E);
+#if HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT
+ struct lconv *lc = localeconv();
+#endif /* HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT */
+
+ /*
+ * AIX' man page says the default is 0, but C99 and at least Solaris'
+ * and NetBSD's man pages say the default is 6, and sprintf(3) on AIX
+ * defaults to 6.
+ */
+ if (precision == -1)
+ precision = 6;
+
+ if (fvalue < 0.0)
+ sign = '-';
+ else if (flags & PRINT_F_PLUS) /* Do a sign. */
+ sign = '+';
+ else if (flags & PRINT_F_SPACE)
+ sign = ' ';
+
+ if (ISNAN(fvalue))
+ infnan = (flags & PRINT_F_UP) ? "NAN" : "nan";
+ else if (ISINF(fvalue))
+ infnan = (flags & PRINT_F_UP) ? "INF" : "inf";
+
+ if (infnan != NULL) {
+ if (sign != 0)
+ iconvert[ipos++] = sign;
+ while (*infnan != '\0')
+ iconvert[ipos++] = *infnan++;
+ fmtstr(str, len, size, iconvert, width, ipos, flags);
+ return;
+ }
+
+ /* "%e" (or "%E") or "%g" (or "%G") conversion. */
+ if (flags & PRINT_F_TYPE_E || flags & PRINT_F_TYPE_G) {
+ if (flags & PRINT_F_TYPE_G) {
+ /*
+ * For "%g" (and "%G") conversions, the precision
+ * specifies the number of significant digits, which
+ * includes the digits in the integer part. The
+ * conversion will or will not be using "e-style" (like
+ * "%e" or "%E" conversions) depending on the precision
+ * and on the exponent. However, the exponent can be
+ * affected by rounding the converted value, so we'll
+ * leave this decision for later. Until then, we'll
+ * assume that we're going to do an "e-style" conversion
+ * (in order to get the exponent calculated). For
+ * "e-style", the precision must be decremented by one.
+ */
+ precision--;
+ /*
+ * For "%g" (and "%G") conversions, trailing zeros are
+ * removed from the fractional portion of the result
+ * unless the "#" flag was specified.
+ */
+ if (!(flags & PRINT_F_NUM))
+ omitzeros = 1;
+ }
+ exponent = getexponent(fvalue);
+ estyle = 1;
+ }
+
+again:
+ /*
+ * Sorry, we only support 9, 19, or 38 digits (that is, the number of
+ * digits of the 32-bit, the 64-bit, or the 128-bit UINTMAX_MAX value
+ * minus one) past the decimal point due to our conversion method.
+ */
+ switch (sizeof(UINTMAX_T)) {
+ case 16:
+ if (precision > 38)
+ precision = 38;
+ break;
+ case 8:
+ if (precision > 19)
+ precision = 19;
+ break;
+ default:
+ if (precision > 9)
+ precision = 9;
+ break;
+ }
+
+ ufvalue = (fvalue >= 0.0) ? fvalue : -fvalue;
+ if (estyle) /* We want exactly one integer digit. */
+ ufvalue /= mypow10(exponent);
+
+ if ((intpart = cast(ufvalue)) == UINTMAX_MAX) {
+ *overflow = 1;
+ return;
+ }
+
+ /*
+ * Factor of ten with the number of digits needed for the fractional
+ * part. For example, if the precision is 3, the mask will be 1000.
+ */
+ mask = (UINTMAX_T)mypow10(precision);
+ /*
+ * We "cheat" by converting the fractional part to integer by
+ * multiplying by a factor of ten.
+ */
+ if ((fracpart = myround(mask * (ufvalue - intpart))) >= mask) {
+ /*
+ * For example, ufvalue = 2.99962, intpart = 2, and mask = 1000
+ * (because precision = 3). Now, myround(1000 * 0.99962) will
+ * return 1000. So, the integer part must be incremented by one
+ * and the fractional part must be set to zero.
+ */
+ intpart++;
+ fracpart = 0;
+ if (estyle && intpart == 10) {
+ /*
+ * The value was rounded up to ten, but we only want one
+ * integer digit if using "e-style". So, the integer
+ * part must be set to one and the exponent must be
+ * incremented by one.
+ */
+ intpart = 1;
+ exponent++;
+ }
+ }
+
+ /*
+ * Now that we know the real exponent, we can check whether or not to
+ * use "e-style" for "%g" (and "%G") conversions. If we don't need
+ * "e-style", the precision must be adjusted and the integer and
+ * fractional parts must be recalculated from the original value.
+ *
+ * C99 says: "Let P equal the precision if nonzero, 6 if the precision
+ * is omitted, or 1 if the precision is zero. Then, if a conversion
+ * with style `E' would have an exponent of X:
+ *
+ * - if P > X >= -4, the conversion is with style `f' (or `F') and
+ * precision P - (X + 1).
+ *
+ * - otherwise, the conversion is with style `e' (or `E') and precision
+ * P - 1." (7.19.6.1, 8)
+ *
+ * Note that we had decremented the precision by one.
+ */
+ if (flags & PRINT_F_TYPE_G && estyle &&
+ precision + 1 > exponent && exponent >= -4) {
+ precision -= exponent;
+ estyle = 0;
+ goto again;
+ }
+
+ if (estyle) {
+ if (exponent < 0) {
+ exponent = -exponent;
+ esign = '-';
+ } else
+ esign = '+';
+
+ /*
+ * Convert the exponent. The sizeof(econvert) is 4. So, the
+ * econvert buffer can hold e.g. "e+99" and "e-99". We don't
+ * support an exponent which contains more than two digits.
+ * Therefore, the following stores are safe.
+ */
+ epos = convert(exponent, econvert, 2, 10, 0);
+ /*
+ * C99 says: "The exponent always contains at least two digits,
+ * and only as many more digits as necessary to represent the
+ * exponent." (7.19.6.1, 8)
+ */
+ if (epos == 1)
+ econvert[epos++] = '0';
+ econvert[epos++] = esign;
+ econvert[epos++] = (flags & PRINT_F_UP) ? 'E' : 'e';
+ }
+
+ /* Convert the integer part and the fractional part. */
+ ipos = convert(intpart, iconvert, sizeof(iconvert), 10, 0);
+ if (fracpart != 0) /* convert() would return 1 if fracpart == 0. */
+ fpos = convert(fracpart, fconvert, sizeof(fconvert), 10, 0);
+
+ leadfraczeros = precision - fpos;
+
+ if (omitzeros) {
+ if (fpos > 0) /* Omit trailing fractional part zeros. */
+ while (omitcount < fpos && fconvert[omitcount] == '0')
+ omitcount++;
+ else { /* The fractional part is zero, omit it completely. */
+ omitcount = precision;
+ leadfraczeros = 0;
+ }
+ precision -= omitcount;
+ }
+
+ /*
+ * Print a decimal point if either the fractional part is non-zero
+ * and/or the "#" flag was specified.
+ */
+ if (precision > 0 || flags & PRINT_F_NUM)
+ emitpoint = 1;
+ if (separators) /* Get the number of group separators we'll print. */
+ separators = getnumsep(ipos);
+
+ padlen = width /* Minimum field width. */
+ - ipos /* Number of integer digits. */
+ - epos /* Number of exponent characters. */
+ - precision /* Number of fractional digits. */
+ - separators /* Number of group separators. */
+ - (emitpoint ? 1 : 0) /* Will we print a decimal point? */
+ - ((sign != 0) ? 1 : 0); /* Will we print a sign character? */
+
+ if (padlen < 0)
+ padlen = 0;
+
+ /*
+ * C99 says: "If the `0' and `-' flags both appear, the `0' flag is
+ * ignored." (7.19.6.1, 6)
+ */
+ if (flags & PRINT_F_MINUS) /* Left justifty. */
+ padlen = -padlen;
+ else if (flags & PRINT_F_ZERO && padlen > 0) {
+ if (sign != 0) { /* Sign. */
+ OUTCHAR(str, *len, size, sign);
+ sign = 0;
+ }
+ while (padlen > 0) { /* Leading zeros. */
+ OUTCHAR(str, *len, size, '0');
+ padlen--;
+ }
+ }
+ while (padlen > 0) { /* Leading spaces. */
+ OUTCHAR(str, *len, size, ' ');
+ padlen--;
+ }
+ if (sign != 0) /* Sign. */
+ OUTCHAR(str, *len, size, sign);
+ while (ipos > 0) { /* Integer part. */
+ ipos--;
+ OUTCHAR(str, *len, size, iconvert[ipos]);
+ if (separators > 0 && ipos > 0 && ipos % 3 == 0)
+ printsep(str, len, size);
+ }
+ if (emitpoint) { /* Decimal point. */
+#if HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT
+ if (lc->decimal_point != NULL && *lc->decimal_point != '\0')
+ OUTCHAR(str, *len, size, *lc->decimal_point);
+ else /* We'll always print some decimal point character. */
+#endif /* HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT */
+ OUTCHAR(str, *len, size, '.');
+ }
+ while (leadfraczeros > 0) { /* Leading fractional part zeros. */
+ OUTCHAR(str, *len, size, '0');
+ leadfraczeros--;
+ }
+ while (fpos > omitcount) { /* The remaining fractional part. */
+ fpos--;
+ OUTCHAR(str, *len, size, fconvert[fpos]);
+ }
+ while (epos > 0) { /* Exponent. */
+ epos--;
+ OUTCHAR(str, *len, size, econvert[epos]);
+ }
+ while (padlen < 0) { /* Trailing spaces. */
+ OUTCHAR(str, *len, size, ' ');
+ padlen++;
+ }
+}
+
+static void
+printsep(char *str, size_t *len, size_t size)
+{
+#if HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP
+ struct lconv *lc = localeconv();
+ int i;
+
+ if (lc->thousands_sep != NULL)
+ for (i = 0; lc->thousands_sep[i] != '\0'; i++)
+ OUTCHAR(str, *len, size, lc->thousands_sep[i]);
+ else
+#endif /* HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP */
+ OUTCHAR(str, *len, size, ',');
+}
+
+static int
+getnumsep(int digits)
+{
+ int separators = (digits - ((digits % 3 == 0) ? 1 : 0)) / 3;
+#if HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP
+ int strln;
+ struct lconv *lc = localeconv();
+
+ /* We support an arbitrary separator length (including zero). */
+ if (lc->thousands_sep != NULL) {
+ for (strln = 0; lc->thousands_sep[strln] != '\0'; strln++)
+ continue;
+ separators *= strln;
+ }
+#endif /* HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP */
+ return separators;
+}
+
+static int
+getexponent(LDOUBLE value)
+{
+ LDOUBLE tmp = (value >= 0.0) ? value : -value;
+ int exponent = 0;
+
+ /*
+ * We check for 99 > exponent > -99 in order to work around possible
+ * endless loops which could happen (at least) in the second loop (at
+ * least) if we're called with an infinite value. However, we checked
+ * for infinity before calling this function using our ISINF() macro, so
+ * this might be somewhat paranoid.
+ */
+ while (tmp < 1.0 && tmp > 0.0 && --exponent > -99)
+ tmp *= 10;
+ while (tmp >= 10.0 && ++exponent < 99)
+ tmp /= 10;
+
+ return exponent;
+}
+
+static int
+convert(UINTMAX_T value, char *buf, size_t size, int base, int caps)
+{
+ const char *digits = caps ? "0123456789ABCDEF" : "0123456789abcdef";
+ size_t pos = 0;
+
+ /* We return an unterminated buffer with the digits in reverse order. */
+ do {
+ buf[pos++] = digits[value % base];
+ value /= base;
+ } while (value != 0 && pos < size);
+
+ return (int)pos;
+}
+
+static UINTMAX_T
+cast(LDOUBLE value)
+{
+ UINTMAX_T result;
+
+ /*
+ * We check for ">=" and not for ">" because if UINTMAX_MAX cannot be
+ * represented exactly as an LDOUBLE value (but is less than LDBL_MAX),
+ * it may be increased to the nearest higher representable value for the
+ * comparison (cf. C99: 6.3.1.4, 2). It might then equal the LDOUBLE
+ * value although converting the latter to UINTMAX_T would overflow.
+ */
+ if (value >= UINTMAX_MAX)
+ return UINTMAX_MAX;
+
+ result = (UINTMAX_T)value;
+ /*
+ * At least on NetBSD/sparc64 3.0.2 and 4.99.30, casting long double to
+ * an integer type converts e.g. 1.9 to 2 instead of 1 (which violates
+ * the standard). Sigh.
+ */
+ return (result <= value) ? result : result - 1;
+}
+
+static UINTMAX_T
+myround(LDOUBLE value)
+{
+ UINTMAX_T intpart = cast(value);
+
+ return ((value -= intpart) < 0.5) ? intpart : intpart + 1;
+}
+
+static LDOUBLE
+mypow10(int exponent)
+{
+ LDOUBLE result = 1;
+
+ while (exponent > 0) {
+ result *= 10;
+ exponent--;
+ }
+ while (exponent < 0) {
+ result /= 10;
+ exponent++;
+ }
+ return result;
+}
+#endif /* !HAVE_VSNPRINTF */
+
+#if !HAVE_VASPRINTF
+#if NEED_MYMEMCPY
+void *
+mymemcpy(void *dst, void *src, size_t len)
+{
+ const char *from = src;
+ char *to = dst;
+
+ /* No need for optimization, we use this only to replace va_copy(3). */
+ while (len-- > 0)
+ *to++ = *from++;
+ return dst;
+}
+#endif /* NEED_MYMEMCPY */
+
+int
+rpl_vasprintf(char **ret, const char *format, va_list ap)
+{
+ size_t size;
+ int len;
+ va_list aq;
+
+ VA_COPY(aq, ap);
+ len = vsnprintf(NULL, 0, format, aq);
+ VA_END_COPY(aq);
+ if (len < 0 || (*ret = malloc(size = len + 1)) == NULL)
+ return -1;
+ return vsnprintf(*ret, size, format, ap);
+}
+#endif /* !HAVE_VASPRINTF */
+
+#if !HAVE_SNPRINTF
+#if HAVE_STDARG_H
+int
+rpl_snprintf(char *str, size_t size, const char *format, ...)
+#else
+int
+rpl_snprintf(va_alist) va_dcl
+#endif /* HAVE_STDARG_H */
+{
+#if !HAVE_STDARG_H
+ char *str;
+ size_t size;
+ char *format;
+#endif /* HAVE_STDARG_H */
+ va_list ap;
+ int len;
+
+ VA_START(ap, format);
+ VA_SHIFT(ap, str, char *);
+ VA_SHIFT(ap, size, size_t);
+ VA_SHIFT(ap, format, const char *);
+ len = vsnprintf(str, size, format, ap);
+ va_end(ap);
+ return len;
+}
+#endif /* !HAVE_SNPRINTF */
+
+#if !HAVE_ASPRINTF
+#if HAVE_STDARG_H
+int
+rpl_asprintf(char **ret, const char *format, ...)
+#else
+int
+rpl_asprintf(va_alist) va_dcl
+#endif /* HAVE_STDARG_H */
+{
+#if !HAVE_STDARG_H
+ char **ret;
+ char *format;
+#endif /* HAVE_STDARG_H */
+ va_list ap;
+ int len;
+
+ VA_START(ap, format);
+ VA_SHIFT(ap, ret, char **);
+ VA_SHIFT(ap, format, const char *);
+ len = vasprintf(ret, format, ap);
+ va_end(ap);
+ return len;
+}
+#endif /* !HAVE_ASPRINTF */
+#else /* Dummy declaration to avoid empty translation unit warnings. */
+int main(void);
+#endif /* !HAVE_SNPRINTF || !HAVE_VSNPRINTF || !HAVE_ASPRINTF || [...] */
+
+
+/* vim: set joinspaces textwidth=80: */