summaryrefslogtreecommitdiff
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/SConscript11
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_cache.c196
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_cache.h87
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_hash.c45
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_hash.h22
-rw-r--r--src/gallium/auxiliary/draw/SConscript3
-rw-r--r--src/gallium/auxiliary/draw/draw_aapoint.c202
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c24
-rw-r--r--src/gallium/auxiliary/draw/draw_pstipple.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_validate.c16
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer.h1
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c2
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c4
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.c19
-rw-r--r--src/gallium/auxiliary/tgsi/Makefile1
-rw-r--r--src/gallium/auxiliary/tgsi/SConscript2
-rw-r--r--src/gallium/auxiliary/tgsi/exec/tgsi_exec.c2
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_dump.c4
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_scan.c117
-rw-r--r--src/gallium/auxiliary/tgsi/util/tgsi_scan.h57
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.h2
-rw-r--r--src/gallium/drivers/cell/ppu/cell_draw_arrays.c1
-rw-r--r--src/gallium/drivers/cell/ppu/cell_flush.h4
-rw-r--r--src/gallium/drivers/cell/ppu/cell_vertex_shader.c6
-rw-r--r--src/gallium/drivers/cell/spu/spu_dcache.c52
-rw-r--r--src/gallium/drivers/cell/spu/spu_exec.c8
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.c5
-rw-r--r--src/gallium/drivers/cell/spu/spu_main.h5
-rw-r--r--src/gallium/drivers/cell/spu/spu_vertex_fetch.c2
-rw-r--r--src/gallium/drivers/i915simple/i915_context.c4
-rw-r--r--src/gallium/drivers/i915simple/i915_context.h43
-rw-r--r--src/gallium/drivers/i915simple/i915_flush.c3
-rw-r--r--src/gallium/drivers/i915simple/i915_fpc.h23
-rw-r--r--src/gallium/drivers/i915simple/i915_fpc_emit.c163
-rw-r--r--src/gallium/drivers/i915simple/i915_fpc_translate.c183
-rw-r--r--src/gallium/drivers/i915simple/i915_prim_emit.c40
-rw-r--r--src/gallium/drivers/i915simple/i915_prim_vbuf.c9
-rw-r--r--src/gallium/drivers/i915simple/i915_state.c38
-rw-r--r--src/gallium/drivers/i915simple/i915_state_derived.c104
-rw-r--r--src/gallium/drivers/i915simple/i915_state_emit.c39
-rw-r--r--src/gallium/drivers/softpipe/SConscript3
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_exec.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_llvm.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_prim_setup.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_quad.c5
-rw-r--r--src/gallium/drivers/softpipe/sp_state.h7
-rw-r--r--src/gallium/drivers/softpipe/sp_state_derived.c39
-rw-r--r--src/gallium/drivers/softpipe/sp_state_fs.c22
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.c6
-rw-r--r--src/gallium/include/pipe/p_compiler.h12
-rw-r--r--src/gallium/include/pipe/p_context.h10
-rw-r--r--src/gallium/include/pipe/p_defines.h8
-rw-r--r--src/gallium/include/pipe/p_format.h8
-rw-r--r--src/gallium/include/pipe/p_inlines.h9
-rw-r--r--src/gallium/include/pipe/p_pointer.h95
-rw-r--r--src/gallium/include/pipe/p_shader_tokens.h2
-rw-r--r--src/gallium/include/pipe/p_state.h10
-rw-r--r--src/gallium/include/pipe/p_util.h44
-rw-r--r--src/gallium/include/pipe/p_winsys.h20
-rw-r--r--src/gallium/winsys/SConscript3
-rw-r--r--src/gallium/winsys/dri/intel/intel_batchbuffer.c1
-rw-r--r--src/gallium/winsys/dri/intel/intel_batchbuffer.h1
-rw-r--r--src/gallium/winsys/dri/intel/intel_context.c3
63 files changed, 1345 insertions, 526 deletions
diff --git a/src/gallium/SConscript b/src/gallium/SConscript
index a08b4b830e..fa4833cbcf 100644
--- a/src/gallium/SConscript
+++ b/src/gallium/SConscript
@@ -1,3 +1,5 @@
+import os
+
Import('*')
env = env.Clone()
@@ -20,10 +22,5 @@ SConscript([
if llvm:
SConscript(['auxiliary/gallivm/SConscript'])
-SConscript([
- 'drivers/softpipe/SConscript',
- 'drivers/i915simple/SConscript',
- 'drivers/i965simple/SConscript',
- 'drivers/failover/SConscript',
- #'drivers/cell/SConscript',
-])
+for driver in env['drivers']:
+ SConscript(os.path.join('drivers', driver, 'SConscript'))
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c
index 9e77e0774d..b427b509f8 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.c
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.c
@@ -28,9 +28,22 @@
/* Authors: Zack Rusin <zack@tungstengraphics.com>
*/
+#include "pipe/p_util.h"
+
#include "cso_cache.h"
#include "cso_hash.h"
+
+struct cso_cache {
+ struct cso_hash *blend_hash;
+ struct cso_hash *depth_stencil_hash;
+ struct cso_hash *fs_hash;
+ struct cso_hash *vs_hash;
+ struct cso_hash *rasterizer_hash;
+ struct cso_hash *sampler_hash;
+ int max_size;
+};
+
#if 1
static unsigned hash_key(const void *key, unsigned key_size)
{
@@ -114,12 +127,106 @@ static int _cso_size_for_type(enum cso_cache_type type)
return 0;
}
+
+static void delete_blend_state(void *state, void *data)
+{
+ struct cso_blend *cso = (struct cso_blend *)state;
+ if (cso->delete_state && cso->data != &cso->state)
+ cso->delete_state(cso->context, cso->data);
+}
+
+static void delete_depth_stencil_state(void *state, void *data)
+{
+ struct cso_depth_stencil_alpha *cso = (struct cso_depth_stencil_alpha *)state;
+ if (cso->delete_state && cso->data != &cso->state)
+ cso->delete_state(cso->context, cso->data);
+}
+
+static void delete_sampler_state(void *state, void *data)
+{
+ struct cso_sampler *cso = (struct cso_sampler *)state;
+ if (cso->delete_state && cso->data != &cso->state)
+ cso->delete_state(cso->context, cso->data);
+}
+
+static void delete_rasterizer_state(void *state, void *data)
+{
+ struct cso_rasterizer *cso = (struct cso_rasterizer *)state;
+ if (cso->delete_state && cso->data != &cso->state)
+ cso->delete_state(cso->context, cso->data);
+}
+
+static void delete_fs_state(void *state, void *data)
+{
+ struct cso_fragment_shader *cso = (struct cso_fragment_shader *)state;
+ if (cso->delete_state && cso->data != &cso->state)
+ cso->delete_state(cso->context, cso->data);
+}
+
+static void delete_vs_state(void *state, void *data)
+{
+ struct cso_vertex_shader *cso = (struct cso_vertex_shader *)state;
+ if (cso->delete_state && cso->data != &cso->state)
+ cso->delete_state(cso->context, cso->data);
+}
+
+
+static INLINE void delete_cso(void *state, enum cso_cache_type type)
+{
+ switch (type) {
+ case CSO_BLEND: {
+ delete_blend_state(state, 0);
+ }
+ break;
+ case CSO_SAMPLER: {
+ delete_sampler_state(state, 0);
+ }
+ break;
+ case CSO_DEPTH_STENCIL_ALPHA: {
+ delete_depth_stencil_state(state, 0);
+ }
+ break;
+ case CSO_RASTERIZER: {
+ delete_rasterizer_state(state, 0);
+ }
+ break;
+ case CSO_FRAGMENT_SHADER: {
+ delete_fs_state(state, 0);
+ }
+ break;
+ case CSO_VERTEX_SHADER: {
+ delete_vs_state(state, 0);
+ }
+ break;
+ }
+ FREE(state);
+}
+
+static INLINE void sanitize_hash(struct cso_hash *hash, enum cso_cache_type type,
+ int max_size)
+{
+ /* if we're approach the maximum size, remove fourth of the entries
+ * otherwise every subsequent call will go through the same */
+ int max_entries = (max_size > cso_hash_size(hash)) ? max_size : cso_hash_size(hash);
+ int to_remove = (max_size < max_entries) * max_entries/4;
+ while (to_remove) {
+ /*remove elements until we're good */
+ /*fixme: currently we pick the nodes to remove at random*/
+ struct cso_hash_iter iter = cso_hash_first_node(hash);
+ void *cso = cso_hash_take(hash, cso_hash_iter_key(iter));
+ delete_cso(cso, type);
+ --to_remove;
+ }
+}
+
struct cso_hash_iter
cso_insert_state(struct cso_cache *sc,
unsigned hash_key, enum cso_cache_type type,
void *state)
{
struct cso_hash *hash = _cso_hash_for_type(sc, type);
+ sanitize_hash(hash, type, sc->max_size);
+
return cso_hash_insert(hash, hash_key, state);
}
@@ -132,6 +239,26 @@ cso_find_state(struct cso_cache *sc,
return cso_hash_find(hash, hash_key);
}
+
+void *cso_hash_find_data_from_template( struct cso_hash *hash,
+ unsigned hash_key,
+ void *templ,
+ int size )
+{
+ struct cso_hash_iter iter = cso_hash_find(hash, hash_key);
+ while (!cso_hash_iter_is_null(iter)) {
+ void *iter_data = cso_hash_iter_data(iter);
+ if (!memcmp(iter_data, templ, size)) {
+ /* Return the payload:
+ */
+ return (unsigned char *)iter_data + size;
+ }
+ iter = cso_hash_iter_next(iter);
+ }
+ return NULL;
+}
+
+
struct cso_hash_iter cso_find_state_template(struct cso_cache *sc,
unsigned hash_key, enum cso_cache_type type,
void *templ)
@@ -156,8 +283,9 @@ void * cso_take_state(struct cso_cache *sc,
struct cso_cache *cso_cache_create(void)
{
- struct cso_cache *sc = malloc(sizeof(struct cso_cache));
+ struct cso_cache *sc = MALLOC_STRUCT(cso_cache);
+ sc->max_size = 4096;
sc->blend_hash = cso_hash_create();
sc->sampler_hash = cso_hash_create();
sc->depth_stencil_hash = cso_hash_create();
@@ -168,14 +296,78 @@ struct cso_cache *cso_cache_create(void)
return sc;
}
+void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type,
+ cso_state_callback func, void *user_data)
+{
+ struct cso_hash *hash = 0;
+ struct cso_hash_iter iter;
+
+ switch (type) {
+ case CSO_BLEND:
+ hash = sc->blend_hash;
+ break;
+ case CSO_SAMPLER:
+ hash = sc->sampler_hash;
+ break;
+ case CSO_DEPTH_STENCIL_ALPHA:
+ hash = sc->depth_stencil_hash;
+ break;
+ case CSO_RASTERIZER:
+ hash = sc->rasterizer_hash;
+ break;
+ case CSO_FRAGMENT_SHADER:
+ hash = sc->fs_hash;
+ break;
+ case CSO_VERTEX_SHADER:
+ hash = sc->vs_hash;
+ break;
+ }
+
+ iter = cso_hash_first_node(hash);
+ while (!cso_hash_iter_is_null(iter)) {
+ void *state = cso_hash_iter_data(iter);
+ if (state) {
+ func(state, user_data);
+ }
+ iter = cso_hash_iter_next(iter);
+ }
+}
+
void cso_cache_delete(struct cso_cache *sc)
{
assert(sc);
+ /* delete driver data */
+ cso_for_each_state(sc, CSO_BLEND, delete_blend_state, 0);
+ cso_for_each_state(sc, CSO_DEPTH_STENCIL_ALPHA, delete_depth_stencil_state, 0);
+ cso_for_each_state(sc, CSO_FRAGMENT_SHADER, delete_fs_state, 0);
+ cso_for_each_state(sc, CSO_VERTEX_SHADER, delete_vs_state, 0);
+ cso_for_each_state(sc, CSO_RASTERIZER, delete_rasterizer_state, 0);
+ cso_for_each_state(sc, CSO_SAMPLER, delete_sampler_state, 0);
+
cso_hash_delete(sc->blend_hash);
cso_hash_delete(sc->sampler_hash);
cso_hash_delete(sc->depth_stencil_hash);
cso_hash_delete(sc->rasterizer_hash);
cso_hash_delete(sc->fs_hash);
cso_hash_delete(sc->vs_hash);
- free(sc);
+ FREE(sc);
}
+
+void cso_set_maximum_cache_size(struct cso_cache *sc, int number)
+{
+ sc->max_size = number;
+
+ sanitize_hash(sc->blend_hash, CSO_BLEND, sc->max_size);
+ sanitize_hash(sc->depth_stencil_hash, CSO_DEPTH_STENCIL_ALPHA,
+ sc->max_size);
+ sanitize_hash(sc->fs_hash, CSO_FRAGMENT_SHADER, sc->max_size);
+ sanitize_hash(sc->vs_hash, CSO_VERTEX_SHADER, sc->max_size);
+ sanitize_hash(sc->rasterizer_hash, CSO_RASTERIZER, sc->max_size);
+ sanitize_hash(sc->sampler_hash, CSO_SAMPLER, sc->max_size);
+}
+
+int cso_maximum_cache_size(const struct cso_cache *sc)
+{
+ return sc->max_size;
+}
+
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h
index 116e2eaa2c..3b0fe100b8 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.h
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.h
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,9 +25,48 @@
*
**************************************************************************/
- /*
- * Authors:
- * Zack Rusin <zack@tungstengraphics.com>
+ /**
+ * @file
+ * Constant State Object (CSO) cache.
+ *
+ * The basic idea is that the states are created via the
+ * create_state/bind_state/delete_state semantics. The driver is expected to
+ * perform as much of the Gallium state translation to whatever its internal
+ * representation is during the create call. Gallium then has a caching
+ * mechanism where it stores the created states. When the pipeline needs an
+ * actual state change, a bind call is issued. In the bind call the driver
+ * gets its already translated representation.
+ *
+ * Those semantics mean that the driver doesn't do the repeated translations
+ * of states on every frame, but only once, when a new state is actually
+ * created.
+ *
+ * Even on hardware that doesn't do any kind of state cache, it makes the
+ * driver look a lot neater, plus it avoids all the redundant state
+ * translations on every frame.
+ *
+ * Currently our constant state objects are:
+ * - alpha test
+ * - blend
+ * - depth stencil
+ * - fragment shader
+ * - rasterizer (old setup)
+ * - sampler
+ * - vertex shader
+ *
+ * Things that are not constant state objects include:
+ * - blend_color
+ * - clip_state
+ * - clear_color_state
+ * - constant_buffer
+ * - feedback_state
+ * - framebuffer_state
+ * - polygon_stipple
+ * - scissor_state
+ * - texture_state
+ * - viewport_state
+ *
+ * @author Zack Rusin <zack@tungstengraphics.com>
*/
#ifndef CSO_CACHE_H
@@ -36,46 +75,57 @@
#include "pipe/p_context.h"
#include "pipe/p_state.h"
+/* cso_hash.h is necessary for cso_hash_iter, as MSVC requires structures
+ * returned by value to be fully defined */
+#include "cso_hash.h"
-struct cso_hash;
-struct cso_cache {
- struct cso_hash *blend_hash;
- struct cso_hash *depth_stencil_hash;
- struct cso_hash *fs_hash;
- struct cso_hash *vs_hash;
- struct cso_hash *rasterizer_hash;
- struct cso_hash *sampler_hash;
-};
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct cso_cache;
struct cso_blend {
struct pipe_blend_state state;
void *data;
+ void (*delete_state)(void *, void *);
+ void *context;
};
struct cso_depth_stencil_alpha {
struct pipe_depth_stencil_alpha_state state;
void *data;
+ void (*delete_state)(void *, void *);
+ void *context;
};
struct cso_rasterizer {
struct pipe_rasterizer_state state;
void *data;
+ void (*delete_state)(void *, void *);
+ void *context;
};
struct cso_fragment_shader {
struct pipe_shader_state state;
void *data;
+ void (*delete_state)(void *, void *);
+ void *context;
};
struct cso_vertex_shader {
struct pipe_shader_state state;
void *data;
+ void (*delete_state)(void *, void *);
+ void *context;
};
struct cso_sampler {
struct pipe_sampler_state state;
void *data;
+ void (*delete_state)(void *, void *);
+ void *context;
};
@@ -88,6 +138,8 @@ enum cso_cache_type {
CSO_VERTEX_SHADER
};
+typedef void (*cso_state_callback)(void *, void *);
+
unsigned cso_construct_key(void *item, int item_size);
struct cso_cache *cso_cache_create(void);
@@ -101,7 +153,16 @@ struct cso_hash_iter cso_find_state(struct cso_cache *sc,
struct cso_hash_iter cso_find_state_template(struct cso_cache *sc,
unsigned hash_key, enum cso_cache_type type,
void *templ);
+void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type,
+ cso_state_callback func, void *user_data);
void * cso_take_state(struct cso_cache *sc, unsigned hash_key,
enum cso_cache_type type);
+void cso_set_maximum_cache_size(struct cso_cache *sc, int number);
+int cso_maximum_cache_size(const struct cso_cache *sc);
+
+#ifdef __cplusplus
+}
+#endif
+
#endif
diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c
index b40217c524..b3b4d667d2 100644
--- a/src/gallium/auxiliary/cso_cache/cso_hash.c
+++ b/src/gallium/auxiliary/cso_cache/cso_hash.c
@@ -30,12 +30,10 @@
* Zack Rusin <zack@tungstengraphics.com>
*/
-#include "cso_hash.h"
+#include "pipe/p_debug.h"
+#include "pipe/p_util.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
+#include "cso_hash.h"
#define MAX(a, b) ((a > b) ? (a) : (b))
@@ -98,7 +96,7 @@ struct cso_hash {
static void *cso_data_allocate_node(struct cso_hash_data *hash)
{
- return malloc(hash->nodeSize);
+ return MALLOC(hash->nodeSize);
}
static void cso_data_free_node(struct cso_node *node)
@@ -107,10 +105,10 @@ static void cso_data_free_node(struct cso_node *node)
* Need to cast value ptr to original cso type, then free the
* driver-specific data hanging off of it. For example:
struct cso_sampler *csamp = (struct cso_sampler *) node->value;
- free(csamp->data);
+ FREE(csamp->data);
*/
- free(node->value);
- free(node);
+ FREE(node->value);
+ FREE(node);
}
static struct cso_node *
@@ -134,7 +132,7 @@ static void cso_data_rehash(struct cso_hash_data *hash, int hint)
hint = countBits(-hint);
if (hint < MinNumBits)
hint = MinNumBits;
- hash->userNumBits = hint;
+ hash->userNumBits = (short)hint;
while (primeForNumBits(hint) < (hash->size >> 1))
++hint;
} else if (hint < MinNumBits) {
@@ -147,9 +145,9 @@ static void cso_data_rehash(struct cso_hash_data *hash, int hint)
int oldNumBuckets = hash->numBuckets;
int i = 0;
- hash->numBits = hint;
+ hash->numBits = (short)hint;
hash->numBuckets = primeForNumBits(hint);
- hash->buckets = malloc(sizeof(struct cso_node*) * hash->numBuckets);
+ hash->buckets = MALLOC(sizeof(struct cso_node*) * hash->numBuckets);
for (i = 0; i < hash->numBuckets; ++i)
hash->buckets[i] = e;
@@ -173,7 +171,7 @@ static void cso_data_rehash(struct cso_hash_data *hash, int hint)
firstNode = afterLastNode;
}
}
- free(oldBuckets);
+ FREE(oldBuckets);
}
}
@@ -235,13 +233,13 @@ struct cso_hash_iter cso_hash_insert(struct cso_hash *hash,
struct cso_hash * cso_hash_create(void)
{
- struct cso_hash *hash = malloc(sizeof(struct cso_hash));
- hash->data.d = malloc(sizeof(struct cso_hash_data));
+ struct cso_hash *hash = MALLOC_STRUCT(cso_hash);
+ hash->data.d = MALLOC_STRUCT(cso_hash_data);
hash->data.d->fakeNext = 0;
hash->data.d->buckets = 0;
hash->data.d->size = 0;
hash->data.d->nodeSize = sizeof(struct cso_node);
- hash->data.d->userNumBits = MinNumBits;
+ hash->data.d->userNumBits = (short)MinNumBits;
hash->data.d->numBits = 0;
hash->data.d->numBuckets = 0;
@@ -261,9 +259,9 @@ void cso_hash_delete(struct cso_hash *hash)
cur = next;
}
}
- free(hash->data.d->buckets);
- free(hash->data.d);
- free(hash);
+ FREE(hash->data.d->buckets);
+ FREE(hash->data.d);
+ FREE(hash);
}
struct cso_hash_iter cso_hash_find(struct cso_hash *hash,
@@ -301,7 +299,7 @@ static struct cso_node *cso_hash_data_next(struct cso_node *node)
a.next = node->next;
if (!a.next) {
- fprintf(stderr, "iterating beyond the last element\n");
+ debug_printf("iterating beyond the last element\n");
return 0;
}
if (a.next->next)
@@ -352,7 +350,7 @@ static struct cso_node *cso_hash_data_prev(struct cso_node *node)
--bucket;
--start;
}
- fprintf(stderr, "iterating backward beyond first element\n");
+ debug_printf("iterating backward beyond first element\n");
return a.e;
}
@@ -397,3 +395,8 @@ struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash)
struct cso_hash_iter iter = {hash, cso_data_first_node(hash->data.d)};
return iter;
}
+
+int cso_hash_size(struct cso_hash *hash)
+{
+ return hash->data.d->size;
+}
diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h
index b4aa111860..d5bca9d591 100644
--- a/src/gallium/auxiliary/cso_cache/cso_hash.h
+++ b/src/gallium/auxiliary/cso_cache/cso_hash.h
@@ -33,6 +33,11 @@
#ifndef CSO_HASH_H
#define CSO_HASH_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
struct cso_hash;
struct cso_node;
@@ -42,7 +47,9 @@ struct cso_hash_iter {
};
struct cso_hash *cso_hash_create(void);
-void cso_hash_delete(struct cso_hash *hash);
+void cso_hash_delete(struct cso_hash *hash);
+
+int cso_hash_size(struct cso_hash *hash);
struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, unsigned key,
void *data);
@@ -59,4 +66,17 @@ void *cso_hash_iter_data(struct cso_hash_iter iter);
struct cso_hash_iter cso_hash_iter_next(struct cso_hash_iter iter);
struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter);
+
+/* KW: a convenience routine:
+ */
+void *cso_hash_find_data_from_template( struct cso_hash *hash,
+ unsigned hash_key,
+ void *templ,
+ int size );
+
+
+#ifdef __cplusplus
+}
+#endif
+
#endif
diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript
index 8e3a8caa74..3302dc44f7 100644
--- a/src/gallium/auxiliary/draw/SConscript
+++ b/src/gallium/auxiliary/draw/SConscript
@@ -3,6 +3,8 @@ Import('*')
draw = env.ConvenienceLibrary(
target = 'draw',
source = [
+ 'draw_aaline.c',
+ 'draw_aapoint.c',
'draw_clip.c',
'draw_vs_exec.c',
'draw_vs_sse.c',
@@ -13,6 +15,7 @@ draw = env.ConvenienceLibrary(
'draw_flatshade.c',
'draw_offset.c',
'draw_prim.c',
+ 'draw_pstipple.c',
'draw_stipple.c',
'draw_twoside.c',
'draw_unfilled.c',
diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c
index 43119cc70b..cae6fcd4d2 100644
--- a/src/gallium/auxiliary/draw/draw_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_aapoint.c
@@ -222,7 +222,7 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
*
* Temp reg0 usage:
* t0.x = distance of fragment from center point
- * t0.y = boolean, is t0.x > 1 ?
+ * t0.y = boolean, is t0.x > 1.0, also misc temp usage
* t0.z = temporary for computing 1/(1-k) value
* t0.w = final coverage value
*/
@@ -313,9 +313,73 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
newInst.FullSrcRegisters[0].SrcRegister.Negate = 1;
ctx->emit_instruction(ctx, &newInst);
- /* SGT t0.y, t0.x, tex.z; # bool b = distance > k */
+
+ /* compute coverage factor = (1-d)/(1-k) */
+
+ /* SUB t0.z, tex.w, tex.z; # m = 1 - k */
newInst = tgsi_default_full_instruction();
- newInst.Instruction.Opcode = TGSI_OPCODE_SGT;
+ newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
+ newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
+ newInst.Instruction.NumSrcRegs = 2;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
+ newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
+ newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
+ newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
+ newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z;
+ ctx->emit_instruction(ctx, &newInst);
+
+ /* RCP t0.z, t0.z; # t0.z = 1 / m */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
+ newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
+ newInst.Instruction.NumSrcRegs = 1;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
+ newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z;
+ ctx->emit_instruction(ctx, &newInst);
+
+ /* SUB t0.y, 1, t0.x; # d = 1 - d */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
+ newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
+ newInst.Instruction.NumSrcRegs = 2;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
+ newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
+ newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
+ newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ ctx->emit_instruction(ctx, &newInst);
+
+ /* MUL t0.w, t0.y, t0.z; # coverage = d * m */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
+ newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
+ newInst.Instruction.NumSrcRegs = 2;
+ newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
+ newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
+ newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
+ newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z;
+ ctx->emit_instruction(ctx, &newInst);
+
+ /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */
+ newInst = tgsi_default_full_instruction();
+ newInst.Instruction.Opcode = TGSI_OPCODE_SLE;
newInst.Instruction.NumDstRegs = 1;
newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
@@ -329,111 +393,40 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z;
ctx->emit_instruction(ctx, &newInst);
- /* IF t0.y # if b then */
+ /* CMP t0.w, -t0.y, tex.w, t0.w;
+ * # if -t0.y < 0 then
+ * t0.w = 1
+ * else
+ * t0.w = t0.w
+ */
newInst = tgsi_default_full_instruction();
- newInst.Instruction.Opcode = TGSI_OPCODE_IF;
- newInst.Instruction.NumDstRegs = 0;
- newInst.Instruction.NumSrcRegs = 1;
+ newInst.Instruction.Opcode = TGSI_OPCODE_CMP;
+ newInst.Instruction.NumDstRegs = 1;
+ newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
+ newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
+ newInst.Instruction.NumSrcRegs = 3;
newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
+ newInst.FullSrcRegisters[0].SrcRegister.Negate = 1;
+ newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
+ newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
+ newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
+ newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
+ newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
+ newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
+ newInst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ newInst.FullSrcRegisters[2].SrcRegister.Index = tmp0;
+ newInst.FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
+ newInst.FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
+ newInst.FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
+ newInst.FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
ctx->emit_instruction(ctx, &newInst);
- {
- /* compute coverage factor = (1-d)/(1-k) */
-
- /* SUB t0.z, tex.w, tex.z; # m = 1 - k */
- newInst = tgsi_default_full_instruction();
- newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
- newInst.Instruction.NumDstRegs = 1;
- newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
- newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
- newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
- newInst.Instruction.NumSrcRegs = 2;
- newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
- newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
- newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
- newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
- newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
- newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z;
- ctx->emit_instruction(ctx, &newInst);
-
- /* RCP t0.z, t0.z; # t0.z = 1 / m */
- newInst = tgsi_default_full_instruction();
- newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
- newInst.Instruction.NumDstRegs = 1;
- newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
- newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
- newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
- newInst.Instruction.NumSrcRegs = 1;
- newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
- newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
- newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z;
- ctx->emit_instruction(ctx, &newInst);
-
- /* SUB t0.x, 1, t0.x; # d = 1 - d */
- newInst = tgsi_default_full_instruction();
- newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
- newInst.Instruction.NumDstRegs = 1;
- newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
- newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
- newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
- newInst.Instruction.NumSrcRegs = 2;
- newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
- newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
- newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
- newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
- newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
- newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
- ctx->emit_instruction(ctx, &newInst);
-
- /* MUL t0.w, t0.x, t0.z; # coverage = d * m */
- newInst = tgsi_default_full_instruction();
- newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
- newInst.Instruction.NumDstRegs = 1;
- newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
- newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
- newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
- newInst.Instruction.NumSrcRegs = 2;
- newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
- newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
- newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
- newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
- newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
- newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z;
- ctx->emit_instruction(ctx, &newInst);
- }
-
- /* ELSE */
- newInst = tgsi_default_full_instruction();
- newInst.Instruction.Opcode = TGSI_OPCODE_ELSE;
- newInst.Instruction.NumDstRegs = 0;
- newInst.Instruction.NumSrcRegs = 0;
- ctx->emit_instruction(ctx, &newInst);
-
- {
- /* MOV t0.w, tex.w; # coverage = 1.0 */
- newInst = tgsi_default_full_instruction();
- newInst.Instruction.Opcode = TGSI_OPCODE_MOV;
- newInst.Instruction.NumDstRegs = 1;
- newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
- newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
- newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
- newInst.Instruction.NumSrcRegs = 1;
- newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
- newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
- ctx->emit_instruction(ctx, &newInst);
- }
-
- /* ENDIF */
- newInst = tgsi_default_full_instruction();
- newInst.Instruction.Opcode = TGSI_OPCODE_ENDIF;
- newInst.Instruction.NumDstRegs = 0;
- newInst.Instruction.NumSrcRegs = 0;
- ctx->emit_instruction(ctx, &newInst);
}
if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
@@ -516,7 +509,7 @@ generate_aapoint_fs(struct aapoint_stage *aapoint)
(struct tgsi_token *) aapoint_fs.tokens,
MAX, &transform.base);
-#if 0 /* DEBUG */
+#if 1 /* DEBUG */
tgsi_dump(orig_fs->tokens, 0);
tgsi_dump(aapoint_fs.tokens, 0);
#endif
@@ -613,13 +606,16 @@ aapoint_point(struct draw_stage *stage, struct prim_header *header)
* ELSE
* coverage = 1.0; // full coverage
* ENDIF
+ *
+ * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to
+ * avoid using IF/ELSE/ENDIF TGSI opcodes.
*/
#if !NORMALIZE
- k = 1.0 / radius;
- k = 1.0 - 2.0 * k + k * k;
+ k = 1.0f / radius;
+ k = 1.0f - 2.0f * k + k * k;
#else
- k = 1.0 - 1.0 / radius;
+ k = 1.0f - 1.0f / radius;
#endif
/* allocate/dup new verts */
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index c28e78d33a..7dd1c6f6fa 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -244,14 +244,32 @@ draw_convert_wide_lines(struct draw_context *draw, boolean enable)
/**
- * The draw module may sometimes generate vertices with extra attributes
- * (such as texcoords for AA lines). The driver can call this function
- * to find those attributes.
+ * Ask the draw module for the location/slot of the given vertex attribute in
+ * a post-transformed vertex.
+ *
+ * With this function, drivers that use the draw module should have no reason
+ * to track the current vertex shader.
+ *
+ * Note that the draw module may sometimes generate vertices with extra
+ * attributes (such as texcoords for AA lines). The driver can call this
+ * function to find those attributes.
+ *
+ * Zero is returned if the attribute is not found since this is
+ * a don't care / undefined situtation. Returning -1 would be a bit more
+ * work for the drivers.
*/
int
draw_find_vs_output(struct draw_context *draw,
uint semantic_name, uint semantic_index)
{
+ const struct pipe_shader_state *vs = draw->vertex_shader->state;
+ uint i;
+ for (i = 0; i < vs->num_outputs; i++) {
+ if (vs->output_semantic_name[i] == semantic_name &&
+ vs->output_semantic_index[i] == semantic_index)
+ return i;
+ }
+
/* XXX there may be more than one extra vertex attrib.
* For example, simulated gl_FragCoord and gl_PointCoord.
*/
diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c
index 4048abf856..1ab04cd959 100644
--- a/src/gallium/auxiliary/draw/draw_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pstipple.c
@@ -133,7 +133,7 @@ pstip_transform_decl(struct tgsi_transform_context *ctx,
pctx->maxSampler = (int) decl->u.DeclarationRange.Last;
}
else if (decl->Declaration.File == TGSI_FILE_INPUT) {
- pctx->maxInput = MAX2(pctx->maxInput, decl->u.DeclarationRange.Last);
+ pctx->maxInput = MAX2(pctx->maxInput, (int) decl->u.DeclarationRange.Last);
if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION)
pctx->wincoordInput = (int) decl->u.DeclarationRange.First;
}
@@ -332,7 +332,7 @@ generate_pstip_fs(struct pstip_stage *pstip)
if (transform.wincoordInput < 0) {
pstip_fs.input_semantic_name[pstip_fs.num_inputs] = TGSI_SEMANTIC_POSITION;
- pstip_fs.input_semantic_index[pstip_fs.num_inputs] = transform.maxInput;
+ pstip_fs.input_semantic_index[pstip_fs.num_inputs] = (ubyte)transform.maxInput;
pstip_fs.num_inputs++;
}
diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c
index efd6793f2b..3a19dd4cd7 100644
--- a/src/gallium/auxiliary/draw/draw_validate.c
+++ b/src/gallium/auxiliary/draw/draw_validate.c
@@ -45,6 +45,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
struct draw_stage *next = draw->pipeline.rasterize;
int need_det = 0;
int precalc_flat = 0;
+ boolean wide_lines, wide_points;
/* Set the validate's next stage to the rasterize stage, so that it
* can be found later if needed for flushing.
@@ -68,9 +69,18 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
next = draw->pipeline.aapoint;
}
- if ((draw->rasterizer->line_width != 1.0 && draw->convert_wide_lines
- && !draw->rasterizer->line_smooth) ||
- (draw->rasterizer->point_size != 1.0 && draw->convert_wide_points) ||
+ /* drawing wide lines? */
+ wide_lines = (draw->rasterizer->line_width != 1.0
+ && draw->convert_wide_lines
+ && !draw->rasterizer->line_smooth);
+
+ /* drawing large points? */
+ wide_points = (draw->rasterizer->point_size != 1.0
+ && draw->convert_wide_points
+ && !draw->pipeline.aapoint);
+
+ if (wide_lines ||
+ wide_points ||
draw->rasterizer->point_sprite) {
draw->pipeline.wide->next = next;
next = draw->pipeline.wide;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
index 97beb5f72a..f5b5f4052f 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
@@ -166,6 +166,7 @@ static INLINE void
pb_destroy(struct pb_buffer *buf)
{
assert(buf);
+ assert(buf->vtbl);
buf->vtbl->destroy(buf);
}
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index e2ee72ed1f..6e217eb2e0 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -285,7 +285,9 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list)
/* Wait on outstanding fences */
while (fenced_list->numDelayed) {
_glthread_UNLOCK_MUTEX(fenced_list->mutex);
+#ifndef __MSC__
sched_yield();
+#endif
_fenced_buffer_list_check_free(fenced_list, 1);
_glthread_LOCK_MUTEX(fenced_list->mutex);
}
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
index ff4fd123f3..66256f3fa7 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
@@ -192,8 +192,8 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr,
}
/* Some sanity checks */
- assert(0 <= mm_buf->block->ofs && mm_buf->block->ofs < mm->size);
- assert(size <= mm_buf->block->size && mm_buf->block->ofs + mm_buf->block->size <= mm->size);
+ assert(0 <= (unsigned)mm_buf->block->ofs && (unsigned)mm_buf->block->ofs < mm->size);
+ assert(size <= (unsigned)mm_buf->block->size && (unsigned)mm_buf->block->ofs + (unsigned)mm_buf->block->size <= mm->size);
_glthread_UNLOCK_MUTEX(mm->mutex);
return SUPER(mm_buf);
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index dcbf76f600..4d33950e99 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -25,6 +25,7 @@
#include "pipe/p_compiler.h"
#include "pipe/p_debug.h"
+#include "pipe/p_pointer.h"
#include "rtasm_execmem.h"
#include "rtasm_x86sse.h"
@@ -34,7 +35,7 @@
static unsigned char *cptr( void (*label)() )
{
- return (unsigned char *)(unsigned long)label;
+ return (unsigned char *) label;
}
@@ -46,7 +47,7 @@ static void do_realloc( struct x86_function *p )
p->csr = p->store;
}
else {
- unsigned used = p->csr - p->store;
+ uintptr_t used = pointer_to_uintptr( p->csr ) - pointer_to_uintptr( p->store );
unsigned char *tmp = p->store;
p->size *= 2;
p->store = rtasm_exec_malloc(p->size);
@@ -60,7 +61,7 @@ static void do_realloc( struct x86_function *p )
*/
static unsigned char *reserve( struct x86_function *p, int bytes )
{
- if (p->csr + bytes - p->store > p->size)
+ if (p->csr + bytes - p->store > (int) p->size)
do_realloc(p);
{
@@ -135,7 +136,7 @@ static void emit_modrm( struct x86_function *p,
case mod_INDIRECT:
break;
case mod_DISP8:
- emit_1b(p, regmem.disp);
+ emit_1b(p, (char) regmem.disp);
break;
case mod_DISP32:
emit_1i(p, regmem.disp);
@@ -251,14 +252,14 @@ void x86_jcc( struct x86_function *p,
enum x86_cc cc,
unsigned char *label )
{
- int offset = label - (x86_get_label(p) + 2);
+ intptr_t offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 2);
if (offset <= 127 && offset >= -128) {
emit_1ub(p, 0x70 + cc);
emit_1b(p, (char) offset);
}
else {
- offset = label - (x86_get_label(p) + 6);
+ offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 6);
emit_2ub(p, 0x0f, 0x80 + cc);
emit_1i(p, offset);
}
@@ -293,13 +294,13 @@ unsigned char *x86_call_forward( struct x86_function *p)
void x86_fixup_fwd_jump( struct x86_function *p,
unsigned char *fixup )
{
- *(int *)(fixup - 4) = x86_get_label(p) - fixup;
+ *(int *)(fixup - 4) = pointer_to_intptr( x86_get_label(p) ) - pointer_to_intptr( fixup );
}
void x86_jmp( struct x86_function *p, unsigned char *label)
{
emit_1ub(p, 0xe9);
- emit_1i(p, label - x86_get_label(p) - 4);
+ emit_1i(p, pointer_to_intptr( label ) - pointer_to_intptr( x86_get_label(p) ) - 4);
}
#if 0
@@ -1207,7 +1208,7 @@ void (*x86_get_func( struct x86_function *p ))(void)
{
if (DISASSEM && p->store)
debug_printf("disassemble %p %p\n", p->store, p->csr);
- return (void (*)(void)) (unsigned long) p->store;
+ return (void (*)(void)) p->store;
}
#else
diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile
index 71f64b747c..5555639b70 100644
--- a/src/gallium/auxiliary/tgsi/Makefile
+++ b/src/gallium/auxiliary/tgsi/Makefile
@@ -9,6 +9,7 @@ C_SOURCES = \
util/tgsi_build.c \
util/tgsi_dump.c \
util/tgsi_parse.c \
+ util/tgsi_scan.c \
util/tgsi_transform.c \
util/tgsi_util.c
diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript
index 8464bfe944..4632dcc072 100644
--- a/src/gallium/auxiliary/tgsi/SConscript
+++ b/src/gallium/auxiliary/tgsi/SConscript
@@ -8,6 +8,8 @@ tgsi = env.ConvenienceLibrary(
'util/tgsi_build.c',
'util/tgsi_dump.c',
'util/tgsi_parse.c',
+ 'util/tgsi_scan.c',
+ 'util/tgsi_transform.c',
'util/tgsi_util.c',
])
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index d7b18dc9c5..ac52441400 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -2455,7 +2455,7 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
/* execute instructions, until pc is set to -1 */
while (pc != -1) {
- assert(pc < mach->NumInstructions);
+ assert(pc < (int) mach->NumInstructions);
exec_instruction( mach, mach->Instructions + pc, &pc );
}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
index b5c54847e0..ff74e6117c 100644
--- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c
@@ -299,7 +299,8 @@ static const char *TGSI_SEMANTICS[] =
"SEMANTIC_BCOLOR",
"SEMANTIC_FOG",
"SEMANTIC_PSIZE",
- "SEMANTIC_GENERIC,"
+ "SEMANTIC_GENERIC",
+ "SEMANTIC_NORMAL"
};
static const char *TGSI_SEMANTICS_SHORT[] =
@@ -310,6 +311,7 @@ static const char *TGSI_SEMANTICS_SHORT[] =
"FOG",
"PSIZE",
"GENERIC",
+ "NORMAL"
};
static const char *TGSI_IMMS[] =
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
new file mode 100644
index 0000000000..4b99ac37cc
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c
@@ -0,0 +1,117 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * TGSI program scan utility.
+ * Used to determine which registers and instructions are used by a shader.
+ *
+ * Authors: Brian Paul
+ */
+
+
+#include "tgsi_scan.h"
+#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/util/tgsi_build.h"
+
+
+
+
+/**
+ */
+void
+tgsi_scan_shader(const struct tgsi_token *tokens,
+ struct tgsi_shader_info *info)
+{
+ uint procType;
+ struct tgsi_parse_context parse;
+
+ memset(info, 0, sizeof(*info));
+
+ /**
+ ** Setup to begin parsing input shader
+ **/
+ if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) {
+ debug_printf("tgsi_parse_init() failed in tgsi_scan_shader()!\n");
+ return;
+ }
+ procType = parse.FullHeader.Processor.Processor;
+ assert(procType == TGSI_PROCESSOR_FRAGMENT ||
+ procType == TGSI_PROCESSOR_VERTEX ||
+ procType == TGSI_PROCESSOR_GEOMETRY);
+
+
+ /**
+ ** Loop over incoming program tokens/instructions
+ */
+ while( !tgsi_parse_end_of_tokens( &parse ) ) {
+
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ struct tgsi_full_instruction *fullinst
+ = &parse.FullToken.FullInstruction;
+
+ assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
+ info->opcode_count[fullinst->Instruction.Opcode]++;
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ struct tgsi_full_declaration *fulldecl
+ = &parse.FullToken.FullDeclaration;
+ uint file = fulldecl->Declaration.File;
+ uint i;
+ for (i = fulldecl->u.DeclarationRange.First;
+ i <= fulldecl->u.DeclarationRange.Last;
+ i++) {
+ info->file_mask[file] |= (1 << i);
+ info->file_count[file]++;
+
+ /* special case */
+ if (procType == TGSI_PROCESSOR_FRAGMENT &&
+ file == TGSI_FILE_OUTPUT &&
+ fulldecl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) {
+ info->writes_z = TRUE;
+ }
+ }
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ info->immediate_count++;
+ break;
+
+ default:
+ assert( 0 );
+ }
+ }
+
+ tgsi_parse_free (&parse);
+}
diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
new file mode 100644
index 0000000000..757446437c
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h
@@ -0,0 +1,57 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef TGSI_SCAN_H
+#define TGSI_SCAN_H
+
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+
+
+/**
+ * Shader summary info
+ */
+struct tgsi_shader_info
+{
+ uint file_mask[TGSI_FILE_COUNT]; /**< bitmask of declared registers */
+ uint file_count[TGSI_FILE_COUNT]; /**< number of declared registers */
+
+ uint immediate_count; /**< number of immediates declared */
+
+ uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */
+
+ boolean writes_z; /**< does fragment shader write Z value? */
+};
+
+
+extern void
+tgsi_scan_shader(const struct tgsi_token *tokens,
+ struct tgsi_shader_info *info);
+
+
+#endif /* TGSI_SCAN_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h
index 3b687bb868..1433a4925f 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.h
+++ b/src/gallium/drivers/cell/ppu/cell_context.h
@@ -134,6 +134,8 @@ extern void
cell_vertex_shader_queue_flush(struct draw_context *draw);
+/* XXX find a better home for this */
+extern void cell_update_vertex_fetch(struct draw_context *draw);
#endif /* CELL_CONTEXT_H */
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
index cbd387f014..c839fb4d12 100644
--- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
+++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
@@ -38,6 +38,7 @@
#include "cell_context.h"
#include "cell_draw_arrays.h"
#include "cell_state.h"
+#include "cell_flush.h"
#include "draw/draw_context.h"
diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h
index eda351b1cb..7f940ae76b 100644
--- a/src/gallium/drivers/cell/ppu/cell_flush.h
+++ b/src/gallium/drivers/cell/ppu/cell_flush.h
@@ -35,4 +35,8 @@ cell_flush(struct pipe_context *pipe, unsigned flags);
extern void
cell_flush_int(struct pipe_context *pipe, unsigned flags);
+extern void
+cell_flush_buffer_range(struct cell_context *cell, void *ptr,
+ unsigned size);
+
#endif
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c
index f7ef72e5a2..f5c27852c1 100644
--- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c
+++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c
@@ -55,7 +55,6 @@ cell_vertex_shader_queue_flush(struct draw_context *draw)
struct cell_command_vs *const vs = &cell_global.command[0].vs;
uint64_t *batch;
struct cell_array_info *array_info;
- struct cell_shader_info *shader_info;
unsigned i, j;
struct cell_attribute_fetch_code *cf;
@@ -123,12 +122,12 @@ cell_vertex_shader_queue_flush(struct draw_context *draw)
for (j = 0; j < n; j++) {
vs->elts[j] = draw->vs.queue[i + j].elt;
- vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].dest;
+ vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex;
}
for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) {
vs->elts[j] = vs->elts[0];
- vs->vOut[j] = vs->vOut[0];
+ vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex;
}
vs->num_elts = n;
@@ -137,5 +136,6 @@ cell_vertex_shader_queue_flush(struct draw_context *draw)
cell_flush_int(& cell->pipe, PIPE_FLUSH_WAIT);
}
+ draw->vs.post_nr = draw->vs.queue_nr;
draw->vs.queue_nr = 0;
}
diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c
index 698a5790bb..a1701d80d1 100644
--- a/src/gallium/drivers/cell/spu/spu_dcache.c
+++ b/src/gallium/drivers/cell/spu/spu_dcache.c
@@ -33,7 +33,7 @@
#define CACHE_NAME data
#define CACHED_TYPE qword
#define CACHE_TYPE CACHE_TYPE_RO
-#define CACHE_SET_TAGID(set) TAG_VERTEX_BUFFER
+#define CACHE_SET_TAGID(set) (((set) & 0x03) + TAG_DCACHE0)
#define CACHE_LOG2NNWAY 2
#define CACHE_LOG2NSETS 6
#include <cache-api.h>
@@ -49,43 +49,57 @@
/**
* Fetch between arbitrary number of bytes from an unaligned address
+ *
+ * \param dst Destination data buffer
+ * \param ea Main memory effective address of source data
+ * \param size Number of bytes to read
+ *
+ * \warning
+ * As is hinted by the type of the \c dst pointer, this function writes
+ * multiples of 16-bytes.
*/
void
spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size)
{
const int shift = ea & 0x0f;
- const unsigned aligned_start_ea = ea & ~0x0f;
- const unsigned aligned_end_ea = ROUNDUP16(ea + size);
- const unsigned num_entries = (aligned_end_ea - aligned_start_ea) / 16;
+ const unsigned read_size = ROUNDUP16(size + shift);
+ const unsigned last_read = ROUNDUP16(ea + size);
+ const qword *const last_write = dst + (ROUNDUP16(size) / 16);
unsigned i;
if (shift == 0) {
/* Data is already aligned. Fetch directly into the destination buffer.
*/
- for (i = 0; i < num_entries; i++) {
- dst[i] = cache_rd(data, ea + (i * 16));
+ for (i = 0; i < size; i += 16) {
+ *(dst++) = cache_rd(data, ea + i);
}
} else {
- qword tmp[2] ALIGN16_ATTRIB;
-
+ qword hi;
- tmp[0] = cache_rd(data, (ea & ~0x0f));
- for (i = 0; i < (num_entries & ~1); i++) {
- const unsigned curr = i & 1;
- const unsigned next = curr ^ 1;
- tmp[next] = cache_rd(data, (ea & ~0x0f) + (next * 16));
-
- dst[i] = si_or((qword) spu_slqwbyte(tmp[curr], shift),
- (qword) spu_rlmaskqwbyte(tmp[next], shift - 16));
+ /* Please exercise extreme caution when modifying this code. This code
+ * must not read past the end of the page containing the source data,
+ * and it must not write more than ((size + 15) / 16) qwords to the
+ * destination buffer.
+ */
+ ea &= ~0x0f;
+ hi = cache_rd(data, ea);
+ for (i = 16; i < read_size; i += 16) {
+ qword lo = cache_rd(data, ea + i);
+
+ *(dst++) = si_or((qword) spu_slqwbyte(hi, shift),
+ (qword) spu_rlmaskqwbyte(lo, shift - 16));
+ hi = lo;
}
- if (i < num_entries) {
- dst[i] = si_or((qword) spu_slqwbyte(tmp[(i & 1)], shift),
- si_il(0));
+ if (dst != last_write) {
+ *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), si_il(0));
}
}
+
+ ASSERT((ea + i) == last_read);
+ ASSERT(dst == last_write);
}
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
index cf81bee8fd..1560c0f157 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -50,8 +50,6 @@
* Brian Paul
*/
-#include <libmisc.h>
-#include <spu_mfcio.h>
#include <transpose_matrix4x4.h>
#include <simdmath/ceilf4.h>
#include <simdmath/cosf4.h>
@@ -151,6 +149,7 @@ spu_exec_machine_init(struct spu_exec_machine *mach,
const qword zero = si_il(0);
const qword not_zero = si_il(~0);
+ (void) numSamplers;
mach->Samplers = samplers;
mach->Processor = processor;
mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
@@ -659,9 +658,10 @@ fetch_texel( struct spu_sampler *sampler,
qword rgba[4];
qword out[4];
- sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, (float *) rgba);
+ sampler->get_samples(sampler, s->f, t->f, p->f, lodbias,
+ (float (*)[4]) rgba);
- _transpose_matrix4x4(out, rgba);
+ _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba);
r->q = out[0];
g->q = out[1];
b->q = out[2];
diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c
index 1136dba62d..cc4bafdb3a 100644
--- a/src/gallium/drivers/cell/spu/spu_main.c
+++ b/src/gallium/drivers/cell/spu/spu_main.c
@@ -38,6 +38,7 @@
#include "spu_tile.h"
//#include "spu_test.h"
#include "spu_vertex_shader.h"
+#include "spu_dcache.h"
#include "cell/common.h"
#include "pipe/p_defines.h"
@@ -434,7 +435,7 @@ cmd_batch(uint opcode)
pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8);
break;
case CELL_CMD_STATE_UNIFORMS:
- draw.constants = (float (*)[4]) (uintptr_t) buffer[pos + 1];
+ draw.constants = (const float (*)[4]) (uintptr_t) buffer[pos + 1];
pos += 2;
break;
case CELL_CMD_STATE_VS_ARRAY_INFO:
@@ -583,7 +584,7 @@ main(main_param_t speid, main_param_t argp)
one_time_init();
if (Debug)
- printf("SPU: main() speid=%lu\n", speid);
+ printf("SPU: main() speid=%lu\n", (unsigned long) speid);
mfc_get(&spu.init, /* dest */
(unsigned int) argp, /* src */
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index 5c95d112ac..d14f1abbe7 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -131,7 +131,10 @@ extern boolean Debug;
#define TAG_BATCH_BUFFER 17
#define TAG_MISC 18
#define TAG_TEXTURE_TILE 19
-#define TAG_INSTRUCTION_FETCH 20
+#define TAG_DCACHE0 20
+#define TAG_DCACHE1 21
+#define TAG_DCACHE2 22
+#define TAG_DCACHE3 23
diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
index f7e4e653e3..219fd90cc0 100644
--- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
+++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c
@@ -32,8 +32,6 @@
* Ian Romanick <idr@us.ibm.com>
*/
-#include <spu_mfcio.h>
-
#include "pipe/p_util.h"
#include "pipe/p_state.h"
#include "pipe/p_shader_tokens.h"
diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c
index acfa349439..c3955bbd2d 100644
--- a/src/gallium/drivers/i915simple/i915_context.c
+++ b/src/gallium/drivers/i915simple/i915_context.c
@@ -298,10 +298,12 @@ struct pipe_context *i915_create( struct pipe_winsys *pipe_winsys,
i915_init_string_functions(i915);
i915_init_texture_functions(i915);
+ draw_install_aaline_stage(i915->draw, &i915->pipe);
+ draw_install_aapoint_stage(i915->draw, &i915->pipe);
+
i915->pci_id = pci_id;
i915->flags.is_i945 = is_i945;
-
i915->dirty = ~0;
i915->hardware_dirty = ~0;
diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h
index 2d876925b2..d32dded6bd 100644
--- a/src/gallium/drivers/i915simple/i915_context.h
+++ b/src/gallium/drivers/i915simple/i915_context.h
@@ -79,6 +79,40 @@
#define I915_MAX_CONSTANT 32
+/** See constant_flags[] below */
+#define I915_CONSTFLAG_USER 0x1f
+
+
+/**
+ * Subclass of pipe_shader_state
+ */
+struct i915_fragment_shader
+{
+ struct pipe_shader_state state;
+ uint *program;
+ uint program_len;
+
+ /**
+ * constants introduced during translation.
+ * These are placed at the end of the constant buffer and grow toward
+ * the beginning (eg: slot 31, 30 29, ...)
+ * User-provided constants start at 0.
+ * This allows both types of constants to co-exist (until there's too many)
+ * and doesn't require regenerating/changing the fragment program to
+ * shuffle constants around.
+ */
+ uint num_constants;
+ float constants[I915_MAX_CONSTANT][4];
+
+ /**
+ * Status of each constant
+ * if I915_CONSTFLAG_PARAM, the value must be taken from the corresponding
+ * slot of the user's constant buffer. (set by pipe->set_constant_buffer())
+ * Else, the bitmask indicates which components are occupied by immediates.
+ */
+ ubyte constant_flags[I915_MAX_CONSTANT];
+};
+
struct i915_cache_context;
@@ -93,11 +127,6 @@ struct i915_state
float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4];
/** number of constants passed in through a constant buffer */
uint num_user_constants[PIPE_SHADER_TYPES];
- /** user constants, plus extra constants from shader translation */
- uint num_constants[PIPE_SHADER_TYPES];
-
- uint *program;
- uint program_len;
/* texture sampler state */
unsigned sampler[I915_TEX_UNITS][3];
@@ -187,7 +216,8 @@ struct i915_context
const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS];
const struct i915_depth_stencil_state *depth_stencil;
const struct i915_rasterizer_state *rasterizer;
- const struct pipe_shader_state *fs;
+
+ struct i915_fragment_shader *fs;
struct pipe_blend_color blend_color;
struct pipe_clip_state clip;
@@ -233,6 +263,7 @@ struct i915_context
#define I915_NEW_TEXTURE 0x800
#define I915_NEW_CONSTANTS 0x1000
#define I915_NEW_VBO 0x2000
+#define I915_NEW_VS 0x4000
/* Driver's internally generated state flags:
diff --git a/src/gallium/drivers/i915simple/i915_flush.c b/src/gallium/drivers/i915simple/i915_flush.c
index 3c2069b827..96a54281f1 100644
--- a/src/gallium/drivers/i915simple/i915_flush.c
+++ b/src/gallium/drivers/i915simple/i915_flush.c
@@ -31,6 +31,7 @@
#include "pipe/p_defines.h"
+#include "draw/draw_context.h"
#include "i915_context.h"
#include "i915_reg.h"
#include "i915_batch.h"
@@ -44,6 +45,8 @@ static void i915_flush( struct pipe_context *pipe,
{
struct i915_context *i915 = i915_context(pipe);
+ draw_flush(i915->draw);
+
/* Do we need to emit an MI_FLUSH command to flush the hardware
* caches?
*/
diff --git a/src/gallium/drivers/i915simple/i915_fpc.h b/src/gallium/drivers/i915simple/i915_fpc.h
index 8c7b68aefb..250dfe6dbf 100644
--- a/src/gallium/drivers/i915simple/i915_fpc.h
+++ b/src/gallium/drivers/i915simple/i915_fpc.h
@@ -44,9 +44,16 @@
* Program translation state
*/
struct i915_fp_compile {
- const struct pipe_shader_state *shader;
+ struct i915_fragment_shader *shader; /* the shader we're compiling */
- struct vertex_info *vertex_info;
+ boolean used_constants[I915_MAX_CONSTANT];
+
+ /** maps TGSI immediate index to constant slot */
+ uint num_immediates;
+ uint immediates_map[I915_MAX_CONSTANT];
+ float immediates[I915_MAX_CONSTANT][4];
+
+ boolean first_instruction;
uint declarations[I915_PROGRAM_SIZE];
uint program[I915_PROGRAM_SIZE];
@@ -57,11 +64,6 @@ struct i915_fp_compile {
uint output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
uint output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
- /** points into the i915->current.constants array: */
- float (*constants)[4];
- uint num_constants;
- uint constant_flags[I915_MAX_CONSTANT]; /**< status of each constant */
-
uint *csr; /**< Cursor, points into program. */
uint *decl; /**< Cursor, points into declarations. */
@@ -155,7 +157,9 @@ swizzle(int reg, uint x, uint y, uint z, uint w)
/***********************************************************************
* Public interface for the compiler
*/
-extern void i915_translate_fragment_program( struct i915_context *i915 );
+extern void
+i915_translate_fragment_program( struct i915_context *i915,
+ struct i915_fragment_shader *fs);
@@ -206,8 +210,5 @@ extern void i915_disassemble_program(const uint * program, uint sz);
extern void
i915_program_error(struct i915_fp_compile *p, const char *msg, ...);
-extern void
-i915_translate_fragment_program(struct i915_context *i915);
-
#endif
diff --git a/src/gallium/drivers/i915simple/i915_fpc_emit.c b/src/gallium/drivers/i915simple/i915_fpc_emit.c
index 74924ff0a1..4bdeefb449 100644
--- a/src/gallium/drivers/i915simple/i915_fpc_emit.c
+++ b/src/gallium/drivers/i915simple/i915_fpc_emit.c
@@ -61,8 +61,6 @@
(REG_NR_MASK << UREG_NR_SHIFT))
-#define I915_CONSTFLAG_PARAM 0x1f
-
uint
i915_get_temp(struct i915_fp_compile *p)
{
@@ -73,10 +71,21 @@ i915_get_temp(struct i915_fp_compile *p)
}
p->temp_flag |= 1 << (bit - 1);
- return UREG(REG_TYPE_R, (bit - 1));
+ return bit - 1;
+}
+
+
+static void
+i915_release_temp(struct i915_fp_compile *p, int reg)
+{
+ p->temp_flag &= ~(1 << reg);
}
+/**
+ * Get unpreserved temporary, a temp whose value is not preserved between
+ * PS program phases.
+ */
uint
i915_get_utemp(struct i915_fp_compile * p)
{
@@ -185,41 +194,62 @@ i915_emit_arith(struct i915_fp_compile * p,
return dest;
}
+
+/**
+ * Emit a texture load or texkill instruction.
+ * \param dest the dest i915 register
+ * \param destmask the dest register writemask
+ * \param sampler the i915 sampler register
+ * \param coord the i915 source texcoord operand
+ * \param opcode the instruction opcode
+ */
uint i915_emit_texld( struct i915_fp_compile *p,
uint dest,
uint destmask,
uint sampler,
uint coord,
- uint op )
+ uint opcode )
{
- uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord));
+ const uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord));
+ int temp = -1;
+
if (coord != k) {
- /* No real way to work around this in the general case - need to
- * allocate and declare a new temporary register (a utemp won't
- * do). Will fallback for now.
+ /* texcoord is swizzled or negated. Need to allocate a new temporary
+ * register (a utemp / unpreserved temp) won't do.
*/
- i915_program_error(p, "Can't (yet) swizzle TEX arguments");
- assert(0);
- return 0;
+ uint tempReg;
+
+ temp = i915_get_temp(p); /* get temp reg index */
+ tempReg = UREG(REG_TYPE_R, temp); /* make i915 register */
+
+ i915_emit_arith( p, A0_MOV,
+ tempReg, A0_DEST_CHANNEL_ALL, /* dest reg, writemask */
+ 0, /* saturate */
+ coord, 0, 0 ); /* src0, src1, src2 */
+
+ /* new src texcoord is tempReg */
+ coord = tempReg;
}
/* Don't worry about saturate as we only support
*/
if (destmask != A0_DEST_CHANNEL_ALL) {
+ /* if not writing to XYZW... */
uint tmp = i915_get_utemp(p);
- i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op );
+ i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode );
i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 );
- return dest;
+ /* XXX release utemp here? */
}
else {
assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);
assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)));
+ /* is the sampler coord a texcoord input reg? */
if (GET_UREG_TYPE(coord) != REG_TYPE_T) {
p->nr_tex_indirect++;
}
- *(p->csr++) = (op |
+ *(p->csr++) = (opcode |
T0_DEST( dest ) |
T0_SAMPLER( sampler ));
@@ -227,14 +257,19 @@ uint i915_emit_texld( struct i915_fp_compile *p,
*(p->csr++) = T2_MBZ;
p->nr_tex_insn++;
- return dest;
}
+
+ if (temp >= 0)
+ i915_release_temp(p, temp);
+
+ return dest;
}
uint
i915_emit_const1f(struct i915_fp_compile * p, float c0)
{
+ struct i915_fragment_shader *ifs = p->shader;
unsigned reg, idx;
if (c0 == 0.0)
@@ -243,15 +278,15 @@ i915_emit_const1f(struct i915_fp_compile * p, float c0)
return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE);
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
- if (p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
+ if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER)
continue;
for (idx = 0; idx < 4; idx++) {
- if (!(p->constant_flags[reg] & (1 << idx)) ||
- p->constants[reg][idx] == c0) {
- p->constants[reg][idx] = c0;
- p->constant_flags[reg] |= 1 << idx;
- if (reg + 1 > p->num_constants)
- p->num_constants = reg + 1;
+ if (!(ifs->constant_flags[reg] & (1 << idx)) ||
+ ifs->constants[reg][idx] == c0) {
+ ifs->constants[reg][idx] = c0;
+ ifs->constant_flags[reg] |= 1 << idx;
+ if (reg + 1 > ifs->num_constants)
+ ifs->num_constants = reg + 1;
return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE);
}
}
@@ -264,6 +299,7 @@ i915_emit_const1f(struct i915_fp_compile * p, float c0)
uint
i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1)
{
+ struct i915_fragment_shader *ifs = p->shader;
unsigned reg, idx;
if (c0 == 0.0)
@@ -277,16 +313,16 @@ i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1)
return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W);
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
- if (p->constant_flags[reg] == 0xf ||
- p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
+ if (ifs->constant_flags[reg] == 0xf ||
+ ifs->constant_flags[reg] == I915_CONSTFLAG_USER)
continue;
for (idx = 0; idx < 3; idx++) {
- if (!(p->constant_flags[reg] & (3 << idx))) {
- p->constants[reg][idx + 0] = c0;
- p->constants[reg][idx + 1] = c1;
- p->constant_flags[reg] |= 3 << idx;
- if (reg + 1 > p->num_constants)
- p->num_constants = reg + 1;
+ if (!(ifs->constant_flags[reg] & (3 << idx))) {
+ ifs->constants[reg][idx + 0] = c0;
+ ifs->constants[reg][idx + 1] = c1;
+ ifs->constant_flags[reg] |= 3 << idx;
+ if (reg + 1 > ifs->num_constants)
+ ifs->num_constants = reg + 1;
return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE);
}
}
@@ -302,25 +338,26 @@ uint
i915_emit_const4f(struct i915_fp_compile * p,
float c0, float c1, float c2, float c3)
{
+ struct i915_fragment_shader *ifs = p->shader;
unsigned reg;
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
- if (p->constant_flags[reg] == 0xf &&
- p->constants[reg][0] == c0 &&
- p->constants[reg][1] == c1 &&
- p->constants[reg][2] == c2 &&
- p->constants[reg][3] == c3) {
+ if (ifs->constant_flags[reg] == 0xf &&
+ ifs->constants[reg][0] == c0 &&
+ ifs->constants[reg][1] == c1 &&
+ ifs->constants[reg][2] == c2 &&
+ ifs->constants[reg][3] == c3) {
return UREG(REG_TYPE_CONST, reg);
}
- else if (p->constant_flags[reg] == 0) {
-
- p->constants[reg][0] = c0;
- p->constants[reg][1] = c1;
- p->constants[reg][2] = c2;
- p->constants[reg][3] = c3;
- p->constant_flags[reg] = 0xf;
- if (reg + 1 > p->num_constants)
- p->num_constants = reg + 1;
+ else if (ifs->constant_flags[reg] == 0) {
+
+ ifs->constants[reg][0] = c0;
+ ifs->constants[reg][1] = c1;
+ ifs->constants[reg][2] = c2;
+ ifs->constants[reg][3] = c3;
+ ifs->constant_flags[reg] = 0xf;
+ if (reg + 1 > ifs->num_constants)
+ ifs->num_constants = reg + 1;
return UREG(REG_TYPE_CONST, reg);
}
}
@@ -335,41 +372,3 @@ i915_emit_const4fv(struct i915_fp_compile * p, const float * c)
{
return i915_emit_const4f(p, c[0], c[1], c[2], c[3]);
}
-
-
-#if 00000/*UNUSED*/
-/* Reserve a slot in the constant file for a Mesa state parameter.
- * These will later need to be tracked on statechanges, but that is
- * done elsewhere.
- */
-uint
-i915_emit_param4fv(struct i915_fp_compile * p, const float * values)
-{
- struct i915_fragment_program *fp = p->fp;
- int i;
-
- for (i = 0; i < fp->nr_params; i++) {
- if (fp->param[i].values == values)
- return UREG(REG_TYPE_CONST, fp->param[i].reg);
- }
-
- if (p->constants->nr_constants == I915_MAX_CONSTANT ||
- fp->nr_params == I915_MAX_CONSTANT) {
- i915_program_error(p, "i915_emit_param4fv: out of constants\n");
- return 0;
- }
-
- {
- int reg = p->constants->nr_constants++;
- int i = fp->nr_params++;
-
- assert (p->constant_flags[reg] == 0);
- p->constant_flags[reg] = I915_CONSTFLAG_PARAM;
-
- fp->param[i].values = values;
- fp->param[i].reg = reg;
-
- return UREG(REG_TYPE_CONST, reg);
- }
-}
-#endif
diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c
index 6c1524c768..76a2184e9a 100644
--- a/src/gallium/drivers/i915simple/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c
@@ -34,6 +34,7 @@
#include "pipe/p_shader_tokens.h"
#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/util/tgsi_dump.h"
#include "draw/draw_vertex.h"
@@ -97,19 +98,19 @@ negate(int reg, int x, int y, int z, int w)
}
+/**
+ * In the event of a translation failure, we'll generate a simple color
+ * pass-through program.
+ */
static void
-i915_use_passthrough_shader(struct i915_context *i915)
+i915_use_passthrough_shader(struct i915_fragment_shader *fs)
{
- debug_printf("**** Using i915 pass-through fragment shader\n");
-
- i915->current.program = (uint *) MALLOC(sizeof(passthrough));
- if (i915->current.program) {
- memcpy(i915->current.program, passthrough, sizeof(passthrough));
- i915->current.program_len = Elements(passthrough);
+ fs->program = (uint *) MALLOC(sizeof(passthrough));
+ if (fs->program) {
+ memcpy(fs->program, passthrough, sizeof(passthrough));
+ fs->program_len = Elements(passthrough);
}
-
- i915->current.num_constants[PIPE_SHADER_FRAGMENT] = 0;
- i915->current.num_user_constants[PIPE_SHADER_FRAGMENT] = 0;
+ fs->num_constants = 0;
}
@@ -161,9 +162,6 @@ src_vector(struct i915_fp_compile *p,
* We also use a texture coordinate to pass wpos when possible.
*/
- /* use vertex format info to map a slot number to a VF attrib */
- assert(index < p->vertex_info->num_attribs);
-
sem_name = p->input_semantic_name[index];
sem_ind = p->input_semantic_index[index];
@@ -201,7 +199,8 @@ src_vector(struct i915_fp_compile *p,
break;
case TGSI_FILE_IMMEDIATE:
- /* XXX unfinished - need to append immediates onto const buffer */
+ assert(index < p->num_immediates);
+ index = p->immediates_map[index];
/* fall-through */
case TGSI_FILE_CONSTANT:
src = UREG(REG_TYPE_CONST, index);
@@ -386,6 +385,26 @@ emit_simple_arith(struct i915_fp_compile *p,
arg3 );
}
+
+/** As above, but swap the first two src regs */
+static void
+emit_simple_arith_swap2(struct i915_fp_compile *p,
+ const struct tgsi_full_instruction *inst,
+ uint opcode, uint numArgs)
+{
+ struct tgsi_full_instruction inst2;
+
+ assert(numArgs == 2);
+
+ /* transpose first two registers */
+ inst2 = *inst;
+ inst2.FullSrcRegisters[0] = inst->FullSrcRegisters[1];
+ inst2.FullSrcRegisters[1] = inst->FullSrcRegisters[0];
+
+ emit_simple_arith(p, &inst2, opcode, numArgs);
+}
+
+
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
@@ -556,8 +575,12 @@ i915_translate_instruction(struct i915_fp_compile *p,
src0 = src_vector(p, &inst->FullSrcRegisters[0]);
tmp = i915_get_utemp(p);
- i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */
- 0, src0, T0_TEXKILL);
+ i915_emit_texld(p,
+ tmp, /* dest reg: a dummy reg */
+ A0_DEST_CHANNEL_ALL, /* dest writemask */
+ 0, /* sampler */
+ src0, /* coord*/
+ T0_TEXKILL); /* opcode */
break;
case TGSI_OPCODE_LG2:
@@ -773,6 +796,11 @@ i915_translate_instruction(struct i915_fp_compile *p,
emit_simple_arith(p, inst, A0_SGE, 2);
break;
+ case TGSI_OPCODE_SLE:
+ /* like SGE, but swap reg0, reg1 */
+ emit_simple_arith_swap2(p, inst, A0_SGE, 2);
+ break;
+
case TGSI_OPCODE_SIN:
src0 = src_vector(p, &inst->FullSrcRegisters[0]);
tmp = i915_get_utemp(p);
@@ -827,6 +855,11 @@ i915_translate_instruction(struct i915_fp_compile *p,
emit_simple_arith(p, inst, A0_SLT, 2);
break;
+ case TGSI_OPCODE_SGT:
+ /* like SLT, but swap reg0, reg1 */
+ emit_simple_arith_swap2(p, inst, A0_SLT, 2);
+ break;
+
case TGSI_OPCODE_SUB:
src0 = src_vector(p, &inst->FullSrcRegisters[0]);
src1 = src_vector(p, &inst->FullSrcRegisters[1]);
@@ -880,6 +913,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
default:
i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode);
+ p->error = 1;
return;
}
@@ -896,6 +930,7 @@ static void
i915_translate_instructions(struct i915_fp_compile *p,
const struct tgsi_token *tokens)
{
+ struct i915_fragment_shader *ifs = p->shader;
struct tgsi_parse_context parse;
tgsi_parse_init( &parse, tokens );
@@ -928,13 +963,64 @@ i915_translate_instructions(struct i915_fp_compile *p,
p->output_semantic_name[ind] = sem;
p->output_semantic_index[ind] = semi;
}
+ else if (parse.FullToken.FullDeclaration.Declaration.File
+ == TGSI_FILE_CONSTANT) {
+ uint i;
+ for (i = parse.FullToken.FullDeclaration.u.DeclarationRange.First;
+ i <= parse.FullToken.FullDeclaration.u.DeclarationRange.Last;
+ i++) {
+ assert(ifs->constant_flags[i] == 0x0);
+ ifs->constant_flags[i] = I915_CONSTFLAG_USER;
+ ifs->num_constants = MAX2(ifs->num_constants, i + 1);
+ }
+ }
+ else if (parse.FullToken.FullDeclaration.Declaration.File
+ == TGSI_FILE_TEMPORARY) {
+ uint i;
+ for (i = parse.FullToken.FullDeclaration.u.DeclarationRange.First;
+ i <= parse.FullToken.FullDeclaration.u.DeclarationRange.Last;
+ i++) {
+ assert(i < I915_MAX_TEMPORARY);
+ p->temp_flag |= (1 << i); /* mark temp as used */
+ }
+ }
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
- /* XXX append the immediate to the const buffer... */
+ {
+ const struct tgsi_full_immediate *imm
+ = &parse.FullToken.FullImmediate;
+ const uint pos = p->num_immediates++;
+ uint j;
+ for (j = 0; j < imm->Immediate.Size; j++) {
+ p->immediates[pos][j] = imm->u.ImmediateFloat32[j].Float;
+ }
+ }
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
+ if (p->first_instruction) {
+ /* resolve location of immediates */
+ uint i, j;
+ for (i = 0; i < p->num_immediates; i++) {
+ /* find constant slot for this immediate */
+ for (j = 0; j < I915_MAX_CONSTANT; j++) {
+ if (ifs->constant_flags[j] == 0x0) {
+ memcpy(ifs->constants[j],
+ p->immediates[i],
+ 4 * sizeof(float));
+ /*printf("immediate %d maps to const %d\n", i, j);*/
+ ifs->constant_flags[j] = 0xf; /* all four comps used */
+ p->immediates_map[i] = j;
+ ifs->num_constants = MAX2(ifs->num_constants, j + 1);
+ break;
+ }
+ }
+ }
+
+ p->first_instruction = FALSE;
+ }
+
i915_translate_instruction(p, &parse.FullToken.FullInstruction);
break;
@@ -950,32 +1036,33 @@ i915_translate_instructions(struct i915_fp_compile *p,
static struct i915_fp_compile *
i915_init_compile(struct i915_context *i915,
- const struct pipe_shader_state *fs)
+ struct i915_fragment_shader *ifs)
{
struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
- p->shader = i915->fs;
+ p->shader = ifs;
- p->vertex_info = &i915->current.vertex_info;
-
- /* new constants found during translation get appended after the
- * user-provided constants.
+ /* Put new constants at end of const buffer, growing downward.
+ * The problem is we don't know how many user-defined constants might
+ * be specified with pipe->set_constant_buffer().
+ * Should pre-scan the user's program to determine the highest-numbered
+ * constant referenced.
*/
- p->constants = i915->current.constants[PIPE_SHADER_FRAGMENT];
- p->num_constants = i915->current.num_user_constants[PIPE_SHADER_FRAGMENT];
+ ifs->num_constants = 0;
+ memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));
+
+ p->first_instruction = TRUE;
p->nr_tex_indirect = 1; /* correct? */
p->nr_tex_insn = 0;
p->nr_alu_insn = 0;
p->nr_decl_insn = 0;
- memset(p->constant_flags, 0, sizeof(p->constant_flags));
-
p->csr = p->program;
p->decl = p->declarations;
p->decl_s = 0;
p->decl_t = 0;
- p->temp_flag = 0xffff000;
+ p->temp_flag = ~0x0 << I915_MAX_TEMPORARY;
p->utemp_flag = ~0x7;
p->wpos_tex = -1;
@@ -993,6 +1080,7 @@ i915_init_compile(struct i915_context *i915,
static void
i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
{
+ struct i915_fragment_shader *ifs = p->shader;
unsigned long program_size = (unsigned long) (p->csr - p->program);
unsigned long decl_size = (unsigned long) (p->decl - p->declarations);
@@ -1008,19 +1096,13 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
if (p->nr_decl_insn > I915_MAX_DECL_INSN)
i915_program_error(p, "Exceeded max DECL instructions");
- /* free old program, if present */
- if (i915->current.program) {
- FREE(i915->current.program);
- i915->current.program_len = 0;
- }
-
if (p->error) {
p->NumNativeInstructions = 0;
p->NumNativeAluInstructions = 0;
p->NumNativeTexInstructions = 0;
p->NumNativeTexIndirections = 0;
- i915_use_passthrough_shader(i915);
+ i915_use_passthrough_shader(ifs);
}
else {
p->NumNativeInstructions
@@ -1034,24 +1116,20 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
/* Copy compilation results to fragment program struct:
*/
- i915->current.program
+ assert(!ifs->program);
+ ifs->program
= (uint *) MALLOC((program_size + decl_size) * sizeof(uint));
- if (i915->current.program) {
- i915->current.program_len = program_size + decl_size;
+ if (ifs->program) {
+ ifs->program_len = program_size + decl_size;
- memcpy(i915->current.program,
+ memcpy(ifs->program,
p->declarations,
decl_size * sizeof(uint));
- memcpy(i915->current.program + decl_size,
+ memcpy(ifs->program + decl_size,
p->program,
program_size * sizeof(uint));
}
-
- /* update number of constants */
- i915->current.num_constants[PIPE_SHADER_FRAGMENT] = p->num_constants;
- assert(i915->current.num_constants[PIPE_SHADER_FRAGMENT]
- >= i915->current.num_user_constants[PIPE_SHADER_FRAGMENT]);
}
/* Release the compilation struct:
@@ -1085,7 +1163,7 @@ i915_find_wpos_space(struct i915_fp_compile *p)
i915_program_error(p, "No free texcoord for wpos value");
}
#else
- if (p->shader->input_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
+ if (p->shader->state.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
/* frag shader using the fragment position input */
#if 0
assert(0);
@@ -1106,7 +1184,7 @@ static void
i915_fixup_depth_write(struct i915_fp_compile *p)
{
/* XXX assuming pos/depth is always in output[0] */
- if (p->shader->output_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
+ if (p->shader->state.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
const uint depth = UREG(REG_TYPE_OD, 0);
i915_emit_arith(p,
@@ -1121,13 +1199,18 @@ i915_fixup_depth_write(struct i915_fp_compile *p)
void
-i915_translate_fragment_program( struct i915_context *i915 )
+i915_translate_fragment_program( struct i915_context *i915,
+ struct i915_fragment_shader *fs)
{
- struct i915_fp_compile *p = i915_init_compile(i915, i915->fs);
- const struct tgsi_token *tokens = i915->fs->tokens;
+ struct i915_fp_compile *p = i915_init_compile(i915, fs);
+ const struct tgsi_token *tokens = fs->state.tokens;
i915_find_wpos_space(p);
+#if 0
+ tgsi_dump(tokens, 0);
+#endif
+
i915_translate_instructions(p, tokens);
i915_fixup_depth_write(p);
diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c
index 44c4325936..d8de5178f6 100644
--- a/src/gallium/drivers/i915simple/i915_prim_emit.c
+++ b/src/gallium/drivers/i915simple/i915_prim_emit.c
@@ -72,38 +72,42 @@ emit_hw_vertex( struct i915_context *i915,
uint i;
uint count = 0; /* for debug/sanity */
+ assert(!i915->dirty);
+
for (i = 0; i < vinfo->num_attribs; i++) {
+ const uint j = vinfo->src_index[i];
+ const float *attrib = vertex->data[j];
switch (vinfo->emit[i]) {
case EMIT_OMIT:
/* no-op */
break;
case EMIT_1F:
- OUT_BATCH( fui(vertex->data[i][0]) );
+ OUT_BATCH( fui(attrib[0]) );
count++;
break;
case EMIT_2F:
- OUT_BATCH( fui(vertex->data[i][0]) );
- OUT_BATCH( fui(vertex->data[i][1]) );
+ OUT_BATCH( fui(attrib[0]) );
+ OUT_BATCH( fui(attrib[1]) );
count += 2;
break;
case EMIT_3F:
- OUT_BATCH( fui(vertex->data[i][0]) );
- OUT_BATCH( fui(vertex->data[i][1]) );
- OUT_BATCH( fui(vertex->data[i][2]) );
+ OUT_BATCH( fui(attrib[0]) );
+ OUT_BATCH( fui(attrib[1]) );
+ OUT_BATCH( fui(attrib[2]) );
count += 3;
break;
case EMIT_4F:
- OUT_BATCH( fui(vertex->data[i][0]) );
- OUT_BATCH( fui(vertex->data[i][1]) );
- OUT_BATCH( fui(vertex->data[i][2]) );
- OUT_BATCH( fui(vertex->data[i][3]) );
+ OUT_BATCH( fui(attrib[0]) );
+ OUT_BATCH( fui(attrib[1]) );
+ OUT_BATCH( fui(attrib[2]) );
+ OUT_BATCH( fui(attrib[3]) );
count += 4;
break;
case EMIT_4UB:
- OUT_BATCH( pack_ub4(float_to_ubyte( vertex->data[i][2] ),
- float_to_ubyte( vertex->data[i][1] ),
- float_to_ubyte( vertex->data[i][0] ),
- float_to_ubyte( vertex->data[i][3] )) );
+ OUT_BATCH( pack_ub4(float_to_ubyte( attrib[2] ),
+ float_to_ubyte( attrib[1] ),
+ float_to_ubyte( attrib[0] ),
+ float_to_ubyte( attrib[3] )) );
count += 1;
break;
default:
@@ -122,17 +126,19 @@ emit_prim( struct draw_stage *stage,
unsigned nr )
{
struct i915_context *i915 = setup_stage(stage)->i915;
- unsigned vertex_size = i915->current.vertex_info.size * 4; /* in bytes */
+ unsigned vertex_size;
unsigned i;
- assert(vertex_size >= 12); /* never smaller than 12 bytes */
-
if (i915->dirty)
i915_update_derived( i915 );
if (i915->hardware_dirty)
i915_emit_hardware_state( i915 );
+ /* need to do this after validation! */
+ vertex_size = i915->current.vertex_info.size * 4; /* in bytes */
+ assert(vertex_size >= 12); /* never smaller than 12 bytes */
+
if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) {
FLUSH_BATCH();
diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
index c5bf6174f6..9d5f609220 100644
--- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c
+++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
@@ -83,6 +83,12 @@ i915_vbuf_render_get_vertex_info( struct vbuf_render *render )
{
struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
struct i915_context *i915 = i915_render->i915;
+
+ if (i915->dirty) {
+ /* make sure we have up to date vertex layout */
+ i915_update_derived( i915 );
+ }
+
return &i915->current.vertex_info;
}
@@ -143,7 +149,8 @@ i915_vbuf_render_draw( struct vbuf_render *render,
assert(nr_indices);
- assert((i915->dirty & ~I915_NEW_VBO) == 0);
+ /* this seems to be bogus, since we validate state right after this */
+ /*assert((i915->dirty & ~I915_NEW_VBO) == 0);*/
if (i915->dirty)
i915_update_derived( i915 );
diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c
index e055eed7e0..a35bdf941f 100644
--- a/src/gallium/drivers/i915simple/i915_state.c
+++ b/src/gallium/drivers/i915simple/i915_state.c
@@ -38,6 +38,7 @@
#include "i915_reg.h"
#include "i915_state.h"
#include "i915_state_inlines.h"
+#include "i915_fpc.h"
/* The i915 (and related graphics cores) do not support GL_CLAMP. The
@@ -416,26 +417,47 @@ static void i915_set_polygon_stipple( struct pipe_context *pipe,
}
-static void * i915_create_fs_state(struct pipe_context *pipe,
- const struct pipe_shader_state *templ)
+
+static void *
+i915_create_fs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
{
- return 0;
+ struct i915_context *i915 = i915_context(pipe);
+ struct i915_fragment_shader *ifs = CALLOC_STRUCT(i915_fragment_shader);
+ if (!ifs)
+ return NULL;
+
+ ifs->state = *templ;
+
+ /* The shader's compiled to i915 instructions here */
+ i915_translate_fragment_program(i915, ifs);
+
+ return ifs;
}
-static void i915_bind_fs_state(struct pipe_context *pipe, void *fs)
+static void
+i915_bind_fs_state(struct pipe_context *pipe, void *shader)
{
struct i915_context *i915 = i915_context(pipe);
- i915->fs = (struct pipe_shader_state *)fs;
+ i915->fs = (struct i915_fragment_shader*) shader;
i915->dirty |= I915_NEW_FS;
}
-static void i915_delete_fs_state(struct pipe_context *pipe, void *shader)
+static
+void i915_delete_fs_state(struct pipe_context *pipe, void *shader)
{
- /*do nothing*/
+ struct i915_fragment_shader *ifs = (struct i915_fragment_shader *) shader;
+
+ if (ifs->program)
+ FREE(ifs->program);
+ ifs->program_len = 0;
+
+ FREE(ifs);
}
+
static void *
i915_create_vs_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ)
@@ -452,6 +474,8 @@ static void i915_bind_vs_state(struct pipe_context *pipe, void *shader)
/* just pass-through to draw module */
draw_bind_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader);
+
+ i915->dirty |= I915_NEW_VS;
}
static void i915_delete_vs_state(struct pipe_context *pipe, void *shader)
diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c
index 4767584fc6..5cf70acdf3 100644
--- a/src/gallium/drivers/i915simple/i915_state_derived.c
+++ b/src/gallium/drivers/i915simple/i915_state_derived.c
@@ -27,104 +27,111 @@
#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
#include "draw/draw_context.h"
#include "draw/draw_vertex.h"
#include "i915_context.h"
#include "i915_state.h"
#include "i915_reg.h"
#include "i915_fpc.h"
-#include "pipe/p_shader_tokens.h"
+
/**
- * Determine which post-transform / pre-rasterization vertex attributes
- * we need.
- * Derived from: fs, setup states.
+ * Determine the hardware vertex layout.
+ * Depends on vertex/fragment shader state.
*/
static void calculate_vertex_layout( struct i915_context *i915 )
{
- const struct pipe_shader_state *fs = i915->fs;
+ const struct pipe_shader_state *fs = &i915->fs->state;
const enum interp_mode colorInterp = i915->rasterizer->color_interp;
struct vertex_info vinfo;
- uint front0 = 0, back0 = 0, front1 = 0, back1 = 0;
- boolean needW = 0;
+ boolean texCoords[8], colors[2], fog, needW;
uint i;
- boolean texCoords[8];
- uint src = 0;
+ int src;
memset(texCoords, 0, sizeof(texCoords));
+ colors[0] = colors[1] = fog = needW = FALSE;
memset(&vinfo, 0, sizeof(vinfo));
- /* pos */
- draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src++);
- /* Note: we'll set the S4_VFMT_XYZ[W] bits below */
-
+ /* Determine which fragment program inputs are needed. Setup HW vertex
+ * layout below, in the HW-specific attribute order.
+ */
for (i = 0; i < fs->num_inputs; i++) {
switch (fs->input_semantic_name[i]) {
case TGSI_SEMANTIC_POSITION:
break;
case TGSI_SEMANTIC_COLOR:
- if (fs->input_semantic_index[i] == 0) {
- front0 = draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src++);
- vinfo.hwfmt[0] |= S4_VFMT_COLOR;
- }
- else {
- assert(fs->input_semantic_index[i] == 1);
- front1 = draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src++);
- vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG;
- }
+ assert(fs->input_semantic_index[i] < 2);
+ colors[fs->input_semantic_index[i]] = TRUE;
break;
case TGSI_SEMANTIC_GENERIC:
/* usually a texcoord */
{
const uint unit = fs->input_semantic_index[i];
- uint hwtc;
+ assert(unit < 8);
texCoords[unit] = TRUE;
- draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++);
- hwtc = TEXCOORDFMT_4D;
needW = TRUE;
- vinfo.hwfmt[1] |= hwtc << (unit * 4);
}
break;
case TGSI_SEMANTIC_FOG:
- debug_printf("i915 fogcoord not implemented yet\n");
- draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src++);
+ fog = TRUE;
break;
default:
assert(0);
}
-
}
- /* finish up texcoord fields */
- for (i = 0; i < 8; i++) {
- if (!texCoords[i]) {
- const uint hwtc = TEXCOORDFMT_NOT_PRESENT;
- vinfo.hwfmt[1] |= hwtc << (i* 4);
- }
- }
-
- /* go back and fill in the vertex position info now that we have needW */
+
+ /* pos */
+ src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_POSITION, 0);
if (needW) {
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src);
vinfo.hwfmt[0] |= S4_VFMT_XYZW;
vinfo.emit[0] = EMIT_4F;
}
else {
+ draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src);
vinfo.hwfmt[0] |= S4_VFMT_XYZ;
vinfo.emit[0] = EMIT_3F;
}
- /* Additional attributes required for setup: Just twosided
- * lighting. Edgeflag is dealt with specially by setting bits in
- * the vertex header.
- */
- if (i915->rasterizer->light_twoside) {
- if (front0) {
- back0 = draw_emit_vertex_attr(&vinfo, EMIT_OMIT, colorInterp, src++);
+ /* hardware point size */
+ /* XXX todo */
+
+ /* primary color */
+ if (colors[0]) {
+ src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 0);
+ draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src);
+ vinfo.hwfmt[0] |= S4_VFMT_COLOR;
+ }
+
+ /* secondary color */
+ if (colors[1]) {
+ src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 1);
+ draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src);
+ vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG;
+ }
+
+ /* fog coord, not fog blend factor */
+ if (fog) {
+ src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_FOG, 0);
+ draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
+ vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM;
+ }
+
+ /* texcoords */
+ for (i = 0; i < 8; i++) {
+ uint hwtc;
+ if (texCoords[i]) {
+ hwtc = TEXCOORDFMT_4D;
+ src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_GENERIC, i);
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
}
- if (back0) {
- back1 = draw_emit_vertex_attr(&vinfo, EMIT_OMIT, colorInterp, src++);
+ else {
+ hwtc = TEXCOORDFMT_NOT_PRESENT;
}
+ vinfo.hwfmt[1] |= hwtc << (i * 4);
}
draw_compute_vertex_size(&vinfo);
@@ -148,7 +155,7 @@ static void calculate_vertex_layout( struct i915_context *i915 )
*/
void i915_update_derived( struct i915_context *i915 )
{
- if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS))
+ if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS))
calculate_vertex_layout( i915 );
if (i915->dirty & (I915_NEW_SAMPLER | I915_NEW_TEXTURE))
@@ -164,7 +171,6 @@ void i915_update_derived( struct i915_context *i915 )
i915_update_dynamic( i915 );
if (i915->dirty & I915_NEW_FS) {
- i915_translate_fragment_program(i915);
i915->hardware_dirty |= I915_HW_PROGRAM; /* XXX right? */
}
diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c
index 3339287f49..6bbaac4e34 100644
--- a/src/gallium/drivers/i915simple/i915_state_emit.c
+++ b/src/gallium/drivers/i915simple/i915_state_emit.c
@@ -99,7 +99,11 @@ i915_emit_hardware_state(struct i915_context *i915 )
2 + I915_TEX_UNITS*3 +
2 + I915_TEX_UNITS*3 +
2 + I915_MAX_CONSTANT*4 +
+#if 0
i915->current.program_len +
+#else
+ i915->fs->program_len +
+#endif
6
) * 3/2; /* plus 50% margin */
const unsigned relocs = ( I915_TEX_UNITS +
@@ -325,15 +329,34 @@ i915_emit_hardware_state(struct i915_context *i915 )
/* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */
if (i915->hardware_dirty & I915_HW_PROGRAM)
{
- const uint nr = i915->current.num_constants[PIPE_SHADER_FRAGMENT];
- assert(nr <= I915_MAX_CONSTANT);
- if (nr > 0) {
- const uint *c
- = (const uint *) i915->current.constants[PIPE_SHADER_FRAGMENT];
+ /* Collate the user-defined constants with the fragment shader's
+ * immediates according to the constant_flags[] array.
+ */
+ const uint nr = i915->fs->num_constants;
+ if (nr) {
uint i;
+
OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) );
OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) );
+
for (i = 0; i < nr; i++) {
+ const uint *c;
+ if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) {
+ /* grab user-defined constant */
+ c = (uint *) i915->current.constants[PIPE_SHADER_FRAGMENT][i];
+ }
+ else {
+ /* emit program constant */
+ c = (uint *) i915->fs->constants[i];
+ }
+#if 0 /* debug */
+ {
+ float *f = (float *) c;
+ printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3],
+ (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER
+ ? "user" : "immediate"));
+ }
+#endif
OUT_BATCH(*c++);
OUT_BATCH(*c++);
OUT_BATCH(*c++);
@@ -348,9 +371,9 @@ i915_emit_hardware_state(struct i915_context *i915 )
{
uint i;
/* we should always have, at least, a pass-through program */
- assert(i915->current.program_len > 0);
- for (i = 0; i < i915->current.program_len; i++) {
- OUT_BATCH(i915->current.program[i]);
+ assert(i915->fs->program_len > 0);
+ for (i = 0; i < i915->fs->program_len; i++) {
+ OUT_BATCH(i915->fs->program[i]);
}
}
diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript
index d581ee8d3c..4c1a6d5df0 100644
--- a/src/gallium/drivers/softpipe/SConscript
+++ b/src/gallium/drivers/softpipe/SConscript
@@ -5,6 +5,9 @@ env = env.Clone()
softpipe = env.ConvenienceLibrary(
target = 'softpipe',
source = [
+ 'sp_fs_exec.c',
+ 'sp_fs_sse.c',
+ 'sp_fs_llvm.c',
'sp_clear.c',
'sp_context.c',
'sp_draw_arrays.c',
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index 8cb0534342..d5bd7a702f 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -81,7 +81,7 @@ sp_setup_pos_vector(const struct tgsi_interp_coef *coef,
static void
-exec_prepare( struct sp_fragment_shader *base,
+exec_prepare( const struct sp_fragment_shader *base,
struct tgsi_exec_machine *machine,
struct tgsi_sampler *samplers )
{
@@ -98,7 +98,7 @@ exec_prepare( struct sp_fragment_shader *base,
* interface:
*/
static unsigned
-exec_run( struct sp_fragment_shader *base,
+exec_run( const struct sp_fragment_shader *base,
struct tgsi_exec_machine *machine,
struct quad_header *quad )
{
diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c
index 22da471453..34b2b7d4e2 100644
--- a/src/gallium/drivers/softpipe/sp_fs_llvm.c
+++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c
@@ -146,7 +146,7 @@ shade_quad_llvm(struct quad_stage *qs,
unsigned
-run_llvm_fs( struct sp_fragment_shader *base,
+run_llvm_fs( const struct sp_fragment_shader *base,
struct foo *machine )
{
}
diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c
index 7b1e131ee1..b6a3fddb29 100644
--- a/src/gallium/drivers/softpipe/sp_prim_setup.c
+++ b/src/gallium/drivers/softpipe/sp_prim_setup.c
@@ -1165,6 +1165,10 @@ static void setup_begin( struct draw_stage *stage )
struct softpipe_context *sp = setup->softpipe;
const struct pipe_shader_state *fs = &setup->softpipe->fs->shader;
+ if (sp->dirty) {
+ softpipe_update_derived(sp);
+ }
+
setup->quad.nr_attrs = fs->num_inputs;
sp->quad.first->begin(sp->quad.first);
diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c
index 15b5594547..142dbcc771 100644
--- a/src/gallium/drivers/softpipe/sp_quad.c
+++ b/src/gallium/drivers/softpipe/sp_quad.c
@@ -56,11 +56,12 @@ sp_build_depth_stencil(
void
sp_build_quad_pipeline(struct softpipe_context *sp)
{
- boolean early_depth_test =
+ boolean early_depth_test =
sp->depth_stencil->depth.enabled &&
sp->framebuffer.zsbuf &&
!sp->depth_stencil->alpha.enabled &&
- sp->fs->shader.output_semantic_name[0] != TGSI_SEMANTIC_POSITION;
+ !sp->fs->uses_kill &&
+ !sp->fs->writes_z;
/* build up the pipeline in reverse order... */
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index ef8cf67d4c..5aaa9e346b 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -63,14 +63,17 @@ struct tgsi_exec_machine;
struct sp_fragment_shader {
struct pipe_shader_state shader;
- void (*prepare)( struct sp_fragment_shader *shader,
+ boolean uses_kill;
+ boolean writes_z;
+
+ void (*prepare)( const struct sp_fragment_shader *shader,
struct tgsi_exec_machine *machine,
struct tgsi_sampler *samplers);
/* Run the shader - this interface will get cleaned up in the
* future:
*/
- unsigned (*run)( struct sp_fragment_shader *shader,
+ unsigned (*run)( const struct sp_fragment_shader *shader,
struct tgsi_exec_machine *machine,
struct quad_header *quad );
diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c
index f9f2c5eaa8..4c6313001f 100644
--- a/src/gallium/drivers/softpipe/sp_state_derived.c
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -35,33 +35,6 @@
/**
- * Search vertex program's outputs to find a match for the given
- * semantic name/index. Return the index of the output slot.
- *
- * Return 0 if not found. This will cause the fragment program to use
- * vertex attrib 0 (position) in the cases where the fragment program
- * attempts to use a missing vertex program output. This is an undefined
- * condition that users shouldn't hit anyway.
- */
-static int
-find_vs_output(struct softpipe_context *sp,
- const struct pipe_shader_state *vs,
- uint semantic_name,
- uint semantic_index)
-{
- uint i;
- for (i = 0; i < vs->num_outputs; i++) {
- if (vs->output_semantic_name[i] == semantic_name &&
- vs->output_semantic_index[i] == semantic_index)
- return i;
- }
-
- /* See if the draw module is introducing a new attribute... */
- return draw_find_vs_output(sp->draw, semantic_name, semantic_index);
-}
-
-
-/**
* Mark the current vertex layout as "invalid".
* We'll validate the vertex layout later, when we start to actually
* render a point or line or tri.
@@ -114,24 +87,25 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
int src;
switch (fs->input_semantic_name[i]) {
case TGSI_SEMANTIC_POSITION:
- src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_POSITION, 0);
+ src = draw_find_vs_output(softpipe->draw,
+ TGSI_SEMANTIC_POSITION, 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src);
break;
case TGSI_SEMANTIC_COLOR:
- src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_COLOR,
+ src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_COLOR,
fs->input_semantic_index[i]);
draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
break;
case TGSI_SEMANTIC_FOG:
- src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_FOG, 0);
+ src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_FOG, 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
break;
case TGSI_SEMANTIC_GENERIC:
/* this includes texcoords and varying vars */
- src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_GENERIC,
+ src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_GENERIC,
fs->input_semantic_index[i]);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
break;
@@ -141,7 +115,8 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
}
}
- softpipe->psize_slot = find_vs_output(softpipe, vs, TGSI_SEMANTIC_PSIZE, 0);
+ softpipe->psize_slot = draw_find_vs_output(softpipe->draw,
+ TGSI_SEMANTIC_PSIZE, 0);
if (softpipe->psize_slot > 0) {
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT,
softpipe->psize_slot);
diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c
index b0238f8173..b184ac61bb 100644
--- a/src/gallium/drivers/softpipe/sp_state_fs.c
+++ b/src/gallium/drivers/softpipe/sp_state_fs.c
@@ -36,6 +36,7 @@
#include "pipe/p_shader_tokens.h"
#include "draw/draw_context.h"
#include "tgsi/util/tgsi_dump.h"
+#include "tgsi/util/tgsi_scan.h"
void *
@@ -44,21 +45,24 @@ softpipe_create_fs_state(struct pipe_context *pipe,
{
struct softpipe_context *softpipe = softpipe_context(pipe);
struct sp_fragment_shader *state;
+ struct tgsi_shader_info info;
+
+ tgsi_scan_shader(templ->tokens, &info);
if (softpipe->dump_fs)
tgsi_dump(templ->tokens, 0);
state = softpipe_create_fs_llvm( softpipe, templ );
- if (state)
- return state;
-
- state = softpipe_create_fs_sse( softpipe, templ );
- if (state)
- return state;
-
- state = softpipe_create_fs_exec( softpipe, templ );
-
+ if (!state) {
+ state = softpipe_create_fs_sse( softpipe, templ );
+ if (!state) {
+ state = softpipe_create_fs_exec( softpipe, templ );
+ }
+ }
assert(state);
+ state->uses_kill = (info.opcode_count[TGSI_OPCODE_KIL] ||
+ info.opcode_count[TGSI_OPCODE_KILP]);
+ state->writes_z = info.writes_z;
return state;
}
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 43d5085895..0ced585c7f 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -343,7 +343,7 @@ nearest_texcoord_unnorm(unsigned wrapMode, float s, unsigned size)
switch (wrapMode) {
case PIPE_TEX_WRAP_CLAMP:
i = ifloor(s);
- return CLAMP(i, 0, size-1);
+ return CLAMP(i, 0, (int) size-1);
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
/* fall-through */
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
@@ -366,7 +366,7 @@ linear_texcoord_unnorm(unsigned wrapMode, float s, unsigned size,
switch (wrapMode) {
case PIPE_TEX_WRAP_CLAMP:
/* Not exactly what the spec says, but it matches NVIDIA output */
- s = CLAMP(s - 0.5F, 0.0, (float) size - 1.0);
+ s = CLAMP(s - 0.5F, 0.0f, (float) size - 1.0f);
*i0 = ifloor(s);
*i1 = *i0 + 1;
break;
@@ -377,7 +377,7 @@ linear_texcoord_unnorm(unsigned wrapMode, float s, unsigned size,
s -= 0.5F;
*i0 = ifloor(s);
*i1 = *i0 + 1;
- if (*i1 > size - 1)
+ if (*i1 > (int) size - 1)
*i1 = size - 1;
break;
default:
diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h
index 30cd729c56..91f3d2ac2d 100644
--- a/src/gallium/include/pipe/p_compiler.h
+++ b/src/gallium/include/pipe/p_compiler.h
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -42,6 +42,14 @@
#endif
+#if defined(__MSC__)
+
+/* Avoid 'expression is always true' warning */
+#pragma warning(disable: 4296)
+
+#endif /* __MSC__ */
+
+
typedef unsigned int uint;
typedef unsigned char ubyte;
typedef unsigned char boolean;
@@ -61,8 +69,10 @@ typedef long long int64_t;
typedef unsigned long long uint64_t;
#if defined(_WIN64)
+typedef __int64 intptr_t;
typedef unsigned __int64 uintptr_t;
#else
+typedef int intptr_t;
typedef unsigned int uintptr_t;
#endif
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index 036c4c8964..f69b52f5e3 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -31,6 +31,11 @@
#include "p_state.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
struct pipe_state_cache;
/* Opaque driver handles:
@@ -226,4 +231,9 @@ struct pipe_context {
unsigned flags );
};
+
+#ifdef __cplusplus
+}
+#endif
+
#endif /* PIPE_CONTEXT_H */
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 0bf53ecb79..d84ddbc27a 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -30,6 +30,10 @@
#include "p_format.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define PIPE_BLENDFACTOR_ONE 0x1
#define PIPE_BLENDFACTOR_SRC_COLOR 0x2
#define PIPE_BLENDFACTOR_SRC_ALPHA 0x3
@@ -267,4 +271,8 @@ enum pipe_texture_target {
#define PIPE_CAP_MAX_TEXTURE_LOD_BIAS 19
#define PIPE_CAP_BITMAP_TEXCOORD_BIAS 20
+#ifdef __cplusplus
+}
+#endif
+
#endif
diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h
index c9ad324315..561d2e5921 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -33,6 +33,10 @@
#include "p_compiler.h"
#include "p_debug.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/**
* The PIPE_FORMAT is a 32-bit wide bitfield that encodes all the information
* needed to uniquely describe a pixel format.
@@ -418,4 +422,8 @@ static INLINE uint pf_get_size( enum pipe_format format ) {
return pf_get_bits(format) / 8;
}
+#ifdef __cplusplus
+}
+#endif
+
#endif
diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h
index ebf6ed86bc..de3fa555c5 100644
--- a/src/gallium/include/pipe/p_inlines.h
+++ b/src/gallium/include/pipe/p_inlines.h
@@ -33,6 +33,11 @@
#include "p_winsys.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
static INLINE void *
pipe_surface_map(struct pipe_surface *surface)
{
@@ -109,4 +114,8 @@ pipe_texture_reference(struct pipe_context *pipe, struct pipe_texture **ptr,
}
+#ifdef __cplusplus
+}
+#endif
+
#endif /* P_INLINES_H */
diff --git a/src/gallium/include/pipe/p_pointer.h b/src/gallium/include/pipe/p_pointer.h
new file mode 100644
index 0000000000..3a1e6be88e
--- /dev/null
+++ b/src/gallium/include/pipe/p_pointer.h
@@ -0,0 +1,95 @@
+/**************************************************************************
+ *
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef P_POINTER_H
+#define P_POINTER_H
+
+#include "p_compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static INLINE intptr_t
+pointer_to_intptr( const void *p )
+{
+ union {
+ const void *p;
+ intptr_t i;
+ } pi;
+ pi.p = p;
+ return pi.i;
+}
+
+static INLINE void *
+intptr_to_pointer( intptr_t i )
+{
+ union {
+ void *p;
+ intptr_t i;
+ } pi;
+ pi.i = i;
+ return pi.p;
+}
+
+static INLINE uintptr_t
+pointer_to_uintptr( const void *ptr )
+{
+ union {
+ const void *p;
+ uintptr_t u;
+ } pu;
+ pu.p = ptr;
+ return pu.u;
+}
+
+static INLINE void *
+uintptr_to_pointer( uintptr_t u )
+{
+ union {
+ void *p;
+ uintptr_t u;
+ } pu;
+ pu.u = u;
+ return pu.p;
+}
+
+/**
+ * Return a pointer aligned to next multiple of N bytes.
+ */
+static INLINE void *
+align_pointer( const void *unaligned, uintptr_t alignment )
+{
+ uintptr_t aligned = (pointer_to_uintptr( unaligned ) + alignment - 1) & ~(alignment - 1);
+ return uintptr_to_pointer( aligned );
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* P_POINTER_H */
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 10c47e0ef0..1806877f6c 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -50,6 +50,8 @@ struct tgsi_token
#define TGSI_FILE_SAMPLER 5
#define TGSI_FILE_ADDRESS 6
#define TGSI_FILE_IMMEDIATE 7
+#define TGSI_FILE_COUNT 8 /**< how many TGSI_FILE_ types */
+
#define TGSI_DECLARE_RANGE 0
#define TGSI_DECLARE_MASK 1
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index 47fa78c31d..15c88881eb 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -42,6 +42,12 @@
#include "p_defines.h"
#include "p_format.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
/**
* Implementation limits
*/
@@ -326,4 +332,8 @@ struct pipe_vertex_element
};
+#ifdef __cplusplus
+}
+#endif
+
#endif
diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h
index d7da2801c9..3b32ba1d24 100644
--- a/src/gallium/include/pipe/p_util.h
+++ b/src/gallium/include/pipe/p_util.h
@@ -30,16 +30,17 @@
#include "p_compiler.h"
#include "p_debug.h"
+#include "p_pointer.h"
#include <math.h>
-#ifdef WIN32
-
#ifdef __cplusplus
-extern "C"
-{
+extern "C" {
#endif
+
+#ifdef WIN32
+
void * __stdcall
EngAllocMem(
unsigned long Flags,
@@ -50,10 +51,6 @@ void __stdcall
EngFreeMem(
void *Mem );
-#ifdef __cplusplus
-}
-#endif
-
static INLINE void *
MALLOC( unsigned size )
{
@@ -115,33 +112,6 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size )
/**
- * Return a pointer aligned to next multiple of N bytes.
- */
-static INLINE void *
-align_pointer( void *unaligned, uint alignment )
-{
- if (sizeof(void *) == 64) {
- union {
- void *p;
- uint64 u;
- } pu;
- pu.p = unaligned;
- pu.u = (pu.u + alignment - 1) & ~(uint64) (alignment - 1);
- return pu.p;
- }
- else {
- /* 32-bit pointers */
- union {
- void *p;
- uint u;
- } pu;
- pu.p = unaligned;
- pu.u = (pu.u + alignment - 1) & ~(alignment - 1);
- return pu.p;
- }
-}
-
-/**
* Return memory on given byte alignment
*/
static INLINE void *
@@ -405,4 +375,8 @@ extern void pipe_copy_rect(ubyte * dst, unsigned cpp, unsigned dst_pitch,
int src_pitch, unsigned src_x, int src_y);
+#ifdef __cplusplus
+}
+#endif
+
#endif
diff --git a/src/gallium/include/pipe/p_winsys.h b/src/gallium/include/pipe/p_winsys.h
index 1e81eebd78..e784c92491 100644
--- a/src/gallium/include/pipe/p_winsys.h
+++ b/src/gallium/include/pipe/p_winsys.h
@@ -25,12 +25,6 @@
*
**************************************************************************/
-#ifndef P_WINSYS_H
-#define P_WINSYS_H
-
-
-#include "p_format.h"
-
/**
* \file
* This is the interface that Gallium3D requires any window system
@@ -38,6 +32,17 @@
* which is public.
*/
+#ifndef P_WINSYS_H
+#define P_WINSYS_H
+
+
+#include "p_format.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/** Opaque type */
struct pipe_fence_handle;
@@ -156,5 +161,8 @@ struct pipe_winsys
};
+#ifdef __cplusplus
+}
+#endif
#endif /* P_WINSYS_H */
diff --git a/src/gallium/winsys/SConscript b/src/gallium/winsys/SConscript
index 32215d8d58..635a68eea2 100644
--- a/src/gallium/winsys/SConscript
+++ b/src/gallium/winsys/SConscript
@@ -4,7 +4,8 @@ if dri:
SConscript([
'dri/SConscript',
])
-else:
+
+if 'xlib' in env['drivers'] and not dri:
SConscript([
'xlib/SConscript',
])
diff --git a/src/gallium/winsys/dri/intel/intel_batchbuffer.c b/src/gallium/winsys/dri/intel/intel_batchbuffer.c
index 49e04d81ec..5830b88b37 100644
--- a/src/gallium/winsys/dri/intel/intel_batchbuffer.c
+++ b/src/gallium/winsys/dri/intel/intel_batchbuffer.c
@@ -26,6 +26,7 @@
**************************************************************************/
#include <errno.h>
+#include <stdio.h>
#include "intel_batchbuffer.h"
#include "intel_context.h"
#include "intel_screen.h"
diff --git a/src/gallium/winsys/dri/intel/intel_batchbuffer.h b/src/gallium/winsys/dri/intel/intel_batchbuffer.h
index 82feafa21f..caf6870a3c 100644
--- a/src/gallium/winsys/dri/intel/intel_batchbuffer.h
+++ b/src/gallium/winsys/dri/intel/intel_batchbuffer.h
@@ -28,6 +28,7 @@
#ifndef INTEL_BATCHBUFFER_H
#define INTEL_BATCHBUFFER_H
+#include "pipe/p_debug.h"
#include "pipe/p_compiler.h"
#include "dri_bufmgr.h"
diff --git a/src/gallium/winsys/dri/intel/intel_context.c b/src/gallium/winsys/dri/intel/intel_context.c
index c033f2a592..79b320c6bf 100644
--- a/src/gallium/winsys/dri/intel/intel_context.c
+++ b/src/gallium/winsys/dri/intel/intel_context.c
@@ -188,7 +188,8 @@ intelCreateContext(const __GLcontextModes * visual,
/*
* Pipe-related setup
*/
- if (!getenv("INTEL_HW")) {
+ if (getenv("INTEL_SP")) {
+ /* use softpipe driver instead of hw */
pipe = intel_create_softpipe( intel, intelScreen->winsys );
}
else {