diff options
Diffstat (limited to 'src/gallium')
276 files changed, 13243 insertions, 4371 deletions
diff --git a/src/gallium/Makefile b/src/gallium/Makefile index 89e068a449..568747e157 100644 --- a/src/gallium/Makefile +++ b/src/gallium/Makefile @@ -18,3 +18,4 @@ subdirs: clean: rm -f `find . -name \*.[oa]` + rm -f `find . -name depend` diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 83b25f9b47..4e462b5c97 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -16,10 +16,8 @@ OBJECTS = $(C_SOURCES:.c=.o) \ INCLUDES = \ -I. \ -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/include/pipe \ -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers \ - -I$(TOP)/src/mesa \ -I$(TOP)/include \ $(DRIVER_INCLUDES) diff --git a/src/gallium/SConscript b/src/gallium/SConscript index d9c20e0100..fa4833cbcf 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -1,9 +1,26 @@ +import os + Import('*') -#env = env.Clone() +env = env.Clone() + +auxiliaries = [] + +Export('auxiliaries') + SConscript([ - 'softpipe/SConscript', - 'i915simple/SConscript', - 'i965simple/SConscript', + # NOTE: order matters! + 'auxiliary/util/SConscript', + 'auxiliary/rtasm/SConscript', + 'auxiliary/tgsi/SConscript', + 'auxiliary/cso_cache/SConscript', + 'auxiliary/draw/SConscript', + 'auxiliary/pipebuffer/SConscript', ]) + +if llvm: + SConscript(['auxiliary/gallivm/SConscript']) + +for driver in env['drivers']: + SConscript(os.path.join('drivers', driver, 'SConscript')) diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index da68498aa1..eaa0f2fe4e 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -2,11 +2,7 @@ TOP = ../../.. include $(TOP)/configs/current -ifeq ($(CONFIG_NAME), linux-llvm) -LLVM_DIR = llvm -endif - -SUBDIRS = pipebuffer $(LLVM_DIR) +SUBDIRS = $(GALLIUM_AUXILIARY_DIRS) default: subdirs diff --git a/src/gallium/auxiliary/cso_cache/Makefile b/src/gallium/auxiliary/cso_cache/Makefile new file mode 100644 index 0000000000..3e49266163 --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/Makefile @@ -0,0 +1,13 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = cso_cache + +C_SOURCES = \ + cso_cache.c \ + cso_hash.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/cso_cache/SConscript b/src/gallium/auxiliary/cso_cache/SConscript new file mode 100644 index 0000000000..9751881613 --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/SConscript @@ -0,0 +1,10 @@ +Import('*') + +cso_cache = env.ConvenienceLibrary( + target = 'cso_cache', + source = [ + 'cso_cache.c', + 'cso_hash.c', + ]) + +auxiliaries.insert(0, cso_cache) diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index 9e77e0774d..b427b509f8 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -28,9 +28,22 @@ /* Authors: Zack Rusin <zack@tungstengraphics.com> */ +#include "pipe/p_util.h" + #include "cso_cache.h" #include "cso_hash.h" + +struct cso_cache { + struct cso_hash *blend_hash; + struct cso_hash *depth_stencil_hash; + struct cso_hash *fs_hash; + struct cso_hash *vs_hash; + struct cso_hash *rasterizer_hash; + struct cso_hash *sampler_hash; + int max_size; +}; + #if 1 static unsigned hash_key(const void *key, unsigned key_size) { @@ -114,12 +127,106 @@ static int _cso_size_for_type(enum cso_cache_type type) return 0; } + +static void delete_blend_state(void *state, void *data) +{ + struct cso_blend *cso = (struct cso_blend *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + +static void delete_depth_stencil_state(void *state, void *data) +{ + struct cso_depth_stencil_alpha *cso = (struct cso_depth_stencil_alpha *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + +static void delete_sampler_state(void *state, void *data) +{ + struct cso_sampler *cso = (struct cso_sampler *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + +static void delete_rasterizer_state(void *state, void *data) +{ + struct cso_rasterizer *cso = (struct cso_rasterizer *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + +static void delete_fs_state(void *state, void *data) +{ + struct cso_fragment_shader *cso = (struct cso_fragment_shader *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + +static void delete_vs_state(void *state, void *data) +{ + struct cso_vertex_shader *cso = (struct cso_vertex_shader *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + + +static INLINE void delete_cso(void *state, enum cso_cache_type type) +{ + switch (type) { + case CSO_BLEND: { + delete_blend_state(state, 0); + } + break; + case CSO_SAMPLER: { + delete_sampler_state(state, 0); + } + break; + case CSO_DEPTH_STENCIL_ALPHA: { + delete_depth_stencil_state(state, 0); + } + break; + case CSO_RASTERIZER: { + delete_rasterizer_state(state, 0); + } + break; + case CSO_FRAGMENT_SHADER: { + delete_fs_state(state, 0); + } + break; + case CSO_VERTEX_SHADER: { + delete_vs_state(state, 0); + } + break; + } + FREE(state); +} + +static INLINE void sanitize_hash(struct cso_hash *hash, enum cso_cache_type type, + int max_size) +{ + /* if we're approach the maximum size, remove fourth of the entries + * otherwise every subsequent call will go through the same */ + int max_entries = (max_size > cso_hash_size(hash)) ? max_size : cso_hash_size(hash); + int to_remove = (max_size < max_entries) * max_entries/4; + while (to_remove) { + /*remove elements until we're good */ + /*fixme: currently we pick the nodes to remove at random*/ + struct cso_hash_iter iter = cso_hash_first_node(hash); + void *cso = cso_hash_take(hash, cso_hash_iter_key(iter)); + delete_cso(cso, type); + --to_remove; + } +} + struct cso_hash_iter cso_insert_state(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type, void *state) { struct cso_hash *hash = _cso_hash_for_type(sc, type); + sanitize_hash(hash, type, sc->max_size); + return cso_hash_insert(hash, hash_key, state); } @@ -132,6 +239,26 @@ cso_find_state(struct cso_cache *sc, return cso_hash_find(hash, hash_key); } + +void *cso_hash_find_data_from_template( struct cso_hash *hash, + unsigned hash_key, + void *templ, + int size ) +{ + struct cso_hash_iter iter = cso_hash_find(hash, hash_key); + while (!cso_hash_iter_is_null(iter)) { + void *iter_data = cso_hash_iter_data(iter); + if (!memcmp(iter_data, templ, size)) { + /* Return the payload: + */ + return (unsigned char *)iter_data + size; + } + iter = cso_hash_iter_next(iter); + } + return NULL; +} + + struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type, void *templ) @@ -156,8 +283,9 @@ void * cso_take_state(struct cso_cache *sc, struct cso_cache *cso_cache_create(void) { - struct cso_cache *sc = malloc(sizeof(struct cso_cache)); + struct cso_cache *sc = MALLOC_STRUCT(cso_cache); + sc->max_size = 4096; sc->blend_hash = cso_hash_create(); sc->sampler_hash = cso_hash_create(); sc->depth_stencil_hash = cso_hash_create(); @@ -168,14 +296,78 @@ struct cso_cache *cso_cache_create(void) return sc; } +void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, + cso_state_callback func, void *user_data) +{ + struct cso_hash *hash = 0; + struct cso_hash_iter iter; + + switch (type) { + case CSO_BLEND: + hash = sc->blend_hash; + break; + case CSO_SAMPLER: + hash = sc->sampler_hash; + break; + case CSO_DEPTH_STENCIL_ALPHA: + hash = sc->depth_stencil_hash; + break; + case CSO_RASTERIZER: + hash = sc->rasterizer_hash; + break; + case CSO_FRAGMENT_SHADER: + hash = sc->fs_hash; + break; + case CSO_VERTEX_SHADER: + hash = sc->vs_hash; + break; + } + + iter = cso_hash_first_node(hash); + while (!cso_hash_iter_is_null(iter)) { + void *state = cso_hash_iter_data(iter); + if (state) { + func(state, user_data); + } + iter = cso_hash_iter_next(iter); + } +} + void cso_cache_delete(struct cso_cache *sc) { assert(sc); + /* delete driver data */ + cso_for_each_state(sc, CSO_BLEND, delete_blend_state, 0); + cso_for_each_state(sc, CSO_DEPTH_STENCIL_ALPHA, delete_depth_stencil_state, 0); + cso_for_each_state(sc, CSO_FRAGMENT_SHADER, delete_fs_state, 0); + cso_for_each_state(sc, CSO_VERTEX_SHADER, delete_vs_state, 0); + cso_for_each_state(sc, CSO_RASTERIZER, delete_rasterizer_state, 0); + cso_for_each_state(sc, CSO_SAMPLER, delete_sampler_state, 0); + cso_hash_delete(sc->blend_hash); cso_hash_delete(sc->sampler_hash); cso_hash_delete(sc->depth_stencil_hash); cso_hash_delete(sc->rasterizer_hash); cso_hash_delete(sc->fs_hash); cso_hash_delete(sc->vs_hash); - free(sc); + FREE(sc); } + +void cso_set_maximum_cache_size(struct cso_cache *sc, int number) +{ + sc->max_size = number; + + sanitize_hash(sc->blend_hash, CSO_BLEND, sc->max_size); + sanitize_hash(sc->depth_stencil_hash, CSO_DEPTH_STENCIL_ALPHA, + sc->max_size); + sanitize_hash(sc->fs_hash, CSO_FRAGMENT_SHADER, sc->max_size); + sanitize_hash(sc->vs_hash, CSO_VERTEX_SHADER, sc->max_size); + sanitize_hash(sc->rasterizer_hash, CSO_RASTERIZER, sc->max_size); + sanitize_hash(sc->sampler_hash, CSO_SAMPLER, sc->max_size); +} + +int cso_maximum_cache_size(const struct cso_cache *sc) +{ + return sc->max_size; +} + diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h index 116e2eaa2c..44ee128a4a 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.h +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,9 +25,48 @@ * **************************************************************************/ - /* - * Authors: - * Zack Rusin <zack@tungstengraphics.com> + /** + * @file + * Constant State Object (CSO) cache. + * + * The basic idea is that the states are created via the + * create_state/bind_state/delete_state semantics. The driver is expected to + * perform as much of the Gallium state translation to whatever its internal + * representation is during the create call. Gallium then has a caching + * mechanism where it stores the created states. When the pipeline needs an + * actual state change, a bind call is issued. In the bind call the driver + * gets its already translated representation. + * + * Those semantics mean that the driver doesn't do the repeated translations + * of states on every frame, but only once, when a new state is actually + * created. + * + * Even on hardware that doesn't do any kind of state cache, it makes the + * driver look a lot neater, plus it avoids all the redundant state + * translations on every frame. + * + * Currently our constant state objects are: + * - alpha test + * - blend + * - depth stencil + * - fragment shader + * - rasterizer (old setup) + * - sampler + * - vertex shader + * + * Things that are not constant state objects include: + * - blend_color + * - clip_state + * - clear_color_state + * - constant_buffer + * - feedback_state + * - framebuffer_state + * - polygon_stipple + * - scissor_state + * - texture_state + * - viewport_state + * + * @author Zack Rusin <zack@tungstengraphics.com> */ #ifndef CSO_CACHE_H @@ -36,46 +75,57 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" +/* cso_hash.h is necessary for cso_hash_iter, as MSVC requires structures + * returned by value to be fully defined */ +#include "cso_hash.h" -struct cso_hash; -struct cso_cache { - struct cso_hash *blend_hash; - struct cso_hash *depth_stencil_hash; - struct cso_hash *fs_hash; - struct cso_hash *vs_hash; - struct cso_hash *rasterizer_hash; - struct cso_hash *sampler_hash; -}; +#ifdef __cplusplus +extern "C" { +#endif + +struct cso_cache; struct cso_blend { struct pipe_blend_state state; - void *data; + void *data; + void (*delete_state)(struct pipe_context *, void *); + struct pipe_context *context; }; struct cso_depth_stencil_alpha { struct pipe_depth_stencil_alpha_state state; void *data; + void (*delete_state)(struct pipe_context *, void *); + struct pipe_context *context; }; struct cso_rasterizer { struct pipe_rasterizer_state state; void *data; + void (*delete_state)(struct pipe_context *, void *); + struct pipe_context *context; }; struct cso_fragment_shader { struct pipe_shader_state state; void *data; + void (*delete_state)(struct pipe_context *, void *); + struct pipe_context *context; }; struct cso_vertex_shader { struct pipe_shader_state state; void *data; + void (*delete_state)(struct pipe_context *, void *); + struct pipe_context *context; }; struct cso_sampler { struct pipe_sampler_state state; void *data; + void (*delete_state)(struct pipe_context *, void *); + struct pipe_context *context; }; @@ -88,6 +138,8 @@ enum cso_cache_type { CSO_VERTEX_SHADER }; +typedef void (*cso_state_callback)(void *, void *); + unsigned cso_construct_key(void *item, int item_size); struct cso_cache *cso_cache_create(void); @@ -101,7 +153,16 @@ struct cso_hash_iter cso_find_state(struct cso_cache *sc, struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type, void *templ); +void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, + cso_state_callback func, void *user_data); void * cso_take_state(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type); +void cso_set_maximum_cache_size(struct cso_cache *sc, int number); +int cso_maximum_cache_size(const struct cso_cache *sc); + +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c index 0338cb3b47..b3b4d667d2 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.c +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -30,12 +30,10 @@ * Zack Rusin <zack@tungstengraphics.com> */ -#include "cso_hash.h" +#include "pipe/p_debug.h" +#include "pipe/p_util.h" -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <assert.h> +#include "cso_hash.h" #define MAX(a, b) ((a > b) ? (a) : (b)) @@ -98,7 +96,7 @@ struct cso_hash { static void *cso_data_allocate_node(struct cso_hash_data *hash) { - return malloc(hash->nodeSize); + return MALLOC(hash->nodeSize); } static void cso_data_free_node(struct cso_node *node) @@ -107,10 +105,10 @@ static void cso_data_free_node(struct cso_node *node) * Need to cast value ptr to original cso type, then free the * driver-specific data hanging off of it. For example: struct cso_sampler *csamp = (struct cso_sampler *) node->value; - free(csamp->data); + FREE(csamp->data); */ - free(node->value); - free(node); + FREE(node->value); + FREE(node); } static struct cso_node * @@ -134,7 +132,7 @@ static void cso_data_rehash(struct cso_hash_data *hash, int hint) hint = countBits(-hint); if (hint < MinNumBits) hint = MinNumBits; - hash->userNumBits = hint; + hash->userNumBits = (short)hint; while (primeForNumBits(hint) < (hash->size >> 1)) ++hint; } else if (hint < MinNumBits) { @@ -147,9 +145,9 @@ static void cso_data_rehash(struct cso_hash_data *hash, int hint) int oldNumBuckets = hash->numBuckets; int i = 0; - hash->numBits = hint; + hash->numBits = (short)hint; hash->numBuckets = primeForNumBits(hint); - hash->buckets = malloc(sizeof(struct cso_node*) * hash->numBuckets); + hash->buckets = MALLOC(sizeof(struct cso_node*) * hash->numBuckets); for (i = 0; i < hash->numBuckets; ++i) hash->buckets[i] = e; @@ -158,11 +156,14 @@ static void cso_data_rehash(struct cso_hash_data *hash, int hint) while (firstNode != e) { unsigned h = firstNode->key; struct cso_node *lastNode = firstNode; + struct cso_node *afterLastNode; + struct cso_node **beforeFirstNode; + while (lastNode->next != e && lastNode->next->key == h) lastNode = lastNode->next; - struct cso_node *afterLastNode = lastNode->next; - struct cso_node **beforeFirstNode = &hash->buckets[h % hash->numBuckets]; + afterLastNode = lastNode->next; + beforeFirstNode = &hash->buckets[h % hash->numBuckets]; while (*beforeFirstNode != e) beforeFirstNode = &(*beforeFirstNode)->next; lastNode->next = *beforeFirstNode; @@ -170,7 +171,7 @@ static void cso_data_rehash(struct cso_hash_data *hash, int hint) firstNode = afterLastNode; } } - free(oldBuckets); + FREE(oldBuckets); } } @@ -222,21 +223,23 @@ struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, { cso_data_might_grow(hash->data.d); - struct cso_node **nextNode = cso_hash_find_node(hash, key); - struct cso_node *node = cso_hash_create_node(hash, key, data, nextNode); - struct cso_hash_iter iter = {hash, node}; - return iter; + { + struct cso_node **nextNode = cso_hash_find_node(hash, key); + struct cso_node *node = cso_hash_create_node(hash, key, data, nextNode); + struct cso_hash_iter iter = {hash, node}; + return iter; + } } struct cso_hash * cso_hash_create(void) { - struct cso_hash *hash = malloc(sizeof(struct cso_hash)); - hash->data.d = malloc(sizeof(struct cso_hash_data)); + struct cso_hash *hash = MALLOC_STRUCT(cso_hash); + hash->data.d = MALLOC_STRUCT(cso_hash_data); hash->data.d->fakeNext = 0; hash->data.d->buckets = 0; hash->data.d->size = 0; hash->data.d->nodeSize = sizeof(struct cso_node); - hash->data.d->userNumBits = MinNumBits; + hash->data.d->userNumBits = (short)MinNumBits; hash->data.d->numBits = 0; hash->data.d->numBuckets = 0; @@ -256,9 +259,9 @@ void cso_hash_delete(struct cso_hash *hash) cur = next; } } - free(hash->data.d->buckets); - free(hash->data.d); - free(hash); + FREE(hash->data.d->buckets); + FREE(hash->data.d); + FREE(hash); } struct cso_hash_iter cso_hash_find(struct cso_hash *hash, @@ -290,17 +293,21 @@ static struct cso_node *cso_hash_data_next(struct cso_node *node) struct cso_node *e; struct cso_hash_data *d; } a; + int start; + struct cso_node **bucket; + int n; + a.next = node->next; if (!a.next) { - fprintf(stderr, "iterating beyond the last element\n"); + debug_printf("iterating beyond the last element\n"); return 0; } if (a.next->next) return a.next; - int start = (node->key % a.d->numBuckets) + 1; - struct cso_node **bucket = a.d->buckets + start; - int n = a.d->numBuckets - start; + start = (node->key % a.d->numBuckets) + 1; + bucket = a.d->buckets + start; + n = a.d->numBuckets - start; while (n--) { if (*bucket != a.e) return *bucket; @@ -316,19 +323,21 @@ static struct cso_node *cso_hash_data_prev(struct cso_node *node) struct cso_node *e; struct cso_hash_data *d; } a; + int start; + struct cso_node *sentinel; + struct cso_node **bucket; a.e = node; while (a.e->next) a.e = a.e->next; - int start; if (node == a.e) start = a.d->numBuckets - 1; else start = node->key % a.d->numBuckets; - struct cso_node *sentinel = node; - struct cso_node **bucket = a.d->buckets + start; + sentinel = node; + bucket = a.d->buckets + start; while (start >= 0) { if (*bucket != sentinel) { struct cso_node *prev = *bucket; @@ -341,7 +350,7 @@ static struct cso_node *cso_hash_data_prev(struct cso_node *node) --bucket; --start; } - fprintf(stderr, "iterating backward beyond first element\n"); + debug_printf("iterating backward beyond first element\n"); return a.e; } @@ -386,3 +395,8 @@ struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash) struct cso_hash_iter iter = {hash, cso_data_first_node(hash->data.d)}; return iter; } + +int cso_hash_size(struct cso_hash *hash) +{ + return hash->data.d->size; +} diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h index b4aa111860..d5bca9d591 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.h +++ b/src/gallium/auxiliary/cso_cache/cso_hash.h @@ -33,6 +33,11 @@ #ifndef CSO_HASH_H #define CSO_HASH_H + +#ifdef __cplusplus +extern "C" { +#endif + struct cso_hash; struct cso_node; @@ -42,7 +47,9 @@ struct cso_hash_iter { }; struct cso_hash *cso_hash_create(void); -void cso_hash_delete(struct cso_hash *hash); +void cso_hash_delete(struct cso_hash *hash); + +int cso_hash_size(struct cso_hash *hash); struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, unsigned key, void *data); @@ -59,4 +66,17 @@ void *cso_hash_iter_data(struct cso_hash_iter iter); struct cso_hash_iter cso_hash_iter_next(struct cso_hash_iter iter); struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter); + +/* KW: a convenience routine: + */ +void *cso_hash_find_data_from_template( struct cso_hash *hash, + unsigned hash_key, + void *templ, + int size ); + + +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index fe9b150f30..2daa1636f3 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -1,2 +1,39 @@ -default: - cd ../../../mesa ; make +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = draw + +C_SOURCES = \ + draw_aaline.c \ + draw_aapoint.c \ + draw_clip.c \ + draw_vs_exec.c \ + draw_vs_sse.c \ + draw_vs_llvm.c \ + draw_context.c\ + draw_cull.c \ + draw_debug.c \ + draw_flatshade.c \ + draw_offset.c \ + draw_prim.c \ + draw_pstipple.c \ + draw_stipple.c \ + draw_twoside.c \ + draw_unfilled.c \ + draw_validate.c \ + draw_vbuf.c \ + draw_vertex.c \ + draw_vertex_cache.c \ + draw_vertex_fetch.c \ + draw_vertex_shader.c \ + draw_vf.c \ + draw_vf_generic.c \ + draw_vf_sse.c \ + draw_wide_line.c \ + draw_wide_point.c + + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript new file mode 100644 index 0000000000..c18dcb2927 --- /dev/null +++ b/src/gallium/auxiliary/draw/SConscript @@ -0,0 +1,35 @@ +Import('*') + +draw = env.ConvenienceLibrary( + target = 'draw', + source = [ + 'draw_aaline.c', + 'draw_aapoint.c', + 'draw_clip.c', + 'draw_vs_exec.c', + 'draw_vs_sse.c', + 'draw_vs_llvm.c', + 'draw_context.c', + 'draw_cull.c', + 'draw_debug.c', + 'draw_flatshade.c', + 'draw_offset.c', + 'draw_prim.c', + 'draw_pstipple.c', + 'draw_stipple.c', + 'draw_twoside.c', + 'draw_unfilled.c', + 'draw_validate.c', + 'draw_vbuf.c', + 'draw_vertex.c', + 'draw_vertex_cache.c', + 'draw_vertex_fetch.c', + 'draw_vertex_shader.c', + 'draw_vf.c', + 'draw_vf_generic.c', + 'draw_vf_sse.c', + 'draw_wide_point.c', + 'draw_wide_line.c' + ]) + +auxiliaries.insert(0, draw) diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c new file mode 100644 index 0000000000..7660e56fe6 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -0,0 +1,811 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * AA line stage: AA lines are converted to texture mapped triangles. + * + * Authors: Brian Paul + */ + + +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/util/tgsi_transform.h" +#include "tgsi/util/tgsi_dump.h" + +#include "draw_context.h" +#include "draw_private.h" + + +/** + * Max texture level for the alpha texture used for antialiasing + */ +#define MAX_TEXTURE_LEVEL 5 /* 32 x 32 */ + + +/** + * Subclass of pipe_shader_state to carry extra fragment shader info. + */ +struct aaline_fragment_shader +{ + struct pipe_shader_state state; + void *driver_fs; + void *aaline_fs; + void *aapoint_fs; /* not yet */ + void *sprite_fs; /* not yet */ +}; + + +/** + * Subclass of draw_stage + */ +struct aaline_stage +{ + struct draw_stage stage; + + float half_line_width; + + /** For AA lines, this is the vertex attrib slot for the new texcoords */ + uint tex_slot; + + void *sampler_cso; + struct pipe_texture *texture; + uint sampler_unit; + + + /* + * Currently bound state + */ + struct aaline_fragment_shader *fs; + struct { + void *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + } state; + + /* + * Driver interface/override functions + */ + void * (*driver_create_fs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*driver_bind_fs_state)(struct pipe_context *, void *); + void (*driver_delete_fs_state)(struct pipe_context *, void *); + + void (*driver_bind_sampler_state)(struct pipe_context *, unsigned, void *); + + void (*driver_set_sampler_texture)(struct pipe_context *, + unsigned sampler, + struct pipe_texture *); + + struct pipe_context *pipe; +}; + + + +/** + * Subclass of tgsi_transform_context, used for transforming the + * user's fragment shader to add the special AA instructions. + */ +struct aa_transform_context { + struct tgsi_transform_context base; + uint tempsUsed; /**< bitmask */ + int colorOutput; /**< which output is the primary color */ + int maxSampler; /**< max sampler index found */ + int maxInput, maxGeneric; /**< max input index found */ + int colorTemp, texTemp; /**< temp registers */ + boolean firstInstruction; +}; + + +/** + * TGSI declaration transform callback. + * Look for a free sampler, a free input attrib, and two free temp regs. + */ +static void +aa_transform_decl(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + + if (decl->Declaration.File == TGSI_FILE_OUTPUT && + decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && + decl->Semantic.SemanticIndex == 0) { + aactx->colorOutput = decl->u.DeclarationRange.First; + } + else if (decl->Declaration.File == TGSI_FILE_SAMPLER) { + if ((int) decl->u.DeclarationRange.Last > aactx->maxSampler) + aactx->maxSampler = decl->u.DeclarationRange.Last + 1; + } + else if (decl->Declaration.File == TGSI_FILE_INPUT) { + if ((int) decl->u.DeclarationRange.Last > aactx->maxInput) + aactx->maxInput = decl->u.DeclarationRange.Last; + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && + (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) { + aactx->maxGeneric = decl->Semantic.SemanticIndex; + } + } + else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { + uint i; + for (i = decl->u.DeclarationRange.First; + i <= decl->u.DeclarationRange.Last; i++) { + aactx->tempsUsed |= (1 << i); + } + } + + ctx->emit_declaration(ctx, decl); +} + + +/** + * TGSI instruction transform callback. + * Replace writes to result.color w/ a temp reg. + * Upon END instruction, insert texture sampling code for antialiasing. + */ +static void +aa_transform_inst(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + + if (aactx->firstInstruction) { + /* emit our new declarations before the first instruction */ + + struct tgsi_full_declaration decl; + uint i; + + /* find two free temp regs */ + for (i = 0; i < 32; i++) { + if ((aactx->tempsUsed & (1 << i)) == 0) { + /* found a free temp */ + if (aactx->colorTemp < 0) + aactx->colorTemp = i; + else if (aactx->texTemp < 0) + aactx->texTemp = i; + else + break; + } + } + assert(aactx->colorTemp >= 0); + assert(aactx->texTemp >= 0); + + /* declare new generic input/texcoord */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; + decl.Declaration.Interpolate = 1; + /* XXX this could be linear... */ + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->maxInput + 1; + ctx->emit_declaration(ctx, &decl); + + /* declare new sampler */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->maxSampler + 1; + ctx->emit_declaration(ctx, &decl); + + /* declare new temp regs */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->texTemp; + ctx->emit_declaration(ctx, &decl); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->colorTemp; + ctx->emit_declaration(ctx, &decl); + + aactx->firstInstruction = FALSE; + } + + if (inst->Instruction.Opcode == TGSI_OPCODE_END && + aactx->colorOutput != -1) { + struct tgsi_full_instruction newInst; + + /* TEX */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_TEX; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->maxInput + 1; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->maxSampler + 1; + + ctx->emit_instruction(ctx, &newInst); + + /* MOV rgb */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MOV; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + ctx->emit_instruction(ctx, &newInst); + + /* MUL alpha */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->texTemp; + ctx->emit_instruction(ctx, &newInst); + + /* END */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_END; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 0; + ctx->emit_instruction(ctx, &newInst); + } + else { + /* Not an END instruction. + * Look for writes to result.color and replace with colorTemp reg. + */ + uint i; + + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + if (dst->DstRegister.File == TGSI_FILE_OUTPUT && + dst->DstRegister.Index == aactx->colorOutput) { + dst->DstRegister.File = TGSI_FILE_TEMPORARY; + dst->DstRegister.Index = aactx->colorTemp; + } + } + + ctx->emit_instruction(ctx, inst); + } +} + + +/** + * Generate the frag shader we'll use for drawing AA lines. + * This will be the user's shader plus some texture/modulate instructions. + */ +static void +generate_aaline_fs(struct aaline_stage *aaline) +{ + const struct pipe_shader_state *orig_fs = &aaline->fs->state; + struct draw_context *draw = aaline->stage.draw; + struct pipe_shader_state aaline_fs; + struct aa_transform_context transform; + +#define MAX 1000 + + aaline_fs = *orig_fs; /* copy to init */ + aaline_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + + memset(&transform, 0, sizeof(transform)); + transform.colorOutput = -1; + transform.maxSampler = -1; + transform.maxInput = -1; + transform.maxGeneric = -1; + transform.colorTemp = -1; + transform.texTemp = -1; + transform.firstInstruction = TRUE; + transform.base.transform_instruction = aa_transform_inst; + transform.base.transform_declaration = aa_transform_decl; + + tgsi_transform_shader(orig_fs->tokens, + (struct tgsi_token *) aaline_fs.tokens, + MAX, &transform.base); + +#if 0 /* DEBUG */ + tgsi_dump(orig_fs->tokens, 0); + tgsi_dump(aaline_fs.tokens, 0); +#endif + +#if 1 /* XXX remove */ + aaline_fs.input_semantic_name[aaline_fs.num_inputs] = TGSI_SEMANTIC_GENERIC; + aaline_fs.input_semantic_index[aaline_fs.num_inputs] = transform.maxGeneric + 1; + aaline_fs.num_inputs++; +#endif + + aaline->fs->aaline_fs + = aaline->driver_create_fs_state(aaline->pipe, &aaline_fs); + + /* advertise the extra post-transform vertex attributes which will have + * the texcoords. + */ + draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_vp_outputs.semantic_index = transform.maxGeneric + 1; +} + + +/** + * Create the texture map we'll use for antialiasing the lines. + */ +static void +aaline_create_texture(struct aaline_stage *aaline) +{ + struct pipe_context *pipe = aaline->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_texture texTemp; + uint level; + + memset(&texTemp, 0, sizeof(texTemp)); + texTemp.target = PIPE_TEXTURE_2D; + texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */ + texTemp.last_level = MAX_TEXTURE_LEVEL; + texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL; + texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL; + texTemp.depth[0] = 1; + texTemp.cpp = 1; + + aaline->texture = screen->texture_create(screen, &texTemp); + + /* Fill in mipmap images. + * Basically each level is solid opaque, except for the outermost + * texels which are zero. Special case the 1x1 and 2x2 levels. + */ + for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) { + struct pipe_surface *surface; + const uint size = aaline->texture->width[level]; + ubyte *data; + uint i, j; + + assert(aaline->texture->width[level] == aaline->texture->height[level]); + + surface = screen->get_tex_surface(screen, aaline->texture, 0, level, 0); + data = pipe_surface_map(surface); + + for (i = 0; i < size; i++) { + for (j = 0; j < size; j++) { + ubyte d; + if (size == 1) { + d = 255; + } + else if (size == 2) { + d = 200; /* tuneable */ + } + else if (i == 0 || j == 0 || i == size - 1 || j == size - 1) { + d = 0; + } + else { + d = 255; + } + data[i * surface->pitch + j] = d; + } + } + + /* unmap */ + pipe_surface_unmap(surface); + pipe_surface_reference(&surface, NULL); + } +} + + +/** + * Create the sampler CSO that'll be used for antialiasing. + * By using a mipmapped texture, we don't have to generate a different + * texture image for each line size. + */ +static void +aaline_create_sampler(struct aaline_stage *aaline) +{ + struct pipe_sampler_state sampler; + struct pipe_context *pipe = aaline->pipe; + + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR; + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.normalized_coords = 1; + sampler.min_lod = 0.0f; + sampler.max_lod = MAX_TEXTURE_LEVEL; + + aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler); +} + + +/** + * When we're about to draw our first AA line in a batch, this function is + * called to tell the driver to bind our modified fragment shader. + */ +static void +bind_aaline_fragment_shader(struct aaline_stage *aaline) +{ + if (!aaline->fs->aaline_fs) { + generate_aaline_fs(aaline); + } + aaline->driver_bind_fs_state(aaline->pipe, aaline->fs->aaline_fs); +} + + + +static INLINE struct aaline_stage * +aaline_stage( struct draw_stage *stage ) +{ + return (struct aaline_stage *) stage; +} + + +static void +passthrough_point(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->point(stage->next, header); +} + + +static void +passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + +/** + * Draw a wide line by drawing a quad, using geometry which will + * fullfill GL's antialiased line requirements. + */ +static void +aaline_line(struct draw_stage *stage, struct prim_header *header) +{ + const struct aaline_stage *aaline = aaline_stage(stage); + const float half_width = aaline->half_line_width; + struct prim_header tri; + struct vertex_header *v[8]; + uint texPos = aaline->tex_slot; + float *pos, *tex; + float dx = header->v[1]->data[0][0] - header->v[0]->data[0][0]; + float dy = header->v[1]->data[0][1] - header->v[0]->data[0][1]; + double a = atan2(dy, dx); + float c_a = (float) cos(a), s_a = (float) sin(a); + uint i; + + /* XXX the ends of lines aren't quite perfect yet, but probably passable */ + dx = 0.5F * half_width; + dy = half_width; + + /* allocate/dup new verts */ + for (i = 0; i < 8; i++) { + v[i] = dup_vert(stage, header->v[i/4], i); + } + + /* + * Quad strip for line from v0 to v1 (*=endpoints): + * + * 1 3 5 7 + * +---+---------------------+---+ + * | | + * | *v0 v1* | + * | | + * +---+---------------------+---+ + * 0 2 4 6 + */ + + /* new verts */ + pos = v[0]->data[0]; + pos[0] += (-dx * c_a - dy * s_a); + pos[1] += (-dx * s_a + dy * c_a); + + pos = v[1]->data[0]; + pos[0] += (-dx * c_a - -dy * s_a); + pos[1] += (-dx * s_a + -dy * c_a); + + pos = v[2]->data[0]; + pos[0] += ( dx * c_a - dy * s_a); + pos[1] += ( dx * s_a + dy * c_a); + + pos = v[3]->data[0]; + pos[0] += ( dx * c_a - -dy * s_a); + pos[1] += ( dx * s_a + -dy * c_a); + + pos = v[4]->data[0]; + pos[0] += (-dx * c_a - dy * s_a); + pos[1] += (-dx * s_a + dy * c_a); + + pos = v[5]->data[0]; + pos[0] += (-dx * c_a - -dy * s_a); + pos[1] += (-dx * s_a + -dy * c_a); + + pos = v[6]->data[0]; + pos[0] += ( dx * c_a - dy * s_a); + pos[1] += ( dx * s_a + dy * c_a); + + pos = v[7]->data[0]; + pos[0] += ( dx * c_a - -dy * s_a); + pos[1] += ( dx * s_a + -dy * c_a); + + /* new texcoords */ + tex = v[0]->data[texPos]; + ASSIGN_4V(tex, 0, 0, 0, 1); + + tex = v[1]->data[texPos]; + ASSIGN_4V(tex, 0, 1, 0, 1); + + tex = v[2]->data[texPos]; + ASSIGN_4V(tex, .5, 0, 0, 1); + + tex = v[3]->data[texPos]; + ASSIGN_4V(tex, .5, 1, 0, 1); + + tex = v[4]->data[texPos]; + ASSIGN_4V(tex, .5, 0, 0, 1); + + tex = v[5]->data[texPos]; + ASSIGN_4V(tex, .5, 1, 0, 1); + + tex = v[6]->data[texPos]; + ASSIGN_4V(tex, 1, 0, 0, 1); + + tex = v[7]->data[texPos]; + ASSIGN_4V(tex, 1, 1, 0, 1); + + /* emit 6 tris for the quad strip */ + tri.v[0] = v[2]; tri.v[1] = v[1]; tri.v[2] = v[0]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[3]; tri.v[1] = v[1]; tri.v[2] = v[2]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[4]; tri.v[1] = v[3]; tri.v[2] = v[2]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[5]; tri.v[1] = v[3]; tri.v[2] = v[4]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[6]; tri.v[1] = v[5]; tri.v[2] = v[4]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[7]; tri.v[1] = v[5]; tri.v[2] = v[6]; + stage->next->tri( stage->next, &tri ); +} + + +static void +aaline_first_line(struct draw_stage *stage, struct prim_header *header) +{ + auto struct aaline_stage *aaline = aaline_stage(stage); + struct draw_context *draw = stage->draw; + struct pipe_context *pipe = aaline->pipe; + + assert(draw->rasterizer->line_smooth); + + if (draw->rasterizer->line_width <= 3.0) + aaline->half_line_width = 1.5f; + else + aaline->half_line_width = 0.5f * draw->rasterizer->line_width; + + aaline->tex_slot = draw->num_vs_outputs; + assert(aaline->tex_slot > 0); /* output[0] is vertex pos */ + draw->extra_vp_outputs.slot = aaline->tex_slot; + + /* + * Bind our fragprog, sampler and texture + */ + bind_aaline_fragment_shader(aaline); + + aaline->driver_bind_sampler_state(pipe, aaline->sampler_unit, aaline->sampler_cso); + aaline->driver_set_sampler_texture(pipe, aaline->sampler_unit, aaline->texture); + + /* now really draw first line */ + stage->line = aaline_line; + stage->line(stage, header); +} + + +static void +aaline_flush(struct draw_stage *stage, unsigned flags) +{ + struct draw_context *draw = stage->draw; + struct aaline_stage *aaline = aaline_stage(stage); + struct pipe_context *pipe = aaline->pipe; + + stage->line = aaline_first_line; + stage->next->flush( stage->next, flags ); + + /* restore original frag shader */ + aaline->driver_bind_fs_state(pipe, aaline->fs->driver_fs); + + /* XXX restore original texture, sampler state */ + aaline->driver_bind_sampler_state(pipe, aaline->sampler_unit, + aaline->state.sampler[aaline->sampler_unit]); + aaline->driver_set_sampler_texture(pipe, aaline->sampler_unit, + aaline->state.texture[aaline->sampler_unit]); + + draw->extra_vp_outputs.slot = 0; +} + + +static void +aaline_reset_stipple_counter(struct draw_stage *stage) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void +aaline_destroy(struct draw_stage *stage) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +static struct aaline_stage * +draw_aaline_stage(struct draw_context *draw) +{ + struct aaline_stage *aaline = CALLOC_STRUCT(aaline_stage); + + draw_alloc_temp_verts( &aaline->stage, 8 ); + + aaline->stage.draw = draw; + aaline->stage.next = NULL; + aaline->stage.point = passthrough_point; + aaline->stage.line = aaline_first_line; + aaline->stage.tri = passthrough_tri; + aaline->stage.flush = aaline_flush; + aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter; + aaline->stage.destroy = aaline_destroy; + + return aaline; +} + + +static struct aaline_stage * +aaline_stage_from_pipe(struct pipe_context *pipe) +{ + struct draw_context *draw = (struct draw_context *) pipe->draw; + return aaline_stage(draw->pipeline.aaline); +} + + +/** + * This function overrides the driver's create_fs_state() function and + * will typically be called by the state tracker. + */ +static void * +aaline_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *fs) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + struct aaline_fragment_shader *aafs = CALLOC_STRUCT(aaline_fragment_shader); + + if (aafs) { + aafs->state = *fs; + + /* pass-through */ + aafs->driver_fs = aaline->driver_create_fs_state(aaline->pipe, fs); + } + + return aafs; +} + + +static void +aaline_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs; + /* save current */ + aaline->fs = aafs; + /* pass-through */ + aaline->driver_bind_fs_state(aaline->pipe, aafs->driver_fs); +} + + +static void +aaline_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs; + /* pass-through */ + aaline->driver_delete_fs_state(aaline->pipe, aafs->driver_fs); + FREE(aafs); +} + + +static void +aaline_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + /* save current */ + aaline->state.sampler[unit] = sampler; + /* pass-through */ + aaline->driver_bind_sampler_state(aaline->pipe, unit, sampler); +} + + +static void +aaline_set_sampler_texture(struct pipe_context *pipe, + unsigned sampler, struct pipe_texture *texture) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + /* save current */ + aaline->state.texture[sampler] = texture; + /* pass-through */ + aaline->driver_set_sampler_texture(aaline->pipe, sampler, texture); +} + + +/** + * Called by drivers that want to install this AA line prim stage + * into the draw module's pipeline. This will not be used if the + * hardware has native support for AA lines. + */ +void +draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) +{ + struct aaline_stage *aaline; + + pipe->draw = (void *) draw; + + /* + * Create / install AA line drawing / prim stage + */ + aaline = draw_aaline_stage( draw ); + assert(aaline); + draw->pipeline.aaline = &aaline->stage; + + aaline->pipe = pipe; + + /* create special texture, sampler state */ + aaline_create_texture(aaline); + aaline_create_sampler(aaline); + + /* save original driver functions */ + aaline->driver_create_fs_state = pipe->create_fs_state; + aaline->driver_bind_fs_state = pipe->bind_fs_state; + aaline->driver_delete_fs_state = pipe->delete_fs_state; + + aaline->driver_bind_sampler_state = pipe->bind_sampler_state; + aaline->driver_set_sampler_texture = pipe->set_sampler_texture; + + /* override the driver's functions */ + pipe->create_fs_state = aaline_create_fs_state; + pipe->bind_fs_state = aaline_bind_fs_state; + pipe->delete_fs_state = aaline_delete_fs_state; + + pipe->bind_sampler_state = aaline_bind_sampler_state; + pipe->set_sampler_texture = aaline_set_sampler_texture; +} diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c new file mode 100644 index 0000000000..70f696475f --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_aapoint.c @@ -0,0 +1,849 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * AA point stage: AA points are converted to quads and rendered with a + * special fragment shader. Another approach would be to use a texture + * map image of a point, but experiments indicate the quality isn't nearly + * as good as this approach. + * + * Note: this looks a lot like draw_aaline.c but there's actually little + * if any code that can be shared. + * + * Authors: Brian Paul + */ + + +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/util/tgsi_transform.h" +#include "tgsi/util/tgsi_dump.h" + +#include "draw_context.h" +#include "draw_private.h" + + +/* + * Enabling NORMALIZE might give _slightly_ better results. + * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or + * d=x*x+y*y. Since we're working with a unit circle, the later seems + * close enough and saves some costly instructions. + */ +#define NORMALIZE 0 + + +/** + * Subclass of pipe_shader_state to carry extra fragment shader info. + */ +struct aapoint_fragment_shader +{ + struct pipe_shader_state state; + void *driver_fs; /**< the regular shader */ + void *aapoint_fs; /**< the aa point-augmented shader */ +}; + + +/** + * Subclass of draw_stage + */ +struct aapoint_stage +{ + struct draw_stage stage; + + int psize_slot; + float radius; + + /** this is the vertex attrib slot for the new texcoords */ + uint tex_slot; + + /* + * Currently bound state + */ + struct aapoint_fragment_shader *fs; + + /* + * Driver interface/override functions + */ + void * (*driver_create_fs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*driver_bind_fs_state)(struct pipe_context *, void *); + void (*driver_delete_fs_state)(struct pipe_context *, void *); + + struct pipe_context *pipe; +}; + + + +/** + * Subclass of tgsi_transform_context, used for transforming the + * user's fragment shader to add the special AA instructions. + */ +struct aa_transform_context { + struct tgsi_transform_context base; + uint tempsUsed; /**< bitmask */ + int colorOutput; /**< which output is the primary color */ + int maxInput, maxGeneric; /**< max input index found */ + int tmp0, colorTemp; /**< temp registers */ + boolean firstInstruction; +}; + + +/** + * TGSI declaration transform callback. + * Look for two free temp regs and available input reg for new texcoords. + */ +static void +aa_transform_decl(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + + if (decl->Declaration.File == TGSI_FILE_OUTPUT && + decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && + decl->Semantic.SemanticIndex == 0) { + aactx->colorOutput = decl->u.DeclarationRange.First; + } + else if (decl->Declaration.File == TGSI_FILE_INPUT) { + if ((int) decl->u.DeclarationRange.Last > aactx->maxInput) + aactx->maxInput = decl->u.DeclarationRange.Last; + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && + (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) { + aactx->maxGeneric = decl->Semantic.SemanticIndex; + } + } + else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { + uint i; + for (i = decl->u.DeclarationRange.First; + i <= decl->u.DeclarationRange.Last; i++) { + aactx->tempsUsed |= (1 << i); + } + } + + ctx->emit_declaration(ctx, decl); +} + + +/** + * TGSI instruction transform callback. + * Replace writes to result.color w/ a temp reg. + * Upon END instruction, insert texture sampling code for antialiasing. + */ +static void +aa_transform_inst(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + struct tgsi_full_instruction newInst; + + if (aactx->firstInstruction) { + /* emit our new declarations before the first instruction */ + + struct tgsi_full_declaration decl; + const int texInput = aactx->maxInput + 1; + int tmp0; + uint i; + + /* find two free temp regs */ + for (i = 0; i < 32; i++) { + if ((aactx->tempsUsed & (1 << i)) == 0) { + /* found a free temp */ + if (aactx->tmp0 < 0) + aactx->tmp0 = i; + else if (aactx->colorTemp < 0) + aactx->colorTemp = i; + else + break; + } + } + + assert(aactx->colorTemp != aactx->tmp0); + + tmp0 = aactx->tmp0; + + /* declare new generic input/texcoord */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; + decl.Declaration.Interpolate = 1; + /* XXX this could be linear... */ + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = texInput; + ctx->emit_declaration(ctx, &decl); + + /* declare new temp regs */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = tmp0; + ctx->emit_declaration(ctx, &decl); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->colorTemp; + ctx->emit_declaration(ctx, &decl); + + aactx->firstInstruction = FALSE; + + + /* + * Emit code to compute fragment coverage, kill if outside point radius + * + * Temp reg0 usage: + * t0.x = distance of fragment from center point + * t0.y = boolean, is t0.x > 1.0, also misc temp usage + * t0.z = temporary for computing 1/(1-k) value + * t0.w = final coverage value + */ + + /* MUL t0.xy, tex, tex; # compute x^2, y^2 */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + ctx->emit_instruction(ctx, &newInst); + + /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_ADD; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + ctx->emit_instruction(ctx, &newInst); + +#if NORMALIZE /* OPTIONAL normalization of length */ + /* RSQ t0.x, t0.x; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_RSQ; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + ctx->emit_instruction(ctx, &newInst); + + /* RCP t0.x, t0.x; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_RCP; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + ctx->emit_instruction(ctx, &newInst); +#endif + + /* SGT t0.y, t0.xxxx, t0.wwww; # bool b = d > 1 (NOTE t0.w == 1) */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SGT; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + ctx->emit_instruction(ctx, &newInst); + + /* KILP -t0.yyyy; # if b, KILL */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_KILP; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + ctx->emit_instruction(ctx, &newInst); + + + /* compute coverage factor = (1-d)/(1-k) */ + + /* SUB t0.z, tex.w, tex.z; # m = 1 - k */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SUB; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* RCP t0.z, t0.z; # t0.z = 1 / m */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_RCP; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* SUB t0.y, 1, t0.x; # d = 1 - d */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SUB; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + ctx->emit_instruction(ctx, &newInst); + + /* MUL t0.w, t0.y, t0.z; # coverage = d * m */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SLE; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* CMP t0.w, -t0.y, tex.w, t0.w; + * # if -t0.y < 0 then + * t0.w = 1 + * else + * t0.w = t0.w + */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_CMP; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 3; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[2].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + ctx->emit_instruction(ctx, &newInst); + + } + + if (inst->Instruction.Opcode == TGSI_OPCODE_END) { + /* add alpha modulation code at tail of program */ + + /* MOV result.color.xyz, colorTemp; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MOV; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + ctx->emit_instruction(ctx, &newInst); + + /* MUL result.color.w, colorTemp, tmp0.w; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->tmp0; + ctx->emit_instruction(ctx, &newInst); + } + else { + /* Not an END instruction. + * Look for writes to result.color and replace with colorTemp reg. + */ + uint i; + + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + if (dst->DstRegister.File == TGSI_FILE_OUTPUT && + dst->DstRegister.Index == aactx->colorOutput) { + dst->DstRegister.File = TGSI_FILE_TEMPORARY; + dst->DstRegister.Index = aactx->colorTemp; + } + } + } + + ctx->emit_instruction(ctx, inst); +} + + +/** + * Generate the frag shader we'll use for drawing AA lines. + * This will be the user's shader plus some texture/modulate instructions. + */ +static void +generate_aapoint_fs(struct aapoint_stage *aapoint) +{ + const struct pipe_shader_state *orig_fs = &aapoint->fs->state; + struct draw_context *draw = aapoint->stage.draw; + struct pipe_shader_state aapoint_fs; + struct aa_transform_context transform; + +#define MAX 1000 + + aapoint_fs = *orig_fs; /* copy to init */ + aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + + memset(&transform, 0, sizeof(transform)); + transform.colorOutput = -1; + transform.maxInput = -1; + transform.maxGeneric = -1; + transform.colorTemp = -1; + transform.tmp0 = -1; + transform.firstInstruction = TRUE; + transform.base.transform_instruction = aa_transform_inst; + transform.base.transform_declaration = aa_transform_decl; + + tgsi_transform_shader(orig_fs->tokens, + (struct tgsi_token *) aapoint_fs.tokens, + MAX, &transform.base); + +#if 0 /* DEBUG */ + tgsi_dump(orig_fs->tokens, 0); + tgsi_dump(aapoint_fs.tokens, 0); +#endif + +#if 1 /* XXX remove */ + aapoint_fs.input_semantic_name[aapoint_fs.num_inputs] = TGSI_SEMANTIC_GENERIC; + aapoint_fs.input_semantic_index[aapoint_fs.num_inputs] = transform.maxGeneric + 1; + aapoint_fs.num_inputs++; +#endif + + aapoint->fs->aapoint_fs + = aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs); + + /* advertise the extra post-transform vertex attributes which will have + * the texcoords. + */ + draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_vp_outputs.semantic_index = transform.maxGeneric + 1; +} + + +/** + * When we're about to draw our first AA line in a batch, this function is + * called to tell the driver to bind our modified fragment shader. + */ +static void +bind_aapoint_fragment_shader(struct aapoint_stage *aapoint) +{ + if (!aapoint->fs->aapoint_fs) { + generate_aapoint_fs(aapoint); + } + aapoint->driver_bind_fs_state(aapoint->pipe, aapoint->fs->aapoint_fs); +} + + + +static INLINE struct aapoint_stage * +aapoint_stage( struct draw_stage *stage ) +{ + return (struct aapoint_stage *) stage; +} + + +static void +passthrough_line(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->line(stage->next, header); +} + + +static void +passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + +/** + * Draw an AA point by drawing a quad. + */ +static void +aapoint_point(struct draw_stage *stage, struct prim_header *header) +{ + const struct aapoint_stage *aapoint = aapoint_stage(stage); + struct prim_header tri; + struct vertex_header *v[4]; + uint texPos = aapoint->tex_slot; + float radius, *pos, *tex; + uint i; + float k; + + if (aapoint->psize_slot >= 0) { + radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0]; + } + else { + radius = aapoint->radius; + } + + /* + * Note: the texcoords (generic attrib, really) we use are special: + * The S and T components simply vary from -1 to +1. + * The R component is k, below. + * The Q component is 1.0 and will used as a handy constant in the + * fragment shader. + */ + + /* + * k is the threshold distance from the point's center at which + * we begin alpha attenuation (the coverage value). + * Operating within a unit circle, we'll compute the fragment's + * distance 'd' from the center point using the texcoords. + * IF d > 1.0 THEN + * KILL fragment + * ELSE IF d > k THEN + * compute coverage in [0,1] proportional to d in [k, 1]. + * ELSE + * coverage = 1.0; // full coverage + * ENDIF + * + * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to + * avoid using IF/ELSE/ENDIF TGSI opcodes. + */ + +#if !NORMALIZE + k = 1.0f / radius; + k = 1.0f - 2.0f * k + k * k; +#else + k = 1.0f - 1.0f / radius; +#endif + + /* allocate/dup new verts */ + for (i = 0; i < 4; i++) { + v[i] = dup_vert(stage, header->v[0], i); + } + + /* new verts */ + pos = v[0]->data[0]; + pos[0] -= radius; + pos[1] -= radius; + + pos = v[1]->data[0]; + pos[0] += radius; + pos[1] -= radius; + + pos = v[2]->data[0]; + pos[0] += radius; + pos[1] += radius; + + pos = v[3]->data[0]; + pos[0] -= radius; + pos[1] += radius; + + /* new texcoords */ + tex = v[0]->data[texPos]; + ASSIGN_4V(tex, -1, -1, k, 1); + + tex = v[1]->data[texPos]; + ASSIGN_4V(tex, 1, -1, k, 1); + + tex = v[2]->data[texPos]; + ASSIGN_4V(tex, 1, 1, k, 1); + + tex = v[3]->data[texPos]; + ASSIGN_4V(tex, -1, 1, k, 1); + + /* emit 2 tris for the quad strip */ + tri.v[0] = v[0]; + tri.v[1] = v[1]; + tri.v[2] = v[2]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[0]; + tri.v[1] = v[2]; + tri.v[2] = v[3]; + stage->next->tri( stage->next, &tri ); +} + + +static void +aapoint_first_point(struct draw_stage *stage, struct prim_header *header) +{ + auto struct aapoint_stage *aapoint = aapoint_stage(stage); + struct draw_context *draw = stage->draw; + + assert(draw->rasterizer->point_smooth); + + if (draw->rasterizer->point_size <= 2.0) + aapoint->radius = 1.0; + else + aapoint->radius = 0.5f * draw->rasterizer->point_size; + + aapoint->tex_slot = draw->num_vs_outputs; + assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */ + draw->extra_vp_outputs.slot = aapoint->tex_slot; + + /* + * Bind our fragprog. + */ + bind_aapoint_fragment_shader(aapoint); + + /* find psize slot in post-transform vertex */ + aapoint->psize_slot = -1; + if (draw->rasterizer->point_size_per_vertex) { + /* find PSIZ vertex output */ + const struct draw_vertex_shader *vs = draw->vertex_shader; + uint i; + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { + aapoint->psize_slot = i; + break; + } + } + } + + /* now really draw first line */ + stage->point = aapoint_point; + stage->point(stage, header); +} + + +static void +aapoint_flush(struct draw_stage *stage, unsigned flags) +{ + struct draw_context *draw = stage->draw; + struct aapoint_stage *aapoint = aapoint_stage(stage); + struct pipe_context *pipe = aapoint->pipe; + + stage->point = aapoint_first_point; + stage->next->flush( stage->next, flags ); + + /* restore original frag shader */ + aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs); + + draw->extra_vp_outputs.slot = 0; +} + + +static void +aapoint_reset_stipple_counter(struct draw_stage *stage) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void +aapoint_destroy(struct draw_stage *stage) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +static struct aapoint_stage * +draw_aapoint_stage(struct draw_context *draw) +{ + struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage); + + draw_alloc_temp_verts( &aapoint->stage, 4 ); + + aapoint->stage.draw = draw; + aapoint->stage.next = NULL; + aapoint->stage.point = aapoint_first_point; + aapoint->stage.line = passthrough_line; + aapoint->stage.tri = passthrough_tri; + aapoint->stage.flush = aapoint_flush; + aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter; + aapoint->stage.destroy = aapoint_destroy; + + return aapoint; +} + + +static struct aapoint_stage * +aapoint_stage_from_pipe(struct pipe_context *pipe) +{ + struct draw_context *draw = (struct draw_context *) pipe->draw; + return aapoint_stage(draw->pipeline.aapoint); +} + + +/** + * This function overrides the driver's create_fs_state() function and + * will typically be called by the state tracker. + */ +static void * +aapoint_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *fs) +{ + struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); + struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader); + + if (aafs) { + aafs->state = *fs; + + /* pass-through */ + aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs); + } + + return aafs; +} + + +static void +aapoint_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); + struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs; + /* save current */ + aapoint->fs = aafs; + /* pass-through */ + aapoint->driver_bind_fs_state(aapoint->pipe, aafs->driver_fs); +} + + +static void +aapoint_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); + struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs; + /* pass-through */ + aapoint->driver_delete_fs_state(aapoint->pipe, aafs->driver_fs); + FREE(aafs); +} + + +/** + * Called by drivers that want to install this AA point prim stage + * into the draw module's pipeline. This will not be used if the + * hardware has native support for AA points. + */ +void +draw_install_aapoint_stage(struct draw_context *draw, + struct pipe_context *pipe) +{ + struct aapoint_stage *aapoint; + + pipe->draw = (void *) draw; + + /* + * Create / install AA point drawing / prim stage + */ + aapoint = draw_aapoint_stage( draw ); + assert(aapoint); + draw->pipeline.aapoint = &aapoint->stage; + + aapoint->pipe = pipe; + + /* save original driver functions */ + aapoint->driver_create_fs_state = pipe->create_fs_state; + aapoint->driver_bind_fs_state = pipe->bind_fs_state; + aapoint->driver_delete_fs_state = pipe->delete_fs_state; + + /* override the driver's functions */ + pipe->create_fs_state = aapoint_create_fs_state; + pipe->bind_fs_state = aapoint_bind_fs_state; + pipe->delete_fs_state = aapoint_delete_fs_state; +} diff --git a/src/gallium/auxiliary/draw/draw_clip.c b/src/gallium/auxiliary/draw/draw_clip.c index e3051507ea..200152ecab 100644 --- a/src/gallium/auxiliary/draw/draw_clip.c +++ b/src/gallium/auxiliary/draw/draw_clip.c @@ -409,13 +409,13 @@ clip_init_state( struct draw_stage *stage ) clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE; if (clipper->flat) { - const struct pipe_shader_state *vs = stage->draw->vertex_shader->state; + const struct draw_vertex_shader *vs = stage->draw->vertex_shader; uint i; clipper->num_color_attribs = 0; - for (i = 0; i < vs->num_outputs; i++) { - if (vs->output_semantic_name[i] == TGSI_SEMANTIC_COLOR || - vs->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR || + vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { clipper->color_attribs[clipper->num_color_attribs++] = i; } } diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 4be3830316..428b6209e0 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -48,7 +48,8 @@ struct draw_context *draw_create( void ) #endif /* create pipeline stages */ - draw->pipeline.wide = draw_wide_stage( draw ); + draw->pipeline.wide_line = draw_wide_line_stage( draw ); + draw->pipeline.wide_point = draw_wide_point_stage( draw ); draw->pipeline.stipple = draw_stipple_stage( draw ); draw->pipeline.unfilled = draw_unfilled_stage( draw ); draw->pipeline.twoside = draw_twoside_stage( draw ); @@ -72,16 +73,17 @@ struct draw_context *draw_create( void ) { uint i; const unsigned size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f; - char *tmp = align_malloc(Elements(draw->vcache.vertex) * size, 16); + char *tmp = align_malloc(Elements(draw->vs.queue) * size, 16); - for (i = 0; i < Elements(draw->vcache.vertex); i++) - draw->vcache.vertex[i] = (struct vertex_header *)(tmp + i * size); + for (i = 0; i < Elements(draw->vs.queue); i++) + draw->vs.queue[i].vertex = (struct vertex_header *)(tmp + i * size); } draw->shader_queue_flush = draw_vertex_shader_queue_flush; - draw->convert_wide_points = TRUE; - draw->convert_wide_lines = TRUE; + /* these defaults are oriented toward the needs of softpipe */ + draw->wide_point_threshold = 1000000.0; /* infinity */ + draw->wide_line_threshold = 1.0; draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ @@ -94,7 +96,8 @@ struct draw_context *draw_create( void ) void draw_destroy( struct draw_context *draw ) { - draw->pipeline.wide->destroy( draw->pipeline.wide ); + draw->pipeline.wide_line->destroy( draw->pipeline.wide_line ); + draw->pipeline.wide_point->destroy( draw->pipeline.wide_point ); draw->pipeline.stipple->destroy( draw->pipeline.stipple ); draw->pipeline.unfilled->destroy( draw->pipeline.unfilled ); draw->pipeline.twoside->destroy( draw->pipeline.twoside ); @@ -103,10 +106,14 @@ void draw_destroy( struct draw_context *draw ) draw->pipeline.flatshade->destroy( draw->pipeline.flatshade ); draw->pipeline.cull->destroy( draw->pipeline.cull ); draw->pipeline.validate->destroy( draw->pipeline.validate ); + if (draw->pipeline.aaline) + draw->pipeline.aaline->destroy( draw->pipeline.aaline ); + if (draw->pipeline.aapoint) + draw->pipeline.aapoint->destroy( draw->pipeline.aapoint ); if (draw->pipeline.rasterize) draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); tgsi_exec_machine_free_data(&draw->machine); - align_free( draw->vcache.vertex[0] ); /* Frees all the vertices. */ + align_free( draw->vs.queue[0].vertex ); /* Frees all the vertices. */ FREE( draw ); } @@ -216,26 +223,77 @@ draw_set_mapped_constant_buffer(struct draw_context *draw, /** - * Tells the draw module whether to convert wide points (size != 1) - * into triangles. + * Tells the draw module to draw points with triangles if their size + * is greater than this threshold. */ void -draw_convert_wide_points(struct draw_context *draw, boolean enable) +draw_wide_point_threshold(struct draw_context *draw, float threshold) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->convert_wide_points = enable; + draw->wide_point_threshold = threshold; } /** - * Tells the draw module whether to convert wide lines (width != 1) - * into triangles. + * Tells the draw module to draw lines with triangles if their width + * is greater than this threshold. */ void -draw_convert_wide_lines(struct draw_context *draw, boolean enable) +draw_wide_line_threshold(struct draw_context *draw, float threshold) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->convert_wide_lines = enable; + draw->wide_line_threshold = threshold; +} + + +/** + * Ask the draw module for the location/slot of the given vertex attribute in + * a post-transformed vertex. + * + * With this function, drivers that use the draw module should have no reason + * to track the current vertex shader. + * + * Note that the draw module may sometimes generate vertices with extra + * attributes (such as texcoords for AA lines). The driver can call this + * function to find those attributes. + * + * Zero is returned if the attribute is not found since this is + * a don't care / undefined situtation. Returning -1 would be a bit more + * work for the drivers. + */ +int +draw_find_vs_output(struct draw_context *draw, + uint semantic_name, uint semantic_index) +{ + const struct draw_vertex_shader *vs = draw->vertex_shader; + uint i; + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == semantic_name && + vs->info.output_semantic_index[i] == semantic_index) + return i; + } + + /* XXX there may be more than one extra vertex attrib. + * For example, simulated gl_FragCoord and gl_PointCoord. + */ + if (draw->extra_vp_outputs.semantic_name == semantic_name && + draw->extra_vp_outputs.semantic_index == semantic_index) { + return draw->extra_vp_outputs.slot; + } + return 0; +} + + +/** + * Return number of vertex shader outputs. + */ +uint +draw_num_vs_outputs(struct draw_context *draw) +{ + uint count = draw->vertex_shader->info.num_outputs; + if (draw->extra_vp_outputs.slot >= 0) + count++; + return count; } diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index ddeb184497..ab87b4127c 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -41,6 +41,7 @@ #include "pipe/p_state.h" +struct pipe_context; struct vertex_buffer; struct vertex_info; struct draw_context; @@ -89,11 +90,35 @@ void draw_set_rasterizer_state( struct draw_context *draw, void draw_set_rasterize_stage( struct draw_context *draw, struct draw_stage *stage ); -void draw_convert_wide_points(struct draw_context *draw, boolean enable); +void draw_wide_point_threshold(struct draw_context *draw, float threshold); -void draw_convert_wide_lines(struct draw_context *draw, boolean enable); +void draw_wide_line_threshold(struct draw_context *draw, float threshold); + +boolean draw_use_sse(struct draw_context *draw); + +void +draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe); + +void +draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe); + +void +draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe); + + +int +draw_find_vs_output(struct draw_context *draw, + uint semantic_name, uint semantic_index); + +uint +draw_num_vs_outputs(struct draw_context *draw); + +/* + * Vertex shader functions + */ + struct draw_vertex_shader * draw_create_vertex_shader(struct draw_context *draw, const struct pipe_shader_state *shader); @@ -102,7 +127,11 @@ void draw_bind_vertex_shader(struct draw_context *draw, void draw_delete_vertex_shader(struct draw_context *draw, struct draw_vertex_shader *dvs); -boolean draw_use_sse(struct draw_context *draw); + + +/* + * Vertex data functions + */ void draw_set_vertex_buffer(struct draw_context *draw, unsigned attr, diff --git a/src/gallium/auxiliary/draw/draw_flatshade.c b/src/gallium/auxiliary/draw/draw_flatshade.c index 4398abbc60..ccad71d695 100644 --- a/src/gallium/auxiliary/draw/draw_flatshade.c +++ b/src/gallium/auxiliary/draw/draw_flatshade.c @@ -128,14 +128,14 @@ static void flatshade_point( struct draw_stage *stage, static void flatshade_init_state( struct draw_stage *stage ) { struct flat_stage *flat = flat_stage(stage); - const struct pipe_shader_state *vs = stage->draw->vertex_shader->state; + const struct draw_vertex_shader *vs = stage->draw->vertex_shader; uint i; /* Find which vertex shader outputs are colors, make a list */ flat->num_color_attribs = 0; - for (i = 0; i < vs->num_outputs; i++) { - if (vs->output_semantic_name[i] == TGSI_SEMANTIC_COLOR || - vs->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR || + vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { flat->color_attribs[flat->num_color_attribs++] = i; } } diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c index 51e2242719..7d6cd43410 100644 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ b/src/gallium/auxiliary/draw/draw_prim.c @@ -114,6 +114,7 @@ static void draw_prim_queue_flush( struct draw_context *draw ) } draw->pq.queue_nr = 0; + draw->vs.post_nr = 0; draw_vertex_cache_unreference( draw ); } @@ -121,11 +122,15 @@ static void draw_prim_queue_flush( struct draw_context *draw ) void draw_do_flush( struct draw_context *draw, unsigned flags ) { + static boolean flushing = FALSE; + if (0) debug_printf("Flushing with %d verts, %d prims\n", draw->vs.queue_nr, draw->pq.queue_nr ); + if (!flushing) { + flushing = TRUE; if (flags >= DRAW_FLUSH_SHADER_QUEUE) { if (draw->vs.queue_nr) @@ -146,6 +151,9 @@ void draw_do_flush( struct draw_context *draw, unsigned flags ) } } } + + flushing = FALSE; + } } @@ -219,6 +227,7 @@ static void do_triangle( struct draw_context *draw, { struct prim_header *prim = get_queued_prim( draw, 3 ); +// _mesa_printf("tri %d %d %d\n", i0, i1, i2); prim->reset_line_stipple = 1; prim->edgeflags = ~0; prim->pad = 0; diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index bc11259cb2..c732d723a7 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -44,10 +44,12 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "x86/rtasm/x86sse.h" +#include "rtasm/rtasm_x86sse.h" #include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_scan.h" +struct pipe_context; struct gallivm_prog; struct gallivm_cpu_engine; @@ -119,7 +121,7 @@ struct draw_stage }; -#define PRIM_QUEUE_LENGTH 16 +#define PRIM_QUEUE_LENGTH 32 #define VCACHE_SIZE 32 #define VCACHE_OVERFLOW 4 #define VS_QUEUE_LENGTH (VCACHE_SIZE + VCACHE_OVERFLOW + 1) /* can never fill up */ @@ -133,6 +135,8 @@ struct draw_vertex_shader { */ const struct pipe_shader_state *state; + struct tgsi_shader_info info; + void (*prepare)( struct draw_vertex_shader *shader, struct draw_context *draw ); @@ -179,7 +183,11 @@ struct draw_context struct draw_stage *offset; struct draw_stage *unfilled; struct draw_stage *stipple; - struct draw_stage *wide; + struct draw_stage *aapoint; + struct draw_stage *aaline; + struct draw_stage *pstipple; + struct draw_stage *wide_line; + struct draw_stage *wide_point; struct draw_stage *rasterize; } pipeline; @@ -211,10 +219,18 @@ struct draw_context float plane[12][4]; unsigned nr_planes; - boolean convert_wide_points; /**< convert wide points to tris? */ - boolean convert_wide_lines; /**< convert side lines to tris? */ + float wide_point_threshold; /**< convert pnts to tris if larger than this */ + float wide_line_threshold; /**< convert lines to tris if wider than this */ boolean use_sse; + /* If a prim stage introduces new vertex attributes, they'll be stored here + */ + struct { + uint semantic_name; + uint semantic_index; + int slot; + } extra_vp_outputs; + unsigned reduced_prim; /** TGSI program interpreter runtime state */ @@ -234,8 +250,12 @@ struct draw_context */ struct { unsigned referenced; /**< bitfield */ - unsigned idx[VCACHE_SIZE + VCACHE_OVERFLOW]; - struct vertex_header *vertex[VCACHE_SIZE + VCACHE_OVERFLOW]; + + struct { + unsigned in; /* client array element */ + unsigned out; /* index in vs queue/array */ + } idx[VCACHE_SIZE + VCACHE_OVERFLOW]; + unsigned overflow; /** To find space in the vertex cache: */ @@ -248,9 +268,10 @@ struct draw_context struct { struct { unsigned elt; /**< index into the user's vertex arrays */ - struct vertex_header *dest; /**< points into vcache.vertex[] array */ + struct vertex_header *vertex; } queue[VS_QUEUE_LENGTH]; unsigned queue_nr; + unsigned post_nr; } vs; /** @@ -284,7 +305,8 @@ extern struct draw_stage *draw_clip_stage( struct draw_context *context ); extern struct draw_stage *draw_flatshade_stage( struct draw_context *context ); extern struct draw_stage *draw_cull_stage( struct draw_context *context ); extern struct draw_stage *draw_stipple_stage( struct draw_context *context ); -extern struct draw_stage *draw_wide_stage( struct draw_context *context ); +extern struct draw_stage *draw_wide_line_stage( struct draw_context *context ); +extern struct draw_stage *draw_wide_point_stage( struct draw_context *context ); extern struct draw_stage *draw_validate_stage( struct draw_context *context ); diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c new file mode 100644 index 0000000000..2cfeb813b3 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -0,0 +1,697 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Polygon stipple stage: implement polygon stipple with texture map and + * fragment program. The fragment program samples the texture and does + * a fragment kill for the stipple-failing fragments. + * + * Authors: Brian Paul + */ + + +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/util/tgsi_transform.h" +#include "tgsi/util/tgsi_dump.h" + +#include "draw_context.h" +#include "draw_private.h" + + + +/** + * Subclass of pipe_shader_state to carry extra fragment shader info. + */ +struct pstip_fragment_shader +{ + struct pipe_shader_state state; + void *driver_fs; + void *pstip_fs; +}; + + +/** + * Subclass of draw_stage + */ +struct pstip_stage +{ + struct draw_stage stage; + + void *sampler_cso; + struct pipe_texture *texture; + uint sampler_unit; + + /* + * Currently bound state + */ + struct pstip_fragment_shader *fs; + struct { + void *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + const struct pipe_poly_stipple *stipple; + } state; + + /* + * Driver interface/override functions + */ + void * (*driver_create_fs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*driver_bind_fs_state)(struct pipe_context *, void *); + void (*driver_delete_fs_state)(struct pipe_context *, void *); + + void (*driver_bind_sampler_state)(struct pipe_context *, unsigned, void *); + + void (*driver_set_sampler_texture)(struct pipe_context *, + unsigned sampler, + struct pipe_texture *); + + void (*driver_set_polygon_stipple)(struct pipe_context *, + const struct pipe_poly_stipple *); + + struct pipe_context *pipe; +}; + + + +/** + * Subclass of tgsi_transform_context, used for transforming the + * user's fragment shader to add the special AA instructions. + */ +struct pstip_transform_context { + struct tgsi_transform_context base; + uint tempsUsed; /**< bitmask */ + int wincoordInput; + int maxInput; + int maxSampler; /**< max sampler index found */ + int texTemp; /**< temp registers */ + int numImmed; + boolean firstInstruction; +}; + + +/** + * TGSI declaration transform callback. + * Look for a free sampler, a free input attrib, and two free temp regs. + */ +static void +pstip_transform_decl(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl) +{ + struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + + if (decl->Declaration.File == TGSI_FILE_SAMPLER) { + if ((int) decl->u.DeclarationRange.Last > pctx->maxSampler) + pctx->maxSampler = (int) decl->u.DeclarationRange.Last; + } + else if (decl->Declaration.File == TGSI_FILE_INPUT) { + pctx->maxInput = MAX2(pctx->maxInput, (int) decl->u.DeclarationRange.Last); + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) + pctx->wincoordInput = (int) decl->u.DeclarationRange.First; + } + else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { + uint i; + for (i = decl->u.DeclarationRange.First; + i <= decl->u.DeclarationRange.Last; i++) { + pctx->tempsUsed |= (1 << i); + } + } + + ctx->emit_declaration(ctx, decl); +} + + +static void +pstip_transform_immed(struct tgsi_transform_context *ctx, + struct tgsi_full_immediate *immed) +{ + struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + pctx->numImmed++; +} + + +/** + * TGSI instruction transform callback. + * Replace writes to result.color w/ a temp reg. + * Upon END instruction, insert texture sampling code for antialiasing. + */ +static void +pstip_transform_inst(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + + if (pctx->firstInstruction) { + /* emit our new declarations before the first instruction */ + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction newInst; + uint i; + int wincoordInput; + const int sampler = pctx->maxSampler + 1; + + if (pctx->wincoordInput < 0) + wincoordInput = pctx->maxInput + 1; + else + wincoordInput = pctx->wincoordInput; + + /* find one free temp reg */ + for (i = 0; i < 32; i++) { + if ((pctx->tempsUsed & (1 << i)) == 0) { + /* found a free temp */ + if (pctx->texTemp < 0) + pctx->texTemp = i; + else + break; + } + } + assert(pctx->texTemp >= 0); + + if (pctx->wincoordInput < 0) { + /* declare new position input reg */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION; + decl.Semantic.SemanticIndex = 0; + decl.Declaration.Interpolate = 1; + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; /* XXX? */ + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = wincoordInput; + ctx->emit_declaration(ctx, &decl); + } + + /* declare new sampler */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = sampler; + ctx->emit_declaration(ctx, &decl); + + /* declare new temp regs */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = pctx->texTemp; + ctx->emit_declaration(ctx, &decl); + + /* emit immediate = {1/32, 1/32, 1, 1} + * The index/position of this immediate will be pctx->numImmed + */ + { + static const float value[4] = { 1.0/32, 1.0/32, 1.0, 1.0 }; + struct tgsi_full_immediate immed; + uint size = 4; + immed = tgsi_default_full_immediate(); + immed.Immediate.Size = 1 + size; /* one for the token itself */ + immed.u.ImmediateFloat32 = (struct tgsi_immediate_float32 *) value; + ctx->emit_immediate(ctx, &immed); + } + + pctx->firstInstruction = FALSE; + + + /* + * Insert new MUL/TEX/KILP instructions at start of program + * Take gl_FragCoord, divide by 32 (stipple size), sample the + * texture and kill fragment if needed. + * + * We'd like to use non-normalized texcoords to index into a RECT + * texture, but we can only use GL_REPEAT wrap mode with normalized + * texcoords. Darn. + */ + + /* MUL texTemp, INPUT[wincoord], 1/32; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = wincoordInput; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_IMMEDIATE; + newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->numImmed; + ctx->emit_instruction(ctx, &newInst); + + /* TEX texTemp, texTemp, sampler; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_TEX; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + newInst.FullSrcRegisters[1].SrcRegister.Index = sampler; + ctx->emit_instruction(ctx, &newInst); + + /* KILP texTemp; # if texTemp < 0, KILL fragment */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_KILP; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; + newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + ctx->emit_instruction(ctx, &newInst); + } + + /* emit this instruction */ + ctx->emit_instruction(ctx, inst); +} + + +/** + * Generate the frag shader we'll use for doing polygon stipple. + * This will be the user's shader prefixed with a TEX and KIL instruction. + */ +static void +generate_pstip_fs(struct pstip_stage *pstip) +{ + const struct pipe_shader_state *orig_fs = &pstip->fs->state; + /*struct draw_context *draw = pstip->stage.draw;*/ + struct pipe_shader_state pstip_fs; + struct pstip_transform_context transform; + +#define MAX 1000 + + pstip_fs = *orig_fs; /* copy to init */ + pstip_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + + memset(&transform, 0, sizeof(transform)); + transform.wincoordInput = -1; + transform.maxInput = -1; + transform.maxSampler = -1; + transform.texTemp = -1; + transform.firstInstruction = TRUE; + transform.base.transform_instruction = pstip_transform_inst; + transform.base.transform_declaration = pstip_transform_decl; + transform.base.transform_immediate = pstip_transform_immed; + + tgsi_transform_shader(orig_fs->tokens, + (struct tgsi_token *) pstip_fs.tokens, + MAX, &transform.base); + +#if 1 /* DEBUG */ + tgsi_dump(orig_fs->tokens, 0); + tgsi_dump(pstip_fs.tokens, 0); +#endif + + pstip->sampler_unit = transform.maxSampler + 1; + +#if 1 /* XXX remove */ + if (transform.wincoordInput < 0) { + pstip_fs.input_semantic_name[pstip_fs.num_inputs] = TGSI_SEMANTIC_POSITION; + pstip_fs.input_semantic_index[pstip_fs.num_inputs] = (ubyte)transform.maxInput; + pstip_fs.num_inputs++; + } +#endif + + pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs); +} + + +/** + * Load texture image with current stipple pattern. + */ +static void +pstip_update_texture(struct pstip_stage *pstip) +{ + static const uint bit31 = 1 << 31; + struct pipe_context *pipe = pstip->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_surface *surface; + const uint *stipple = pstip->state.stipple->stipple; + uint i, j; + ubyte *data; + + surface = screen->get_tex_surface(screen, pstip->texture, 0, 0, 0); + data = pipe_surface_map(surface); + + /* + * Load alpha texture. + * Note: 0 means keep the fragment, 255 means kill it. + * We'll negate the texel value and use KILP which kills if value + * is negative. + */ + for (i = 0; i < 32; i++) { + for (j = 0; j < 32; j++) { + if (stipple[i] & (bit31 >> j)) { + /* fragment "on" */ + data[i * surface->pitch + j] = 0; + } + else { + /* fragment "off" */ + data[i * surface->pitch + j] = 255; + } + } + } + + /* unmap */ + pipe_surface_unmap(surface); + pipe_surface_reference(&surface, NULL); + pipe->texture_update(pipe, pstip->texture); +} + + +/** + * Create the texture map we'll use for stippling. + */ +static void +pstip_create_texture(struct pstip_stage *pstip) +{ + struct pipe_context *pipe = pstip->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_texture texTemp; + + memset(&texTemp, 0, sizeof(texTemp)); + texTemp.target = PIPE_TEXTURE_2D; + texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */ + texTemp.last_level = 0; + texTemp.width[0] = 32; + texTemp.height[0] = 32; + texTemp.depth[0] = 1; + texTemp.cpp = 1; + + pstip->texture = screen->texture_create(screen, &texTemp); + + //pstip_update_texture(pstip); +} + + +/** + * Create the sampler CSO that'll be used for antialiasing. + * By using a mipmapped texture, we don't have to generate a different + * texture image for each line size. + */ +static void +pstip_create_sampler(struct pstip_stage *pstip) +{ + struct pipe_sampler_state sampler; + struct pipe_context *pipe = pstip->pipe; + + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.normalized_coords = 1; + sampler.min_lod = 0.0f; + sampler.max_lod = 0.0f; + + pstip->sampler_cso = pipe->create_sampler_state(pipe, &sampler); +} + + +/** + * When we're about to draw our first AA line in a batch, this function is + * called to tell the driver to bind our modified fragment shader. + */ +static void +bind_pstip_fragment_shader(struct pstip_stage *pstip) +{ + if (!pstip->fs->pstip_fs) { + generate_pstip_fs(pstip); + } + pstip->driver_bind_fs_state(pstip->pipe, pstip->fs->pstip_fs); +} + + + +static INLINE struct pstip_stage * +pstip_stage( struct draw_stage *stage ) +{ + return (struct pstip_stage *) stage; +} + + +static void +passthrough_point(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->point(stage->next, header); +} + + +static void +passthrough_line(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->line(stage->next, header); +} + + +static void +passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + + +static void +pstip_first_tri(struct draw_stage *stage, struct prim_header *header) +{ + struct pstip_stage *pstip = pstip_stage(stage); + struct draw_context *draw = stage->draw; + struct pipe_context *pipe = pstip->pipe; + + assert(draw->rasterizer->poly_stipple_enable); + + /* + * Bind our fragprog, sampler and texture + */ + bind_pstip_fragment_shader(pstip); + + pstip->driver_bind_sampler_state(pipe, pstip->sampler_unit, pstip->sampler_cso); + pstip->driver_set_sampler_texture(pipe, pstip->sampler_unit, pstip->texture); + + /* now really draw first line */ + stage->tri = passthrough_tri; + stage->tri(stage, header); +} + + +static void +pstip_flush(struct draw_stage *stage, unsigned flags) +{ + /*struct draw_context *draw = stage->draw;*/ + struct pstip_stage *pstip = pstip_stage(stage); + struct pipe_context *pipe = pstip->pipe; + + stage->tri = pstip_first_tri; + stage->next->flush( stage->next, flags ); + + /* restore original frag shader */ + pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs); + + /* XXX restore original texture, sampler state */ + pstip->driver_bind_sampler_state(pipe, pstip->sampler_unit, + pstip->state.sampler[pstip->sampler_unit]); + pstip->driver_set_sampler_texture(pipe, pstip->sampler_unit, + pstip->state.texture[pstip->sampler_unit]); +} + + +static void +pstip_reset_stipple_counter(struct draw_stage *stage) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void +pstip_destroy(struct draw_stage *stage) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +static struct pstip_stage * +draw_pstip_stage(struct draw_context *draw) +{ + struct pstip_stage *pstip = CALLOC_STRUCT(pstip_stage); + + draw_alloc_temp_verts( &pstip->stage, 8 ); + + pstip->stage.draw = draw; + pstip->stage.next = NULL; + pstip->stage.point = passthrough_point; + pstip->stage.line = passthrough_line; + pstip->stage.tri = pstip_first_tri; + pstip->stage.flush = pstip_flush; + pstip->stage.reset_stipple_counter = pstip_reset_stipple_counter; + pstip->stage.destroy = pstip_destroy; + + return pstip; +} + + +static struct pstip_stage * +pstip_stage_from_pipe(struct pipe_context *pipe) +{ + struct draw_context *draw = (struct draw_context *) pipe->draw; + return pstip_stage(draw->pipeline.pstipple); +} + + +/** + * This function overrides the driver's create_fs_state() function and + * will typically be called by the state tracker. + */ +static void * +pstip_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *fs) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct pstip_fragment_shader *aafs = CALLOC_STRUCT(pstip_fragment_shader); + + if (aafs) { + aafs->state = *fs; + + /* pass-through */ + aafs->driver_fs = pstip->driver_create_fs_state(pstip->pipe, fs); + } + + return aafs; +} + + +static void +pstip_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs; + /* save current */ + pstip->fs = aafs; + /* pass-through */ + pstip->driver_bind_fs_state(pstip->pipe, aafs->driver_fs); +} + + +static void +pstip_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs; + /* pass-through */ + pstip->driver_delete_fs_state(pstip->pipe, aafs->driver_fs); + FREE(aafs); +} + + +static void +pstip_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + /* save current */ + pstip->state.sampler[unit] = sampler; + /* pass-through */ + pstip->driver_bind_sampler_state(pstip->pipe, unit, sampler); +} + + +static void +pstip_set_sampler_texture(struct pipe_context *pipe, + unsigned sampler, struct pipe_texture *texture) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + /* save current */ + pstip->state.texture[sampler] = texture; + /* pass-through */ + pstip->driver_set_sampler_texture(pstip->pipe, sampler, texture); +} + + +static void +pstip_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + /* save current */ + pstip->state.stipple = stipple; + /* pass-through */ + pstip->driver_set_polygon_stipple(pstip->pipe, stipple); + + pstip_update_texture(pstip); +} + + + +/** + * Called by drivers that want to install this AA line prim stage + * into the draw module's pipeline. This will not be used if the + * hardware has native support for AA lines. + */ +void +draw_install_pstipple_stage(struct draw_context *draw, + struct pipe_context *pipe) +{ + struct pstip_stage *pstip; + + pipe->draw = (void *) draw; + + /* + * Create / install AA line drawing / prim stage + */ + pstip = draw_pstip_stage( draw ); + assert(pstip); + draw->pipeline.pstipple = &pstip->stage; + + pstip->pipe = pipe; + + /* create special texture, sampler state */ + pstip_create_texture(pstip); + pstip_create_sampler(pstip); + + /* save original driver functions */ + pstip->driver_create_fs_state = pipe->create_fs_state; + pstip->driver_bind_fs_state = pipe->bind_fs_state; + pstip->driver_delete_fs_state = pipe->delete_fs_state; + + pstip->driver_bind_sampler_state = pipe->bind_sampler_state; + pstip->driver_set_sampler_texture = pipe->set_sampler_texture; + pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple; + + /* override the driver's functions */ + pipe->create_fs_state = pstip_create_fs_state; + pipe->bind_fs_state = pstip_bind_fs_state; + pipe->delete_fs_state = pstip_delete_fs_state; + + pipe->bind_sampler_state = pstip_bind_sampler_state; + pipe->set_sampler_texture = pstip_set_sampler_texture; + pipe->set_polygon_stipple = pstip_set_polygon_stipple; +} diff --git a/src/gallium/auxiliary/draw/draw_twoside.c b/src/gallium/auxiliary/draw/draw_twoside.c index 1c38957987..3debaac282 100644 --- a/src/gallium/auxiliary/draw/draw_twoside.c +++ b/src/gallium/auxiliary/draw/draw_twoside.c @@ -119,7 +119,7 @@ static void twoside_first_tri( struct draw_stage *stage, struct prim_header *header ) { struct twoside_stage *twoside = twoside_stage(stage); - const struct pipe_shader_state *vs = stage->draw->vertex_shader->state; + const struct draw_vertex_shader *vs = stage->draw->vertex_shader; uint i; twoside->attrib_front0 = 0; @@ -128,15 +128,15 @@ static void twoside_first_tri( struct draw_stage *stage, twoside->attrib_back1 = 0; /* Find which vertex shader outputs are front/back colors */ - for (i = 0; i < vs->num_outputs; i++) { - if (vs->output_semantic_name[i] == TGSI_SEMANTIC_COLOR) { - if (vs->output_semantic_index[i] == 0) + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR) { + if (vs->info.output_semantic_index[i] == 0) twoside->attrib_front0 = i; else twoside->attrib_front1 = i; } - if (vs->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { - if (vs->output_semantic_index[i] == 0) + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { + if (vs->info.output_semantic_index[i] == 0) twoside->attrib_back0 = i; else twoside->attrib_back1 = i; diff --git a/src/gallium/auxiliary/draw/draw_unfilled.c b/src/gallium/auxiliary/draw/draw_unfilled.c index 8777cfdfc8..4d718d514c 100644 --- a/src/gallium/auxiliary/draw/draw_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_unfilled.c @@ -101,9 +101,9 @@ static void lines( struct draw_stage *stage, assert(((header->edgeflags & 0x4) >> 2) == header->v[2]->edgeflag); #endif + if (header->edgeflags & 0x4) line( stage, v2, v0 ); if (header->edgeflags & 0x1) line( stage, v0, v1 ); if (header->edgeflags & 0x2) line( stage, v1, v2 ); - if (header->edgeflags & 0x4) line( stage, v2, v0 ); } diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c index 4375ebabbc..b43295b586 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -45,12 +45,25 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) struct draw_stage *next = draw->pipeline.rasterize; int need_det = 0; int precalc_flat = 0; + boolean wide_lines, wide_points; /* Set the validate's next stage to the rasterize stage, so that it * can be found later if needed for flushing. */ stage->next = next; + /* drawing wide lines? */ + wide_lines = (draw->rasterizer->line_width > draw->wide_line_threshold + && !draw->rasterizer->line_smooth); + + /* drawing large points? */ + if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) + wide_points = FALSE; + else if (draw->rasterizer->point_size > draw->wide_point_threshold) + wide_points = TRUE; + else + wide_points = FALSE; + /* * NOTE: we build up the pipeline in end-to-start order. * @@ -58,11 +71,25 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) * shorter pipelines for lines & points. */ - if ((draw->rasterizer->line_width != 1.0 && draw->convert_wide_lines) || - (draw->rasterizer->point_size != 1.0 && draw->convert_wide_points) || - draw->rasterizer->point_sprite) { - draw->pipeline.wide->next = next; - next = draw->pipeline.wide; + if (draw->rasterizer->line_smooth && draw->pipeline.aaline) { + draw->pipeline.aaline->next = next; + next = draw->pipeline.aaline; + } + + if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) { + draw->pipeline.aapoint->next = next; + next = draw->pipeline.aapoint; + } + + if (wide_lines) { + draw->pipeline.wide_line->next = next; + next = draw->pipeline.wide_line; + precalc_flat = 1; + } + + if (wide_points || draw->rasterizer->point_sprite) { + draw->pipeline.wide_point->next = next; + next = draw->pipeline.wide_point; } if (draw->rasterizer->line_stipple_enable) { @@ -71,6 +98,12 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) precalc_flat = 1; /* only needed for lines really */ } + if (draw->rasterizer->poly_stipple_enable + && draw->pipeline.pstipple) { + draw->pipeline.pstipple->next = next; + next = draw->pipeline.pstipple; + } + if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) { draw->pipeline.unfilled->next = next; diff --git a/src/gallium/auxiliary/draw/draw_vertex_cache.c b/src/gallium/auxiliary/draw/draw_vertex_cache.c index 44427999cc..53f8bbec44 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_cache.c +++ b/src/gallium/auxiliary/draw/draw_vertex_cache.c @@ -41,7 +41,7 @@ void draw_vertex_cache_invalidate( struct draw_context *draw ) assert(draw->vs.queue_nr == 0); assert(draw->vcache.referenced == 0); - memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx)); +// memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx)); } @@ -62,43 +62,51 @@ static struct vertex_header *get_vertex( struct draw_context *draw, assert(slot < 32); /* so we don't exceed the bitfield size below */ - /* Cache miss? - */ - if (draw->vcache.idx[slot] != i) { - - /* If slot is in use, use the overflow area: + if (draw->vcache.referenced & (1<<slot)) + { + /* Cache hit? */ - if (draw->vcache.referenced & (1 << slot)) { - slot = VCACHE_SIZE + draw->vcache.overflow++; + if (draw->vcache.idx[slot].in == i) { +// _mesa_printf("HIT %d %d\n", slot, i); + assert(draw->vcache.idx[slot].out < draw->vs.queue_nr); + return draw->vs.queue[draw->vcache.idx[slot].out].vertex; } + /* Otherwise a collision + */ + slot = VCACHE_SIZE + draw->vcache.overflow++; +// _mesa_printf("XXX %d --> %d\n", i, slot); + } + + /* Deal with the cache miss: + */ + { + unsigned out; + assert(slot < Elements(draw->vcache.idx)); - draw->vcache.idx[slot] = i; +// _mesa_printf("NEW %d %d\n", slot, i); + draw->vcache.idx[slot].in = i; + draw->vcache.idx[slot].out = out = draw->vs.queue_nr++; + draw->vcache.referenced |= (1 << slot); + /* Add to vertex shader queue: */ assert(draw->vs.queue_nr < VS_QUEUE_LENGTH); - draw->vs.queue[draw->vs.queue_nr].dest = draw->vcache.vertex[slot]; - draw->vs.queue[draw->vs.queue_nr].elt = i; - draw->vs.queue_nr++; + + draw->vs.queue[out].elt = i; + draw->vs.queue[out].vertex->clipmask = 0; + draw->vs.queue[out].vertex->edgeflag = 1; /*XXX use user's edge flag! */ + draw->vs.queue[out].vertex->pad = 0; + draw->vs.queue[out].vertex->vertex_id = UNDEFINED_VERTEX_ID; /* Need to set the vertex's edge flag here. If we're being called * by do_ef_triangle(), that function needs edge flag info! */ - draw->vcache.vertex[slot]->clipmask = 0; - draw->vcache.vertex[slot]->edgeflag = 1; /*XXX use user's edge flag! */ - draw->vcache.vertex[slot]->pad = 0; - draw->vcache.vertex[slot]->vertex_id = UNDEFINED_VERTEX_ID; - } - - /* primitive flushing may have cleared the bitfield but did not - * clear the idx[] array values. Set the bit now. This fixes a - * bug found when drawing long triangle fans. - */ - draw->vcache.referenced |= (1 << slot); - return draw->vcache.vertex[slot]; + return draw->vs.queue[draw->vcache.idx[slot].out].vertex; + } } @@ -130,8 +138,8 @@ void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw ) { unsigned i; - for (i = 0; i < Elements(draw->vcache.vertex); i++) - draw->vcache.vertex[i]->vertex_id = UNDEFINED_VERTEX_ID; + for (i = 0; i < draw->vs.post_nr; i++) + draw->vs.queue[i].vertex->vertex_id = UNDEFINED_VERTEX_ID; } diff --git a/src/gallium/auxiliary/draw/draw_vertex_fetch.c b/src/gallium/auxiliary/draw/draw_vertex_fetch.c index e13df04605..cb8cdd04a3 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_fetch.c +++ b/src/gallium/auxiliary/draw/draw_vertex_fetch.c @@ -473,7 +473,7 @@ void draw_update_vertex_fetch( struct draw_context *draw ) if (!draw->vertex_shader) return; - nr_attrs = draw->vertex_shader->state->num_inputs; + nr_attrs = draw->vertex_shader->info.num_inputs; for (i = 0; i < nr_attrs; i++) { unsigned buf = draw->vertex_element[i].vertex_buffer_index; diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c index f68f6e3244..1e95355555 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c @@ -55,7 +55,7 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) */ shader->prepare( shader, draw ); -// fprintf(stderr, " q(%d) ", draw->vs.queue_nr ); +// fprintf(stderr, "%s %d\n", __FUNCTION__, draw->vs.queue_nr ); /* run vertex shader on vertex cache entries, four per invokation */ for (i = 0; i < draw->vs.queue_nr; i += 4) { @@ -65,12 +65,12 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) for (j = 0; j < n; j++) { elts[j] = draw->vs.queue[i + j].elt; - dests[j] = draw->vs.queue[i + j].dest; + dests[j] = draw->vs.queue[i + j].vertex; } for ( ; j < 4; j++) { elts[j] = elts[0]; - dests[j] = dests[0]; + dests[j] = draw->vs.queue[i + j].vertex; } assert(n > 0); @@ -79,6 +79,7 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) shader->run(shader, draw, elts, n, dests); } + draw->vs.post_nr = draw->vs.queue_nr; draw->vs.queue_nr = 0; } @@ -90,15 +91,16 @@ draw_create_vertex_shader(struct draw_context *draw, struct draw_vertex_shader *vs; vs = draw_create_vs_llvm( draw, shader ); - if (vs) - return vs; + if (!vs) { + vs = draw_create_vs_sse( draw, shader ); + if (!vs) { + vs = draw_create_vs_exec( draw, shader ); + } + } + assert(vs); - vs = draw_create_vs_sse( draw, shader ); - if (vs) - return vs; + tgsi_scan_shader(shader->tokens, &vs->info); - vs = draw_create_vs_exec( draw, shader ); - assert(vs); return vs; } @@ -110,7 +112,7 @@ draw_bind_vertex_shader(struct draw_context *draw, draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); draw->vertex_shader = dvs; - draw->num_vs_outputs = dvs->state->num_outputs; + draw->num_vs_outputs = dvs->info.num_outputs; tgsi_exec_machine_init(&draw->machine); diff --git a/src/gallium/auxiliary/draw/draw_vf.c b/src/gallium/auxiliary/draw/draw_vf.c index dc3a5ecd21..901ff20a7e 100644 --- a/src/gallium/auxiliary/draw/draw_vf.c +++ b/src/gallium/auxiliary/draw/draw_vf.c @@ -30,6 +30,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_util.h" +#include "rtasm/rtasm_execmem.h" #include "draw_vf.h" @@ -37,11 +38,6 @@ #define DRAW_VF_DBG 0 -/* TODO: remove this */ -extern void -_mesa_exec_free( void *addr ); - - static boolean match_fastpath( struct draw_vertex_fetch *vf, const struct draw_vf_fastpath *fp) { @@ -414,12 +410,12 @@ void draw_vf_destroy( struct draw_vertex_fetch *vf ) FREE(fp->attr); /* KW: At the moment, fp->func is constrained to be allocated by - * _mesa_exec_alloc(), as the hardwired fastpaths in + * rtasm_exec_alloc(), as the hardwired fastpaths in * t_vertex_generic.c are handled specially. It would be nice * to unify them, but this probably won't change until this * module gets another overhaul. */ - //_mesa_exec_free((void *) fp->func); + //rtasm_exec_free((void *) fp->func); FREE(fp); } diff --git a/src/gallium/auxiliary/draw/draw_vf_sse.c b/src/gallium/auxiliary/draw/draw_vf_sse.c index 1ad2ae756d..aff4ffd985 100644 --- a/src/gallium/auxiliary/draw/draw_vf_sse.c +++ b/src/gallium/auxiliary/draw/draw_vf_sse.c @@ -26,17 +26,16 @@ */ -#include "simple_list.h" - #include "pipe/p_compiler.h" +#include "util/u_simple_list.h" #include "draw_vf.h" #if defined(USE_SSE_ASM) -#include "x86/rtasm/x86sse.h" -#include "x86/common_x86_asm.h" +#include "rtasm/rtasm_cpu.h" +#include "rtasm/rtasm_x86sse.h" #define X 0 @@ -576,7 +575,7 @@ void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf ) { struct x86_program p; - if (!cpu_has_xmm) { + if (!rtasm_cpu_has_sse()) { vf->codegen_emit = NULL; return; } @@ -586,7 +585,7 @@ void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf ) p.vf = vf; p.inputs_safe = 0; /* for now */ p.outputs_safe = 1; /* for now */ - p.have_sse2 = cpu_has_xmm2; + p.have_sse2 = rtasm_cpu_has_sse2(); p.identity = x86_make_reg(file_XMM, 6); p.chan0 = x86_make_reg(file_XMM, 7); diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 8588879400..583812aadd 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -103,7 +103,7 @@ vs_exec_run( struct draw_vertex_shader *shader, const float *trans = draw->viewport.translate; assert(count <= 4); - assert(draw->vertex_shader->state->output_semantic_name[0] + assert(draw->vertex_shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION); machine->Consts = (float (*)[4]) draw->user.constants; diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 44022b6e07..0fd557d667 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -40,7 +40,7 @@ #ifdef MESA_LLVM -#include "llvm/gallivm.h" +#include "gallivm/gallivm.h" struct draw_llvm_vertex_shader { struct draw_vertex_shader base; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 27bc66812c..0b8bc2bf14 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -41,7 +41,7 @@ #include "draw_private.h" #include "draw_context.h" -#include "x86/rtasm/x86sse.h" +#include "rtasm/rtasm_x86sse.h" #include "tgsi/exec/tgsi_sse2.h" @@ -119,7 +119,7 @@ vs_sse_run( struct draw_vertex_shader *base, const float *trans = draw->viewport.translate; assert(count <= 4); - assert(draw->vertex_shader->state->output_semantic_name[0] + assert(draw->vertex_shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION); /* Consts does not require 16 byte alignment. */ diff --git a/src/gallium/auxiliary/draw/draw_wide_line.c b/src/gallium/auxiliary/draw/draw_wide_line.c new file mode 100644 index 0000000000..9a168ce8bd --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_wide_line.c @@ -0,0 +1,190 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "draw_private.h" + + +struct wideline_stage { + struct draw_stage stage; + + float half_line_width; +}; + + + +static INLINE struct wideline_stage *wideline_stage( struct draw_stage *stage ) +{ + return (struct wideline_stage *)stage; +} + + +static void wideline_point( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->point( stage->next, header ); +} + + +static void wideline_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->tri(stage->next, header); +} + + +/** + * Draw a wide line by drawing a quad (two triangles). + * XXX need to disable polygon stipple. + */ +static void wideline_line( struct draw_stage *stage, + struct prim_header *header ) +{ + /*const struct wideline_stage *wide = wideline_stage(stage);*/ + const float half_width = 0.5f * stage->draw->rasterizer->line_width; + + struct prim_header tri; + + struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); + struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); + struct vertex_header *v2 = dup_vert(stage, header->v[1], 2); + struct vertex_header *v3 = dup_vert(stage, header->v[1], 3); + + float *pos0 = v0->data[0]; + float *pos1 = v1->data[0]; + float *pos2 = v2->data[0]; + float *pos3 = v3->data[0]; + + const float dx = FABSF(pos0[0] - pos2[0]); + const float dy = FABSF(pos0[1] - pos2[1]); + + /* small tweak to meet GL specification */ + const float bias = 0.125f; + + /* + * Draw wide line as a quad (two tris) by "stretching" the line along + * X or Y. + * We need to tweak coords in several ways to be conformant here. + */ + + if (dx > dy) { + /* x-major line */ + pos0[1] = pos0[1] - half_width - bias; + pos1[1] = pos1[1] + half_width - bias; + pos2[1] = pos2[1] - half_width - bias; + pos3[1] = pos3[1] + half_width - bias; + if (pos0[0] < pos2[0]) { + /* left to right line */ + pos0[0] -= 0.5f; + pos1[0] -= 0.5f; + pos2[0] -= 0.5f; + pos3[0] -= 0.5f; + } + else { + /* right to left line */ + pos0[0] += 0.5f; + pos1[0] += 0.5f; + pos2[0] += 0.5f; + pos3[0] += 0.5f; + } + } + else { + /* y-major line */ + pos0[0] = pos0[0] - half_width + bias; + pos1[0] = pos1[0] + half_width + bias; + pos2[0] = pos2[0] - half_width + bias; + pos3[0] = pos3[0] + half_width + bias; + if (pos0[1] < pos2[1]) { + /* top to bottom line */ + pos0[1] -= 0.5f; + pos1[1] -= 0.5f; + pos2[1] -= 0.5f; + pos3[1] -= 0.5f; + } + else { + /* bottom to top line */ + pos0[1] += 0.5f; + pos1[1] += 0.5f; + pos2[1] += 0.5f; + pos3[1] += 0.5f; + } + } + + tri.det = header->det; /* only the sign matters */ + tri.v[0] = v0; + tri.v[1] = v2; + tri.v[2] = v3; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v0; + tri.v[1] = v3; + tri.v[2] = v1; + stage->next->tri( stage->next, &tri ); +} + + +static void wideline_flush( struct draw_stage *stage, unsigned flags ) +{ + stage->next->flush( stage->next, flags ); +} + + +static void wideline_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void wideline_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +struct draw_stage *draw_wide_line_stage( struct draw_context *draw ) +{ + struct wideline_stage *wide = CALLOC_STRUCT(wideline_stage); + + draw_alloc_temp_verts( &wide->stage, 4 ); + + wide->stage.draw = draw; + wide->stage.next = NULL; + wide->stage.point = wideline_point; + wide->stage.line = wideline_line; + wide->stage.tri = wideline_tri; + wide->stage.flush = wideline_flush; + wide->stage.reset_stipple_counter = wideline_reset_stipple_counter; + wide->stage.destroy = wideline_destroy; + + return &wide->stage; +} diff --git a/src/gallium/auxiliary/draw/draw_wide_point.c b/src/gallium/auxiliary/draw/draw_wide_point.c new file mode 100644 index 0000000000..65bd50f2b8 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_wide_point.c @@ -0,0 +1,261 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "draw_private.h" + + +struct widepoint_stage { + struct draw_stage stage; + + float half_point_size; + + uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS]; + uint texcoord_mode[PIPE_MAX_SHADER_OUTPUTS]; + uint num_texcoords; + + int psize_slot; +}; + + + +static INLINE struct widepoint_stage * +widepoint_stage( struct draw_stage *stage ) +{ + return (struct widepoint_stage *)stage; +} + + +static void passthrough_point( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->point( stage->next, header ); +} + +static void widepoint_line( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->line(stage->next, header); +} + +static void widepoint_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->tri(stage->next, header); +} + + +/** + * Set the vertex texcoords for sprite mode. + * Coords may be left untouched or set to a right-side-up or upside-down + * orientation. + */ +static void set_texcoords(const struct widepoint_stage *wide, + struct vertex_header *v, const float tc[4]) +{ + uint i; + for (i = 0; i < wide->num_texcoords; i++) { + if (wide->texcoord_mode[i] != PIPE_SPRITE_COORD_NONE) { + uint j = wide->texcoord_slot[i]; + v->data[j][0] = tc[0]; + if (wide->texcoord_mode[i] == PIPE_SPRITE_COORD_LOWER_LEFT) + v->data[j][1] = 1.0f - tc[1]; + else + v->data[j][1] = tc[1]; + v->data[j][2] = tc[2]; + v->data[j][3] = tc[3]; + } + } +} + + +/* If there are lots of sprite points (and why wouldn't there be?) it + * would probably be more sensible to change hardware setup to + * optimize this rather than doing the whole thing in software like + * this. + */ +static void widepoint_point( struct draw_stage *stage, + struct prim_header *header ) +{ + const struct widepoint_stage *wide = widepoint_stage(stage); + const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite; + float half_size; + float left_adj, right_adj, bot_adj, top_adj; + + struct prim_header tri; + + /* four dups of original vertex */ + struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); + struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); + struct vertex_header *v2 = dup_vert(stage, header->v[0], 2); + struct vertex_header *v3 = dup_vert(stage, header->v[0], 3); + + float *pos0 = v0->data[0]; + float *pos1 = v1->data[0]; + float *pos2 = v2->data[0]; + float *pos3 = v3->data[0]; + + const float xbias = 0.0, ybias = -0.125; + + /* point size is either per-vertex or fixed size */ + if (wide->psize_slot >= 0) { + half_size = 0.5f * header->v[0]->data[wide->psize_slot][0]; + } + else { + half_size = wide->half_point_size; + } + + left_adj = -half_size + xbias; + right_adj = half_size + xbias; + bot_adj = half_size + ybias; + top_adj = -half_size + ybias; + + pos0[0] += left_adj; + pos0[1] += top_adj; + + pos1[0] += left_adj; + pos1[1] += bot_adj; + + pos2[0] += right_adj; + pos2[1] += top_adj; + + pos3[0] += right_adj; + pos3[1] += bot_adj; + + if (sprite) { + static const float tex00[4] = { 0, 0, 0, 1 }; + static const float tex01[4] = { 0, 1, 0, 1 }; + static const float tex11[4] = { 1, 1, 0, 1 }; + static const float tex10[4] = { 1, 0, 0, 1 }; + set_texcoords( wide, v0, tex00 ); + set_texcoords( wide, v1, tex01 ); + set_texcoords( wide, v2, tex10 ); + set_texcoords( wide, v3, tex11 ); + } + + tri.det = header->det; /* only the sign matters */ + tri.v[0] = v0; + tri.v[1] = v2; + tri.v[2] = v3; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v0; + tri.v[1] = v3; + tri.v[2] = v1; + stage->next->tri( stage->next, &tri ); +} + + +static void widepoint_first_point( struct draw_stage *stage, + struct prim_header *header ) +{ + struct widepoint_stage *wide = widepoint_stage(stage); + struct draw_context *draw = stage->draw; + + wide->half_point_size = 0.5f * draw->rasterizer->point_size; + + /* XXX we won't know the real size if it's computed by the vertex shader! */ + if (draw->rasterizer->point_size > draw->wide_point_threshold) { + stage->point = widepoint_point; + } + else { + stage->point = passthrough_point; + } + + if (draw->rasterizer->point_sprite) { + /* find vertex shader texcoord outputs */ + const struct draw_vertex_shader *vs = draw->vertex_shader; + uint i, j = 0; + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { + wide->texcoord_slot[j] = i; + wide->texcoord_mode[j] = draw->rasterizer->sprite_coord_mode[j]; + j++; + } + } + wide->num_texcoords = j; + } + + wide->psize_slot = -1; + if (draw->rasterizer->point_size_per_vertex) { + /* find PSIZ vertex output */ + const struct draw_vertex_shader *vs = draw->vertex_shader; + uint i; + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { + wide->psize_slot = i; + break; + } + } + } + + stage->point( stage, header ); +} + + +static void widepoint_flush( struct draw_stage *stage, unsigned flags ) +{ + stage->point = widepoint_first_point; + stage->next->flush( stage->next, flags ); +} + + +static void widepoint_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void widepoint_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +struct draw_stage *draw_wide_point_stage( struct draw_context *draw ) +{ + struct widepoint_stage *wide = CALLOC_STRUCT(widepoint_stage); + + draw_alloc_temp_verts( &wide->stage, 4 ); + + wide->stage.draw = draw; + wide->stage.next = NULL; + wide->stage.point = widepoint_first_point; + wide->stage.line = widepoint_line; + wide->stage.tri = widepoint_tri; + wide->stage.flush = widepoint_flush; + wide->stage.reset_stipple_counter = widepoint_reset_stipple_counter; + wide->stage.destroy = widepoint_destroy; + + return &wide->stage; +} diff --git a/src/gallium/auxiliary/draw/draw_wide_prims.c b/src/gallium/auxiliary/draw/draw_wide_prims.c index 655774b155..d6bff110b4 100644 --- a/src/gallium/auxiliary/draw/draw_wide_prims.c +++ b/src/gallium/auxiliary/draw/draw_wide_prims.c @@ -162,70 +162,6 @@ static void wide_line( struct draw_stage *stage, /** - * Draw a wide line by drawing a quad, using geometry which will - * fullfill GL's antialiased line requirements. - */ -static void wide_line_aa(struct draw_stage *stage, - struct prim_header *header) -{ - const struct wide_stage *wide = wide_stage(stage); - const float half_width = wide->half_line_width; - struct prim_header tri; - struct vertex_header *v[4]; - float *pos; - float dx = header->v[1]->data[0][0] - header->v[0]->data[0][0]; - float dy = header->v[1]->data[0][1] - header->v[0]->data[0][1]; - const float len = (float) sqrt(dx * dx + dy * dy); - uint i; - - dx = dx * half_width / len; - dy = dy * half_width / len; - - /* allocate/dup new verts */ - for (i = 0; i < 4; i++) { - v[i] = dup_vert(stage, header->v[i/2], i); - } - - /* - * Quad for line from v0 to v1: - * - * 1 3 - * +-------------------------+ - * | | - * *v0 v1* - * | | - * +-------------------------+ - * 0 2 - */ - - pos = v[0]->data[0]; - pos[0] += dy; - pos[1] -= dx; - - pos = v[1]->data[0]; - pos[0] -= dy; - pos[1] += dx; - - pos = v[2]->data[0]; - pos[0] += dy; - pos[1] -= dx; - - pos = v[3]->data[0]; - pos[0] -= dy; - pos[1] += dx; - - tri.det = header->det; /* only the sign matters */ - - tri.v[0] = v[2]; tri.v[1] = v[1]; tri.v[2] = v[0]; - stage->next->tri( stage->next, &tri ); - - tri.v[0] = v[3]; tri.v[1] = v[1]; tri.v[2] = v[2]; - stage->next->tri( stage->next, &tri ); - -} - - -/** * Set the vertex texcoords for sprite mode. * Coords may be left untouched or set to a right-side-up or upside-down * orientation. @@ -283,8 +219,8 @@ static void wide_point( struct draw_stage *stage, half_size = wide->half_point_size; } - left_adj = -half_size + 0.25f; - right_adj = half_size + 0.25f; + left_adj = -half_size; /* + 0.25f;*/ + right_adj = half_size; /* + 0.25f;*/ pos0[0] += left_adj; pos0[1] -= half_size; @@ -330,7 +266,8 @@ static void wide_first_point( struct draw_stage *stage, wide->half_point_size = 0.5f * draw->rasterizer->point_size; - if (draw->rasterizer->point_size != 1.0) { + /* XXX we won't know the real size if it's computed by the vertex shader! */ + if (draw->rasterizer->point_size > draw->wide_point_threshold) { stage->point = wide_point; } else { @@ -341,8 +278,8 @@ static void wide_first_point( struct draw_stage *stage, /* find vertex shader texcoord outputs */ const struct draw_vertex_shader *vs = draw->vertex_shader; uint i, j = 0; - for (i = 0; i < vs->state->num_outputs; i++) { - if (vs->state->output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { wide->texcoord_slot[j] = i; wide->texcoord_mode[j] = draw->rasterizer->sprite_coord_mode[j]; j++; @@ -357,8 +294,8 @@ static void wide_first_point( struct draw_stage *stage, /* find PSIZ vertex output */ const struct draw_vertex_shader *vs = draw->vertex_shader; uint i; - for (i = 0; i < vs->state->num_outputs; i++) { - if (vs->state->output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { wide->psize_slot = i; break; } @@ -379,10 +316,7 @@ static void wide_first_line( struct draw_stage *stage, wide->half_line_width = 0.5f * draw->rasterizer->line_width; if (draw->rasterizer->line_width != 1.0) { - if (draw->rasterizer->line_smooth) - wide->stage.line = wide_line_aa; - else - wide->stage.line = wide_line; + wide->stage.line = wide_line; } else { wide->stage.line = passthrough_line; diff --git a/src/gallium/auxiliary/llvm/Makefile b/src/gallium/auxiliary/gallivm/Makefile index e0abf860c1..39fac6ea4a 100644 --- a/src/gallium/auxiliary/llvm/Makefile +++ b/src/gallium/auxiliary/gallivm/Makefile @@ -30,7 +30,7 @@ OBJECTS = $(C_SOURCES:.c=.o) \ ### Include directories INCLUDES = \ -I. \ - -I$(TOP)/src/gallium/drivers + -I$(TOP)/src/gallium/drivers \ -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/include \ -I$(TOP)/src/mesa \ diff --git a/src/gallium/auxiliary/gallivm/SConscript b/src/gallium/auxiliary/gallivm/SConscript new file mode 100644 index 0000000000..c0aa51b90a --- /dev/null +++ b/src/gallium/auxiliary/gallivm/SConscript @@ -0,0 +1,16 @@ +Import('*') + +gallivm = env.ConvenienceLibrary( + target = 'gallivm', + source = [ + 'gallivm.cpp', + 'gallivm_cpu.cpp', + 'instructions.cpp', + 'loweringpass.cpp', + 'tgsitollvm.cpp', + 'storage.cpp', + 'storagesoa.cpp', + 'instructionssoa.cpp', + ]) + +auxiliaries.insert(0, gallivm) diff --git a/src/gallium/auxiliary/llvm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp index d14bb3b99a..d14bb3b99a 100644 --- a/src/gallium/auxiliary/llvm/gallivm.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm.cpp diff --git a/src/gallium/auxiliary/llvm/gallivm.h b/src/gallium/auxiliary/gallivm/gallivm.h index 92da4bca7f..57912a952f 100644 --- a/src/gallium/auxiliary/llvm/gallivm.h +++ b/src/gallium/auxiliary/gallivm/gallivm.h @@ -97,7 +97,7 @@ void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *ee); #endif /* MESA_LLVM */ #if defined __cplusplus -} // extern "C" +} #endif #endif diff --git a/src/gallium/auxiliary/llvm/gallivm_builtins.cpp b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp index 1796f0a177..1796f0a177 100644 --- a/src/gallium/auxiliary/llvm/gallivm_builtins.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp diff --git a/src/gallium/auxiliary/llvm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp index 8f9830d0b1..8f9830d0b1 100644 --- a/src/gallium/auxiliary/llvm/gallivm_cpu.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp diff --git a/src/gallium/auxiliary/llvm/gallivm_p.h b/src/gallium/auxiliary/gallivm/gallivm_p.h index cfe7b1901b..ebf3e11cd5 100644 --- a/src/gallium/auxiliary/llvm/gallivm_p.h +++ b/src/gallium/auxiliary/gallivm/gallivm_p.h @@ -32,7 +32,7 @@ struct gallivm_ir { int num_components; int num_consts; - //FIXME: this might not be enough for some shaders + /* FIXME: this might not be enough for some shaders */ struct gallivm_interpolate interpolators[32*4]; int num_interp; }; @@ -46,7 +46,7 @@ struct gallivm_prog { int num_consts; - //FIXME: this might not be enough for some shaders + /* FIXME: this might not be enough for some shaders */ struct gallivm_interpolate interpolators[32*4]; int num_interp; }; @@ -104,7 +104,7 @@ static INLINE int gallivm_w_swizzle(int swizzle) #endif /* MESA_LLVM */ #if defined __cplusplus -} // extern "C" +} #endif #endif diff --git a/src/gallium/auxiliary/llvm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index 55d39fa5f1..55d39fa5f1 100644 --- a/src/gallium/auxiliary/llvm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp diff --git a/src/gallium/auxiliary/llvm/instructions.h b/src/gallium/auxiliary/gallivm/instructions.h index 9ebc17dd8e..9ebc17dd8e 100644 --- a/src/gallium/auxiliary/llvm/instructions.h +++ b/src/gallium/auxiliary/gallivm/instructions.h diff --git a/src/gallium/auxiliary/llvm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index a4d5046637..a4d5046637 100644 --- a/src/gallium/auxiliary/llvm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp diff --git a/src/gallium/auxiliary/llvm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h index 4169dcbb2e..4169dcbb2e 100644 --- a/src/gallium/auxiliary/llvm/instructionssoa.h +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h diff --git a/src/gallium/auxiliary/llvm/llvm_builtins.c b/src/gallium/auxiliary/gallivm/llvm_builtins.c index 4f98d754ba..4f98d754ba 100644 --- a/src/gallium/auxiliary/llvm/llvm_builtins.c +++ b/src/gallium/auxiliary/gallivm/llvm_builtins.c diff --git a/src/gallium/auxiliary/llvm/loweringpass.cpp b/src/gallium/auxiliary/gallivm/loweringpass.cpp index 556dbec366..556dbec366 100644 --- a/src/gallium/auxiliary/llvm/loweringpass.cpp +++ b/src/gallium/auxiliary/gallivm/loweringpass.cpp diff --git a/src/gallium/auxiliary/llvm/loweringpass.h b/src/gallium/auxiliary/gallivm/loweringpass.h index f62dcf6ba7..f62dcf6ba7 100644 --- a/src/gallium/auxiliary/llvm/loweringpass.h +++ b/src/gallium/auxiliary/gallivm/loweringpass.h diff --git a/src/gallium/auxiliary/llvm/storage.cpp b/src/gallium/auxiliary/gallivm/storage.cpp index c4326de8c5..c4326de8c5 100644 --- a/src/gallium/auxiliary/llvm/storage.cpp +++ b/src/gallium/auxiliary/gallivm/storage.cpp diff --git a/src/gallium/auxiliary/llvm/storage.h b/src/gallium/auxiliary/gallivm/storage.h index 8574f7554e..8574f7554e 100644 --- a/src/gallium/auxiliary/llvm/storage.h +++ b/src/gallium/auxiliary/gallivm/storage.h diff --git a/src/gallium/auxiliary/llvm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp index ed0674a96f..ed0674a96f 100644 --- a/src/gallium/auxiliary/llvm/storagesoa.cpp +++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp diff --git a/src/gallium/auxiliary/llvm/storagesoa.h b/src/gallium/auxiliary/gallivm/storagesoa.h index 6443351f27..6443351f27 100644 --- a/src/gallium/auxiliary/llvm/storagesoa.h +++ b/src/gallium/auxiliary/gallivm/storagesoa.h diff --git a/src/gallium/auxiliary/llvm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 2cb4acce32..2cb4acce32 100644 --- a/src/gallium/auxiliary/llvm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp diff --git a/src/gallium/auxiliary/llvm/tgsitollvm.h b/src/gallium/auxiliary/gallivm/tgsitollvm.h index 7ada04d629..7ada04d629 100644 --- a/src/gallium/auxiliary/llvm/tgsitollvm.h +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.h diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile index 588629e870..a9fa518c67 100644 --- a/src/gallium/auxiliary/pipebuffer/Makefile +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -1,10 +1,9 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = pipebuffer -DRIVER_SOURCES = \ +C_SOURCES = \ pb_buffer_fenced.c \ pb_buffer_malloc.c \ pb_bufmgr_fenced.c \ @@ -12,11 +11,6 @@ DRIVER_SOURCES = \ pb_bufmgr_pool.c \ pb_winsys.c -C_SOURCES = \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript new file mode 100644 index 0000000000..3d41fd84a6 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -0,0 +1,14 @@ +Import('*') + +pipebuffer = env.ConvenienceLibrary( + target = 'pipebuffer', + source = [ + 'pb_buffer_fenced.c', + 'pb_buffer_malloc.c', + 'pb_bufmgr_fenced.c', + 'pb_bufmgr_mm.c', + 'pb_bufmgr_pool.c', + 'pb_winsys.c', + ]) + +auxiliaries.insert(0, pipebuffer) diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index 97beb5f72a..4b09c80b2a 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -37,7 +37,7 @@ * There is no obligation of a winsys driver to use this library. And a pipe * driver should be completly agnostic about it. * - * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author Jos� Fonseca <jrfonseca@tungstengraphics.com> */ #ifndef PB_BUFFER_H_ @@ -50,6 +50,11 @@ #include "pipe/p_inlines.h" +#ifdef __cplusplus +extern "C" { +#endif + + struct pb_vtbl; /** @@ -166,6 +171,7 @@ static INLINE void pb_destroy(struct pb_buffer *buf) { assert(buf); + assert(buf->vtbl); buf->vtbl->destroy(buf); } @@ -199,4 +205,8 @@ void pb_init_winsys(struct pipe_winsys *winsys); +#ifdef __cplusplus +} +#endif + #endif /*PB_BUFFER_H_*/ diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index f4fc3f6d71..6e217eb2e0 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -34,13 +34,12 @@ */ -#include "linked_list.h" - -#include "p_compiler.h" -#include "p_debug.h" -#include "p_winsys.h" -#include "p_thread.h" -#include "p_util.h" +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_winsys.h" +#include "pipe/p_thread.h" +#include "pipe/p_util.h" +#include "util/u_double_list.h" #include "pb_buffer.h" #include "pb_buffer_fenced.h" @@ -286,7 +285,9 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) /* Wait on outstanding fences */ while (fenced_list->numDelayed) { _glthread_UNLOCK_MUTEX(fenced_list->mutex); +#ifndef __MSC__ sched_yield(); +#endif _fenced_buffer_list_check_free(fenced_list, 1); _glthread_LOCK_MUTEX(fenced_list->mutex); } diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h index c40b9c75e1..50d5891bdb 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h @@ -44,7 +44,7 @@ * Between the handle's destruction, and the fence signalling, the buffer is * stored in a fenced buffer list. * - * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author José Fonseca <jrfonseca@tungstengraphics.com> */ #ifndef PB_BUFFER_FENCED_H_ @@ -54,6 +54,11 @@ #include "pipe/p_debug.h" +#ifdef __cplusplus +extern "C" { +#endif + + struct pipe_winsys; struct pipe_buffer; struct pipe_fence_handle; @@ -114,4 +119,8 @@ buffer_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence); +#ifdef __cplusplus +} +#endif + #endif /*PB_BUFFER_FENCED_H_*/ diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 1ddf784c97..0cf8e92e37 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -43,7 +43,7 @@ * - the fenced buffer manager, which will delay buffer destruction until the * the moment the card finishing processing it. * - * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author José Fonseca <jrfonseca@tungstengraphics.com> */ #ifndef PB_BUFMGR_H_ @@ -53,6 +53,11 @@ #include <stddef.h> +#ifdef __cplusplus +extern "C" { +#endif + + struct pb_desc; struct pipe_buffer; struct pipe_winsys; @@ -123,4 +128,8 @@ fenced_bufmgr_create(struct pb_manager *provider, struct pipe_winsys *winsys); +#ifdef __cplusplus +} +#endif + #endif /*PB_BUFMGR_H_*/ diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c index c535d3276c..bffca5b244 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c @@ -34,8 +34,8 @@ */ -#include "p_debug.h" -#include "p_util.h" +#include "pipe/p_debug.h" +#include "pipe/p_util.h" #include "pb_buffer.h" #include "pb_buffer_fenced.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index 8b1b51c0e2..66256f3fa7 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -1,7 +1,6 @@ /************************************************************************** * * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * Copyright 1999 Wittawat Yamwong * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -34,12 +33,12 @@ */ -#include "linked_list.h" - -#include "p_defines.h" -#include "p_debug.h" -#include "p_thread.h" -#include "p_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_debug.h" +#include "pipe/p_thread.h" +#include "pipe/p_util.h" +#include "util/u_double_list.h" +#include "util/u_mm.h" #include "pb_buffer.h" #include "pb_bufmgr.h" @@ -50,306 +49,6 @@ #define SUPER(__derived) (&(__derived)->base) -struct mem_block -{ - struct mem_block *next, *prev; - struct mem_block *next_free, *prev_free; - struct mem_block *heap; - int ofs, size; - unsigned int free:1; - unsigned int reserved:1; -}; - - -#ifdef DEBUG -/** - * For debugging purposes. - */ -static void -mmDumpMemInfo(const struct mem_block *heap) -{ - debug_printf("Memory heap %p:\n", (void *)heap); - if (heap == 0) { - debug_printf(" heap == 0\n"); - } else { - const struct mem_block *p; - - for(p = heap->next; p != heap; p = p->next) { - debug_printf(" Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, - p->free ? 'F':'.', - p->reserved ? 'R':'.'); - } - - debug_printf("\nFree list:\n"); - - for(p = heap->next_free; p != heap; p = p->next_free) { - debug_printf(" FREE Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, - p->free ? 'F':'.', - p->reserved ? 'R':'.'); - } - - } - debug_printf("End of memory blocks\n"); -} -#endif - - -/** - * input: total size in bytes - * return: a heap pointer if OK, NULL if error - */ -static struct mem_block * -mmInit(int ofs, int size) -{ - struct mem_block *heap, *block; - - if (size <= 0) - return NULL; - - heap = CALLOC_STRUCT(mem_block); - if (!heap) - return NULL; - - block = CALLOC_STRUCT(mem_block); - if (!block) { - FREE(heap); - return NULL; - } - - heap->next = block; - heap->prev = block; - heap->next_free = block; - heap->prev_free = block; - - block->heap = heap; - block->next = heap; - block->prev = heap; - block->next_free = heap; - block->prev_free = heap; - - block->ofs = ofs; - block->size = size; - block->free = 1; - - return heap; -} - - -static struct mem_block * -SliceBlock(struct mem_block *p, - int startofs, int size, - int reserved, int alignment) -{ - struct mem_block *newblock; - - /* break left [p, newblock, p->next], then p = newblock */ - if (startofs > p->ofs) { - newblock = CALLOC_STRUCT(mem_block); - if (!newblock) - return NULL; - newblock->ofs = startofs; - newblock->size = p->size - (startofs - p->ofs); - newblock->free = 1; - newblock->heap = p->heap; - - newblock->next = p->next; - newblock->prev = p; - p->next->prev = newblock; - p->next = newblock; - - newblock->next_free = p->next_free; - newblock->prev_free = p; - p->next_free->prev_free = newblock; - p->next_free = newblock; - - p->size -= newblock->size; - p = newblock; - } - - /* break right, also [p, newblock, p->next] */ - if (size < p->size) { - newblock = CALLOC_STRUCT(mem_block); - if (!newblock) - return NULL; - newblock->ofs = startofs + size; - newblock->size = p->size - size; - newblock->free = 1; - newblock->heap = p->heap; - - newblock->next = p->next; - newblock->prev = p; - p->next->prev = newblock; - p->next = newblock; - - newblock->next_free = p->next_free; - newblock->prev_free = p; - p->next_free->prev_free = newblock; - p->next_free = newblock; - - p->size = size; - } - - /* p = middle block */ - p->free = 0; - - /* Remove p from the free list: - */ - p->next_free->prev_free = p->prev_free; - p->prev_free->next_free = p->next_free; - - p->next_free = 0; - p->prev_free = 0; - - p->reserved = reserved; - return p; -} - - -/** - * Allocate 'size' bytes with 2^align2 bytes alignment, - * restrict the search to free memory after 'startSearch' - * depth and back buffers should be in different 4mb banks - * to get better page hits if possible - * input: size = size of block - * align2 = 2^align2 bytes alignment - * startSearch = linear offset from start of heap to begin search - * return: pointer to the allocated block, 0 if error - */ -static struct mem_block * -mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) -{ - struct mem_block *p; - const int mask = (1 << align2)-1; - int startofs = 0; - int endofs; - - if (!heap || align2 < 0 || size <= 0) - return NULL; - - for (p = heap->next_free; p != heap; p = p->next_free) { - assert(p->free); - - startofs = (p->ofs + mask) & ~mask; - if ( startofs < startSearch ) { - startofs = startSearch; - } - endofs = startofs+size; - if (endofs <= (p->ofs+p->size)) - break; - } - - if (p == heap) - return NULL; - - assert(p->free); - p = SliceBlock(p,startofs,size,0,mask+1); - - return p; -} - - -#if 0 -/** - * Free block starts at offset - * input: pointer to a heap, start offset - * return: pointer to a block - */ -static struct mem_block * -mmFindBlock(struct mem_block *heap, int start) -{ - struct mem_block *p; - - for (p = heap->next; p != heap; p = p->next) { - if (p->ofs == start) - return p; - } - - return NULL; -} -#endif - - -static INLINE int -Join2Blocks(struct mem_block *p) -{ - /* XXX there should be some assertions here */ - - /* NOTE: heap->free == 0 */ - - if (p->free && p->next->free) { - struct mem_block *q = p->next; - - assert(p->ofs + p->size == q->ofs); - p->size += q->size; - - p->next = q->next; - q->next->prev = p; - - q->next_free->prev_free = q->prev_free; - q->prev_free->next_free = q->next_free; - - FREE(q); - return 1; - } - return 0; -} - - -/** - * Free block starts at offset - * input: pointer to a block - * return: 0 if OK, -1 if error - */ -static int -mmFreeMem(struct mem_block *b) -{ - if (!b) - return 0; - - if (b->free) { - debug_printf("block already free\n"); - return -1; - } - if (b->reserved) { - debug_printf("block is reserved\n"); - return -1; - } - - b->free = 1; - b->next_free = b->heap->next_free; - b->prev_free = b->heap; - b->next_free->prev_free = b; - b->prev_free->next_free = b; - - Join2Blocks(b); - if (b->prev != b->heap) - Join2Blocks(b->prev); - - return 0; -} - - -/** - * destroy MM - */ -static void -mmDestroy(struct mem_block *heap) -{ - struct mem_block *p; - - if (!heap) - return; - - for (p = heap->next; p != heap; ) { - struct mem_block *next = p->next; - FREE(p); - p = next; - } - - FREE(heap); -} - - struct mm_pb_manager { struct pb_manager base; @@ -493,8 +192,8 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr, } /* Some sanity checks */ - assert(0 <= mm_buf->block->ofs && mm_buf->block->ofs < mm->size); - assert(size <= mm_buf->block->size && mm_buf->block->ofs + mm_buf->block->size <= mm->size); + assert(0 <= (unsigned)mm_buf->block->ofs && (unsigned)mm_buf->block->ofs < mm->size); + assert(size <= (unsigned)mm_buf->block->size && (unsigned)mm_buf->block->ofs + (unsigned)mm_buf->block->size <= mm->size); _glthread_UNLOCK_MUTEX(mm->mutex); return SUPER(mm_buf); diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c index 04477a865a..528e9528f6 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -35,13 +35,12 @@ */ -#include "linked_list.h" - -#include "p_compiler.h" -#include "p_debug.h" -#include "p_thread.h" -#include "p_defines.h" -#include "p_util.h" +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_thread.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "util/u_double_list.h" #include "pb_buffer.h" #include "pb_bufmgr.h" diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile new file mode 100644 index 0000000000..39b8a4dbd7 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/Makefile @@ -0,0 +1,15 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = rtasm + +C_SOURCES = \ + rtasm_cpu.c \ + rtasm_execmem.c \ + rtasm_x86sse.c \ + rtasm_ppc_spe.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/rtasm/SConscript b/src/gallium/auxiliary/rtasm/SConscript new file mode 100644 index 0000000000..8ea25922aa --- /dev/null +++ b/src/gallium/auxiliary/rtasm/SConscript @@ -0,0 +1,12 @@ +Import('*') + +rtasm = env.ConvenienceLibrary( + target = 'rtasm', + source = [ + 'rtasm_cpu.c', + 'rtasm_execmem.c', + 'rtasm_x86sse.c', + 'rtasm_ppc_spe.c', + ]) + +auxiliaries.insert(0, rtasm) diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c new file mode 100644 index 0000000000..eb3359750b --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c @@ -0,0 +1,50 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "rtasm_cpu.h" + + +int rtasm_cpu_has_sse(void) +{ + /* FIXME: actually detect this at run-time */ +#if defined(__i386__) || defined(__386__) + return 1; +#else + return 0; +#endif +} + +int rtasm_cpu_has_sse2(void) +{ + /* FIXME: actually detect this at run-time */ +#if defined(__i386__) || defined(__386__) + return 1; +#else + return 0; +#endif +} diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.h b/src/gallium/auxiliary/rtasm/rtasm_cpu.h new file mode 100644 index 0000000000..ebc71634fd --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Runtime detection of CPU capabilities. + */ + +#ifndef _RTASM_CPU_H_ +#define _RTASM_CPU_H_ + + +int rtasm_cpu_has_sse(void); + +int rtasm_cpu_has_sse2(void); + + +#endif /* _RTASM_CPU_H_ */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c new file mode 100644 index 0000000000..300c1c2d9d --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -0,0 +1,135 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * \file exemem.c + * Functions for allocating executable memory. + * + * \author Keith Whitwell + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_thread.h" +#include "pipe/p_util.h" + +#include "rtasm_execmem.h" + + +#if defined(__linux__) + +/* + * Allocate a large block of memory which can hold code then dole it out + * in pieces by means of the generic memory manager code. +*/ + +#include <unistd.h> +#include <sys/mman.h> +#include "util/u_mm.h" + +#define EXEC_HEAP_SIZE (10*1024*1024) + +_glthread_DECLARE_STATIC_MUTEX(exec_mutex); + +static struct mem_block *exec_heap = NULL; +static unsigned char *exec_mem = NULL; + + +static void +init_heap(void) +{ + if (!exec_heap) + exec_heap = mmInit( 0, EXEC_HEAP_SIZE ); + + if (!exec_mem) + exec_mem = (unsigned char *) mmap(0, EXEC_HEAP_SIZE, + PROT_EXEC | PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +} + + +void * +rtasm_exec_malloc(size_t size) +{ + struct mem_block *block = NULL; + void *addr = NULL; + + _glthread_LOCK_MUTEX(exec_mutex); + + init_heap(); + + if (exec_heap) { + size = (size + 31) & ~31; + block = mmAllocMem( exec_heap, size, 32, 0 ); + } + + if (block) + addr = exec_mem + block->ofs; + else + debug_printf("rtasm_exec_malloc failed\n"); + + _glthread_UNLOCK_MUTEX(exec_mutex); + + return addr; +} + + +void +rtasm_exec_free(void *addr) +{ + _glthread_LOCK_MUTEX(exec_mutex); + + if (exec_heap) { + struct mem_block *block = mmFindBlock(exec_heap, (unsigned char *)addr - exec_mem); + + if (block) + mmFreeMem(block); + } + + _glthread_UNLOCK_MUTEX(exec_mutex); +} + + +#else + +/* + * Just use regular memory. + */ + +void * +rtasm_exec_malloc(size_t size) +{ + return MALLOC( size ); +} + + +void +rtasm_exec_free(void *addr) +{ + FREE(addr); +} + + +#endif diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.h b/src/gallium/auxiliary/rtasm/rtasm_execmem.h new file mode 100644 index 0000000000..155c6d34e0 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.h @@ -0,0 +1,45 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file exemem.c + * Functions for allocating executable memory. + * + * \author Keith Whitwell + */ + +#ifndef _RTASM_EXECMEM_H_ +#define _RTASM_EXECMEM_H_ + +#include "pipe/p_compiler.h" + + +extern void * +rtasm_exec_malloc( size_t size ); + + +extern void +rtasm_exec_free( void *addr ); + + +#endif diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c new file mode 100644 index 0000000000..95a2d6fcbb --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -0,0 +1,386 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file + * Real-time assembly generation interface for Cell B.E. SPEs. + * + * \author Ian Romanick <idr@us.ibm.com> + */ + +#include "pipe/p_compiler.h" +#include "pipe/p_util.h" +#include "rtasm_ppc_spe.h" + +#ifdef GALLIUM_CELL +/** + * SPE instruction types + * + * There are 6 primary instruction encodings used on the Cell's SPEs. Each of + * the following unions encodes one type. + * + * \bug + * If, at some point, we start generating SPE code from a little-endian host + * these unions will not work. + */ +/*@{*/ +/** + * Encode one output register with two input registers + */ +union spe_inst_RR { + uint32_t bits; + struct { + unsigned op:11; + unsigned rB:7; + unsigned rA:7; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with three input registers + */ +union spe_inst_RRR { + uint32_t bits; + struct { + unsigned op:4; + unsigned rT:7; + unsigned rB:7; + unsigned rA:7; + unsigned rC:7; + } inst; +}; + + +/** + * Encode one output register with one input reg. and a 7-bit signed immed + */ +union spe_inst_RI7 { + uint32_t bits; + struct { + unsigned op:11; + unsigned i7:7; + unsigned rA:7; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with one input reg. and an 8-bit signed immed + */ +union spe_inst_RI8 { + uint32_t bits; + struct { + unsigned op:10; + unsigned i8:8; + unsigned rA:7; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with one input reg. and a 10-bit signed immed + */ +union spe_inst_RI10 { + uint32_t bits; + struct { + unsigned op:8; + unsigned i10:10; + unsigned rA:7; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with a 16-bit signed immediate + */ +union spe_inst_RI16 { + uint32_t bits; + struct { + unsigned op:9; + unsigned i16:16; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with a 18-bit signed immediate + */ +union spe_inst_RI18 { + uint32_t bits; + struct { + unsigned op:7; + unsigned i18:18; + unsigned rT:7; + } inst; +}; +/*@}*/ + + +static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, unsigned rB) +{ + union spe_inst_RR inst; + inst.inst.op = op; + inst.inst.rB = rB; + inst.inst.rA = rA; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + +static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, unsigned rB, unsigned rC) +{ + union spe_inst_RRR inst; + inst.inst.op = op; + inst.inst.rT = rT; + inst.inst.rB = rB; + inst.inst.rA = rA; + inst.inst.rC = rC; + *p->csr = inst.bits; + p->csr++; +} + + +static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, int imm) +{ + union spe_inst_RI7 inst; + inst.inst.op = op; + inst.inst.i7 = imm; + inst.inst.rA = rA; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + + +static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, int imm) +{ + union spe_inst_RI8 inst; + inst.inst.op = op; + inst.inst.i8 = imm; + inst.inst.rA = rA; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + + +static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, int imm) +{ + union spe_inst_RI10 inst; + inst.inst.op = op; + inst.inst.i10 = imm; + inst.inst.rA = rA; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + +static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT, + int imm) +{ + union spe_inst_RI16 inst; + inst.inst.op = op; + inst.inst.i16 = imm; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + +static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, + int imm) +{ + union spe_inst_RI18 inst; + inst.inst.op = op; + inst.inst.i18 = imm; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + + + +#define EMIT_(_name, _op) \ +void _name (struct spe_function *p, unsigned rT) \ +{ \ + emit_RR(p, _op, rT, 0, 0); \ +} + +#define EMIT_R(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA) \ +{ \ + emit_RR(p, _op, rT, rA, 0); \ +} + +#define EMIT_RR(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) \ +{ \ + emit_RR(p, _op, rT, rA, rB); \ +} + +#define EMIT_RRR(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB, unsigned rC) \ +{ \ + emit_RRR(p, _op, rT, rA, rB, rC); \ +} + +#define EMIT_RI7(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +{ \ + emit_RI7(p, _op, rT, rA, imm); \ +} + +#define EMIT_RI8(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +{ \ + emit_RI8(p, _op, rT, rA, 155 - imm); \ +} + +#define EMIT_RI10(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +{ \ + emit_RI10(p, _op, rT, rA, imm); \ +} + +#define EMIT_RI16(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, int imm) \ +{ \ + emit_RI16(p, _op, rT, imm); \ +} + +#define EMIT_RI18(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, int imm) \ +{ \ + emit_RI18(p, _op, rT, imm); \ +} + +#define EMIT_I16(_name, _op) \ +void _name (struct spe_function *p, int imm) \ +{ \ + emit_RI16(p, _op, 0, imm); \ +} + +#include "rtasm_ppc_spe.h" + + +/* + */ +void spe_init_func(struct spe_function *p, unsigned code_size) +{ + p->store = align_malloc(code_size, 16); + p->csr = p->store; +} + + +void spe_release_func(struct spe_function *p) +{ + align_free(p->store); + p->store = NULL; + p->csr = NULL; +} + + +void spe_bi(struct spe_function *p, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4)); +} + +void spe_iret(struct spe_function *p, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4)); +} + +void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, int d, + int e) +{ + emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4)); +} + +void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, int d, + int e) +{ + emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4)); +} + +void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, + int e) +{ + emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4)); +} + +void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4)); +} + +void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4)); +} + +void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4)); +} + + +/* Hint-for-branch instructions + */ +#if 0 +hbr; +hbra; +hbrr; +#endif + + +/* Control instructions + */ +#if 0 +stop; +EMIT_RR (spe_stopd, 0x140); +EMIT_ (spe_lnop, 0x001); +EMIT_ (spe_nop, 0x201); +sync; +EMIT_ (spe_dsync, 0x003); +EMIT_R (spe_mfspr, 0x00c); +EMIT_R (spe_mtspr, 0x10c); +#endif + +#endif /* GALLIUM_CELL */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h new file mode 100644 index 0000000000..10ce44b3a0 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -0,0 +1,314 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file + * Real-time assembly generation interface for Cell B.E. SPEs. + * + * \author Ian Romanick <idr@us.ibm.com> + */ + +#ifndef RTASM_PPC_SPE_H +#define RTASM_PPC_SPE_H + +struct spe_function { + /** + * + */ + uint32_t *store; + uint32_t *csr; + const char *fn; +}; + +extern void spe_init_func(struct spe_function *p, unsigned code_size); +extern void spe_release_func(struct spe_function *p); + +#endif /* RTASM_PPC_SPE_H */ + +#ifndef EMIT_ +#define EMIT_(name, _op) \ + extern void _name (struct spe_function *p, unsigned rT) +#define EMIT_R(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA) +#define EMIT_RR(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + unsigned rB) +#define EMIT_RRR(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + unsigned rB, unsigned rC) +#define EMIT_RI7(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + int imm) +#define EMIT_RI8(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + int imm) +#define EMIT_RI10(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + int imm) +#define EMIT_RI16(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, int imm) +#define EMIT_RI18(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, int imm) +#define EMIT_I16(_name, _op) \ + extern void _name (struct spe_function *p, int imm) +#define UNDEF_EMIT_MACROS +#endif /* EMIT_ */ + + +/* Memory load / store instructions + */ +EMIT_RI10(spe_lqd, 0x034); +EMIT_RR (spe_lqx, 0x1c4); +EMIT_RI16(spe_lqa, 0x061); +EMIT_RI16(spe_lqr, 0x067); +EMIT_RI10(spe_stqd, 0x024); +EMIT_RR (spe_stqx, 0x144); +EMIT_RI16(spe_stqa, 0x041); +EMIT_RI16(spe_stqr, 0x047); +EMIT_RI7 (spe_cbd, 0x1f4); +EMIT_RR (spe_cbx, 0x1d4); +EMIT_RI7 (spe_chd, 0x1f5); +EMIT_RI7 (spe_chx, 0x1d5); +EMIT_RI7 (spe_cwd, 0x1f6); +EMIT_RI7 (spe_cwx, 0x1d6); +EMIT_RI7 (spe_cdd, 0x1f7); +EMIT_RI7 (spe_cdx, 0x1d7); + + +/* Constant formation instructions + */ +EMIT_RI16(spe_ilh, 0x083); +EMIT_RI16(spe_ilhu, 0x082); +EMIT_RI16(spe_il, 0x081); +EMIT_RI18(spe_ila, 0x021); +EMIT_RI16(spe_iohl, 0x0c1); +EMIT_RI16(spe_fsmbi, 0x0c5); + + +/* Integer and logical instructions + */ +EMIT_RR (spe_ah, 0x0c8); +EMIT_RI10(spe_ahi, 0x01d); +EMIT_RR (spe_a, 0x0c0); +EMIT_RI10(spe_ai, 0x01c); +EMIT_RR (spe_sfh, 0x048); +EMIT_RI10(spe_sfhi, 0x00d); +EMIT_RR (spe_sf, 0x040); +EMIT_RI10(spe_sfi, 0x00c); +EMIT_RR (spe_addx, 0x340); +EMIT_RR (spe_cg, 0x0c2); +EMIT_RR (spe_cgx, 0x342); +EMIT_RR (spe_sfx, 0x341); +EMIT_RR (spe_bg, 0x042); +EMIT_RR (spe_bgx, 0x343); +EMIT_RR (spe_mpy, 0x3c4); +EMIT_RR (spe_mpyu, 0x3cc); +EMIT_RI10(spe_mpyi, 0x074); +EMIT_RI10(spe_mpyui, 0x075); +EMIT_RRR (spe_mpya, 0x00c); +EMIT_RR (spe_mpyh, 0x3c5); +EMIT_RR (spe_mpys, 0x3c7); +EMIT_RR (spe_mpyhh, 0x3c6); +EMIT_RR (spe_mpyhha, 0x346); +EMIT_RR (spe_mpyhhu, 0x3ce); +EMIT_RR (spe_mpyhhau, 0x34e); +EMIT_R (spe_clz, 0x2a5); +EMIT_R (spe_cntb, 0x2b4); +EMIT_R (spe_fsmb, 0x1b6); +EMIT_R (spe_fsmh, 0x1b5); +EMIT_R (spe_fsm, 0x1b4); +EMIT_R (spe_gbb, 0x1b2); +EMIT_R (spe_gbh, 0x1b1); +EMIT_R (spe_gb, 0x1b0); +EMIT_RR (spe_avgb, 0x0d3); +EMIT_RR (spe_absdb, 0x053); +EMIT_RR (spe_sumb, 0x253); +EMIT_R (spe_xsbh, 0x2b6); +EMIT_R (spe_xshw, 0x2ae); +EMIT_R (spe_xswd, 0x2a6); +EMIT_RR (spe_and, 0x0c1); +EMIT_RR (spe_andc, 0x2c1); +EMIT_RI10(spe_andbi, 0x016); +EMIT_RI10(spe_andhi, 0x015); +EMIT_RI10(spe_andi, 0x014); +EMIT_RR (spe_or, 0x041); +EMIT_RR (spe_orc, 0x2c9); +EMIT_RI10(spe_orbi, 0x006); +EMIT_RI10(spe_orhi, 0x005); +EMIT_RI10(spe_ori, 0x004); +EMIT_R (spe_orx, 0x1f0); +EMIT_RR (spe_xor, 0x241); +EMIT_RI10(spe_xorbi, 0x026); +EMIT_RI10(spe_xorhi, 0x025); +EMIT_RI10(spe_xori, 0x024); +EMIT_RR (spe_nand, 0x0c9); +EMIT_RR (spe_nor, 0x049); +EMIT_RR (spe_eqv, 0x249); +EMIT_RRR (spe_selb, 0x008); +EMIT_RRR (spe_shufb, 0x00b); + + +/* Shift and rotate instructions + */ +EMIT_RR (spe_shlh, 0x05f); +EMIT_RI7 (spe_shlhi, 0x07f); +EMIT_RR (spe_shl, 0x05b); +EMIT_RI7 (spe_shli, 0x07b); +EMIT_RR (spe_shlqbi, 0x1db); +EMIT_RI7 (spe_shlqbii, 0x1fb); +EMIT_RR (spe_shlqby, 0x1df); +EMIT_RI7 (spe_shlqbyi, 0x1ff); +EMIT_RR (spe_shlqbybi, 0x1cf); +EMIT_RR (spe_roth, 0x05c); +EMIT_RI7 (spe_rothi, 0x07c); +EMIT_RR (spe_rot, 0x058); +EMIT_RI7 (spe_roti, 0x078); +EMIT_RR (spe_rotqby, 0x1dc); +EMIT_RI7 (spe_rotqbyi, 0x1fc); +EMIT_RR (spe_rotqbybi, 0x1cc); +EMIT_RR (spe_rotqbi, 0x1d8); +EMIT_RI7 (spe_rotqbii, 0x1f8); +EMIT_RR (spe_rothm, 0x05d); +EMIT_RI7 (spe_rothmi, 0x07d); +EMIT_RR (spe_rotm, 0x059); +EMIT_RI7 (spe_rotmi, 0x079); +EMIT_RR (spe_rotqmby, 0x1dd); +EMIT_RI7 (spe_rotqmbyi, 0x1fd); +EMIT_RR (spe_rotqmbybi, 0x1cd); +EMIT_RR (spe_rotqmbi, 0x1c9); +EMIT_RI7 (spe_rotqmbii, 0x1f9); +EMIT_RR (spe_rotmah, 0x05e); +EMIT_RI7 (spe_rotmahi, 0x07e); +EMIT_RR (spe_rotma, 0x05a); +EMIT_RI7 (spe_rotmai, 0x07a); + + +/* Compare, branch, and halt instructions + */ +EMIT_RR (spe_heq, 0x3d8); +EMIT_RI10(spe_heqi, 0x07f); +EMIT_RR (spe_hgt, 0x258); +EMIT_RI10(spe_hgti, 0x04f); +EMIT_RR (spe_hlgt, 0x2d8); +EMIT_RI10(spe_hlgti, 0x05f); +EMIT_RR (spe_ceqb, 0x3d0); +EMIT_RI10(spe_ceqbi, 0x07e); +EMIT_RR (spe_ceqh, 0x3c8); +EMIT_RI10(spe_ceqhi, 0x07d); +EMIT_RR (spe_ceq, 0x3c0); +EMIT_RI10(spe_ceqi, 0x07c); +EMIT_RR (spe_cgtb, 0x250); +EMIT_RI10(spe_cgtbi, 0x04e); +EMIT_RR (spe_cgth, 0x248); +EMIT_RI10(spe_cgthi, 0x04d); +EMIT_RR (spe_cgt, 0x240); +EMIT_RI10(spe_cgti, 0x04c); +EMIT_RR (spe_clgtb, 0x2d0); +EMIT_RI10(spe_clgtbi, 0x05e); +EMIT_RR (spe_clgth, 0x2c8); +EMIT_RI10(spe_clgthi, 0x05d); +EMIT_RR (spe_clgt, 0x2c0); +EMIT_RI10(spe_clgti, 0x05c); +EMIT_I16 (spe_br, 0x064); +EMIT_I16 (spe_bra, 0x060); +EMIT_RI16(spe_brsl, 0x066); +EMIT_RI16(spe_brasl, 0x062); +EMIT_RI16(spe_brnz, 0x042); +EMIT_RI16(spe_brz, 0x040); +EMIT_RI16(spe_brhnz, 0x046); +EMIT_RI16(spe_brhz, 0x044); + +extern void spe_bi(struct spe_function *p, unsigned rA, int d, int e); +extern void spe_iret(struct spe_function *p, unsigned rA, int d, int e); +extern void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); + + +/* Floating-point instructions + */ +EMIT_RR (spe_fa, 0x2c4); +EMIT_RR (spe_dfa, 0x2cc); +EMIT_RR (spe_fs, 0x2c5); +EMIT_RR (spe_dfs, 0x2cd); +EMIT_RR (spe_fm, 0x2c6); +EMIT_RR (spe_dfm, 0x2ce); +EMIT_RRR (spe_fma, 0x00e); +EMIT_RR (spe_dfma, 0x35c); +EMIT_RRR (spe_fnms, 0x00d); +EMIT_RR (spe_dfnms, 0x35e); +EMIT_RRR (spe_fms, 0x00f); +EMIT_RR (spe_dfms, 0x35d); +EMIT_RR (spe_dfnma, 0x35f); +EMIT_R (spe_frest, 0x1b8); +EMIT_R (spe_frsqest, 0x1b9); +EMIT_RR (spe_fi, 0x3d4); +EMIT_RI8 (spe_csflt, 0x1da); +EMIT_RI8 (spe_cflts, 0x1d8); +EMIT_RI8 (spe_cuflt, 0x1db); +EMIT_RI8 (spe_cfltu, 0x1d9); +EMIT_R (spe_frds, 0x3b9); +EMIT_R (spe_fesd, 0x3b8); +EMIT_RR (spe_dfceq, 0x3c3); +EMIT_RR (spe_dfcmeq, 0x3cb); +EMIT_RR (spe_dfcgt, 0x2c3); +EMIT_RR (spe_dfcmgt, 0x2cb); +EMIT_RI7 (spe_dftsv, 0x3bf); +EMIT_RR (spe_fceq, 0x3c2); +EMIT_RR (spe_fcmeq, 0x3ca); +EMIT_RR (spe_fcgt, 0x2c2); +EMIT_RR (spe_fcmgt, 0x2ca); +EMIT_R (spe_fscrwr, 0x3ba); +EMIT_ (spe_fscrrd, 0x398); + + +/* Channel instructions + */ +EMIT_R (spe_rdch, 0x00d); +EMIT_R (spe_rdchcnt, 0x00f); +EMIT_R (spe_wrch, 0x10d); + + +#ifdef UNDEF_EMIT_MACROS +#undef EMIT_ +#undef EMIT_R +#undef EMIT_RR +#undef EMIT_RRR +#undef EMIT_RI7 +#undef EMIT_RI8 +#undef EMIT_RI10 +#undef EMIT_RI16 +#undef EMIT_RI18 +#undef EMIT_I16 +#undef UNDEF_EMIT_MACROS +#endif /* EMIT_ */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c new file mode 100644 index 0000000000..4d33950e99 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -0,0 +1,1220 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#if defined(__i386__) || defined(__386__) || defined(i386) + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_pointer.h" + +#include "rtasm_execmem.h" +#include "rtasm_x86sse.h" + +#define DISASSEM 0 +#define X86_TWOB 0x0f + +static unsigned char *cptr( void (*label)() ) +{ + return (unsigned char *) label; +} + + +static void do_realloc( struct x86_function *p ) +{ + if (p->size == 0) { + p->size = 1024; + p->store = rtasm_exec_malloc(p->size); + p->csr = p->store; + } + else { + uintptr_t used = pointer_to_uintptr( p->csr ) - pointer_to_uintptr( p->store ); + unsigned char *tmp = p->store; + p->size *= 2; + p->store = rtasm_exec_malloc(p->size); + memcpy(p->store, tmp, used); + p->csr = p->store + used; + rtasm_exec_free(tmp); + } +} + +/* Emit bytes to the instruction stream: + */ +static unsigned char *reserve( struct x86_function *p, int bytes ) +{ + if (p->csr + bytes - p->store > (int) p->size) + do_realloc(p); + + { + unsigned char *csr = p->csr; + p->csr += bytes; + return csr; + } +} + + + +static void emit_1b( struct x86_function *p, char b0 ) +{ + char *csr = (char *)reserve(p, 1); + *csr = b0; +} + +static void emit_1i( struct x86_function *p, int i0 ) +{ + int *icsr = (int *)reserve(p, sizeof(i0)); + *icsr = i0; +} + +static void emit_1ub( struct x86_function *p, unsigned char b0 ) +{ + unsigned char *csr = reserve(p, 1); + *csr++ = b0; +} + +static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 ) +{ + unsigned char *csr = reserve(p, 2); + *csr++ = b0; + *csr++ = b1; +} + +static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 ) +{ + unsigned char *csr = reserve(p, 3); + *csr++ = b0; + *csr++ = b1; + *csr++ = b2; +} + + +/* Build a modRM byte + possible displacement. No treatment of SIB + * indexing. BZZT - no way to encode an absolute address. + */ +static void emit_modrm( struct x86_function *p, + struct x86_reg reg, + struct x86_reg regmem ) +{ + unsigned char val = 0; + + assert(reg.mod == mod_REG); + + val |= regmem.mod << 6; /* mod field */ + val |= reg.idx << 3; /* reg field */ + val |= regmem.idx; /* r/m field */ + + emit_1ub(p, val); + + /* Oh-oh we've stumbled into the SIB thing. + */ + if (regmem.file == file_REG32 && + regmem.idx == reg_SP) { + emit_1ub(p, 0x24); /* simplistic! */ + } + + switch (regmem.mod) { + case mod_REG: + case mod_INDIRECT: + break; + case mod_DISP8: + emit_1b(p, (char) regmem.disp); + break; + case mod_DISP32: + emit_1i(p, regmem.disp); + break; + default: + assert(0); + break; + } +} + + +static void emit_modrm_noreg( struct x86_function *p, + unsigned op, + struct x86_reg regmem ) +{ + struct x86_reg dummy = x86_make_reg(file_REG32, op); + emit_modrm(p, dummy, regmem); +} + +/* Many x86 instructions have two opcodes to cope with the situations + * where the destination is a register or memory reference + * respectively. This function selects the correct opcode based on + * the arguments presented. + */ +static void emit_op_modrm( struct x86_function *p, + unsigned char op_dst_is_reg, + unsigned char op_dst_is_mem, + struct x86_reg dst, + struct x86_reg src ) +{ + switch (dst.mod) { + case mod_REG: + emit_1ub(p, op_dst_is_reg); + emit_modrm(p, dst, src); + break; + case mod_INDIRECT: + case mod_DISP32: + case mod_DISP8: + assert(src.mod == mod_REG); + emit_1ub(p, op_dst_is_mem); + emit_modrm(p, src, dst); + break; + default: + assert(0); + break; + } +} + + + + + + + +/* Create and manipulate registers and regmem values: + */ +struct x86_reg x86_make_reg( enum x86_reg_file file, + enum x86_reg_name idx ) +{ + struct x86_reg reg; + + reg.file = file; + reg.idx = idx; + reg.mod = mod_REG; + reg.disp = 0; + + return reg; +} + +struct x86_reg x86_make_disp( struct x86_reg reg, + int disp ) +{ + assert(reg.file == file_REG32); + + if (reg.mod == mod_REG) + reg.disp = disp; + else + reg.disp += disp; + + if (reg.disp == 0) + reg.mod = mod_INDIRECT; + else if (reg.disp <= 127 && reg.disp >= -128) + reg.mod = mod_DISP8; + else + reg.mod = mod_DISP32; + + return reg; +} + +struct x86_reg x86_deref( struct x86_reg reg ) +{ + return x86_make_disp(reg, 0); +} + +struct x86_reg x86_get_base_reg( struct x86_reg reg ) +{ + return x86_make_reg( reg.file, reg.idx ); +} + +unsigned char *x86_get_label( struct x86_function *p ) +{ + return p->csr; +} + + + +/*********************************************************************** + * x86 instructions + */ + + +void x86_jcc( struct x86_function *p, + enum x86_cc cc, + unsigned char *label ) +{ + intptr_t offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 2); + + if (offset <= 127 && offset >= -128) { + emit_1ub(p, 0x70 + cc); + emit_1b(p, (char) offset); + } + else { + offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 6); + emit_2ub(p, 0x0f, 0x80 + cc); + emit_1i(p, offset); + } +} + +/* Always use a 32bit offset for forward jumps: + */ +unsigned char *x86_jcc_forward( struct x86_function *p, + enum x86_cc cc ) +{ + emit_2ub(p, 0x0f, 0x80 + cc); + emit_1i(p, 0); + return x86_get_label(p); +} + +unsigned char *x86_jmp_forward( struct x86_function *p) +{ + emit_1ub(p, 0xe9); + emit_1i(p, 0); + return x86_get_label(p); +} + +unsigned char *x86_call_forward( struct x86_function *p) +{ + emit_1ub(p, 0xe8); + emit_1i(p, 0); + return x86_get_label(p); +} + +/* Fixup offset from forward jump: + */ +void x86_fixup_fwd_jump( struct x86_function *p, + unsigned char *fixup ) +{ + *(int *)(fixup - 4) = pointer_to_intptr( x86_get_label(p) ) - pointer_to_intptr( fixup ); +} + +void x86_jmp( struct x86_function *p, unsigned char *label) +{ + emit_1ub(p, 0xe9); + emit_1i(p, pointer_to_intptr( label ) - pointer_to_intptr( x86_get_label(p) ) - 4); +} + +#if 0 +/* This doesn't work once we start reallocating & copying the + * generated code on buffer fills, because the call is relative to the + * current pc. + */ +void x86_call( struct x86_function *p, void (*label)()) +{ + emit_1ub(p, 0xe8); + emit_1i(p, cptr(label) - x86_get_label(p) - 4); +} +#else +void x86_call( struct x86_function *p, struct x86_reg reg) +{ + emit_1ub(p, 0xff); + emit_modrm(p, reg, reg); +} +#endif + + +/* michal: + * Temporary. As I need immediate operands, and dont want to mess with the codegen, + * I load the immediate into general purpose register and use it. + */ +void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + assert(dst.mod == mod_REG); + emit_1ub(p, 0xb8 + dst.idx); + emit_1i(p, imm); +} + +void x86_push( struct x86_function *p, + struct x86_reg reg ) +{ + assert(reg.mod == mod_REG); + emit_1ub(p, 0x50 + reg.idx); + p->stack_offset += 4; +} + +void x86_pop( struct x86_function *p, + struct x86_reg reg ) +{ + assert(reg.mod == mod_REG); + emit_1ub(p, 0x58 + reg.idx); + p->stack_offset -= 4; +} + +void x86_inc( struct x86_function *p, + struct x86_reg reg ) +{ + assert(reg.mod == mod_REG); + emit_1ub(p, 0x40 + reg.idx); +} + +void x86_dec( struct x86_function *p, + struct x86_reg reg ) +{ + assert(reg.mod == mod_REG); + emit_1ub(p, 0x48 + reg.idx); +} + +void x86_ret( struct x86_function *p ) +{ + emit_1ub(p, 0xc3); +} + +void x86_sahf( struct x86_function *p ) +{ + emit_1ub(p, 0x9e); +} + +void x86_mov( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x8b, 0x89, dst, src ); +} + +void x86_xor( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x33, 0x31, dst, src ); +} + +void x86_cmp( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x3b, 0x39, dst, src ); +} + +void x86_lea( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_1ub(p, 0x8d); + emit_modrm( p, dst, src ); +} + +void x86_test( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_1ub(p, 0x85); + emit_modrm( p, dst, src ); +} + +void x86_add( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm(p, 0x03, 0x01, dst, src ); +} + +void x86_mul( struct x86_function *p, + struct x86_reg src ) +{ + assert (src.file == file_REG32 && src.mod == mod_REG); + emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src ); +} + +void x86_sub( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm(p, 0x2b, 0x29, dst, src ); +} + +void x86_or( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x0b, 0x09, dst, src ); +} + +void x86_and( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x23, 0x21, dst, src ); +} + + + +/*********************************************************************** + * SSE instructions + */ + + +void sse_movss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, 0xF3, X86_TWOB); + emit_op_modrm( p, 0x10, 0x11, dst, src ); +} + +void sse_movaps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x28, 0x29, dst, src ); +} + +void sse_movups( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x10, 0x11, dst, src ); +} + +void sse_movhps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.mod != mod_REG || src.mod != mod_REG); + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ +} + +void sse_movlps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.mod != mod_REG || src.mod != mod_REG); + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ +} + +void sse_maxps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x5F); + emit_modrm( p, dst, src ); +} + +void sse_maxss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x5F); + emit_modrm( p, dst, src ); +} + +void sse_divss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x5E); + emit_modrm( p, dst, src ); +} + +void sse_minps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x5D); + emit_modrm( p, dst, src ); +} + +void sse_subps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x5C); + emit_modrm( p, dst, src ); +} + +void sse_mulps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x59); + emit_modrm( p, dst, src ); +} + +void sse_mulss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x59); + emit_modrm( p, dst, src ); +} + +void sse_addps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x58); + emit_modrm( p, dst, src ); +} + +void sse_addss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x58); + emit_modrm( p, dst, src ); +} + +void sse_andnps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x55); + emit_modrm( p, dst, src ); +} + +void sse_andps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x54); + emit_modrm( p, dst, src ); +} + +void sse_rsqrtps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x52); + emit_modrm( p, dst, src ); +} + +void sse_rsqrtss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x52); + emit_modrm( p, dst, src ); + +} + +void sse_movhlps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.mod == mod_REG && src.mod == mod_REG); + emit_2ub(p, X86_TWOB, 0x12); + emit_modrm( p, dst, src ); +} + +void sse_movlhps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.mod == mod_REG && src.mod == mod_REG); + emit_2ub(p, X86_TWOB, 0x16); + emit_modrm( p, dst, src ); +} + +void sse_orps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x56); + emit_modrm( p, dst, src ); +} + +void sse_xorps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x57); + emit_modrm( p, dst, src ); +} + +void sse_cvtps2pi( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.file == file_MMX && + (src.file == file_XMM || src.mod != mod_REG)); + + p->need_emms = 1; + + emit_2ub(p, X86_TWOB, 0x2d); + emit_modrm( p, dst, src ); +} + + +/* Shufps can also be used to implement a reduced swizzle when dest == + * arg0. + */ +void sse_shufps( struct x86_function *p, + struct x86_reg dest, + struct x86_reg arg0, + unsigned char shuf) +{ + emit_2ub(p, X86_TWOB, 0xC6); + emit_modrm(p, dest, arg0); + emit_1ub(p, shuf); +} + +void sse_cmpps( struct x86_function *p, + struct x86_reg dest, + struct x86_reg arg0, + unsigned char cc) +{ + emit_2ub(p, X86_TWOB, 0xC2); + emit_modrm(p, dest, arg0); + emit_1ub(p, cc); +} + +void sse_pmovmskb( struct x86_function *p, + struct x86_reg dest, + struct x86_reg src) +{ + emit_3ub(p, 0x66, X86_TWOB, 0xD7); + emit_modrm(p, dest, src); +} + +/*********************************************************************** + * SSE2 instructions + */ + +/** + * Perform a reduced swizzle: + */ +void sse2_pshufd( struct x86_function *p, + struct x86_reg dest, + struct x86_reg arg0, + unsigned char shuf) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x70); + emit_modrm(p, dest, arg0); + emit_1ub(p, shuf); +} + +void sse2_cvttps2dq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); + emit_modrm( p, dst, src ); +} + +void sse2_cvtps2dq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x5B); + emit_modrm( p, dst, src ); +} + +void sse2_packssdw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x6B); + emit_modrm( p, dst, src ); +} + +void sse2_packsswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x63); + emit_modrm( p, dst, src ); +} + +void sse2_packuswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x67); + emit_modrm( p, dst, src ); +} + +void sse2_rcpps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x53); + emit_modrm( p, dst, src ); +} + +void sse2_rcpss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x53); + emit_modrm( p, dst, src ); +} + +void sse2_movd( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, 0x66, X86_TWOB); + emit_op_modrm( p, 0x6e, 0x7e, dst, src ); +} + + + + +/*********************************************************************** + * x87 instructions + */ +void x87_fist( struct x86_function *p, struct x86_reg dst ) +{ + emit_1ub(p, 0xdb); + emit_modrm_noreg(p, 2, dst); +} + +void x87_fistp( struct x86_function *p, struct x86_reg dst ) +{ + emit_1ub(p, 0xdb); + emit_modrm_noreg(p, 3, dst); +} + +void x87_fild( struct x86_function *p, struct x86_reg arg ) +{ + emit_1ub(p, 0xdf); + emit_modrm_noreg(p, 0, arg); +} + +void x87_fldz( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xee); +} + + +void x87_fldcw( struct x86_function *p, struct x86_reg arg ) +{ + assert(arg.file == file_REG32); + assert(arg.mod != mod_REG); + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 5, arg); +} + +void x87_fld1( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xe8); +} + +void x87_fldl2e( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xea); +} + +void x87_fldln2( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xed); +} + +void x87_fwait( struct x86_function *p ) +{ + emit_1ub(p, 0x9b); +} + +void x87_fnclex( struct x86_function *p ) +{ + emit_2ub(p, 0xdb, 0xe2); +} + +void x87_fclex( struct x86_function *p ) +{ + x87_fwait(p); + x87_fnclex(p); +} + + +static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, + unsigned char dst0ub0, + unsigned char dst0ub1, + unsigned char arg0ub0, + unsigned char arg0ub1, + unsigned char argmem_noreg) +{ + assert(dst.file == file_x87); + + if (arg.file == file_x87) { + if (dst.idx == 0) + emit_2ub(p, dst0ub0, dst0ub1+arg.idx); + else if (arg.idx == 0) + emit_2ub(p, arg0ub0, arg0ub1+arg.idx); + else + assert(0); + } + else if (dst.idx == 0) { + assert(arg.file == file_REG32); + emit_1ub(p, 0xd8); + emit_modrm_noreg(p, argmem_noreg, arg); + } + else + assert(0); +} + +void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xc8, + 0xdc, 0xc8, + 4); +} + +void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xe0, + 0xdc, 0xe8, + 4); +} + +void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xe8, + 0xdc, 0xe0, + 5); +} + +void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xc0, + 0xdc, 0xc0, + 0); +} + +void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xf0, + 0xdc, 0xf8, + 6); +} + +void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xf8, + 0xdc, 0xf0, + 7); +} + +void x87_fmulp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xc8+dst.idx); +} + +void x87_fsubp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xe8+dst.idx); +} + +void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xe0+dst.idx); +} + +void x87_faddp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xc0+dst.idx); +} + +void x87_fdivp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xf8+dst.idx); +} + +void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xf0+dst.idx); +} + +void x87_fucom( struct x86_function *p, struct x86_reg arg ) +{ + assert(arg.file == file_x87); + emit_2ub(p, 0xdd, 0xe0+arg.idx); +} + +void x87_fucomp( struct x86_function *p, struct x86_reg arg ) +{ + assert(arg.file == file_x87); + emit_2ub(p, 0xdd, 0xe8+arg.idx); +} + +void x87_fucompp( struct x86_function *p ) +{ + emit_2ub(p, 0xda, 0xe9); +} + +void x87_fxch( struct x86_function *p, struct x86_reg arg ) +{ + assert(arg.file == file_x87); + emit_2ub(p, 0xd9, 0xc8+arg.idx); +} + +void x87_fabs( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xe1); +} + +void x87_fchs( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xe0); +} + +void x87_fcos( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xff); +} + + +void x87_fprndint( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfc); +} + +void x87_fscale( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfd); +} + +void x87_fsin( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfe); +} + +void x87_fsincos( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfb); +} + +void x87_fsqrt( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfa); +} + +void x87_fxtract( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xf4); +} + +/* st0 = (2^st0)-1 + * + * Restrictions: -1.0 <= st0 <= 1.0 + */ +void x87_f2xm1( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xf0); +} + +/* st1 = st1 * log2(st0); + * pop_stack; + */ +void x87_fyl2x( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xf1); +} + +/* st1 = st1 * log2(st0 + 1.0); + * pop_stack; + * + * A fast operation, with restrictions: -.29 < st0 < .29 + */ +void x87_fyl2xp1( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xf9); +} + + +void x87_fld( struct x86_function *p, struct x86_reg arg ) +{ + if (arg.file == file_x87) + emit_2ub(p, 0xd9, 0xc0 + arg.idx); + else { + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 0, arg); + } +} + +void x87_fst( struct x86_function *p, struct x86_reg dst ) +{ + if (dst.file == file_x87) + emit_2ub(p, 0xdd, 0xd0 + dst.idx); + else { + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 2, dst); + } +} + +void x87_fstp( struct x86_function *p, struct x86_reg dst ) +{ + if (dst.file == file_x87) + emit_2ub(p, 0xdd, 0xd8 + dst.idx); + else { + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 3, dst); + } +} + +void x87_fcom( struct x86_function *p, struct x86_reg dst ) +{ + if (dst.file == file_x87) + emit_2ub(p, 0xd8, 0xd0 + dst.idx); + else { + emit_1ub(p, 0xd8); + emit_modrm_noreg(p, 2, dst); + } +} + +void x87_fcomp( struct x86_function *p, struct x86_reg dst ) +{ + if (dst.file == file_x87) + emit_2ub(p, 0xd8, 0xd8 + dst.idx); + else { + emit_1ub(p, 0xd8); + emit_modrm_noreg(p, 3, dst); + } +} + + +void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_REG32); + + if (dst.idx == reg_AX && + dst.mod == mod_REG) + emit_2ub(p, 0xdf, 0xe0); + else { + emit_1ub(p, 0xdd); + emit_modrm_noreg(p, 7, dst); + } +} + + + + +/*********************************************************************** + * MMX instructions + */ + +void mmx_emms( struct x86_function *p ) +{ + assert(p->need_emms); + emit_2ub(p, 0x0f, 0x77); + p->need_emms = 0; +} + +void mmx_packssdw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.file == file_MMX && + (src.file == file_MMX || src.mod != mod_REG)); + + p->need_emms = 1; + + emit_2ub(p, X86_TWOB, 0x6b); + emit_modrm( p, dst, src ); +} + +void mmx_packuswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.file == file_MMX && + (src.file == file_MMX || src.mod != mod_REG)); + + p->need_emms = 1; + + emit_2ub(p, X86_TWOB, 0x67); + emit_modrm( p, dst, src ); +} + +void mmx_movd( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + p->need_emms = 1; + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x6e, 0x7e, dst, src ); +} + +void mmx_movq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + p->need_emms = 1; + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x6f, 0x7f, dst, src ); +} + + +/*********************************************************************** + * Helper functions + */ + + +/* Retreive a reference to one of the function arguments, taking into + * account any push/pop activity: + */ +struct x86_reg x86_fn_arg( struct x86_function *p, + unsigned arg ) +{ + return x86_make_disp(x86_make_reg(file_REG32, reg_SP), + p->stack_offset + arg * 4); /* ??? */ +} + + +void x86_init_func( struct x86_function *p ) +{ + p->size = 0; + p->store = NULL; + p->csr = p->store; +} + +void x86_init_func_size( struct x86_function *p, unsigned code_size ) +{ + p->size = code_size; + p->store = rtasm_exec_malloc(code_size); + p->csr = p->store; +} + +void x86_release_func( struct x86_function *p ) +{ + rtasm_exec_free(p->store); + p->store = NULL; + p->csr = NULL; + p->size = 0; +} + + +void (*x86_get_func( struct x86_function *p ))(void) +{ + if (DISASSEM && p->store) + debug_printf("disassemble %p %p\n", p->store, p->csr); + return (void (*)(void)) p->store; +} + +#else + +void x86sse_dummy( void ) +{ +} + +#endif diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h new file mode 100644 index 0000000000..606b41eb35 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -0,0 +1,278 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _RTASM_X86SSE_H_ +#define _RTASM_X86SSE_H_ + +#if defined(__i386__) || defined(__386__) || defined(i386) + +/* It is up to the caller to ensure that instructions issued are + * suitable for the host cpu. There are no checks made in this module + * for mmx/sse/sse2 support on the cpu. + */ +struct x86_reg { + unsigned file:3; + unsigned idx:3; + unsigned mod:2; /* mod_REG if this is just a register */ + int disp:24; /* only +/- 23bits of offset - should be enough... */ +}; + +struct x86_function { + unsigned size; + unsigned char *store; + unsigned char *csr; + unsigned stack_offset; + int need_emms; + const char *fn; +}; + +enum x86_reg_file { + file_REG32, + file_MMX, + file_XMM, + file_x87 +}; + +/* Values for mod field of modr/m byte + */ +enum x86_reg_mod { + mod_INDIRECT, + mod_DISP8, + mod_DISP32, + mod_REG +}; + +enum x86_reg_name { + reg_AX, + reg_CX, + reg_DX, + reg_BX, + reg_SP, + reg_BP, + reg_SI, + reg_DI +}; + + +enum x86_cc { + cc_O, /* overflow */ + cc_NO, /* not overflow */ + cc_NAE, /* not above or equal / carry */ + cc_AE, /* above or equal / not carry */ + cc_E, /* equal / zero */ + cc_NE /* not equal / not zero */ +}; + +enum sse_cc { + cc_Equal, + cc_LessThan, + cc_LessThanEqual, + cc_Unordered, + cc_NotEqual, + cc_NotLessThan, + cc_NotLessThanEqual, + cc_Ordered +}; + +#define cc_Z cc_E +#define cc_NZ cc_NE + +/* Begin/end/retreive function creation: + */ + + +void x86_init_func( struct x86_function *p ); +void x86_init_func_size( struct x86_function *p, unsigned code_size ); +void x86_release_func( struct x86_function *p ); +void (*x86_get_func( struct x86_function *p ))( void ); + + + +/* Create and manipulate registers and regmem values: + */ +struct x86_reg x86_make_reg( enum x86_reg_file file, + enum x86_reg_name idx ); + +struct x86_reg x86_make_disp( struct x86_reg reg, + int disp ); + +struct x86_reg x86_deref( struct x86_reg reg ); + +struct x86_reg x86_get_base_reg( struct x86_reg reg ); + + +/* Labels, jumps and fixup: + */ +unsigned char *x86_get_label( struct x86_function *p ); + +void x86_jcc( struct x86_function *p, + enum x86_cc cc, + unsigned char *label ); + +unsigned char *x86_jcc_forward( struct x86_function *p, + enum x86_cc cc ); + +unsigned char *x86_jmp_forward( struct x86_function *p); + +unsigned char *x86_call_forward( struct x86_function *p); + +void x86_fixup_fwd_jump( struct x86_function *p, + unsigned char *fixup ); + +void x86_jmp( struct x86_function *p, unsigned char *label ); + +/* void x86_call( struct x86_function *p, void (*label)() ); */ +void x86_call( struct x86_function *p, struct x86_reg reg); + +/* michal: + * Temporary. As I need immediate operands, and dont want to mess with the codegen, + * I load the immediate into general purpose register and use it. + */ +void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); + + +/* Macro for sse_shufps() and sse2_pshufd(): + */ +#define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6)) +#define SHUF_NOOP RSW(0,1,2,3) +#define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +void mmx_emms( struct x86_function *p ); +void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); + +void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, + unsigned char shuf ); +void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); + +void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, + unsigned char cc ); +void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, + unsigned char shuf ); +void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); + +void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_dec( struct x86_function *p, struct x86_reg reg ); +void x86_inc( struct x86_function *p, struct x86_reg reg ); +void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_mul( struct x86_function *p, struct x86_reg src ); +void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_pop( struct x86_function *p, struct x86_reg reg ); +void x86_push( struct x86_function *p, struct x86_reg reg ); +void x86_ret( struct x86_function *p ); +void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_sahf( struct x86_function *p ); + +void x87_f2xm1( struct x86_function *p ); +void x87_fabs( struct x86_function *p ); +void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_faddp( struct x86_function *p, struct x86_reg dst ); +void x87_fchs( struct x86_function *p ); +void x87_fclex( struct x86_function *p ); +void x87_fcom( struct x86_function *p, struct x86_reg dst ); +void x87_fcomp( struct x86_function *p, struct x86_reg dst ); +void x87_fcos( struct x86_function *p ); +void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fdivp( struct x86_function *p, struct x86_reg dst ); +void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fdivrp( struct x86_function *p, struct x86_reg dst ); +void x87_fild( struct x86_function *p, struct x86_reg arg ); +void x87_fist( struct x86_function *p, struct x86_reg dst ); +void x87_fistp( struct x86_function *p, struct x86_reg dst ); +void x87_fld( struct x86_function *p, struct x86_reg arg ); +void x87_fld1( struct x86_function *p ); +void x87_fldcw( struct x86_function *p, struct x86_reg arg ); +void x87_fldl2e( struct x86_function *p ); +void x87_fldln2( struct x86_function *p ); +void x87_fldz( struct x86_function *p ); +void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fmulp( struct x86_function *p, struct x86_reg dst ); +void x87_fnclex( struct x86_function *p ); +void x87_fprndint( struct x86_function *p ); +void x87_fscale( struct x86_function *p ); +void x87_fsin( struct x86_function *p ); +void x87_fsincos( struct x86_function *p ); +void x87_fsqrt( struct x86_function *p ); +void x87_fst( struct x86_function *p, struct x86_reg dst ); +void x87_fstp( struct x86_function *p, struct x86_reg dst ); +void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fsubp( struct x86_function *p, struct x86_reg dst ); +void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fsubrp( struct x86_function *p, struct x86_reg dst ); +void x87_fxch( struct x86_function *p, struct x86_reg dst ); +void x87_fxtract( struct x86_function *p ); +void x87_fyl2x( struct x86_function *p ); +void x87_fyl2xp1( struct x86_function *p ); +void x87_fwait( struct x86_function *p ); +void x87_fnstsw( struct x86_function *p, struct x86_reg dst ); +void x87_fucompp( struct x86_function *p ); +void x87_fucomp( struct x86_function *p, struct x86_reg arg ); +void x87_fucom( struct x86_function *p, struct x86_reg arg ); + + + +/* Retreive a reference to one of the function arguments, taking into + * account any push/pop activity. Note - doesn't track explict + * manipulation of ESP by other instructions. + */ +struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg ); + +#endif +#endif diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index 12a8bd0409..5555639b70 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -1,3 +1,19 @@ -default: - cd ../.. ; make +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = tgsi + +C_SOURCES = \ + exec/tgsi_exec.c \ + exec/tgsi_sse2.c \ + util/tgsi_build.c \ + util/tgsi_dump.c \ + util/tgsi_parse.c \ + util/tgsi_scan.c \ + util/tgsi_transform.c \ + util/tgsi_util.c + +include ../../Makefile.template + +symlinks: diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript new file mode 100644 index 0000000000..4632dcc072 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/SConscript @@ -0,0 +1,16 @@ +Import('*') + +tgsi = env.ConvenienceLibrary( + target = 'tgsi', + source = [ + 'exec/tgsi_exec.c', + 'exec/tgsi_sse2.c', + 'util/tgsi_build.c', + 'util/tgsi_dump.c', + 'util/tgsi_parse.c', + 'util/tgsi_scan.c', + 'util/tgsi_transform.c', + 'util/tgsi_util.c', + ]) + +auxiliaries.insert(0, tgsi) diff --git a/src/gallium/auxiliary/tgsi/exec/Makefile b/src/gallium/auxiliary/tgsi/exec/Makefile deleted file mode 100644 index eb8b14e0e8..0000000000 --- a/src/gallium/auxiliary/tgsi/exec/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -default: - cd ../../.. ; make - diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index d7b18dc9c5..ac52441400 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -2455,7 +2455,7 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) /* execute instructions, until pc is set to -1 */ while (pc != -1) { - assert(pc < mach->NumInstructions); + assert(pc < (int) mach->NumInstructions); exec_instruction( mach, mach->Instructions + pc, &pc ); } diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 62c6a69c63..0da3b0bdb7 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -32,7 +32,7 @@ #include "tgsi_exec.h" #include "tgsi_sse2.h" -#include "x86/rtasm/x86sse.h" +#include "rtasm/rtasm_x86sse.h" #if defined(__i386__) || defined(__386__) @@ -1268,7 +1268,7 @@ emit_store( break; case TGSI_SAT_ZERO_ONE: -// assert( 0 ); + /* assert( 0 ); */ break; case TGSI_SAT_MINUS_PLUS_ONE: @@ -2268,7 +2268,7 @@ tgsi_emit_sse2( &parse.FullToken.FullInstruction ); if (!ok) { - debug_printf("failed to translate tgsi opcode %d\n", + debug_printf("failed to translate tgsi opcode %d to SSE\n", parse.FullToken.FullInstruction.Instruction.Opcode ); } break; @@ -2276,7 +2276,7 @@ tgsi_emit_sse2( case TGSI_TOKEN_TYPE_IMMEDIATE: /* XXX implement this */ ok = 0; - debug_printf("failed to emit immediate value\n"); + debug_printf("failed to emit immediate value to SSE\n"); break; default: @@ -2356,11 +2356,17 @@ tgsi_emit_sse2_fs( ok = emit_instruction( func, &parse.FullToken.FullInstruction ); + + if (!ok) { + debug_printf("failed to translate tgsi opcode %d to SSE\n", + parse.FullToken.FullInstruction.Instruction.Opcode ); + } break; case TGSI_TOKEN_TYPE_IMMEDIATE: /* XXX implement this */ ok = 0; + debug_printf("failed to emit immediate value to SSE\n"); break; default: diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h index 9bee371766..63b8ef3911 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h @@ -3,7 +3,7 @@ #if defined __cplusplus extern "C" { -#endif // defined __cplusplus +#endif struct tgsi_token; struct x86_function; @@ -19,8 +19,8 @@ tgsi_emit_sse2_fs( struct x86_function *function ); #if defined __cplusplus -} // extern "C" -#endif // defined __cplusplus +} +#endif -#endif // !defined TGSI_SSE2_H +#endif /* TGSI_SSE2_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.h b/src/gallium/auxiliary/tgsi/util/tgsi_build.h index 116c78abf3..607860e7fc 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.h @@ -3,7 +3,7 @@ #if defined __cplusplus extern "C" { -#endif // defined __cplusplus +#endif /* * version @@ -313,8 +313,8 @@ tgsi_build_dst_register_ext_modulate( struct tgsi_header *header ); #if defined __cplusplus -} // extern "C" -#endif // defined __cplusplus +} +#endif -#endif // !defined TGSI_BUILD_H +#endif /* TGSI_BUILD_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index b5c54847e0..59be14a748 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -299,7 +299,8 @@ static const char *TGSI_SEMANTICS[] = "SEMANTIC_BCOLOR", "SEMANTIC_FOG", "SEMANTIC_PSIZE", - "SEMANTIC_GENERIC," + "SEMANTIC_GENERIC", + "SEMANTIC_NORMAL" }; static const char *TGSI_SEMANTICS_SHORT[] = @@ -310,6 +311,7 @@ static const char *TGSI_SEMANTICS_SHORT[] = "FOG", "PSIZE", "GENERIC", + "NORMAL" }; static const char *TGSI_IMMS[] = @@ -662,6 +664,19 @@ static const char *TGSI_TEXTURES[] = "TEXTURE_SHADOWRECT" }; +static const char *TGSI_TEXTURES_SHORT[] = +{ + "UNKNOWN", + "1D", + "2D", + "3D", + "CUBE", + "RECT", + "SHADOW1D", + "SHADOW2D", + "SHADOWRECT" +}; + static const char *TGSI_SRC_REGISTER_EXTS[] = { "SRC_REGISTER_EXT_TYPE_SWZ", @@ -1035,6 +1050,11 @@ dump_instruction_short( first_reg = FALSE; } + if (inst->InstructionExtTexture.Texture != TGSI_TEXTURE_UNKNOWN) { + TXT( ", " ); + ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES_SHORT ); + } + switch( inst->Instruction.Opcode ) { case TGSI_OPCODE_IF: case TGSI_OPCODE_ELSE: diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h index 1adc9db251..b983b38226 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h @@ -3,7 +3,7 @@ #if defined __cplusplus extern "C" { -#endif // defined __cplusplus +#endif #define TGSI_DUMP_VERBOSE 1 #define TGSI_DUMP_NO_IGNORED 2 @@ -21,8 +21,8 @@ tgsi_dump_str( unsigned flags ); #if defined __cplusplus -} // extern "C" -#endif // defined __cplusplus +} +#endif -#endif // !defined TGSI_DUMP_H +#endif /* TGSI_DUMP_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h index 9372da8d5d..5ccb5bfcf6 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h @@ -3,7 +3,7 @@ #if defined __cplusplus extern "C" { -#endif // defined __cplusplus +#endif struct tgsi_full_version { @@ -114,8 +114,8 @@ tgsi_parse_token( struct tgsi_parse_context *ctx ); #if defined __cplusplus -} // extern "C" -#endif // defined __cplusplus +} +#endif -#endif // !defined TGSI_PARSE_H +#endif /* TGSI_PARSE_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c new file mode 100644 index 0000000000..ea4a72967d --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c @@ -0,0 +1,144 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI program scan utility. + * Used to determine which registers and instructions are used by a shader. + * + * Authors: Brian Paul + */ + + +#include "tgsi_scan.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_build.h" + +#include "pipe/p_util.h" + + + +/** + */ +void +tgsi_scan_shader(const struct tgsi_token *tokens, + struct tgsi_shader_info *info) +{ + uint procType, i; + struct tgsi_parse_context parse; + + memset(info, 0, sizeof(*info)); + for (i = 0; i < TGSI_FILE_COUNT; i++) + info->file_max[i] = -1; + + /** + ** Setup to begin parsing input shader + **/ + if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) { + debug_printf("tgsi_parse_init() failed in tgsi_scan_shader()!\n"); + return; + } + procType = parse.FullHeader.Processor.Processor; + assert(procType == TGSI_PROCESSOR_FRAGMENT || + procType == TGSI_PROCESSOR_VERTEX || + procType == TGSI_PROCESSOR_GEOMETRY); + + + /** + ** Loop over incoming program tokens/instructions + */ + while( !tgsi_parse_end_of_tokens( &parse ) ) { + + info->num_tokens++; + + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + struct tgsi_full_instruction *fullinst + = &parse.FullToken.FullInstruction; + + assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); + info->opcode_count[fullinst->Instruction.Opcode]++; + } + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + { + struct tgsi_full_declaration *fulldecl + = &parse.FullToken.FullDeclaration; + uint file = fulldecl->Declaration.File; + uint i; + for (i = fulldecl->u.DeclarationRange.First; + i <= fulldecl->u.DeclarationRange.Last; + i++) { + + /* only first 32 regs will appear in this bitfield */ + info->file_mask[file] |= (1 << i); + info->file_count[file]++; + info->file_max[file] = MAX2(info->file_max[file], (int)i); + + if (file == TGSI_FILE_INPUT) { + info->input_semantic_name[info->num_inputs] + = (ubyte)fulldecl->Semantic.SemanticName; + info->input_semantic_index[info->num_inputs] + = (ubyte)fulldecl->Semantic.SemanticIndex; + info->num_inputs++; + } + + if (file == TGSI_FILE_OUTPUT) { + info->output_semantic_name[info->num_outputs] + = (ubyte)fulldecl->Semantic.SemanticName; + info->output_semantic_index[info->num_outputs] + = (ubyte)fulldecl->Semantic.SemanticIndex; + info->num_outputs++; + } + + /* special case */ + if (procType == TGSI_PROCESSOR_FRAGMENT && + file == TGSI_FILE_OUTPUT && + fulldecl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) { + info->writes_z = TRUE; + } + } + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + info->immediate_count++; + break; + + default: + assert( 0 ); + } + } + + info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] || + info->opcode_count[TGSI_OPCODE_KILP]); + + tgsi_parse_free (&parse); +} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h new file mode 100644 index 0000000000..0530bc6b51 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h @@ -0,0 +1,70 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_SCAN_H +#define TGSI_SCAN_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" + + +/** + * Shader summary info + */ +struct tgsi_shader_info +{ + uint num_tokens; + + /* XXX eventually remove the corresponding fields from pipe_shader_state: */ + ubyte num_inputs; + ubyte num_outputs; + ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */ + ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; + ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ + ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; + + uint file_mask[TGSI_FILE_COUNT]; /**< bitmask of declared registers */ + uint file_count[TGSI_FILE_COUNT]; /**< number of declared registers */ + int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers */ + + uint immediate_count; /**< number of immediates declared */ + + uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */ + + boolean writes_z; /**< does fragment shader write Z value? */ + boolean uses_kill; /**< KIL or KILP instruction used? */ +}; + + +extern void +tgsi_scan_shader(const struct tgsi_token *tokens, + struct tgsi_shader_info *info); + + +#endif /* TGSI_SCAN_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.h b/src/gallium/auxiliary/tgsi/util/tgsi_util.h index ef14446f0e..45f5f0be0e 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_util.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_util.h @@ -3,7 +3,7 @@ #if defined __cplusplus extern "C" { -#endif // defined __cplusplus +#endif void * tgsi_align_128bit( @@ -63,8 +63,8 @@ tgsi_util_set_full_src_register_sign_mode( unsigned sign_mode ); #if defined __cplusplus -} // extern "C" -#endif // defined __cplusplus +} +#endif -#endif // !defined TGSI_UTIL_H +#endif /* TGSI_UTIL_H */ diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile new file mode 100644 index 0000000000..906a46d6b4 --- /dev/null +++ b/src/gallium/auxiliary/util/Makefile @@ -0,0 +1,15 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = util + +C_SOURCES = \ + p_debug.c \ + p_tile.c \ + p_util.c \ + u_mm.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript new file mode 100644 index 0000000000..4717941434 --- /dev/null +++ b/src/gallium/auxiliary/util/SConscript @@ -0,0 +1,12 @@ +Import('*') + +util = env.ConvenienceLibrary( + target = 'util', + source = [ + 'p_debug.c', + 'p_tile.c', + 'p_util.c', + 'u_mm.c', + ]) + +auxiliaries.insert(0, util) diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index 3f795a3898..287d783981 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -697,3 +697,127 @@ pipe_put_tile_rgba(struct pipe_context *pipe, FREE(packed); } + + +/** + * Get a block of Z values, converted to 32-bit range. + */ +void +pipe_get_tile_z(struct pipe_context *pipe, + struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + uint *z) +{ + const uint dstStride = w; + uint *pDest = z; + uint i, j; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + switch (ps->format) { + case PIPE_FORMAT_Z32_UNORM: + { + const uint *pSrc + = (const uint *) pipe_surface_map(ps) + (y * ps->pitch + x); + for (i = 0; i < h; i++) { + memcpy(pDest, pSrc, 4 * w); + pDest += dstStride; + pSrc += ps->pitch; + } + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + { + const uint *pSrc + = (const uint *) pipe_surface_map(ps) + (y * ps->pitch + x); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 24-bit Z to 32-bit Z */ + pDest[j] = (pSrc[j] << 8) | (pSrc[j] & 0xff); + } + pDest += dstStride; + pSrc += ps->pitch; + } + } + break; + case PIPE_FORMAT_Z16_UNORM: + { + const ushort *pSrc + = (const ushort *) pipe_surface_map(ps) + (y * ps->pitch + x); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 16-bit Z to 32-bit Z */ + pDest[j] = (pSrc[j] << 16) | pSrc[j]; + } + pDest += dstStride; + pSrc += ps->pitch; + } + } + break; + default: + assert(0); + } + + pipe_surface_unmap(ps); +} + + +void +pipe_put_tile_z(struct pipe_context *pipe, + struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const uint *zSrc) +{ + const uint srcStride = w; + const uint *pSrc = zSrc; + uint i, j; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + switch (ps->format) { + case PIPE_FORMAT_Z32_UNORM: + { + uint *pDest = (uint *) pipe_surface_map(ps) + (y * ps->pitch + x); + for (i = 0; i < h; i++) { + memcpy(pDest, pSrc, 4 * w); + pDest += ps->pitch; + pSrc += srcStride; + } + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + { + uint *pDest = (uint *) pipe_surface_map(ps) + (y * ps->pitch + x); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 32-bit Z to 24-bit Z (0 stencil) */ + pDest[j] = pSrc[j] >> 8; + } + pDest += ps->pitch; + pSrc += srcStride; + } + } + break; + case PIPE_FORMAT_Z16_UNORM: + { + ushort *pDest = (ushort *) pipe_surface_map(ps) + (y * ps->pitch + x); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 32-bit Z to 16-bit Z */ + pDest[j] = pSrc[j] >> 16; + } + pDest += ps->pitch; + pSrc += srcStride; + } + } + break; + default: + assert(0); + } + + pipe_surface_unmap(ps); +} + + diff --git a/src/gallium/auxiliary/util/p_tile.h b/src/gallium/auxiliary/util/p_tile.h index cd8124bf11..318b6d11a6 100644 --- a/src/gallium/auxiliary/util/p_tile.h +++ b/src/gallium/auxiliary/util/p_tile.h @@ -78,4 +78,18 @@ pipe_put_tile_rgba(struct pipe_context *pipe, uint x, uint y, uint w, uint h, const float *p); + +extern void +pipe_get_tile_z(struct pipe_context *pipe, + struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + uint *z); + +extern void +pipe_put_tile_z(struct pipe_context *pipe, + struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const uint *z); + + #endif diff --git a/src/gallium/auxiliary/pipebuffer/linked_list.h b/src/gallium/auxiliary/util/u_double_list.h index e99817fb13..8cb10c9820 100644 --- a/src/gallium/auxiliary/pipebuffer/linked_list.h +++ b/src/gallium/auxiliary/util/u_double_list.h @@ -34,8 +34,8 @@ * be protected using an external mutex. */ -#ifndef LINKED_LIST_H_ -#define LINKED_LIST_H_ +#ifndef _U_DOUBLE_LIST_H_ +#define _U_DOUBLE_LIST_H_ #include <stddef.h> @@ -88,4 +88,4 @@ struct list_head ((__type *)(((char *)(__item)) - offsetof(__type, __field))) -#endif /*LINKED_LIST_H_*/ +#endif /*_U_DOUBLE_LIST_H_*/ diff --git a/src/gallium/auxiliary/util/u_mm.c b/src/gallium/auxiliary/util/u_mm.c new file mode 100644 index 0000000000..b49ae074e0 --- /dev/null +++ b/src/gallium/auxiliary/util/u_mm.c @@ -0,0 +1,283 @@ +/************************************************************************** + * + * Copyright (C) 1999 Wittawat Yamwong + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * WITTAWAT YAMWONG, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_compiler.h" +#include "pipe/p_util.h" +#include "pipe/p_debug.h" + +#include "util/u_mm.h" + + +void +mmDumpMemInfo(const struct mem_block *heap) +{ + debug_printf("Memory heap %p:\n", (void *)heap); + if (heap == 0) { + debug_printf(" heap == 0\n"); + } else { + const struct mem_block *p; + + for(p = heap->next; p != heap; p = p->next) { + debug_printf(" Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, + p->free ? 'F':'.', + p->reserved ? 'R':'.'); + } + + debug_printf("\nFree list:\n"); + + for(p = heap->next_free; p != heap; p = p->next_free) { + debug_printf(" FREE Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, + p->free ? 'F':'.', + p->reserved ? 'R':'.'); + } + + } + debug_printf("End of memory blocks\n"); +} + +struct mem_block * +mmInit(int ofs, int size) +{ + struct mem_block *heap, *block; + + if (size <= 0) + return NULL; + + heap = CALLOC_STRUCT(mem_block); + if (!heap) + return NULL; + + block = CALLOC_STRUCT(mem_block); + if (!block) { + FREE(heap); + return NULL; + } + + heap->next = block; + heap->prev = block; + heap->next_free = block; + heap->prev_free = block; + + block->heap = heap; + block->next = heap; + block->prev = heap; + block->next_free = heap; + block->prev_free = heap; + + block->ofs = ofs; + block->size = size; + block->free = 1; + + return heap; +} + + +static struct mem_block * +SliceBlock(struct mem_block *p, + int startofs, int size, + int reserved, int alignment) +{ + struct mem_block *newblock; + + /* break left [p, newblock, p->next], then p = newblock */ + if (startofs > p->ofs) { + newblock = CALLOC_STRUCT(mem_block); + if (!newblock) + return NULL; + newblock->ofs = startofs; + newblock->size = p->size - (startofs - p->ofs); + newblock->free = 1; + newblock->heap = p->heap; + + newblock->next = p->next; + newblock->prev = p; + p->next->prev = newblock; + p->next = newblock; + + newblock->next_free = p->next_free; + newblock->prev_free = p; + p->next_free->prev_free = newblock; + p->next_free = newblock; + + p->size -= newblock->size; + p = newblock; + } + + /* break right, also [p, newblock, p->next] */ + if (size < p->size) { + newblock = CALLOC_STRUCT(mem_block); + if (!newblock) + return NULL; + newblock->ofs = startofs + size; + newblock->size = p->size - size; + newblock->free = 1; + newblock->heap = p->heap; + + newblock->next = p->next; + newblock->prev = p; + p->next->prev = newblock; + p->next = newblock; + + newblock->next_free = p->next_free; + newblock->prev_free = p; + p->next_free->prev_free = newblock; + p->next_free = newblock; + + p->size = size; + } + + /* p = middle block */ + p->free = 0; + + /* Remove p from the free list: + */ + p->next_free->prev_free = p->prev_free; + p->prev_free->next_free = p->next_free; + + p->next_free = 0; + p->prev_free = 0; + + p->reserved = reserved; + return p; +} + + +struct mem_block * +mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) +{ + struct mem_block *p; + const int mask = (1 << align2)-1; + int startofs = 0; + int endofs; + + if (!heap || align2 < 0 || size <= 0) + return NULL; + + for (p = heap->next_free; p != heap; p = p->next_free) { + assert(p->free); + + startofs = (p->ofs + mask) & ~mask; + if ( startofs < startSearch ) { + startofs = startSearch; + } + endofs = startofs+size; + if (endofs <= (p->ofs+p->size)) + break; + } + + if (p == heap) + return NULL; + + assert(p->free); + p = SliceBlock(p,startofs,size,0,mask+1); + + return p; +} + + +struct mem_block * +mmFindBlock(struct mem_block *heap, int start) +{ + struct mem_block *p; + + for (p = heap->next; p != heap; p = p->next) { + if (p->ofs == start) + return p; + } + + return NULL; +} + + +static INLINE int +Join2Blocks(struct mem_block *p) +{ + /* XXX there should be some assertions here */ + + /* NOTE: heap->free == 0 */ + + if (p->free && p->next->free) { + struct mem_block *q = p->next; + + assert(p->ofs + p->size == q->ofs); + p->size += q->size; + + p->next = q->next; + q->next->prev = p; + + q->next_free->prev_free = q->prev_free; + q->prev_free->next_free = q->next_free; + + FREE(q); + return 1; + } + return 0; +} + +int +mmFreeMem(struct mem_block *b) +{ + if (!b) + return 0; + + if (b->free) { + debug_printf("block already free\n"); + return -1; + } + if (b->reserved) { + debug_printf("block is reserved\n"); + return -1; + } + + b->free = 1; + b->next_free = b->heap->next_free; + b->prev_free = b->heap; + b->next_free->prev_free = b; + b->prev_free->next_free = b; + + Join2Blocks(b); + if (b->prev != b->heap) + Join2Blocks(b->prev); + + return 0; +} + + +void +mmDestroy(struct mem_block *heap) +{ + struct mem_block *p; + + if (!heap) + return; + + for (p = heap->next; p != heap; ) { + struct mem_block *next = p->next; + FREE(p); + p = next; + } + + FREE(heap); +} diff --git a/src/gallium/auxiliary/util/u_mm.h b/src/gallium/auxiliary/util/u_mm.h new file mode 100644 index 0000000000..b226b101cb --- /dev/null +++ b/src/gallium/auxiliary/util/u_mm.h @@ -0,0 +1,91 @@ +/************************************************************************** + * + * Copyright (C) 1999 Wittawat Yamwong + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * KEITH WHITWELL, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Memory manager code. Primarily used by device drivers to manage texture + * heaps, etc. + */ + + +#ifndef _U_MM_H_ +#define _U_MM_H_ + + +struct mem_block { + struct mem_block *next, *prev; + struct mem_block *next_free, *prev_free; + struct mem_block *heap; + int ofs,size; + unsigned int free:1; + unsigned int reserved:1; +}; + + + +/** + * input: total size in bytes + * return: a heap pointer if OK, NULL if error + */ +extern struct mem_block *mmInit(int ofs, int size); + +/** + * Allocate 'size' bytes with 2^align2 bytes alignment, + * restrict the search to free memory after 'startSearch' + * depth and back buffers should be in different 4mb banks + * to get better page hits if possible + * input: size = size of block + * align2 = 2^align2 bytes alignment + * startSearch = linear offset from start of heap to begin search + * return: pointer to the allocated block, 0 if error + */ +extern struct mem_block *mmAllocMem(struct mem_block *heap, int size, int align2, + int startSearch); + +/** + * Free block starts at offset + * input: pointer to a block + * return: 0 if OK, -1 if error + */ +extern int mmFreeMem(struct mem_block *b); + +/** + * Free block starts at offset + * input: pointer to a heap, start offset + * return: pointer to a block + */ +extern struct mem_block *mmFindBlock(struct mem_block *heap, int start); + +/** + * destroy MM + */ +extern void mmDestroy(struct mem_block *mmInit); + +/** + * For debuging purpose. + */ +extern void mmDumpMemInfo(const struct mem_block *mmInit); + +#endif diff --git a/src/gallium/auxiliary/util/u_simple_list.h b/src/gallium/auxiliary/util/u_simple_list.h new file mode 100644 index 0000000000..f5f43b0faa --- /dev/null +++ b/src/gallium/auxiliary/util/u_simple_list.h @@ -0,0 +1,197 @@ +/** + * \file simple_list.h + * Simple macros for type-safe, intrusive lists. + * + * Intended to work with a list sentinal which is created as an empty + * list. Insert & delete are O(1). + * + * \author + * (C) 1997, Keith Whitwell + */ + +/* + * Mesa 3-D graphics library + * Version: 3.5 + * + * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef _U_SIMPLE_LIST_H_ +#define _U_SIMPLE_LIST_H_ + +/** + * Remove an element from list. + * + * \param elem element to remove. + */ +#define remove_from_list(elem) \ +do { \ + (elem)->next->prev = (elem)->prev; \ + (elem)->prev->next = (elem)->next; \ +} while (0) + +/** + * Insert an element to the list head. + * + * \param list list. + * \param elem element to insert. + */ +#define insert_at_head(list, elem) \ +do { \ + (elem)->prev = list; \ + (elem)->next = (list)->next; \ + (list)->next->prev = elem; \ + (list)->next = elem; \ +} while(0) + +/** + * Insert an element to the list tail. + * + * \param list list. + * \param elem element to insert. + */ +#define insert_at_tail(list, elem) \ +do { \ + (elem)->next = list; \ + (elem)->prev = (list)->prev; \ + (list)->prev->next = elem; \ + (list)->prev = elem; \ +} while(0) + +/** + * Move an element to the list head. + * + * \param list list. + * \param elem element to move. + */ +#define move_to_head(list, elem) \ +do { \ + remove_from_list(elem); \ + insert_at_head(list, elem); \ +} while (0) + +/** + * Move an element to the list tail. + * + * \param list list. + * \param elem element to move. + */ +#define move_to_tail(list, elem) \ +do { \ + remove_from_list(elem); \ + insert_at_tail(list, elem); \ +} while (0) + +/** + * Make a empty list empty. + * + * \param sentinal list (sentinal element). + */ +#define make_empty_list(sentinal) \ +do { \ + (sentinal)->next = sentinal; \ + (sentinal)->prev = sentinal; \ +} while (0) + +/** + * Get list first element. + * + * \param list list. + * + * \return pointer to first element. + */ +#define first_elem(list) ((list)->next) + +/** + * Get list last element. + * + * \param list list. + * + * \return pointer to last element. + */ +#define last_elem(list) ((list)->prev) + +/** + * Get next element. + * + * \param elem element. + * + * \return pointer to next element. + */ +#define next_elem(elem) ((elem)->next) + +/** + * Get previous element. + * + * \param elem element. + * + * \return pointer to previous element. + */ +#define prev_elem(elem) ((elem)->prev) + +/** + * Test whether element is at end of the list. + * + * \param list list. + * \param elem element. + * + * \return non-zero if element is at end of list, or zero otherwise. + */ +#define at_end(list, elem) ((elem) == (list)) + +/** + * Test if a list is empty. + * + * \param list list. + * + * \return non-zero if list empty, or zero otherwise. + */ +#define is_empty_list(list) ((list)->next == (list)) + +/** + * Walk through the elements of a list. + * + * \param ptr pointer to the current element. + * \param list list. + * + * \note It should be followed by a { } block or a single statement, as in a \c + * for loop. + */ +#define foreach(ptr, list) \ + for( ptr=(list)->next ; ptr!=list ; ptr=(ptr)->next ) + +/** + * Walk through the elements of a list. + * + * Same as #foreach but lets you unlink the current value during a list + * traversal. Useful for freeing a list, element by element. + * + * \param ptr pointer to the current element. + * \param t temporary pointer. + * \param list list. + * + * \note It should be followed by a { } block or a single statement, as in a \c + * for loop. + */ +#define foreach_s(ptr, t, list) \ + for(ptr=(list)->next,t=(ptr)->next; list != ptr; ptr=t, t=(t)->next) + +#endif /* _U_SIMPLE_LIST_H_ */ diff --git a/src/gallium/drivers/Makefile b/src/gallium/drivers/Makefile index 58df6c5009..6161cb6ff8 100644 --- a/src/gallium/drivers/Makefile +++ b/src/gallium/drivers/Makefile @@ -2,12 +2,7 @@ TOP = ../../.. include $(TOP)/configs/current -ifeq ($(CONFIG_NAME), linux-cell) -CELL_DIR = cell -endif - -SUBDIRS = softpipe i915simple i965simple nv30 nv40 nv50 \ - failover pipebuffer $(CELL_DIR) +SUBDIRS = $(GALLIUM_DRIVER_DIRS) default: subdirs diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 74b131fbef..9a4004535e 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -87,10 +87,13 @@ #define CELL_CMD_STATE_TEXTURE 13 #define CELL_CMD_STATE_VERTEX_INFO 14 #define CELL_CMD_STATE_VIEWPORT 15 -#define CELL_CMD_STATE_VS_ARRAY_INFO 16 -#define CELL_CMD_STATE_BLEND 17 -#define CELL_CMD_VS_EXECUTE 18 -#define CELL_CMD_STATE_ATTRIB_FETCH 19 +#define CELL_CMD_STATE_UNIFORMS 16 +#define CELL_CMD_STATE_VS_ARRAY_INFO 17 +#define CELL_CMD_STATE_BIND_VS 18 +#define CELL_CMD_STATE_BLEND 19 +#define CELL_CMD_STATE_ATTRIB_FETCH 20 +#define CELL_CMD_VS_EXECUTE 21 +#define CELL_CMD_FLUSH_BUFFER_RANGE 22 #define CELL_NUM_BUFFERS 4 @@ -134,7 +137,7 @@ struct cell_array_info uint pitch; /**< Byte pitch from one entry to the next. */ uint size; uint function_offset; -} ALIGN16_ATTRIB; +}; struct cell_attribute_fetch_code { @@ -142,32 +145,37 @@ struct cell_attribute_fetch_code { uint size; }; + +struct cell_buffer_range { + uint64_t base; + unsigned size; +}; + + struct cell_shader_info { - unsigned num_outputs; - uint64_t declarations; - unsigned num_declarations; uint64_t instructions; - unsigned num_instructions; - uint64_t uniforms; uint64_t immediates; + + unsigned num_outputs; + unsigned num_declarations; + unsigned num_instructions; unsigned num_immediates; -} ALIGN16_ATTRIB; +}; #define SPU_VERTS_PER_BATCH 64 struct cell_command_vs { uint64_t opcode; /**< CELL_CMD_VS_EXECUTE */ - struct cell_shader_info shader; + uint64_t vOut[SPU_VERTS_PER_BATCH]; unsigned num_elts; unsigned elts[SPU_VERTS_PER_BATCH]; - uint64_t vOut[SPU_VERTS_PER_BATCH]; float plane[12][4]; unsigned nr_planes; unsigned nr_attrs; -} ALIGN16_ATTRIB; +}; struct cell_command_render diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index 196ab777f5..d38fa6ce07 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -1,6 +1,6 @@ # Gallium3D Cell driver: PPU code -# This makefile builds the g3dcell.a library which gets pulled into +# This makefile builds the libcell.a library which gets pulled into # the main libGL.so library @@ -8,10 +8,14 @@ TOP = ../../../../.. include $(TOP)/configs/linux-cell -#PROG = gl4 +# This is the "top-level" cell PPU driver code, will get pulled into libGL.so +# by the winsys Makefile. +CELL_LIB = ../libcell.a -CELL_LIB = libcell.a +# This is the SPU code. We'd like to be able to put this into the libcell.a +# archive with the PPU code, but nesting .a libs doesn't seem to work. +# So, it's pulled into libGL.so in gallium/winsys/xlib/Makefile SPU_CODE_MODULE = ../spu/g3d_spu.a @@ -21,14 +25,11 @@ SOURCES = \ cell_context.c \ cell_draw_arrays.c \ cell_flush.c \ - cell_state_blend.c \ - cell_state_clip.c \ cell_state_derived.c \ cell_state_emit.c \ - cell_state_fs.c \ - cell_state_rasterizer.c \ - cell_state_sampler.c \ - cell_state_surface.c \ + cell_state_shader.c \ + cell_pipe_state.c \ + cell_screen.c \ cell_state_vertex.c \ cell_spu.c \ cell_surface.c \ @@ -56,7 +57,7 @@ default: $(CELL_LIB) $(CELL_LIB): $(OBJECTS) $(SPU_CODE_MODULE) -# ar -ru $(CELL_LIB) $(OBJECTS) $(SPU_CODE_MODULE) +# ar -ru $(CELL_LIB) $(OBJECTS) $(SPU_CODE_MODULE) # doesn't work ar -ru $(CELL_LIB) $(OBJECTS) #$(PROG): $(PPU_OBJECTS) diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c index e588a30d5b..3ffe09add6 100644 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -41,6 +41,7 @@ #include "cell_batch.h" #include "cell_flush.h" #include "cell_spu.h" +#include "cell_state.h" void diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index e1eb22f468..ccbbd1d331 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -37,9 +37,12 @@ #include "pipe/p_format.h" #include "pipe/p_util.h" #include "pipe/p_winsys.h" -#include "cell/common.h" +#include "pipe/p_screen.h" + #include "draw/draw_context.h" #include "draw/draw_private.h" + +#include "cell/common.h" #include "cell_clear.h" #include "cell_context.h" #include "cell_draw_arrays.h" @@ -48,104 +51,12 @@ #include "cell_state.h" #include "cell_surface.h" #include "cell_spu.h" +#include "cell_pipe_state.h" #include "cell_texture.h" #include "cell_vbuf.h" -static boolean -cell_is_format_supported( struct pipe_context *pipe, - enum pipe_format format, uint type ) -{ - /*struct cell_context *cell = cell_context( pipe );*/ - - switch (type) { - case PIPE_TEXTURE: - /* cell supports all texture formats, XXX for now anyway */ - return TRUE; - case PIPE_SURFACE: - /* cell supports all (off-screen) surface formats, XXX for now */ - return TRUE; - default: - assert(0); - return FALSE; - } -} - - -static int cell_get_param(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 8; - case PIPE_CAP_NPOT_TEXTURES: - return 1; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 1; - case PIPE_CAP_S3TC: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; - case PIPE_CAP_POINT_SPRITE: - return 1; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; - case PIPE_CAP_OCCLUSION_QUERY: - return 1; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 12; /* max 2Kx2K */ - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 8; /* max 128x128x128 */ - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 12; /* max 2Kx2K */ - default: - return 0; - } -} - -static float cell_get_paramf(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 255.0; /* arbitrary */ - - case PIPE_CAP_MAX_POINT_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 255.0; /* arbitrary */ - - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 0.0; - - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0; /* arbitrary */ - - default: - return 0; - } -} - - -static const char * -cell_get_name( struct pipe_context *pipe ) -{ - return "Cell"; -} - -static const char * -cell_get_vendor( struct pipe_context *pipe ) -{ - return "Tungsten Graphics, Inc."; -} - - - static void cell_destroy_context( struct pipe_context *pipe ) { @@ -173,7 +84,8 @@ cell_draw_create(struct cell_context *cell) struct pipe_context * -cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws) +cell_create_context(struct pipe_screen *screen, + struct cell_winsys *cws) { struct cell_context *cell; uint spu, buf; @@ -186,52 +98,11 @@ cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws) memset(cell, 0, sizeof(*cell)); cell->winsys = cws; - cell->pipe.winsys = winsys; + cell->pipe.winsys = screen->winsys; + cell->pipe.screen = screen; cell->pipe.destroy = cell_destroy_context; - /* queries */ - cell->pipe.is_format_supported = cell_is_format_supported; - cell->pipe.get_name = cell_get_name; - cell->pipe.get_vendor = cell_get_vendor; - cell->pipe.get_param = cell_get_param; - cell->pipe.get_paramf = cell_get_paramf; - - /* state setters */ - cell->pipe.create_blend_state = cell_create_blend_state; - cell->pipe.bind_blend_state = cell_bind_blend_state; - cell->pipe.delete_blend_state = cell_delete_blend_state; - - cell->pipe.create_sampler_state = cell_create_sampler_state; - cell->pipe.bind_sampler_state = cell_bind_sampler_state; - cell->pipe.delete_sampler_state = cell_delete_sampler_state; - - cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state; - cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state; - cell->pipe.delete_depth_stencil_alpha_state = cell_delete_depth_stencil_alpha_state; - - cell->pipe.create_rasterizer_state = cell_create_rasterizer_state; - cell->pipe.bind_rasterizer_state = cell_bind_rasterizer_state; - cell->pipe.delete_rasterizer_state = cell_delete_rasterizer_state; - - cell->pipe.create_fs_state = cell_create_fs_state; - cell->pipe.bind_fs_state = cell_bind_fs_state; - cell->pipe.delete_fs_state = cell_delete_fs_state; - - cell->pipe.create_vs_state = cell_create_vs_state; - cell->pipe.bind_vs_state = cell_bind_vs_state; - cell->pipe.delete_vs_state = cell_delete_vs_state; - - cell->pipe.set_blend_color = cell_set_blend_color; - cell->pipe.set_clip_state = cell_set_clip_state; - cell->pipe.set_constant_buffer = cell_set_constant_buffer; - - cell->pipe.set_framebuffer_state = cell_set_framebuffer_state; - - cell->pipe.set_polygon_stipple = cell_set_polygon_stipple; - cell->pipe.set_scissor_state = cell_set_scissor_state; - cell->pipe.set_viewport_state = cell_set_viewport_state; - cell->pipe.set_vertex_buffer = cell_set_vertex_buffer; cell->pipe.set_vertex_element = cell_set_vertex_element; @@ -241,30 +112,33 @@ cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws) cell->pipe.clear = cell_clear_surface; cell->pipe.flush = cell_flush; - /* textures */ - cell->pipe.texture_create = cell_texture_create; - cell->pipe.texture_release = cell_texture_release; - cell->pipe.get_tex_surface = cell_get_tex_surface; - - cell->pipe.set_sampler_texture = cell_set_sampler_texture; - #if 0 cell->pipe.begin_query = cell_begin_query; cell->pipe.end_query = cell_end_query; cell->pipe.wait_query = cell_wait_query; #endif + cell_init_state_functions(cell); + cell_init_shader_functions(cell); cell_init_surface_functions(cell); + cell_init_texture_functions(cell); cell->draw = cell_draw_create(cell); cell_init_vbuf(cell); draw_set_rasterize_stage(cell->draw, cell->vbuf); + /* convert all points/lines to tris for the time being */ + draw_wide_point_threshold(cell->draw, 0.0); + draw_wide_line_threshold(cell->draw, 0.0); + /* * SPU stuff */ cell->num_spus = 6; + /* XXX is this in SDK 3.0 only? + cell->num_spus = spe_cpu_info_get(SPE_COUNT_PHYSICAL_SPES, -1); + */ cell_start_spus(cell); diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 91f8e542a2..bf27289f3f 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -36,7 +36,7 @@ #include "draw/draw_vbuf.h" #include "cell_winsys.h" #include "cell/common.h" -#include "ppc/rtasm/spe_asm.h" +#include "rtasm/rtasm_ppc_spe.h" struct cell_vbuf_render; @@ -128,12 +128,14 @@ cell_context(struct pipe_context *pipe) extern struct pipe_context * -cell_create_context(struct pipe_winsys *ws, struct cell_winsys *cws); +cell_create_context(struct pipe_screen *screen, struct cell_winsys *cws); extern void cell_vertex_shader_queue_flush(struct draw_context *draw); +/* XXX find a better home for this */ +extern void cell_update_vertex_fetch(struct draw_context *draw); #endif /* CELL_CONTEXT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index f12613649b..c839fb4d12 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -38,6 +38,7 @@ #include "cell_context.h" #include "cell_draw_arrays.h" #include "cell_state.h" +#include "cell_flush.h" #include "draw/draw_context.h" @@ -49,9 +50,12 @@ cell_map_constant_buffers(struct cell_context *sp) struct pipe_winsys *ws = sp->pipe.winsys; uint i; for (i = 0; i < 2; i++) { - if (sp->constants[i].size) + if (sp->constants[i].size) { sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); + cell_flush_buffer_range(sp, sp->mapped_constants[i], + sp->constants[i].buffer->size); + } } draw_set_mapped_constant_buffer(sp->draw, @@ -124,6 +128,7 @@ cell_draw_elements(struct pipe_context *pipe, void *buf = pipe->winsys->buffer_map(pipe->winsys, sp->vertex_buffer[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); + cell_flush_buffer_range(sp, buf, sp->vertex_buffer[i].buffer->size); draw_set_mapped_vertex_buffer(draw, i, buf); } } diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c index 20f27531fc..66a5627d84 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.c +++ b/src/gallium/drivers/cell/ppu/cell_flush.c @@ -82,3 +82,17 @@ cell_flush_int(struct pipe_context *pipe, unsigned flags) flushing = FALSE; } + + +void +cell_flush_buffer_range(struct cell_context *cell, void *ptr, + unsigned size) +{ + uint64_t batch[1 + (ROUNDUP8(sizeof(struct cell_buffer_range)) / 8)]; + struct cell_buffer_range *br = (struct cell_buffer_range *) & batch[1]; + + batch[0] = CELL_CMD_FLUSH_BUFFER_RANGE; + br->base = (uintptr_t) ptr; + br->size = size; + cell_batch_append(cell, batch, sizeof(batch)); +} diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h index eda351b1cb..7f940ae76b 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.h +++ b/src/gallium/drivers/cell/ppu/cell_flush.h @@ -35,4 +35,8 @@ cell_flush(struct pipe_context *pipe, unsigned flags); extern void cell_flush_int(struct pipe_context *pipe, unsigned flags); +extern void +cell_flush_buffer_range(struct cell_context *cell, void *ptr, + unsigned size); + #endif diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c new file mode 100644 index 0000000000..075e0a0c47 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -0,0 +1,325 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "draw/draw_context.h" +#include "cell_context.h" +#include "cell_state.h" +#include "cell_texture.h" + + + +static void * +cell_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + return mem_dup(blend, sizeof(*blend)); +} + + +static void +cell_bind_blend_state(struct pipe_context *pipe, void *blend) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->blend = (const struct pipe_blend_state *)blend; + + cell->dirty |= CELL_NEW_BLEND; +} + + +static void +cell_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + FREE(blend); +} + + +static void +cell_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *blend_color) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->blend_color = *blend_color; + + cell->dirty |= CELL_NEW_BLEND; +} + + + + +static void * +cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + return mem_dup(depth_stencil, sizeof(*depth_stencil)); +} + + +static void +cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->depth_stencil + = (const struct pipe_depth_stencil_alpha_state *) depth_stencil; + + cell->dirty |= CELL_NEW_DEPTH_STENCIL; +} + + +static void +cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth) +{ + FREE(depth); +} + + +static void cell_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct cell_context *cell = cell_context(pipe); + + /* pass the clip state to the draw module */ + draw_set_clip_state(cell->draw, clip); +} + + + +/* Called when driver state tracker notices changes to the viewport + * matrix: + */ +static void +cell_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct cell_context *cell = cell_context(pipe); + + cell->viewport = *viewport; /* struct copy */ + cell->dirty |= CELL_NEW_VIEWPORT; + + /* pass the viewport info to the draw module */ + draw_set_viewport_state(cell->draw, viewport); + + /* Using tnl/ and vf/ modules is temporary while getting started. + * Full pipe will have vertex shader, vertex fetch of its own. + */ +} + + +static void +cell_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct cell_context *cell = cell_context(pipe); + + memcpy( &cell->scissor, scissor, sizeof(*scissor) ); + cell->dirty |= CELL_NEW_SCISSOR; +} + + +static void +cell_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ + struct cell_context *cell = cell_context(pipe); + + memcpy( &cell->poly_stipple, stipple, sizeof(*stipple) ); + cell->dirty |= CELL_NEW_STIPPLE; +} + + + +static void * +cell_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *setup) +{ + struct pipe_rasterizer_state *state + = MALLOC(sizeof(struct pipe_rasterizer_state)); + memcpy(state, setup, sizeof(struct pipe_rasterizer_state)); + return state; +} + + +static void +cell_bind_rasterizer_state(struct pipe_context *pipe, void *setup) +{ + struct cell_context *cell = cell_context(pipe); + + /* pass-through to draw module */ + draw_set_rasterizer_state(cell->draw, setup); + + cell->rasterizer = (struct pipe_rasterizer_state *)setup; + + cell->dirty |= CELL_NEW_RASTERIZER; +} + + +static void +cell_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer) +{ + FREE(rasterizer); +} + + + +static void * +cell_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + return mem_dup(sampler, sizeof(*sampler)); +} + + +static void +cell_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + assert(unit < PIPE_MAX_SAMPLERS); + cell->sampler[unit] = (struct pipe_sampler_state *)sampler; + + cell->dirty |= CELL_NEW_SAMPLER; +} + + +static void +cell_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE( sampler ); +} + + + +static void +cell_set_sampler_texture(struct pipe_context *pipe, + unsigned sampler, + struct pipe_texture *texture) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + pipe_texture_reference((struct pipe_texture **) &cell->texture[sampler], + texture); + + cell_update_texture_mapping(cell); + + cell->dirty |= CELL_NEW_TEXTURE; +} + + + +static void +cell_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct cell_context *cell = cell_context(pipe); + + if (1 /*memcmp(&cell->framebuffer, fb, sizeof(*fb))*/) { + struct pipe_surface *csurf = fb->cbufs[0]; + struct pipe_surface *zsurf = fb->zsbuf; + uint i; + + /* unmap old surfaces */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (cell->framebuffer.cbufs[i] && cell->cbuf_map[i]) { + pipe_surface_unmap(cell->framebuffer.cbufs[i]); + cell->cbuf_map[i] = NULL; + } + } + + if (cell->framebuffer.zsbuf && cell->zsbuf_map) { + pipe_surface_unmap(cell->framebuffer.zsbuf); + cell->zsbuf_map = NULL; + } + + /* update my state */ + cell->framebuffer = *fb; + + /* map new surfaces */ + if (csurf) + cell->cbuf_map[0] = pipe_surface_map(csurf); + + if (zsurf) + cell->zsbuf_map = pipe_surface_map(zsurf); + + cell->dirty |= CELL_NEW_FRAMEBUFFER; + } +} + + + +void +cell_init_state_functions(struct cell_context *cell) +{ + cell->pipe.create_blend_state = cell_create_blend_state; + cell->pipe.bind_blend_state = cell_bind_blend_state; + cell->pipe.delete_blend_state = cell_delete_blend_state; + + cell->pipe.create_sampler_state = cell_create_sampler_state; + cell->pipe.bind_sampler_state = cell_bind_sampler_state; + cell->pipe.delete_sampler_state = cell_delete_sampler_state; + + cell->pipe.set_sampler_texture = cell_set_sampler_texture; + + cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state; + cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state; + cell->pipe.delete_depth_stencil_alpha_state = cell_delete_depth_stencil_alpha_state; + + cell->pipe.create_rasterizer_state = cell_create_rasterizer_state; + cell->pipe.bind_rasterizer_state = cell_bind_rasterizer_state; + cell->pipe.delete_rasterizer_state = cell_delete_rasterizer_state; + + cell->pipe.set_blend_color = cell_set_blend_color; + cell->pipe.set_clip_state = cell_set_clip_state; + + cell->pipe.set_framebuffer_state = cell_set_framebuffer_state; + + cell->pipe.set_polygon_stipple = cell_set_polygon_stipple; + cell->pipe.set_scissor_state = cell_set_scissor_state; + cell->pipe.set_viewport_state = cell_set_viewport_state; +} + diff --git a/src/gallium/drivers/cell/ppu/cell_state_sampler.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.h index a33421a4ad..1889bd52ff 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_sampler.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,60 +25,15 @@ * **************************************************************************/ -/* Authors: - * Brian Paul - */ -#include "pipe/p_util.h" -#include "draw/draw_context.h" -#include "cell_context.h" -#include "cell_state.h" -#include "cell_texture.h" +#ifndef CELL_PIPE_STATE_H +#define CELL_PIPE_STATE_H -void * -cell_create_sampler_state(struct pipe_context *pipe, - const struct pipe_sampler_state *sampler) -{ - return mem_dup(sampler, sizeof(*sampler)); -} +struct cell_context; -void -cell_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) -{ - struct cell_context *cell = cell_context(pipe); +extern void +cell_init_state_functions(struct cell_context *cell); - draw_flush(cell->draw); - assert(unit < PIPE_MAX_SAMPLERS); - cell->sampler[unit] = (struct pipe_sampler_state *)sampler; - - cell->dirty |= CELL_NEW_SAMPLER; -} - - -void -cell_delete_sampler_state(struct pipe_context *pipe, - void *sampler) -{ - FREE( sampler ); -} - - - -void -cell_set_sampler_texture(struct pipe_context *pipe, - unsigned sampler, - struct pipe_texture *texture) -{ - struct cell_context *cell = cell_context(pipe); - - draw_flush(cell->draw); - - cell->texture[sampler] = texture; - - cell_update_texture_mapping(cell); - - cell->dirty |= CELL_NEW_TEXTURE; -} +#endif /* CELL_PIPE_STATE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c new file mode 100644 index 0000000000..124670df25 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -0,0 +1,166 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" + +#include "cell_screen.h" +#include "cell_texture.h" +#include "cell_winsys.h" + + +static const char * +cell_get_vendor(struct pipe_screen *screen) +{ + return "Tungsten Graphics, Inc."; +} + + +static const char * +cell_get_name(struct pipe_screen *screen) +{ + return "Cell"; +} + + +static int +cell_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 1; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 12; /* max 2Kx2K */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 12; /* max 2Kx2K */ + default: + return 0; + } +} + + +static float +cell_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 255.0; /* arbitrary */ + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; /* arbitrary */ + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 0.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; /* arbitrary */ + + default: + return 0; + } +} + + +static boolean +cell_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, uint type ) +{ + switch (type) { + case PIPE_TEXTURE: + /* cell supports all texture formats, XXX for now anyway */ + return TRUE; + case PIPE_SURFACE: + /* cell supports all (off-screen) surface formats, XXX for now */ + return TRUE; + default: + assert(0); + return FALSE; + } +} + + +static void +cell_destroy_screen( struct pipe_screen *screen ) +{ + FREE(screen); +} + + +/** + * Create a new pipe_screen object + * Note: we're not presently subclassing pipe_screen (no cell_screen) but + * that would be the place to put SPU thread/context info... + */ +struct pipe_screen * +cell_create_screen(struct pipe_winsys *winsys) +{ + struct pipe_screen *screen = CALLOC_STRUCT(pipe_screen); + + if (!screen) + return NULL; + + screen->winsys = winsys; + + screen->destroy = cell_destroy_screen; + + screen->get_name = cell_get_name; + screen->get_vendor = cell_get_vendor; + screen->get_param = cell_get_param; + screen->get_paramf = cell_get_paramf; + screen->is_format_supported = cell_is_format_supported; + + cell_init_screen_texture_funcs(screen); + + return screen; +} diff --git a/src/gallium/drivers/cell/ppu/cell_screen.h b/src/gallium/drivers/cell/ppu/cell_screen.h new file mode 100644 index 0000000000..c7e15889d6 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_screen.h @@ -0,0 +1,41 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_SCREEN_H +#define CELL_SCREEN_H + + +struct pipe_screen; +struct pipe_winsys; + + +extern struct pipe_screen * +cell_create_screen(struct pipe_winsys *winsys); + + +#endif /* CELL_SCREEN_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c index 419e74dc40..973c0b1aa1 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -97,8 +97,18 @@ static void *cell_thread_function(void *arg) void cell_start_spus(struct cell_context *cell) { + static boolean one_time_init = FALSE; uint i, j; + + if (one_time_init) { + fprintf(stderr, "PPU: Multiple rendering contexts not yet supported " + "on Cell.\n"); + abort(); + } + + one_time_init = TRUE; + assert(cell->num_spus <= MAX_SPUS); ASSERT_ALIGN16(&cell_global.command[0]); diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h index 3a71ba14fa..31ce505e21 100644 --- a/src/gallium/drivers/cell/ppu/cell_state.h +++ b/src/gallium/drivers/cell/ppu/cell_state.h @@ -1,3 +1,29 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ #ifndef CELL_STATE_H @@ -22,82 +48,6 @@ #define CELL_NEW_VERTEX_INFO 0x8000 - -extern void -cell_set_framebuffer_state( struct pipe_context *, - const struct pipe_framebuffer_state * ); - - - -extern void * -cell_create_blend_state(struct pipe_context *, const struct pipe_blend_state *); -extern void cell_bind_blend_state(struct pipe_context *, void *); -extern void cell_delete_blend_state(struct pipe_context *, void *); - -extern void cell_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ); - - -void * -cell_create_sampler_state(struct pipe_context *, - const struct pipe_sampler_state *); - -extern void -cell_bind_sampler_state(struct pipe_context *, unsigned, void *); - -extern void -cell_delete_sampler_state(struct pipe_context *, void *); - - -extern void * -cell_create_depth_stencil_alpha_state(struct pipe_context *, - const struct pipe_depth_stencil_alpha_state *); - -extern void -cell_bind_depth_stencil_alpha_state(struct pipe_context *, void *); - -extern void -cell_delete_depth_stencil_alpha_state(struct pipe_context *, void *); - - -void *cell_create_fs_state(struct pipe_context *, - const struct pipe_shader_state *); -void cell_bind_fs_state(struct pipe_context *, void *); -void cell_delete_fs_state(struct pipe_context *, void *); -void *cell_create_vs_state(struct pipe_context *, - const struct pipe_shader_state *); -void cell_bind_vs_state(struct pipe_context *, void *); -void cell_delete_vs_state(struct pipe_context *, void *); - - -void * -cell_create_rasterizer_state(struct pipe_context *, - const struct pipe_rasterizer_state *); -void cell_bind_rasterizer_state(struct pipe_context *, void *); -void cell_delete_rasterizer_state(struct pipe_context *, void *); - - -void cell_set_clip_state( struct pipe_context *, - const struct pipe_clip_state * ); - -void cell_set_constant_buffer(struct pipe_context *pipe, - uint shader, uint index, - const struct pipe_constant_buffer *buf); - -void cell_set_polygon_stipple( struct pipe_context *, - const struct pipe_poly_stipple * ); - -void -cell_set_sampler_texture(struct pipe_context *pipe, - unsigned sampler, - struct pipe_texture *texture); - -void cell_set_scissor_state( struct pipe_context *, - const struct pipe_scissor_state * ); - -void cell_set_texture_state( struct pipe_context *, - unsigned unit, struct pipe_texture * ); - void cell_set_vertex_element(struct pipe_context *, unsigned index, const struct pipe_vertex_element *); @@ -106,10 +56,11 @@ void cell_set_vertex_buffer(struct pipe_context *, unsigned index, const struct pipe_vertex_buffer *); -void cell_set_viewport_state( struct pipe_context *, - const struct pipe_viewport_state * ); +void cell_update_derived( struct cell_context *softpipe ); -void cell_update_derived( struct cell_context *softpipe ); +void +cell_init_shader_functions(struct cell_context *cell); + +#endif /* CELL_STATE_H */ -#endif diff --git a/src/gallium/drivers/cell/ppu/cell_state_blend.c b/src/gallium/drivers/cell/ppu/cell_state_blend.c deleted file mode 100644 index b6d6d71f0c..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_blend.c +++ /dev/null @@ -1,109 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "pipe/p_util.h" -#include "draw/draw_context.h" -#include "cell_context.h" -#include "cell_state.h" - - - -void * -cell_create_blend_state(struct pipe_context *pipe, - const struct pipe_blend_state *blend) -{ - return mem_dup(blend, sizeof(*blend)); -} - - -void -cell_bind_blend_state(struct pipe_context *pipe, void *blend) -{ - struct cell_context *cell = cell_context(pipe); - - draw_flush(cell->draw); - - cell->blend = (const struct pipe_blend_state *)blend; - - cell->dirty |= CELL_NEW_BLEND; -} - - -void -cell_delete_blend_state(struct pipe_context *pipe, void *blend) -{ - FREE(blend); -} - - -void -cell_set_blend_color(struct pipe_context *pipe, - const struct pipe_blend_color *blend_color) -{ - struct cell_context *cell = cell_context(pipe); - - draw_flush(cell->draw); - - cell->blend_color = *blend_color; - - cell->dirty |= CELL_NEW_BLEND; -} - - - - -void * -cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *depth_stencil) -{ - return mem_dup(depth_stencil, sizeof(*depth_stencil)); -} - - -void -cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, - void *depth_stencil) -{ - struct cell_context *cell = cell_context(pipe); - - draw_flush(cell->draw); - - cell->depth_stencil - = (const struct pipe_depth_stencil_alpha_state *) depth_stencil; - - cell->dirty |= CELL_NEW_DEPTH_STENCIL; -} - - -void -cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth) -{ - FREE(depth); -} diff --git a/src/gallium/drivers/cell/ppu/cell_state_clip.c b/src/gallium/drivers/cell/ppu/cell_state_clip.c deleted file mode 100644 index 0482f87e88..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_clip.c +++ /dev/null @@ -1,84 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "cell_context.h" -#include "cell_state.h" -#include "draw/draw_context.h" - - -void cell_set_clip_state( struct pipe_context *pipe, - const struct pipe_clip_state *clip ) -{ - struct cell_context *cell = cell_context(pipe); - - /* pass the clip state to the draw module */ - draw_set_clip_state(cell->draw, clip); -} - - - -/* Called when driver state tracker notices changes to the viewport - * matrix: - */ -void cell_set_viewport_state( struct pipe_context *pipe, - const struct pipe_viewport_state *viewport ) -{ - struct cell_context *cell = cell_context(pipe); - - cell->viewport = *viewport; /* struct copy */ - cell->dirty |= CELL_NEW_VIEWPORT; - - /* pass the viewport info to the draw module */ - draw_set_viewport_state(cell->draw, viewport); - - /* Using tnl/ and vf/ modules is temporary while getting started. - * Full pipe will have vertex shader, vertex fetch of its own. - */ -} - - -void cell_set_scissor_state( struct pipe_context *pipe, - const struct pipe_scissor_state *scissor ) -{ - struct cell_context *cell = cell_context(pipe); - - memcpy( &cell->scissor, scissor, sizeof(*scissor) ); - cell->dirty |= CELL_NEW_SCISSOR; -} - - -void cell_set_polygon_stipple( struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple ) -{ - struct cell_context *cell = cell_context(pipe); - - memcpy( &cell->poly_stipple, stipple, sizeof(*stipple) ); - cell->dirty |= CELL_NEW_STIPPLE; -} diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 5d2a786449..49c0d130c5 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -31,6 +31,8 @@ #include "cell_state_emit.h" #include "cell_batch.h" #include "cell_texture.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" static void @@ -100,4 +102,20 @@ cell_emit_state(struct cell_context *cell) emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO, &cell->vertex_info, sizeof(struct vertex_info)); } + + if (cell->dirty & CELL_NEW_VS) { + const struct draw_context *const draw = cell->draw; + struct cell_shader_info info; + + info.num_outputs = draw->num_vs_outputs; + info.declarations = (uintptr_t) draw->machine.Declarations; + info.num_declarations = draw->machine.NumDeclarations; + info.instructions = (uintptr_t) draw->machine.Instructions; + info.num_instructions = draw->machine.NumInstructions; + info.immediates = (uintptr_t) draw->machine.Imms; + info.num_immediates = draw->machine.ImmLimit / 4; + + emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, + & info, sizeof(info)); + } } diff --git a/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c b/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c deleted file mode 100644 index 7eca5b5765..0000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c +++ /dev/null @@ -1,106 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_defines.h" -#include "pipe/p_util.h" -#include "draw/draw_context.h" -#include "cell_context.h" -#include "cell_state.h" - - - -struct spu_rasterizer_state -{ - unsigned flatshade:1; -#if 0 - unsigned light_twoside:1; - unsigned front_winding:2; /**< PIPE_WINDING_x */ - unsigned cull_mode:2; /**< PIPE_WINDING_x */ - unsigned fill_cw:2; /**< PIPE_POLYGON_MODE_x */ - unsigned fill_ccw:2; /**< PIPE_POLYGON_MODE_x */ - unsigned offset_cw:1; - unsigned offset_ccw:1; -#endif - unsigned scissor:1; - unsigned poly_smooth:1; - unsigned poly_stipple_enable:1; - unsigned point_smooth:1; -#if 0 - unsigned point_sprite:1; - unsigned point_size_per_vertex:1; /**< size computed in vertex shader */ -#endif - unsigned multisample:1; /* XXX maybe more ms state in future */ - unsigned line_smooth:1; - unsigned line_stipple_enable:1; - unsigned line_stipple_factor:8; /**< [1..256] actually */ - unsigned line_stipple_pattern:16; -#if 0 - unsigned bypass_clipping:1; -#endif - unsigned origin_lower_left:1; /**< Is (0,0) the lower-left corner? */ - - float line_width; - float point_size; /**< used when no per-vertex size */ -#if 0 - float offset_units; - float offset_scale; - ubyte sprite_coord_mode[PIPE_MAX_SHADER_OUTPUTS]; /**< PIPE_SPRITE_COORD_ */ -#endif -}; - - - -void * -cell_create_rasterizer_state(struct pipe_context *pipe, - const struct pipe_rasterizer_state *setup) -{ - struct pipe_rasterizer_state *state - = MALLOC(sizeof(struct pipe_rasterizer_state)); - memcpy(state, setup, sizeof(struct pipe_rasterizer_state)); - return state; -} - - -void -cell_bind_rasterizer_state(struct pipe_context *pipe, void *setup) -{ - struct cell_context *cell = cell_context(pipe); - - /* pass-through to draw module */ - draw_set_rasterizer_state(cell->draw, setup); - - cell->rasterizer = (struct pipe_rasterizer_state *)setup; - - cell->dirty |= CELL_NEW_RASTERIZER; -} - - -void -cell_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer) -{ - FREE(rasterizer); -} diff --git a/src/gallium/drivers/cell/ppu/cell_state_fs.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index b2ed699a5b..935501441b 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_fs.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -32,7 +32,7 @@ #include "draw/draw_context.h" #if 0 #include "pipe/p_shader_tokens.h" -#include "llvm/gallivm.h" +#include "gallivm/gallivm.h" #include "tgsi/util/tgsi_dump.h" #include "tgsi/exec/tgsi_sse2.h" #endif @@ -41,7 +41,7 @@ #include "cell_state.h" -void * +static void * cell_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { @@ -80,7 +80,7 @@ cell_create_fs_state(struct pipe_context *pipe, } -void +static void cell_bind_fs_state(struct pipe_context *pipe, void *fs) { struct cell_context *cell = cell_context(pipe); @@ -91,7 +91,7 @@ cell_bind_fs_state(struct pipe_context *pipe, void *fs) } -void +static void cell_delete_fs_state(struct pipe_context *pipe, void *fs) { struct cell_fragment_shader_state *state = @@ -101,7 +101,7 @@ cell_delete_fs_state(struct pipe_context *pipe, void *fs) } -void * +static void * cell_create_vs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { @@ -124,7 +124,7 @@ cell_create_vs_state(struct pipe_context *pipe, } -void +static void cell_bind_vs_state(struct pipe_context *pipe, void *vs) { struct cell_context *cell = cell_context(pipe); @@ -137,7 +137,7 @@ cell_bind_vs_state(struct pipe_context *pipe, void *vs) } -void +static void cell_delete_vs_state(struct pipe_context *pipe, void *vs) { struct cell_context *cell = cell_context(pipe); @@ -150,7 +150,7 @@ cell_delete_vs_state(struct pipe_context *pipe, void *vs) } -void +static void cell_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, const struct pipe_constant_buffer *buf) @@ -169,3 +169,18 @@ cell_set_constant_buffer(struct pipe_context *pipe, cell->dirty |= CELL_NEW_CONSTANTS; } + + +void +cell_init_shader_functions(struct cell_context *cell) +{ + cell->pipe.create_fs_state = cell_create_fs_state; + cell->pipe.bind_fs_state = cell_bind_fs_state; + cell->pipe.delete_fs_state = cell_delete_fs_state; + + cell->pipe.create_vs_state = cell_create_vs_state; + cell->pipe.bind_vs_state = cell_bind_vs_state; + cell->pipe.delete_vs_state = cell_delete_vs_state; + + cell->pipe.set_constant_buffer = cell_set_constant_buffer; +} diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index c8ef36002f..e235421107 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -79,20 +79,24 @@ cell_texture_layout(struct cell_texture * spt) } -struct pipe_texture * -cell_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat) +static struct pipe_texture * +cell_texture_create_screen(struct pipe_screen *screen, + const struct pipe_texture *templat) { + struct pipe_winsys *ws = screen->winsys; struct cell_texture *spt = CALLOC_STRUCT(cell_texture); if (!spt) return NULL; spt->base = *templat; + spt->base.refcount = 1; + spt->base.screen = screen; cell_texture_layout(spt); - spt->buffer = pipe->winsys->buffer_create(pipe->winsys, 32, - PIPE_BUFFER_USAGE_PIXEL, - spt->buffer_size); + spt->buffer = ws->buffer_create(ws, 32, + PIPE_BUFFER_USAGE_PIXEL, + spt->buffer_size); if (!spt->buffer) { FREE(spt); @@ -103,8 +107,9 @@ cell_texture_create(struct pipe_context *pipe, const struct pipe_texture *templa } -void -cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) +static void +cell_texture_release_screen(struct pipe_screen *screen, + struct pipe_texture **pt) { if (!*pt) return; @@ -120,7 +125,7 @@ cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); */ - pipe_buffer_reference(pipe->winsys, &spt->buffer, NULL); + pipe_buffer_reference(screen->winsys, &spt->buffer, NULL); FREE(spt); } @@ -128,22 +133,28 @@ cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) } -/** - * Called via pipe->get_tex_surface() - */ -struct pipe_surface * -cell_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) +static void +cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) +{ + /* XXX TO DO: re-tile the texture data ... */ + +} + + +static struct pipe_surface * +cell_get_tex_surface_screen(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) { + struct pipe_winsys *ws = screen->winsys; struct cell_texture *spt = cell_texture(pt); struct pipe_surface *ps; - ps = pipe->winsys->surface_alloc(pipe->winsys); + ps = ws->surface_alloc(ws); if (ps) { assert(ps->refcount); assert(ps->winsys); - pipe_buffer_reference(pipe->winsys, &ps->buffer, spt->buffer); + pipe_buffer_reference(ws, &ps->buffer, spt->buffer); ps->format = pt->format; ps->cpp = pt->cpp; ps->width = pt->width[level]; @@ -198,6 +209,7 @@ static void cell_tile_texture(struct cell_context *cell, struct cell_texture *texture) { + struct pipe_screen *screen = cell->pipe.screen; uint face = 0, level = 0, zslice = 0; struct pipe_surface *surf; const uint w = texture->base.width[0], h = texture->base.height[0]; @@ -209,7 +221,7 @@ cell_tile_texture(struct cell_context *cell, assert(w % TILE_SIZE == 0); assert(h % TILE_SIZE == 0); - surf = cell_get_tex_surface(&cell->pipe, &texture->base, face, level, zslice); + surf = screen->get_tex_surface(screen, &texture->base, face, level, zslice); ASSERT(surf); src = (const uint *) pipe_surface_map(surf); @@ -250,3 +262,18 @@ cell_update_texture_mapping(struct cell_context *cell) cell->tex_map = pipe_surface_map(cell->tex_surf); #endif } + + +void +cell_init_texture_functions(struct cell_context *cell) +{ + cell->pipe.texture_update = cell_texture_update; +} + +void +cell_init_screen_texture_funcs(struct pipe_screen *screen) +{ + screen->texture_create = cell_texture_create_screen; + screen->texture_release = cell_texture_release_screen; + screen->get_tex_surface = cell_get_tex_surface_screen; +} diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h index 0264fed88e..fcee069d05 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.h +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -29,7 +29,7 @@ #define CELL_TEXTURE_H -struct pipe_context; +struct cell_context; struct pipe_texture; @@ -60,21 +60,16 @@ cell_texture(struct pipe_texture *pt) -extern struct pipe_texture * -cell_texture_create(struct pipe_context *pipe, - const struct pipe_texture *templat); - extern void -cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); +cell_update_texture_mapping(struct cell_context *cell); -extern struct pipe_surface * -cell_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice); + +extern void +cell_init_texture_functions(struct cell_context *cell); extern void -cell_update_texture_mapping(struct cell_context *cell); +cell_init_screen_texture_funcs(struct pipe_screen *screen); -#endif /* CELL_TEXTURE */ +#endif /* CELL_TEXTURE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index f10689a959..9cf74bab47 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -31,7 +31,7 @@ #include "../auxiliary/draw/draw_private.h" #include "cell_context.h" -#include "ppc/rtasm/spe_asm.h" +#include "rtasm/rtasm_ppc_spe.h" typedef uint64_t register_mask; diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index 6a1d3bc20a..f5c27852c1 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -35,6 +35,7 @@ #include "cell_context.h" #include "cell_draw_arrays.h" +#include "cell_flush.h" #include "cell_spu.h" #include "cell_batch.h" @@ -100,17 +101,17 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) (void) memcpy(&batch[1], &draw->viewport, sizeof(struct pipe_viewport_state)); + { + uint64_t uniforms = (uintptr_t) draw->user.constants; + + batch = cell_batch_alloc(cell, 2 *sizeof(batch[0])); + batch[0] = CELL_CMD_STATE_UNIFORMS; + batch[1] = uniforms; + } + cell_batch_flush(cell); vs->opcode = CELL_CMD_VS_EXECUTE; - vs->shader.num_outputs = draw->num_vs_outputs; - vs->shader.declarations = (uintptr_t) draw->machine.Declarations; - vs->shader.num_declarations = draw->machine.NumDeclarations; - vs->shader.instructions = (uintptr_t) draw->machine.Instructions; - vs->shader.num_instructions = draw->machine.NumInstructions; - vs->shader.uniforms = (uintptr_t) draw->user.constants; - vs->shader.immediates = (uintptr_t) draw->machine.Imms; - vs->shader.num_immediates = draw->machine.ImmLimit / 4; vs->nr_attrs = draw->vertex_fetch.nr_attrs; (void) memcpy(vs->plane, draw->plane, sizeof(draw->plane)); @@ -121,12 +122,12 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) for (j = 0; j < n; j++) { vs->elts[j] = draw->vs.queue[i + j].elt; - vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].dest; + vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex; } for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) { vs->elts[j] = vs->elts[0]; - vs->vOut[j] = vs->vOut[0]; + vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex; } vs->num_elts = n; @@ -135,5 +136,6 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) cell_flush_int(& cell->pipe, PIPE_FLUSH_WAIT); } + draw->vs.post_nr = draw->vs.queue_nr; draw->vs.queue_nr = 0; } diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index 30ef2450ec..c071de1900 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -18,6 +18,7 @@ PROG_SPU_EMBED_O = $(PROG)_spu-embed.o SOURCES = \ spu_main.c \ spu_blend.c \ + spu_dcache.c \ spu_render.c \ spu_texture.c \ spu_tile.c \ diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c new file mode 100644 index 0000000000..a1701d80d1 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_dcache.c @@ -0,0 +1,125 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "cell/common.h" +#include "spu_main.h" +#include "spu_dcache.h" + +#define CACHELINE_LOG2SIZE 7 +#define LINE_SIZE (1U << 7) +#define ALIGN_MASK (~(LINE_SIZE - 1)) + +#define CACHE_NAME data +#define CACHED_TYPE qword +#define CACHE_TYPE CACHE_TYPE_RO +#define CACHE_SET_TAGID(set) (((set) & 0x03) + TAG_DCACHE0) +#define CACHE_LOG2NNWAY 2 +#define CACHE_LOG2NSETS 6 +#include <cache-api.h> + +/* Yes folks, this is ugly. + */ +#undef CACHE_NWAY +#undef CACHE_NSETS +#define CACHE_NAME data +#define CACHE_NWAY 4 +#define CACHE_NSETS (1U << 6) + + +/** + * Fetch between arbitrary number of bytes from an unaligned address + * + * \param dst Destination data buffer + * \param ea Main memory effective address of source data + * \param size Number of bytes to read + * + * \warning + * As is hinted by the type of the \c dst pointer, this function writes + * multiples of 16-bytes. + */ +void +spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size) +{ + const int shift = ea & 0x0f; + const unsigned read_size = ROUNDUP16(size + shift); + const unsigned last_read = ROUNDUP16(ea + size); + const qword *const last_write = dst + (ROUNDUP16(size) / 16); + unsigned i; + + + if (shift == 0) { + /* Data is already aligned. Fetch directly into the destination buffer. + */ + for (i = 0; i < size; i += 16) { + *(dst++) = cache_rd(data, ea + i); + } + } else { + qword hi; + + + /* Please exercise extreme caution when modifying this code. This code + * must not read past the end of the page containing the source data, + * and it must not write more than ((size + 15) / 16) qwords to the + * destination buffer. + */ + ea &= ~0x0f; + hi = cache_rd(data, ea); + for (i = 16; i < read_size; i += 16) { + qword lo = cache_rd(data, ea + i); + + *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), + (qword) spu_rlmaskqwbyte(lo, shift - 16)); + hi = lo; + } + + if (dst != last_write) { + *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), si_il(0)); + } + } + + ASSERT((ea + i) == last_read); + ASSERT(dst == last_write); +} + + +/** + * Notify the cache that a range of main memory may have been modified + */ +void +spu_dcache_mark_dirty(unsigned ea, unsigned size) +{ + unsigned i; + const unsigned aligned_start = (ea & ALIGN_MASK); + const unsigned aligned_end = (ea + size + (LINE_SIZE - 1)) + & ALIGN_MASK; + + + for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) { + const unsigned entry = __cache_dir[i]; + const unsigned addr = entry & ~0x0f; + + __cache_dir[i] = ((addr >= aligned_start) && (addr < aligned_end)) + ? (entry & ~CACHELINE_VALID) : entry; + } +} diff --git a/src/gallium/drivers/cell/spu/spu_dcache.h b/src/gallium/drivers/cell/spu/spu_dcache.h new file mode 100644 index 0000000000..7a06b8c25a --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_dcache.h @@ -0,0 +1,34 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SPU_DCACHE_H +#define SPU_DCACHE_H + +extern void +spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size); + +extern void +spu_dcache_mark_dirty(unsigned ea, unsigned size); + +#endif /* SPU_DCACHE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 109540b1f7..1560c0f157 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -50,8 +50,6 @@ * Brian Paul */ -#include <libmisc.h> -#include <spu_mfcio.h> #include <transpose_matrix4x4.h> #include <simdmath/ceilf4.h> #include <simdmath/cosf4.h> @@ -72,6 +70,8 @@ #include "spu_exec.h" #include "spu_main.h" #include "spu_vertex_shader.h" +#include "spu_dcache.h" +#include "cell/common.h" #define TILE_TOP_LEFT 0 #define TILE_TOP_RIGHT 1 @@ -146,17 +146,14 @@ spu_exec_machine_init(struct spu_exec_machine *mach, struct spu_sampler *samplers, unsigned processor) { - qword zero; - qword not_zero; - uint i; + const qword zero = si_il(0); + const qword not_zero = si_il(~0); + (void) numSamplers; mach->Samplers = samplers; mach->Processor = processor; mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; - zero = si_xor(zero, zero); - not_zero = si_xori(zero, 0xff); - /* Setup constants. */ mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero; mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero; @@ -356,19 +353,17 @@ fetch_src_file_channel( case TGSI_EXTSWIZZLE_W: switch( file ) { case TGSI_FILE_CONSTANT: { - unsigned char buffer[32] ALIGN16_ATTRIB; unsigned i; for (i = 0; i < 4; i++) { const float *ptr = mach->Consts[index->i[i]]; - const uint64_t addr = (uint64_t)(uintptr_t) ptr; - const unsigned size = ((addr & 0x0f) == 0) ? 16 : 32; + float tmp[4]; - mfc_get(buffer, addr & ~0x0f, size, TAG_VERTEX_BUFFER, 0, 0); - wait_on_mask(1 << TAG_VERTEX_BUFFER); + spu_dcache_fetch_unaligned((qword *) tmp, + (uintptr_t)(ptr + swizzle), + sizeof(float)); - (void) memcpy(& chan->f[i], &buffer[(addr & 0x0f) - + (sizeof(float) * swizzle)], sizeof(float)); + chan->f[i] = tmp[0]; } break; } @@ -663,9 +658,10 @@ fetch_texel( struct spu_sampler *sampler, qword rgba[4]; qword out[4]; - sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, (float *) rgba); + sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, + (float (*)[4]) rgba); - _transpose_matrix4x4(out, rgba); + _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba); r->q = out[0]; g->q = out[1]; b->q = out[2]; @@ -1903,32 +1899,28 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute declarations (interpolants) */ if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { for (i = 0; i < mach->NumDeclarations; i++) { - uint8_t buffer[sizeof(struct tgsi_full_declaration) + 32] ALIGN16_ATTRIB; - struct tgsi_full_declaration decl; - unsigned long decl_addr = (unsigned long) (mach->Declarations+i); - unsigned size = ((sizeof(decl) + (decl_addr & 0x0f) + 0x0f) & ~0x0f); + union { + struct tgsi_full_declaration decl; + qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; + } d ALIGN16_ATTRIB; + unsigned ea = (unsigned) (mach->Declarations + pc); - mfc_get(buffer, decl_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0); - wait_on_mask(1 << TAG_INSTRUCTION_FETCH); + spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); - memcpy(& decl, buffer + (decl_addr & 0x0f), sizeof(decl)); - exec_declaration( mach, &decl ); + exec_declaration( mach, &d.decl ); } } /* execute instructions, until pc is set to -1 */ while (pc != -1) { - uint8_t buffer[sizeof(struct tgsi_full_instruction) + 32] ALIGN16_ATTRIB; - struct tgsi_full_instruction inst; - unsigned long inst_addr = (unsigned long) (mach->Instructions + pc); - unsigned size = ((sizeof(inst) + (inst_addr & 0x0f) + 0x0f) & ~0x0f); - - assert(pc < mach->NumInstructions); - mfc_get(buffer, inst_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0); - wait_on_mask(1 << TAG_INSTRUCTION_FETCH); - - memcpy(& inst, buffer + (inst_addr & 0x0f), sizeof(inst)); - exec_instruction( mach, & inst, &pc ); + union { + struct tgsi_full_instruction inst; + qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; + } i ALIGN16_ATTRIB; + unsigned ea = (unsigned) (mach->Instructions + pc); + + spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); + exec_instruction( mach, & i.inst, &pc ); } #if 0 diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index fcbf0f841e..59300028d4 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -38,6 +38,7 @@ #include "spu_tile.h" //#include "spu_test.h" #include "spu_vertex_shader.h" +#include "spu_dcache.h" #include "cell/common.h" #include "pipe/p_defines.h" @@ -285,6 +286,8 @@ cmd_state_texture(const struct cell_command_texture *texture) { spu.texture.width, spu.texture.height, 0.0, 0.0}; spu.tex_size_mask = (vector unsigned int) { spu.texture.width - 1, spu.texture.height - 1, 0, 0 }; + spu.tex_size_x_mask = spu_splats(spu.texture.width - 1); + spu.tex_size_y_mask = spu_splats(spu.texture.height - 1); } @@ -433,10 +436,19 @@ cmd_batch(uint opcode) sizeof(struct pipe_viewport_state)); pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8); break; + case CELL_CMD_STATE_UNIFORMS: + draw.constants = (const float (*)[4]) (uintptr_t) buffer[pos + 1]; + pos += 2; + break; case CELL_CMD_STATE_VS_ARRAY_INFO: cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); break; + case CELL_CMD_STATE_BIND_VS: + spu_bind_vertex_shader(&draw, + (struct cell_shader_info *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8); + break; case CELL_CMD_STATE_ATTRIB_FETCH: { struct cell_attribute_fetch_code *code = (struct cell_attribute_fetch_code *) &buffer[pos+1]; @@ -453,6 +465,14 @@ cmd_batch(uint opcode) pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8); break; } + case CELL_CMD_FLUSH_BUFFER_RANGE: { + struct cell_buffer_range *br = (struct cell_buffer_range *) + &buffer[pos+1]; + + spu_dcache_mark_dirty((unsigned) br->base, br->size); + pos += (1 + ROUNDUP8(sizeof(struct cell_buffer_range)) / 8); + break; + } default: printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]); ASSERT(0); @@ -566,7 +586,7 @@ main(main_param_t speid, main_param_t argp) one_time_init(); if (Debug) - printf("SPU: main() speid=%lu\n", speid); + printf("SPU: main() speid=%lu\n", (unsigned long) speid); mfc_get(&spu.init, /* dest */ (unsigned int) argp, /* src */ diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 5c95d112ac..a13edd1702 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -107,6 +107,8 @@ struct spu_global vector float tex_size; vector unsigned int tex_size_mask; /**< == int(size - 1) */ + vector unsigned int tex_size_x_mask; /**< == int(size - 1) */ + vector unsigned int tex_size_y_mask; /**< == int(size - 1) */ vector float (*sample_texture)(vector float texcoord); @@ -130,8 +132,10 @@ extern boolean Debug; #define TAG_INDEX_BUFFER 16 #define TAG_BATCH_BUFFER 17 #define TAG_MISC 18 -#define TAG_TEXTURE_TILE 19 -#define TAG_INSTRUCTION_FETCH 20 +#define TAG_DCACHE0 20 +#define TAG_DCACHE1 21 +#define TAG_DCACHE2 22 +#define TAG_DCACHE3 23 diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 3962aaa4a9..67eb08196a 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -31,19 +31,7 @@ #include "spu_texture.h" #include "spu_tile.h" #include "spu_colorpack.h" - - -/** - * Number of texture tiles to cache. - * Note that this will probably be the largest consumer of SPU local store/ - * memory for this driver! - */ -#define CACHE_SIZE 16 - -static tile_t tex_tiles[CACHE_SIZE] ALIGN16_ATTRIB; - -static vector unsigned int tex_tile_xy[CACHE_SIZE]; - +#include "spu_dcache.h" /** @@ -52,78 +40,60 @@ static vector unsigned int tex_tile_xy[CACHE_SIZE]; void invalidate_tex_cache(void) { - /* XXX memset? */ - uint i; - for (i = 0; i < CACHE_SIZE; i++) { - tex_tile_xy[i] = ((vector unsigned int) { ~0U, ~0U, ~0U, ~0U }); - } + spu_dcache_mark_dirty((unsigned) spu.texture.start, + 4 * spu.texture.width * spu.texture.height); } -/** - * Return the cache pos/index which corresponds to tile (tx,ty) - */ -static INLINE uint -cache_pos(vector unsigned int txty) +static uint +get_texel(vec_uint4 coordinate) { - uint pos = (spu_extract(txty,0) + spu_extract(txty,1) * 4) % CACHE_SIZE; - return pos; + vec_uint4 tmp; + unsigned x = spu_extract(coordinate, 0); + unsigned y = spu_extract(coordinate, 1); + const unsigned tiles_per_row = spu.texture.width / TILE_SIZE; + unsigned tile_offset = sizeof(tile_t) * ((y / TILE_SIZE * tiles_per_row) + + (x / TILE_SIZE)); + unsigned texel_offset = 4 * (((y % TILE_SIZE) * TILE_SIZE) + + (x % TILE_SIZE)); + + spu_dcache_fetch_unaligned((qword *) & tmp, + spu.texture.start + tile_offset + texel_offset, + 4); + return spu_extract(tmp, 0); } -/** - * Make sure the tile for texel (i,j) is present, return its position/index - * in the cache. - */ -static uint -get_tex_tile(vector unsigned int ij) +static void +get_four_texels(vec_uint4 x, vec_uint4 y, vec_uint4 *texels) { - /* tile address: tx,ty */ - const vector unsigned int txty = spu_rlmask(ij, -5); /* divide by 32 */ - const uint pos = cache_pos(txty); - - if ((spu_extract(tex_tile_xy[pos], 0) != spu_extract(txty, 0)) || - (spu_extract(tex_tile_xy[pos], 1) != spu_extract(txty, 1))) { - - /* texture cache miss, fetch tile from main memory */ - const uint tiles_per_row = spu.texture.width / TILE_SIZE; - const uint bytes_per_tile = sizeof(tile_t); - const void *src = (const ubyte *) spu.texture.start - + (spu_extract(txty,1) * tiles_per_row + spu_extract(txty,0)) * bytes_per_tile; - - printf("SPU %u: tex cache miss at %d, %d pos=%u old=%d,%d\n", - spu.init.id, - spu_extract(txty,0), - spu_extract(txty,1), - pos, - spu_extract(tex_tile_xy[pos],0), - spu_extract(tex_tile_xy[pos],1)); - - ASSERT_ALIGN16(tex_tiles[pos].ui); - ASSERT_ALIGN16(src); - - mfc_get(tex_tiles[pos].ui, /* dest */ - (unsigned int) src, - bytes_per_tile, /* size */ - TAG_TEXTURE_TILE, - 0, /* tid */ - 0 /* rid */); - - wait_on_mask(1 << TAG_TEXTURE_TILE); - - tex_tile_xy[pos] = txty; - } - else { -#if 0 - printf("SPU %u: tex cache HIT at %d, %d\n", - spu.init.id, tx, ty); -#endif - } - - return pos; + const unsigned texture_ea = (uintptr_t) spu.texture.start; + vec_uint4 tile_x = spu_rlmask(x, -5); + vec_uint4 tile_y = spu_rlmask(y, -5); + const qword offset_x = si_andi((qword) x, 0x1f); + const qword offset_y = si_andi((qword) y, 0x1f); + + const qword tiles_per_row = (qword) spu_splats(spu.texture.width / TILE_SIZE); + const qword tile_size = (qword) spu_splats(sizeof(tile_t)); + + qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x); + tile_offset = si_mpy((qword) tile_offset, tile_size); + + qword texel_offset = si_a(si_mpyui(offset_y, 32), offset_x); + texel_offset = si_mpyui(texel_offset, 4); + + vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset); + + spu_dcache_fetch_unaligned((qword *) & texels[0], + texture_ea + spu_extract(offset, 0), 4); + spu_dcache_fetch_unaligned((qword *) & texels[1], + texture_ea + spu_extract(offset, 1), 4); + spu_dcache_fetch_unaligned((qword *) & texels[2], + texture_ea + spu_extract(offset, 2), 4); + spu_dcache_fetch_unaligned((qword *) & texels[3], + texture_ea + spu_extract(offset, 3), 4); } - /** * Get texture sample at texcoord. * XXX this is extremely primitive for now. @@ -134,9 +104,7 @@ sample_texture_nearest(vector float texcoord) vector float tc = spu_mul(texcoord, spu.tex_size); vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */ itc = spu_and(itc, spu.tex_size_mask); /* mask (GL_REPEAT) */ - vector unsigned int ij = spu_and(itc, TILE_SIZE-1); /* intra tile addr */ - uint pos = get_tex_tile(itc); - uint texel = tex_tiles[pos].ui[spu_extract(ij, 1)][spu_extract(ij, 0)]; + uint texel = get_texel(itc); return spu_unpack_A8R8G8B8(texel); } @@ -144,49 +112,33 @@ sample_texture_nearest(vector float texcoord) vector float sample_texture_bilinear(vector float texcoord) { - static const vector unsigned int offset10 = {1, 0, 0, 0}; - static const vector unsigned int offset01 = {0, 1, 0, 0}; + static const vec_uint4 offset_x = {0, 0, 1, 1}; + static const vec_uint4 offset_y = {0, 1, 0, 1}; vector float tc = spu_mul(texcoord, spu.tex_size); tc = spu_add(tc, spu_splats(-0.5f)); /* half texel bias */ /* integer texcoords S,T: */ - vector unsigned int itc00 = spu_convtu(tc, 0); /* convert to int */ - vector unsigned int itc01 = spu_add(itc00, offset01); - vector unsigned int itc10 = spu_add(itc00, offset10); - vector unsigned int itc11 = spu_add(itc10, offset01); - - /* mask (GL_REPEAT) */ - itc00 = spu_and(itc00, spu.tex_size_mask); - itc01 = spu_and(itc01, spu.tex_size_mask); - itc10 = spu_and(itc10, spu.tex_size_mask); - itc11 = spu_and(itc11, spu.tex_size_mask); - - /* intra tile addr */ - vector unsigned int ij00 = spu_and(itc00, TILE_SIZE-1); - vector unsigned int ij01 = spu_and(itc01, TILE_SIZE-1); - vector unsigned int ij10 = spu_and(itc10, TILE_SIZE-1); - vector unsigned int ij11 = spu_and(itc11, TILE_SIZE-1); - - /* get tile cache positions */ - uint pos00 = get_tex_tile(itc00); - uint pos01, pos10, pos11; - if ((spu_extract(ij00, 0) < TILE_SIZE-1) && - (spu_extract(ij00, 1) < TILE_SIZE-1)) { - /* all texels are in the same tile */ - pos01 = pos10 = pos11 = pos00; - } - else { - pos01 = get_tex_tile(itc01); - pos10 = get_tex_tile(itc10); - pos11 = get_tex_tile(itc11); - } - - /* get texels from tiles and convert to float[4] */ - vector float texel00 = spu_unpack_A8R8G8B8(tex_tiles[pos00].ui[spu_extract(ij00, 1)][spu_extract(ij00, 0)]); - vector float texel01 = spu_unpack_A8R8G8B8(tex_tiles[pos01].ui[spu_extract(ij01, 1)][spu_extract(ij01, 0)]); - vector float texel10 = spu_unpack_A8R8G8B8(tex_tiles[pos10].ui[spu_extract(ij10, 1)][spu_extract(ij10, 0)]); - vector float texel11 = spu_unpack_A8R8G8B8(tex_tiles[pos11].ui[spu_extract(ij11, 1)][spu_extract(ij11, 0)]); + vec_uint4 itc = spu_convtu(tc, 0); /* convert to int */ + + vec_uint4 texels[4]; + + vec_uint4 x = spu_splats(spu_extract(itc, 0)); + vec_uint4 y = spu_splats(spu_extract(itc, 1)); + + x = spu_add(x, offset_x); + y = spu_add(y, offset_y); + + x = spu_and(x, spu.tex_size_x_mask); + y = spu_and(y, spu.tex_size_y_mask); + + get_four_texels(x, y, texels); + + vector float texel00 = spu_unpack_A8R8G8B8(spu_extract(texels[0], 0)); + vector float texel01 = spu_unpack_A8R8G8B8(spu_extract(texels[1], 0)); + vector float texel10 = spu_unpack_A8R8G8B8(spu_extract(texels[2], 0)); + vector float texel11 = spu_unpack_A8R8G8B8(spu_extract(texels[3], 0)); + /* Compute weighting factors in [0,1] * Multiply texcoord by 1024, AND with 1023, convert back to float. diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index 55c6c28717..219fd90cc0 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -32,39 +32,19 @@ * Ian Romanick <idr@us.ibm.com> */ -#include <spu_mfcio.h> - #include "pipe/p_util.h" #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" #include "spu_exec.h" #include "spu_vertex_shader.h" #include "spu_main.h" - -#define CACHE_NAME attribute -#define CACHED_TYPE qword -#define CACHE_TYPE CACHE_TYPE_RO -#define CACHE_SET_TAGID(set) TAG_VERTEX_BUFFER -#define CACHE_LOG2NNWAY 2 -#define CACHE_LOG2NSETS 6 -#include <cache-api.h> - -/* Yes folks, this is ugly. - */ -#undef CACHE_NWAY -#undef CACHE_NSETS -#define CACHE_NAME attribute -#define CACHE_NWAY 4 -#define CACHE_NSETS (1U << 6) - - -#define DRAW_DBG 0 +#include "spu_dcache.h" typedef void (*spu_fetch_func)(qword *out, const qword *in, const qword *shuffle_data); -static const qword fetch_shuffle_data[] = { +static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = { /* Shuffle used by CVT_64_FLOAT */ { @@ -103,44 +83,6 @@ static const qword fetch_shuffle_data[] = { /** - * Fetch between 1 and 32 bytes from an unaligned address - */ -static INLINE void -fetch_unaligned(qword *dst, unsigned ea, unsigned size) -{ - qword tmp[4]; - const int shift = ea & 0x0f; - const unsigned aligned_start_ea = ea & ~0x0f; - const unsigned aligned_end_ea = (ea + size) & ~0x0f; - const unsigned num_entries = ((aligned_end_ea - aligned_start_ea) / 16) + 1; - unsigned i; - - - if (shift == 0) { - /* Data is already aligned. Fetch directly into the destination buffer. - */ - for (i = 0; i < num_entries; i++) { - dst[i] = cache_rd(attribute, (ea & ~0x0f) + (i * 16)); - } - } else { - /* Fetch data from the cache to the local buffer. - */ - for (i = 0; i < num_entries; i++) { - tmp[i] = cache_rd(attribute, (ea & ~0x0f) + (i * 16)); - } - - - /* Fix the alignment of the data and write to the destination buffer. - */ - for (i = 0; i < ((size + 15) / 16); i++) { - dst[i] = si_or((qword) spu_slqwbyte(tmp[i], shift), - (qword) spu_rlmaskqwbyte(tmp[i + 1], shift - 16)); - } - } -} - - -/** * Fetch vertex attributes for 'count' vertices. */ static void generic_vertex_fetch(struct spu_vs_context *draw, @@ -169,7 +111,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, unsigned idx; const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; - qword in[2 * 4]; + qword in[2 * 4] ALIGN16_ATTRIB; /* Fetch four attributes for four vertices. @@ -182,7 +124,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, printf("SPU: fetching = 0x%llx\n", addr); #endif - fetch_unaligned(& in[idx], addr, bytes_per_entry); + spu_dcache_fetch_unaligned(& in[idx], addr, bytes_per_entry); idx += quads_per_entry; } @@ -200,15 +142,5 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, void spu_update_vertex_fetch( struct spu_vs_context *draw ) { - unsigned i; - - - /* Invalidate the vertex cache. - */ - for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) { - CACHELINE_CLEARVALID(i); - } - - draw->vertex_fetch.fetch_func = generic_vertex_fetch; } diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index 3f5bf41aa2..8363efeeb6 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -165,63 +165,55 @@ run_vertex_program(struct spu_vs_context *draw, } -static void -spu_bind_vertex_shader(struct spu_vs_context *draw, - void *uniforms, - void *planes, - unsigned nr_planes, - unsigned num_outputs - ) -{ - draw->constants = (float (*)[4]) uniforms; - - (void) memcpy(draw->plane, planes, sizeof(float) * 4 * nr_planes); - draw->nr_planes = nr_planes; - draw->num_vs_outputs = num_outputs; - - /* specify the shader to interpret/execute */ - spu_exec_machine_init(&draw->machine, - PIPE_MAX_SAMPLERS, - NULL /*samplers*/, - PIPE_SHADER_VERTEX); -} - - unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32] ALIGN16_ATTRIB; + void -spu_execute_vertex_shader(struct spu_vs_context *draw, - const struct cell_command_vs *vs) +spu_bind_vertex_shader(struct spu_vs_context *draw, + struct cell_shader_info *vs) { - unsigned i; - - const uint64_t immediate_addr = vs->shader.immediates; + const unsigned immediate_addr = vs->immediates; const unsigned immediate_size = - ROUNDUP16((sizeof(float) * 4 * vs->shader.num_immediates) - + (immediate_addr & 0x0f)); + ROUNDUP16((sizeof(float) * 4 * vs->num_immediates) + + (immediate_addr & 0x0f)); + mfc_get(immediates, immediate_addr & ~0x0f, immediate_size, TAG_VERTEX_BUFFER, 0, 0); draw->machine.Instructions = (struct tgsi_full_instruction *) - vs->shader.instructions; - draw->machine.NumInstructions = vs->shader.num_instructions; + vs->instructions; + draw->machine.NumInstructions = vs->num_instructions; draw->machine.Declarations = (struct tgsi_full_declaration *) - vs->shader.declarations; - draw->machine.NumDeclarations = vs->shader.num_declarations; + vs->declarations; + draw->machine.NumDeclarations = vs->num_declarations; - draw->vertex_fetch.nr_attrs = vs->nr_attrs; + draw->num_vs_outputs = vs->num_outputs; + + /* specify the shader to interpret/execute */ + spu_exec_machine_init(&draw->machine, + PIPE_MAX_SAMPLERS, + NULL /*samplers*/, + PIPE_SHADER_VERTEX); wait_on_mask(1 << TAG_VERTEX_BUFFER); (void) memcpy(& draw->machine.Imms, &immediates[immediate_addr & 0x0f], - sizeof(float) * 4 * vs->shader.num_immediates); + sizeof(float) * 4 * vs->num_immediates); +} - spu_bind_vertex_shader(draw, vs->shader.uniforms, - vs->plane, vs->nr_planes, - vs->shader.num_outputs); + +void +spu_execute_vertex_shader(struct spu_vs_context *draw, + const struct cell_command_vs *vs) +{ + unsigned i; + + (void) memcpy(draw->plane, vs->plane, sizeof(float) * 4 * vs->nr_planes); + draw->nr_planes = vs->nr_planes; + draw->vertex_fetch.nr_attrs = vs->nr_attrs; for (i = 0; i < vs->num_elts; i += 4) { const unsigned batch_size = MIN2(vs->num_elts - i, 4); diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.h b/src/gallium/drivers/cell/spu/spu_vertex_shader.h index 0fb0bc28d0..54a4b8d9b9 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.h +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.h @@ -1,6 +1,7 @@ #ifndef SPU_VERTEX_SHADER_H #define SPU_VERTEX_SHADER_H +#include "cell/common.h" #include "pipe/p_format.h" #include "spu_exec.h" @@ -55,6 +56,10 @@ static INLINE void spu_vertex_fetch(struct spu_vs_context *draw, struct cell_command_vs; extern void +spu_bind_vertex_shader(struct spu_vs_context *draw, + struct cell_shader_info *vs); + +extern void spu_execute_vertex_shader(struct spu_vs_context *draw, const struct cell_command_vs *vs); diff --git a/src/gallium/drivers/failover/Makefile b/src/gallium/drivers/failover/Makefile index 14389bd055..f08b8df07a 100644 --- a/src/gallium/drivers/failover/Makefile +++ b/src/gallium/drivers/failover/Makefile @@ -1,20 +1,13 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = failover -DRIVER_SOURCES = \ +C_SOURCES = \ fo_state.c \ fo_state_emit.c \ fo_context.c -C_SOURCES = \ - $(COMMON_SOURCES) \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/failover/SConscript b/src/gallium/drivers/failover/SConscript new file mode 100644 index 0000000000..f8e9b1b491 --- /dev/null +++ b/src/gallium/drivers/failover/SConscript @@ -0,0 +1,13 @@ +Import('*') + +env = env.Clone() + +failover = env.ConvenienceLibrary( + target = 'failover', + source = [ + 'fo_state.c', + 'fo_state_emit.c', + 'fo_context.c', + ]) + +Export('failover') diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 7ce4a7df17..afc0d7eb1e 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -117,12 +117,15 @@ struct pipe_context *failover_create( struct pipe_context *hw, failover->hw = hw; failover->sw = sw; failover->pipe.winsys = hw->winsys; + failover->pipe.screen = hw->screen; failover->pipe.destroy = failover_destroy; +#if 0 failover->pipe.is_format_supported = hw->is_format_supported; failover->pipe.get_name = hw->get_name; failover->pipe.get_vendor = hw->get_vendor; failover->pipe.get_param = hw->get_param; failover->pipe.get_paramf = hw->get_paramf; +#endif failover->pipe.draw_arrays = failover_draw_arrays; failover->pipe.draw_elements = failover_draw_elements; @@ -137,15 +140,16 @@ struct pipe_context *failover_create( struct pipe_context *hw, failover_init_state_functions( failover ); -#if 0 - failover->pipe.surface_alloc = hw->surface_alloc; -#endif - failover->pipe.get_tex_surface = hw->get_tex_surface; - failover->pipe.surface_copy = hw->surface_copy; failover->pipe.surface_fill = hw->surface_fill; + +#if 0 failover->pipe.texture_create = hw->texture_create; failover->pipe.texture_release = hw->texture_release; + failover->pipe.get_tex_surface = hw->get_tex_surface; +#endif + failover->pipe.texture_update = hw->texture_update; + failover->pipe.flush = hw->flush; failover->dirty = 0; diff --git a/src/gallium/drivers/i915simple/Makefile b/src/gallium/drivers/i915simple/Makefile index ee22ba86f9..41a61a0020 100644 --- a/src/gallium/drivers/i915simple/Makefile +++ b/src/gallium/drivers/i915simple/Makefile @@ -1,10 +1,9 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = i915simple -DRIVER_SOURCES = \ +C_SOURCES = \ i915_blit.c \ i915_clear.c \ i915_flush.c \ @@ -18,7 +17,7 @@ DRIVER_SOURCES = \ i915_state_derived.c \ i915_state_emit.c \ i915_state_sampler.c \ - i915_strings.c \ + i915_screen.c \ i915_prim_emit.c \ i915_prim_vbuf.c \ i915_texture.c \ @@ -26,12 +25,6 @@ DRIVER_SOURCES = \ i915_fpc_translate.c \ i915_surface.c -C_SOURCES = \ - $(COMMON_SOURCES) \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/i915simple/SConscript b/src/gallium/drivers/i915simple/SConscript index f5fb96b995..2366e1247f 100644 --- a/src/gallium/drivers/i915simple/SConscript +++ b/src/gallium/drivers/i915simple/SConscript @@ -15,13 +15,13 @@ i915simple = env.ConvenienceLibrary( 'i915_fpc_translate.c', 'i915_prim_emit.c', 'i915_prim_vbuf.c', + 'i915_screen.c', 'i915_state.c', 'i915_state_derived.c', 'i915_state_dynamic.c', 'i915_state_emit.c', 'i915_state_immediate.c', 'i915_state_sampler.c', - 'i915_strings.c', 'i915_surface.c', 'i915_texture.c', ]) diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 7f71f8fd4f..15ff2360b7 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -36,120 +36,7 @@ #include "pipe/p_defines.h" #include "pipe/p_winsys.h" #include "pipe/p_util.h" - - -/** - * Query format support for creating a texture, drawing surface, etc. - * \param format the format to test - * \param type one of PIPE_TEXTURE, PIPE_SURFACE - */ -static boolean -i915_is_format_supported( struct pipe_context *pipe, - enum pipe_format format, uint type ) -{ - static const enum pipe_format tex_supported[] = { - PIPE_FORMAT_R8G8B8A8_UNORM, - PIPE_FORMAT_A8R8G8B8_UNORM, - PIPE_FORMAT_R5G6B5_UNORM, - PIPE_FORMAT_U_L8, - PIPE_FORMAT_U_A8, - PIPE_FORMAT_U_I8, - PIPE_FORMAT_U_A8_L8, - PIPE_FORMAT_YCBCR, - PIPE_FORMAT_YCBCR_REV, - PIPE_FORMAT_S8Z24_UNORM, - PIPE_FORMAT_NONE /* list terminator */ - }; - static const enum pipe_format surface_supported[] = { - PIPE_FORMAT_A8R8G8B8_UNORM, - PIPE_FORMAT_R5G6B5_UNORM, - PIPE_FORMAT_S8Z24_UNORM, - /*PIPE_FORMAT_R16G16B16A16_SNORM,*/ - PIPE_FORMAT_NONE /* list terminator */ - }; - const enum pipe_format *list; - uint i; - - switch (type) { - case PIPE_TEXTURE: - list = tex_supported; - break; - case PIPE_SURFACE: - list = surface_supported; - break; - default: - assert(0); - } - - for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) { - if (list[i] == format) - return TRUE; - } - - return FALSE; -} - - -static int -i915_get_param(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 8; - case PIPE_CAP_NPOT_TEXTURES: - return 1; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_S3TC: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; - case PIPE_CAP_POINT_SPRITE: - return 0; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; - case PIPE_CAP_OCCLUSION_QUERY: - return 0; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 11; /* max 1024x1024 */ - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 8; /* max 128x128x128 */ - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 11; /* max 1024x1024 */ - default: - return 0; - } -} - - -static float -i915_get_paramf(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 7.5; - - case PIPE_CAP_MAX_POINT_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 255.0; - - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 4.0; - - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0; - - default: - return 0; - } -} +#include "pipe/p_screen.h" static void i915_destroy( struct pipe_context *pipe ) @@ -162,8 +49,6 @@ static void i915_destroy( struct pipe_context *pipe ) } - - static boolean i915_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, @@ -234,33 +119,11 @@ static boolean i915_draw_arrays( struct pipe_context *pipe, -struct pipe_context *i915_create( struct pipe_winsys *pipe_winsys, - struct i915_winsys *i915_winsys, - unsigned pci_id ) +struct pipe_context *i915_create_context( struct pipe_screen *screen, + struct pipe_winsys *pipe_winsys, + struct i915_winsys *i915_winsys ) { struct i915_context *i915; - unsigned is_i945 = 0; - - switch (pci_id) { - case PCI_CHIP_I915_G: - case PCI_CHIP_I915_GM: - break; - - case PCI_CHIP_I945_G: - case PCI_CHIP_I945_GM: - case PCI_CHIP_I945_GME: - case PCI_CHIP_G33_G: - case PCI_CHIP_Q33_G: - case PCI_CHIP_Q35_G: - is_i945 = 1; - break; - - default: - pipe_winsys->printf(pipe_winsys, - "%s: unknown pci id 0x%x, cannot create context\n", - __FUNCTION__, pci_id); - return NULL; - } i915 = CALLOC_STRUCT(i915_context); if (i915 == NULL) @@ -268,11 +131,9 @@ struct pipe_context *i915_create( struct pipe_winsys *pipe_winsys, i915->winsys = i915_winsys; i915->pipe.winsys = pipe_winsys; + i915->pipe.screen = screen; i915->pipe.destroy = i915_destroy; - i915->pipe.is_format_supported = i915_is_format_supported; - i915->pipe.get_param = i915_get_param; - i915->pipe.get_paramf = i915_get_paramf; i915->pipe.clear = i915_clear; @@ -295,13 +156,10 @@ struct pipe_context *i915_create( struct pipe_winsys *pipe_winsys, i915_init_surface_functions(i915); i915_init_state_functions(i915); i915_init_flush_functions(i915); - i915_init_string_functions(i915); - - i915->pci_id = pci_id; - i915->flags.is_i945 = is_i945; + i915_init_texture_functions(i915); - i915->pipe.texture_create = i915_texture_create; - i915->pipe.texture_release = i915_texture_release; + draw_install_aaline_stage(i915->draw, &i915->pipe); + draw_install_aapoint_stage(i915->draw, &i915->pipe); i915->dirty = ~0; i915->hardware_dirty = ~0; @@ -310,11 +168,6 @@ struct pipe_context *i915_create( struct pipe_winsys *pipe_winsys, */ i915->batch_start = NULL; - /* - * XXX we could plug GL selection/feedback into the drawing pipeline - * by specifying a different setup/render stage. - */ - return &i915->pipe; } diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 2d876925b2..6401112f83 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -35,6 +35,8 @@ #include "draw/draw_vertex.h" +#include "tgsi/util/tgsi_scan.h" + #define I915_TEX_UNITS 8 @@ -79,6 +81,43 @@ #define I915_MAX_CONSTANT 32 +/** See constant_flags[] below */ +#define I915_CONSTFLAG_USER 0x1f + + +/** + * Subclass of pipe_shader_state + */ +struct i915_fragment_shader +{ + struct pipe_shader_state state; + + struct tgsi_shader_info info; + + uint *program; + uint program_len; + + /** + * constants introduced during translation. + * These are placed at the end of the constant buffer and grow toward + * the beginning (eg: slot 31, 30 29, ...) + * User-provided constants start at 0. + * This allows both types of constants to co-exist (until there's too many) + * and doesn't require regenerating/changing the fragment program to + * shuffle constants around. + */ + uint num_constants; + float constants[I915_MAX_CONSTANT][4]; + + /** + * Status of each constant + * if I915_CONSTFLAG_PARAM, the value must be taken from the corresponding + * slot of the user's constant buffer. (set by pipe->set_constant_buffer()) + * Else, the bitmask indicates which components are occupied by immediates. + */ + ubyte constant_flags[I915_MAX_CONSTANT]; +}; + struct i915_cache_context; @@ -93,11 +132,6 @@ struct i915_state float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4]; /** number of constants passed in through a constant buffer */ uint num_user_constants[PIPE_SHADER_TYPES]; - /** user constants, plus extra constants from shader translation */ - uint num_constants[PIPE_SHADER_TYPES]; - - uint *program; - uint program_len; /* texture sampler state */ unsigned sampler[I915_TEX_UNITS][3]; @@ -187,7 +221,8 @@ struct i915_context const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS]; const struct i915_depth_stencil_state *depth_stencil; const struct i915_rasterizer_state *rasterizer; - const struct pipe_shader_state *fs; + + struct i915_fragment_shader *fs; struct pipe_blend_color blend_color; struct pipe_clip_state clip; @@ -210,11 +245,6 @@ struct i915_context unsigned hardware_dirty; unsigned debug; - unsigned pci_id; - - struct { - unsigned is_i945:1; - } flags; }; /* A flag for each state_tracker state object: @@ -233,6 +263,7 @@ struct i915_context #define I915_NEW_TEXTURE 0x800 #define I915_NEW_CONSTANTS 0x1000 #define I915_NEW_VBO 0x2000 +#define I915_NEW_VS 0x4000 /* Driver's internally generated state flags: @@ -289,6 +320,7 @@ void i915_init_string_functions( struct i915_context *i915 ); + /*********************************************************************** * Inline conversion functions. These are better-typed than the * macros used previously: diff --git a/src/gallium/drivers/i915simple/i915_debug.c b/src/gallium/drivers/i915simple/i915_debug.c index 94db44e1aa..78102dbac2 100644 --- a/src/gallium/drivers/i915simple/i915_debug.c +++ b/src/gallium/drivers/i915simple/i915_debug.c @@ -25,8 +25,6 @@ * **************************************************************************/ -//#include "imports.h" - #include "i915_reg.h" #include "i915_context.h" #include "i915_winsys.h" diff --git a/src/gallium/drivers/i915simple/i915_flush.c b/src/gallium/drivers/i915simple/i915_flush.c index 3c2069b827..96a54281f1 100644 --- a/src/gallium/drivers/i915simple/i915_flush.c +++ b/src/gallium/drivers/i915simple/i915_flush.c @@ -31,6 +31,7 @@ #include "pipe/p_defines.h" +#include "draw/draw_context.h" #include "i915_context.h" #include "i915_reg.h" #include "i915_batch.h" @@ -44,6 +45,8 @@ static void i915_flush( struct pipe_context *pipe, { struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + /* Do we need to emit an MI_FLUSH command to flush the hardware * caches? */ diff --git a/src/gallium/drivers/i915simple/i915_fpc.h b/src/gallium/drivers/i915simple/i915_fpc.h index 8c7b68aefb..80a9576304 100644 --- a/src/gallium/drivers/i915simple/i915_fpc.h +++ b/src/gallium/drivers/i915simple/i915_fpc.h @@ -44,23 +44,19 @@ * Program translation state */ struct i915_fp_compile { - const struct pipe_shader_state *shader; + struct i915_fragment_shader *shader; /* the shader we're compiling */ - struct vertex_info *vertex_info; + boolean used_constants[I915_MAX_CONSTANT]; - uint declarations[I915_PROGRAM_SIZE]; - uint program[I915_PROGRAM_SIZE]; + /** maps TGSI immediate index to constant slot */ + uint num_immediates; + uint immediates_map[I915_MAX_CONSTANT]; + float immediates[I915_MAX_CONSTANT][4]; - uint input_semantic_name[PIPE_MAX_SHADER_INPUTS]; - uint input_semantic_index[PIPE_MAX_SHADER_INPUTS]; + boolean first_instruction; - uint output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; - uint output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; - - /** points into the i915->current.constants array: */ - float (*constants)[4]; - uint num_constants; - uint constant_flags[I915_MAX_CONSTANT]; /**< status of each constant */ + uint declarations[I915_PROGRAM_SIZE]; + uint program[I915_PROGRAM_SIZE]; uint *csr; /**< Cursor, points into program. */ @@ -155,7 +151,9 @@ swizzle(int reg, uint x, uint y, uint z, uint w) /*********************************************************************** * Public interface for the compiler */ -extern void i915_translate_fragment_program( struct i915_context *i915 ); +extern void +i915_translate_fragment_program( struct i915_context *i915, + struct i915_fragment_shader *fs); @@ -206,8 +204,5 @@ extern void i915_disassemble_program(const uint * program, uint sz); extern void i915_program_error(struct i915_fp_compile *p, const char *msg, ...); -extern void -i915_translate_fragment_program(struct i915_context *i915); - #endif diff --git a/src/gallium/drivers/i915simple/i915_fpc_emit.c b/src/gallium/drivers/i915simple/i915_fpc_emit.c index 74924ff0a1..4bdeefb449 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_emit.c +++ b/src/gallium/drivers/i915simple/i915_fpc_emit.c @@ -61,8 +61,6 @@ (REG_NR_MASK << UREG_NR_SHIFT)) -#define I915_CONSTFLAG_PARAM 0x1f - uint i915_get_temp(struct i915_fp_compile *p) { @@ -73,10 +71,21 @@ i915_get_temp(struct i915_fp_compile *p) } p->temp_flag |= 1 << (bit - 1); - return UREG(REG_TYPE_R, (bit - 1)); + return bit - 1; +} + + +static void +i915_release_temp(struct i915_fp_compile *p, int reg) +{ + p->temp_flag &= ~(1 << reg); } +/** + * Get unpreserved temporary, a temp whose value is not preserved between + * PS program phases. + */ uint i915_get_utemp(struct i915_fp_compile * p) { @@ -185,41 +194,62 @@ i915_emit_arith(struct i915_fp_compile * p, return dest; } + +/** + * Emit a texture load or texkill instruction. + * \param dest the dest i915 register + * \param destmask the dest register writemask + * \param sampler the i915 sampler register + * \param coord the i915 source texcoord operand + * \param opcode the instruction opcode + */ uint i915_emit_texld( struct i915_fp_compile *p, uint dest, uint destmask, uint sampler, uint coord, - uint op ) + uint opcode ) { - uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord)); + const uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord)); + int temp = -1; + if (coord != k) { - /* No real way to work around this in the general case - need to - * allocate and declare a new temporary register (a utemp won't - * do). Will fallback for now. + /* texcoord is swizzled or negated. Need to allocate a new temporary + * register (a utemp / unpreserved temp) won't do. */ - i915_program_error(p, "Can't (yet) swizzle TEX arguments"); - assert(0); - return 0; + uint tempReg; + + temp = i915_get_temp(p); /* get temp reg index */ + tempReg = UREG(REG_TYPE_R, temp); /* make i915 register */ + + i915_emit_arith( p, A0_MOV, + tempReg, A0_DEST_CHANNEL_ALL, /* dest reg, writemask */ + 0, /* saturate */ + coord, 0, 0 ); /* src0, src1, src2 */ + + /* new src texcoord is tempReg */ + coord = tempReg; } /* Don't worry about saturate as we only support */ if (destmask != A0_DEST_CHANNEL_ALL) { + /* if not writing to XYZW... */ uint tmp = i915_get_utemp(p); - i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op ); + i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode ); i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 ); - return dest; + /* XXX release utemp here? */ } else { assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); + /* is the sampler coord a texcoord input reg? */ if (GET_UREG_TYPE(coord) != REG_TYPE_T) { p->nr_tex_indirect++; } - *(p->csr++) = (op | + *(p->csr++) = (opcode | T0_DEST( dest ) | T0_SAMPLER( sampler )); @@ -227,14 +257,19 @@ uint i915_emit_texld( struct i915_fp_compile *p, *(p->csr++) = T2_MBZ; p->nr_tex_insn++; - return dest; } + + if (temp >= 0) + i915_release_temp(p, temp); + + return dest; } uint i915_emit_const1f(struct i915_fp_compile * p, float c0) { + struct i915_fragment_shader *ifs = p->shader; unsigned reg, idx; if (c0 == 0.0) @@ -243,15 +278,15 @@ i915_emit_const1f(struct i915_fp_compile * p, float c0) return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE); for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { - if (p->constant_flags[reg] == I915_CONSTFLAG_PARAM) + if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER) continue; for (idx = 0; idx < 4; idx++) { - if (!(p->constant_flags[reg] & (1 << idx)) || - p->constants[reg][idx] == c0) { - p->constants[reg][idx] = c0; - p->constant_flags[reg] |= 1 << idx; - if (reg + 1 > p->num_constants) - p->num_constants = reg + 1; + if (!(ifs->constant_flags[reg] & (1 << idx)) || + ifs->constants[reg][idx] == c0) { + ifs->constants[reg][idx] = c0; + ifs->constant_flags[reg] |= 1 << idx; + if (reg + 1 > ifs->num_constants) + ifs->num_constants = reg + 1; return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); } } @@ -264,6 +299,7 @@ i915_emit_const1f(struct i915_fp_compile * p, float c0) uint i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) { + struct i915_fragment_shader *ifs = p->shader; unsigned reg, idx; if (c0 == 0.0) @@ -277,16 +313,16 @@ i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { - if (p->constant_flags[reg] == 0xf || - p->constant_flags[reg] == I915_CONSTFLAG_PARAM) + if (ifs->constant_flags[reg] == 0xf || + ifs->constant_flags[reg] == I915_CONSTFLAG_USER) continue; for (idx = 0; idx < 3; idx++) { - if (!(p->constant_flags[reg] & (3 << idx))) { - p->constants[reg][idx + 0] = c0; - p->constants[reg][idx + 1] = c1; - p->constant_flags[reg] |= 3 << idx; - if (reg + 1 > p->num_constants) - p->num_constants = reg + 1; + if (!(ifs->constant_flags[reg] & (3 << idx))) { + ifs->constants[reg][idx + 0] = c0; + ifs->constants[reg][idx + 1] = c1; + ifs->constant_flags[reg] |= 3 << idx; + if (reg + 1 > ifs->num_constants) + ifs->num_constants = reg + 1; return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE); } } @@ -302,25 +338,26 @@ uint i915_emit_const4f(struct i915_fp_compile * p, float c0, float c1, float c2, float c3) { + struct i915_fragment_shader *ifs = p->shader; unsigned reg; for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { - if (p->constant_flags[reg] == 0xf && - p->constants[reg][0] == c0 && - p->constants[reg][1] == c1 && - p->constants[reg][2] == c2 && - p->constants[reg][3] == c3) { + if (ifs->constant_flags[reg] == 0xf && + ifs->constants[reg][0] == c0 && + ifs->constants[reg][1] == c1 && + ifs->constants[reg][2] == c2 && + ifs->constants[reg][3] == c3) { return UREG(REG_TYPE_CONST, reg); } - else if (p->constant_flags[reg] == 0) { - - p->constants[reg][0] = c0; - p->constants[reg][1] = c1; - p->constants[reg][2] = c2; - p->constants[reg][3] = c3; - p->constant_flags[reg] = 0xf; - if (reg + 1 > p->num_constants) - p->num_constants = reg + 1; + else if (ifs->constant_flags[reg] == 0) { + + ifs->constants[reg][0] = c0; + ifs->constants[reg][1] = c1; + ifs->constants[reg][2] = c2; + ifs->constants[reg][3] = c3; + ifs->constant_flags[reg] = 0xf; + if (reg + 1 > ifs->num_constants) + ifs->num_constants = reg + 1; return UREG(REG_TYPE_CONST, reg); } } @@ -335,41 +372,3 @@ i915_emit_const4fv(struct i915_fp_compile * p, const float * c) { return i915_emit_const4f(p, c[0], c[1], c[2], c[3]); } - - -#if 00000/*UNUSED*/ -/* Reserve a slot in the constant file for a Mesa state parameter. - * These will later need to be tracked on statechanges, but that is - * done elsewhere. - */ -uint -i915_emit_param4fv(struct i915_fp_compile * p, const float * values) -{ - struct i915_fragment_program *fp = p->fp; - int i; - - for (i = 0; i < fp->nr_params; i++) { - if (fp->param[i].values == values) - return UREG(REG_TYPE_CONST, fp->param[i].reg); - } - - if (p->constants->nr_constants == I915_MAX_CONSTANT || - fp->nr_params == I915_MAX_CONSTANT) { - i915_program_error(p, "i915_emit_param4fv: out of constants\n"); - return 0; - } - - { - int reg = p->constants->nr_constants++; - int i = fp->nr_params++; - - assert (p->constant_flags[reg] == 0); - p->constant_flags[reg] = I915_CONSTFLAG_PARAM; - - fp->param[i].values = values; - fp->param[i].reg = reg; - - return UREG(REG_TYPE_CONST, reg); - } -} -#endif diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 6c1524c768..c2d9a93c6c 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -34,6 +34,7 @@ #include "pipe/p_shader_tokens.h" #include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_dump.h" #include "draw/draw_vertex.h" @@ -97,19 +98,19 @@ negate(int reg, int x, int y, int z, int w) } +/** + * In the event of a translation failure, we'll generate a simple color + * pass-through program. + */ static void -i915_use_passthrough_shader(struct i915_context *i915) +i915_use_passthrough_shader(struct i915_fragment_shader *fs) { - debug_printf("**** Using i915 pass-through fragment shader\n"); - - i915->current.program = (uint *) MALLOC(sizeof(passthrough)); - if (i915->current.program) { - memcpy(i915->current.program, passthrough, sizeof(passthrough)); - i915->current.program_len = Elements(passthrough); + fs->program = (uint *) MALLOC(sizeof(passthrough)); + if (fs->program) { + memcpy(fs->program, passthrough, sizeof(passthrough)); + fs->program_len = Elements(passthrough); } - - i915->current.num_constants[PIPE_SHADER_FRAGMENT] = 0; - i915->current.num_user_constants[PIPE_SHADER_FRAGMENT] = 0; + fs->num_constants = 0; } @@ -161,11 +162,8 @@ src_vector(struct i915_fp_compile *p, * We also use a texture coordinate to pass wpos when possible. */ - /* use vertex format info to map a slot number to a VF attrib */ - assert(index < p->vertex_info->num_attribs); - - sem_name = p->input_semantic_name[index]; - sem_ind = p->input_semantic_index[index]; + sem_name = p->shader->info.input_semantic_name[index]; + sem_ind = p->shader->info.input_semantic_index[index]; switch (sem_name) { case TGSI_SEMANTIC_POSITION: @@ -201,7 +199,8 @@ src_vector(struct i915_fp_compile *p, break; case TGSI_FILE_IMMEDIATE: - /* XXX unfinished - need to append immediates onto const buffer */ + assert(index < p->num_immediates); + index = p->immediates_map[index]; /* fall-through */ case TGSI_FILE_CONSTANT: src = UREG(REG_TYPE_CONST, index); @@ -266,7 +265,7 @@ get_result_vector(struct i915_fp_compile *p, switch (dest->DstRegister.File) { case TGSI_FILE_OUTPUT: { - uint sem_name = p->output_semantic_name[dest->DstRegister.Index]; + uint sem_name = p->shader->info.output_semantic_name[dest->DstRegister.Index]; switch (sem_name) { case TGSI_SEMANTIC_POSITION: return UREG(REG_TYPE_OD, 0); @@ -386,6 +385,26 @@ emit_simple_arith(struct i915_fp_compile *p, arg3 ); } + +/** As above, but swap the first two src regs */ +static void +emit_simple_arith_swap2(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst, + uint opcode, uint numArgs) +{ + struct tgsi_full_instruction inst2; + + assert(numArgs == 2); + + /* transpose first two registers */ + inst2 = *inst; + inst2.FullSrcRegisters[0] = inst->FullSrcRegisters[1]; + inst2.FullSrcRegisters[1] = inst->FullSrcRegisters[0]; + + emit_simple_arith(p, &inst2, opcode, numArgs); +} + + #ifndef M_PI #define M_PI 3.14159265358979323846 #endif @@ -556,8 +575,12 @@ i915_translate_instruction(struct i915_fp_compile *p, src0 = src_vector(p, &inst->FullSrcRegisters[0]); tmp = i915_get_utemp(p); - i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */ - 0, src0, T0_TEXKILL); + i915_emit_texld(p, + tmp, /* dest reg: a dummy reg */ + A0_DEST_CHANNEL_ALL, /* dest writemask */ + 0, /* sampler */ + src0, /* coord*/ + T0_TEXKILL); /* opcode */ break; case TGSI_OPCODE_LG2: @@ -773,6 +796,11 @@ i915_translate_instruction(struct i915_fp_compile *p, emit_simple_arith(p, inst, A0_SGE, 2); break; + case TGSI_OPCODE_SLE: + /* like SGE, but swap reg0, reg1 */ + emit_simple_arith_swap2(p, inst, A0_SGE, 2); + break; + case TGSI_OPCODE_SIN: src0 = src_vector(p, &inst->FullSrcRegisters[0]); tmp = i915_get_utemp(p); @@ -827,6 +855,11 @@ i915_translate_instruction(struct i915_fp_compile *p, emit_simple_arith(p, inst, A0_SLT, 2); break; + case TGSI_OPCODE_SGT: + /* like SLT, but swap reg0, reg1 */ + emit_simple_arith_swap2(p, inst, A0_SLT, 2); + break; + case TGSI_OPCODE_SUB: src0 = src_vector(p, &inst->FullSrcRegisters[0]); src1 = src_vector(p, &inst->FullSrcRegisters[1]); @@ -880,6 +913,7 @@ i915_translate_instruction(struct i915_fp_compile *p, default: i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode); + p->error = 1; return; } @@ -896,6 +930,7 @@ static void i915_translate_instructions(struct i915_fp_compile *p, const struct tgsi_token *tokens) { + struct i915_fragment_shader *ifs = p->shader; struct tgsi_parse_context parse; tgsi_parse_init( &parse, tokens ); @@ -907,34 +942,64 @@ i915_translate_instructions(struct i915_fp_compile *p, switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: if (parse.FullToken.FullDeclaration.Declaration.File - == TGSI_FILE_INPUT) { - /* save input register info for use in src_vector() */ - uint ind, sem, semi; - ind = parse.FullToken.FullDeclaration.u.DeclarationRange.First; - sem = parse.FullToken.FullDeclaration.Semantic.SemanticName; - semi = parse.FullToken.FullDeclaration.Semantic.SemanticIndex; - /*debug_printf("FS Input DECL [%u] sem %u\n", ind, sem);*/ - p->input_semantic_name[ind] = sem; - p->input_semantic_index[ind] = semi; + == TGSI_FILE_CONSTANT) { + uint i; + for (i = parse.FullToken.FullDeclaration.u.DeclarationRange.First; + i <= parse.FullToken.FullDeclaration.u.DeclarationRange.Last; + i++) { + assert(ifs->constant_flags[i] == 0x0); + ifs->constant_flags[i] = I915_CONSTFLAG_USER; + ifs->num_constants = MAX2(ifs->num_constants, i + 1); + } } else if (parse.FullToken.FullDeclaration.Declaration.File - == TGSI_FILE_OUTPUT) { - /* save output register info for use in get_result_vector() */ - uint ind, sem, semi; - ind = parse.FullToken.FullDeclaration.u.DeclarationRange.First; - sem = parse.FullToken.FullDeclaration.Semantic.SemanticName; - semi = parse.FullToken.FullDeclaration.Semantic.SemanticIndex; - /*debug_printf("FS Output DECL [%u] sem %u\n", ind, sem);*/ - p->output_semantic_name[ind] = sem; - p->output_semantic_index[ind] = semi; + == TGSI_FILE_TEMPORARY) { + uint i; + for (i = parse.FullToken.FullDeclaration.u.DeclarationRange.First; + i <= parse.FullToken.FullDeclaration.u.DeclarationRange.Last; + i++) { + assert(i < I915_MAX_TEMPORARY); + /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ + p->temp_flag |= (1 << i); /* mark temp as used */ + } } break; case TGSI_TOKEN_TYPE_IMMEDIATE: - /* XXX append the immediate to the const buffer... */ + { + const struct tgsi_full_immediate *imm + = &parse.FullToken.FullImmediate; + const uint pos = p->num_immediates++; + uint j; + for (j = 0; j < imm->Immediate.Size; j++) { + p->immediates[pos][j] = imm->u.ImmediateFloat32[j].Float; + } + } break; case TGSI_TOKEN_TYPE_INSTRUCTION: + if (p->first_instruction) { + /* resolve location of immediates */ + uint i, j; + for (i = 0; i < p->num_immediates; i++) { + /* find constant slot for this immediate */ + for (j = 0; j < I915_MAX_CONSTANT; j++) { + if (ifs->constant_flags[j] == 0x0) { + memcpy(ifs->constants[j], + p->immediates[i], + 4 * sizeof(float)); + /*printf("immediate %d maps to const %d\n", i, j);*/ + ifs->constant_flags[j] = 0xf; /* all four comps used */ + p->immediates_map[i] = j; + ifs->num_constants = MAX2(ifs->num_constants, j + 1); + break; + } + } + } + + p->first_instruction = FALSE; + } + i915_translate_instruction(p, &parse.FullToken.FullInstruction); break; @@ -950,32 +1015,33 @@ i915_translate_instructions(struct i915_fp_compile *p, static struct i915_fp_compile * i915_init_compile(struct i915_context *i915, - const struct pipe_shader_state *fs) + struct i915_fragment_shader *ifs) { struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); - p->shader = i915->fs; - - p->vertex_info = &i915->current.vertex_info; + p->shader = ifs; - /* new constants found during translation get appended after the - * user-provided constants. + /* Put new constants at end of const buffer, growing downward. + * The problem is we don't know how many user-defined constants might + * be specified with pipe->set_constant_buffer(). + * Should pre-scan the user's program to determine the highest-numbered + * constant referenced. */ - p->constants = i915->current.constants[PIPE_SHADER_FRAGMENT]; - p->num_constants = i915->current.num_user_constants[PIPE_SHADER_FRAGMENT]; + ifs->num_constants = 0; + memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); + + p->first_instruction = TRUE; p->nr_tex_indirect = 1; /* correct? */ p->nr_tex_insn = 0; p->nr_alu_insn = 0; p->nr_decl_insn = 0; - memset(p->constant_flags, 0, sizeof(p->constant_flags)); - p->csr = p->program; p->decl = p->declarations; p->decl_s = 0; p->decl_t = 0; - p->temp_flag = 0xffff000; + p->temp_flag = ~0x0 << I915_MAX_TEMPORARY; p->utemp_flag = ~0x7; p->wpos_tex = -1; @@ -993,6 +1059,7 @@ i915_init_compile(struct i915_context *i915, static void i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) { + struct i915_fragment_shader *ifs = p->shader; unsigned long program_size = (unsigned long) (p->csr - p->program); unsigned long decl_size = (unsigned long) (p->decl - p->declarations); @@ -1008,19 +1075,13 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) if (p->nr_decl_insn > I915_MAX_DECL_INSN) i915_program_error(p, "Exceeded max DECL instructions"); - /* free old program, if present */ - if (i915->current.program) { - FREE(i915->current.program); - i915->current.program_len = 0; - } - if (p->error) { p->NumNativeInstructions = 0; p->NumNativeAluInstructions = 0; p->NumNativeTexInstructions = 0; p->NumNativeTexIndirections = 0; - i915_use_passthrough_shader(i915); + i915_use_passthrough_shader(ifs); } else { p->NumNativeInstructions @@ -1034,24 +1095,20 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) /* Copy compilation results to fragment program struct: */ - i915->current.program + assert(!ifs->program); + ifs->program = (uint *) MALLOC((program_size + decl_size) * sizeof(uint)); - if (i915->current.program) { - i915->current.program_len = program_size + decl_size; + if (ifs->program) { + ifs->program_len = program_size + decl_size; - memcpy(i915->current.program, + memcpy(ifs->program, p->declarations, decl_size * sizeof(uint)); - memcpy(i915->current.program + decl_size, + memcpy(ifs->program + decl_size, p->program, program_size * sizeof(uint)); } - - /* update number of constants */ - i915->current.num_constants[PIPE_SHADER_FRAGMENT] = p->num_constants; - assert(i915->current.num_constants[PIPE_SHADER_FRAGMENT] - >= i915->current.num_user_constants[PIPE_SHADER_FRAGMENT]); } /* Release the compilation struct: @@ -1085,7 +1142,7 @@ i915_find_wpos_space(struct i915_fp_compile *p) i915_program_error(p, "No free texcoord for wpos value"); } #else - if (p->shader->input_semantic_name[0] == TGSI_SEMANTIC_POSITION) { + if (p->shader->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) { /* frag shader using the fragment position input */ #if 0 assert(0); @@ -1106,7 +1163,7 @@ static void i915_fixup_depth_write(struct i915_fp_compile *p) { /* XXX assuming pos/depth is always in output[0] */ - if (p->shader->output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { + if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { const uint depth = UREG(REG_TYPE_OD, 0); i915_emit_arith(p, @@ -1121,13 +1178,18 @@ i915_fixup_depth_write(struct i915_fp_compile *p) void -i915_translate_fragment_program( struct i915_context *i915 ) +i915_translate_fragment_program( struct i915_context *i915, + struct i915_fragment_shader *fs) { - struct i915_fp_compile *p = i915_init_compile(i915, i915->fs); - const struct tgsi_token *tokens = i915->fs->tokens; + struct i915_fp_compile *p = i915_init_compile(i915, fs); + const struct tgsi_token *tokens = fs->state.tokens; i915_find_wpos_space(p); +#if 0 + tgsi_dump(tokens, 0); +#endif + i915_translate_instructions(p, tokens); i915_fixup_depth_write(p); diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c index 44c4325936..d8de5178f6 100644 --- a/src/gallium/drivers/i915simple/i915_prim_emit.c +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -72,38 +72,42 @@ emit_hw_vertex( struct i915_context *i915, uint i; uint count = 0; /* for debug/sanity */ + assert(!i915->dirty); + for (i = 0; i < vinfo->num_attribs; i++) { + const uint j = vinfo->src_index[i]; + const float *attrib = vertex->data[j]; switch (vinfo->emit[i]) { case EMIT_OMIT: /* no-op */ break; case EMIT_1F: - OUT_BATCH( fui(vertex->data[i][0]) ); + OUT_BATCH( fui(attrib[0]) ); count++; break; case EMIT_2F: - OUT_BATCH( fui(vertex->data[i][0]) ); - OUT_BATCH( fui(vertex->data[i][1]) ); + OUT_BATCH( fui(attrib[0]) ); + OUT_BATCH( fui(attrib[1]) ); count += 2; break; case EMIT_3F: - OUT_BATCH( fui(vertex->data[i][0]) ); - OUT_BATCH( fui(vertex->data[i][1]) ); - OUT_BATCH( fui(vertex->data[i][2]) ); + OUT_BATCH( fui(attrib[0]) ); + OUT_BATCH( fui(attrib[1]) ); + OUT_BATCH( fui(attrib[2]) ); count += 3; break; case EMIT_4F: - OUT_BATCH( fui(vertex->data[i][0]) ); - OUT_BATCH( fui(vertex->data[i][1]) ); - OUT_BATCH( fui(vertex->data[i][2]) ); - OUT_BATCH( fui(vertex->data[i][3]) ); + OUT_BATCH( fui(attrib[0]) ); + OUT_BATCH( fui(attrib[1]) ); + OUT_BATCH( fui(attrib[2]) ); + OUT_BATCH( fui(attrib[3]) ); count += 4; break; case EMIT_4UB: - OUT_BATCH( pack_ub4(float_to_ubyte( vertex->data[i][2] ), - float_to_ubyte( vertex->data[i][1] ), - float_to_ubyte( vertex->data[i][0] ), - float_to_ubyte( vertex->data[i][3] )) ); + OUT_BATCH( pack_ub4(float_to_ubyte( attrib[2] ), + float_to_ubyte( attrib[1] ), + float_to_ubyte( attrib[0] ), + float_to_ubyte( attrib[3] )) ); count += 1; break; default: @@ -122,17 +126,19 @@ emit_prim( struct draw_stage *stage, unsigned nr ) { struct i915_context *i915 = setup_stage(stage)->i915; - unsigned vertex_size = i915->current.vertex_info.size * 4; /* in bytes */ + unsigned vertex_size; unsigned i; - assert(vertex_size >= 12); /* never smaller than 12 bytes */ - if (i915->dirty) i915_update_derived( i915 ); if (i915->hardware_dirty) i915_emit_hardware_state( i915 ); + /* need to do this after validation! */ + vertex_size = i915->current.vertex_info.size * 4; /* in bytes */ + assert(vertex_size >= 12); /* never smaller than 12 bytes */ + if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { FLUSH_BATCH(); diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index c5bf6174f6..9d5f609220 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -83,6 +83,12 @@ i915_vbuf_render_get_vertex_info( struct vbuf_render *render ) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; + + if (i915->dirty) { + /* make sure we have up to date vertex layout */ + i915_update_derived( i915 ); + } + return &i915->current.vertex_info; } @@ -143,7 +149,8 @@ i915_vbuf_render_draw( struct vbuf_render *render, assert(nr_indices); - assert((i915->dirty & ~I915_NEW_VBO) == 0); + /* this seems to be bogus, since we validate state right after this */ + /*assert((i915->dirty & ~I915_NEW_VBO) == 0);*/ if (i915->dirty) i915_update_derived( i915 ); diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c new file mode 100644 index 0000000000..8d7bf0b33e --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -0,0 +1,250 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_screen.h" +#include "i915_texture.h" + + +static const char * +i915_get_vendor( struct pipe_screen *pscreen ) +{ + return "Tungsten Graphics, Inc."; +} + + +static const char * +i915_get_name( struct pipe_screen *pscreen ) +{ + static char buffer[128]; + const char *chipset; + + switch (i915_screen(pscreen)->pci_id) { + case PCI_CHIP_I915_G: + chipset = "915G"; + break; + case PCI_CHIP_I915_GM: + chipset = "915GM"; + break; + case PCI_CHIP_I945_G: + chipset = "945G"; + break; + case PCI_CHIP_I945_GM: + chipset = "945GM"; + break; + case PCI_CHIP_I945_GME: + chipset = "945GME"; + break; + case PCI_CHIP_G33_G: + chipset = "G33"; + break; + case PCI_CHIP_Q35_G: + chipset = "Q35"; + break; + case PCI_CHIP_Q33_G: + chipset = "Q33"; + break; + default: + chipset = "unknown"; + break; + } + + sprintf(buffer, "i915 (chipset: %s)", chipset); + return buffer; +} + + +static int +i915_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 11; /* max 1024x1024 */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 11; /* max 1024x1024 */ + default: + return 0; + } +} + + +static float +i915_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 7.5; + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 4.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + + default: + return 0; + } +} + + +static boolean +i915_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, uint type ) +{ + static const enum pipe_format tex_supported[] = { + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_U_L8, + PIPE_FORMAT_U_A8, + PIPE_FORMAT_U_I8, + PIPE_FORMAT_U_A8_L8, + PIPE_FORMAT_YCBCR, + PIPE_FORMAT_YCBCR_REV, + PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_NONE /* list terminator */ + }; + static const enum pipe_format surface_supported[] = { + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_S8Z24_UNORM, + /*PIPE_FORMAT_R16G16B16A16_SNORM,*/ + PIPE_FORMAT_NONE /* list terminator */ + }; + const enum pipe_format *list; + uint i; + + switch (type) { + case PIPE_TEXTURE: + list = tex_supported; + break; + case PIPE_SURFACE: + list = surface_supported; + break; + default: + assert(0); + } + + for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) { + if (list[i] == format) + return TRUE; + } + + return FALSE; +} + + +static void +i915_destroy_screen( struct pipe_screen *screen ) +{ + FREE(screen); +} + + +/** + * Create a new i915_screen object + */ +struct pipe_screen * +i915_create_screen(struct pipe_winsys *winsys, uint pci_id) +{ + struct i915_screen *i915screen = CALLOC_STRUCT(i915_screen); + + if (!i915screen) + return NULL; + + switch (pci_id) { + case PCI_CHIP_I915_G: + case PCI_CHIP_I915_GM: + i915screen->is_i945 = FALSE; + break; + + case PCI_CHIP_I945_G: + case PCI_CHIP_I945_GM: + case PCI_CHIP_I945_GME: + case PCI_CHIP_G33_G: + case PCI_CHIP_Q33_G: + case PCI_CHIP_Q35_G: + i915screen->is_i945 = TRUE; + break; + + default: + winsys->printf(winsys, + "%s: unknown pci id 0x%x, cannot create screen\n", + __FUNCTION__, pci_id); + return NULL; + } + + i915screen->pci_id = pci_id; + + i915screen->screen.winsys = winsys; + + i915screen->screen.destroy = i915_destroy_screen; + + i915screen->screen.get_name = i915_get_name; + i915screen->screen.get_vendor = i915_get_vendor; + i915screen->screen.get_param = i915_get_param; + i915screen->screen.get_paramf = i915_get_paramf; + i915screen->screen.is_format_supported = i915_is_format_supported; + + i915_init_screen_texture_functions(&i915screen->screen); + + return &i915screen->screen; +} diff --git a/src/gallium/drivers/i965simple/brw_strings.c b/src/gallium/drivers/i915simple/i915_screen.h index 3d9c50961f..73b0ff05ce 100644 --- a/src/gallium/drivers/i965simple/brw_strings.c +++ b/src/gallium/drivers/i915simple/i915_screen.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,48 +25,45 @@ * **************************************************************************/ -#include "brw_context.h" -#include "brw_reg.h" +#ifndef I915_SCREEN_H +#define I915_SCREEN_H -static const char *brw_get_vendor( struct pipe_context *pipe ) + +#include "pipe/p_screen.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Subclass of pipe_screen + */ +struct i915_screen { - return "Tungsten Graphics, Inc."; -} + struct pipe_screen screen; + boolean is_i945; + uint pci_id; +}; -static const char *brw_get_name( struct pipe_context *pipe ) + +/** cast wrapper */ +static INLINE struct i915_screen * +i915_screen(struct pipe_screen *pscreen) { - static char buffer[128]; - const char *chipset; - - switch (brw_context(pipe)->pci_id) { - case PCI_CHIP_I965_Q: - chipset = "Intel(R) 965Q"; - break; - case PCI_CHIP_I965_G: - case PCI_CHIP_I965_G_1: - chipset = "Intel(R) 965G"; - break; - case PCI_CHIP_I965_GM: - chipset = "Intel(R) 965GM"; - break; - case PCI_CHIP_I965_GME: - chipset = "Intel(R) 965GME/GLE"; - break; - default: - chipset = "unknown"; - break; - } - - sprintf(buffer, "i965 (chipset: %s)", chipset); - return buffer; + return (struct i915_screen *) pscreen; } -void -brw_init_string_functions(struct brw_context *brw) -{ - brw->pipe.get_name = brw_get_name; - brw->pipe.get_vendor = brw_get_vendor; +extern struct pipe_screen * +i915_create_screen(struct pipe_winsys *winsys, uint pci_id); + + +#ifdef __cplusplus } +#endif + +#endif /* I915_SCREEN_H */ diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index 294e6fad03..27af46bea0 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -32,11 +32,13 @@ #include "draw/draw_context.h" #include "pipe/p_winsys.h" #include "pipe/p_util.h" +#include "pipe/p_inlines.h" #include "i915_context.h" #include "i915_reg.h" #include "i915_state.h" #include "i915_state_inlines.h" +#include "i915_fpc.h" /* The i915 (and related graphics cores) do not support GL_CLAMP. The @@ -415,26 +417,49 @@ static void i915_set_polygon_stipple( struct pipe_context *pipe, } -static void * i915_create_fs_state(struct pipe_context *pipe, - const struct pipe_shader_state *templ) + +static void * +i915_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) { - return 0; + struct i915_context *i915 = i915_context(pipe); + struct i915_fragment_shader *ifs = CALLOC_STRUCT(i915_fragment_shader); + if (!ifs) + return NULL; + + ifs->state = *templ; + + tgsi_scan_shader(templ->tokens, &ifs->info); + + /* The shader's compiled to i915 instructions here */ + i915_translate_fragment_program(i915, ifs); + + return ifs; } -static void i915_bind_fs_state(struct pipe_context *pipe, void *fs) +static void +i915_bind_fs_state(struct pipe_context *pipe, void *shader) { struct i915_context *i915 = i915_context(pipe); - i915->fs = (struct pipe_shader_state *)fs; + i915->fs = (struct i915_fragment_shader*) shader; i915->dirty |= I915_NEW_FS; } -static void i915_delete_fs_state(struct pipe_context *pipe, void *shader) +static +void i915_delete_fs_state(struct pipe_context *pipe, void *shader) { - /*do nothing*/ + struct i915_fragment_shader *ifs = (struct i915_fragment_shader *) shader; + + if (ifs->program) + FREE(ifs->program); + ifs->program_len = 0; + + FREE(ifs); } + static void * i915_create_vs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) @@ -451,6 +476,8 @@ static void i915_bind_vs_state(struct pipe_context *pipe, void *shader) /* just pass-through to draw module */ draw_bind_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader); + + i915->dirty |= I915_NEW_VS; } static void i915_delete_vs_state(struct pipe_context *pipe, void *shader) @@ -505,7 +532,8 @@ static void i915_set_sampler_texture(struct pipe_context *pipe, { struct i915_context *i915 = i915_context(pipe); - i915->texture[sampler] = (struct i915_texture*)texture; /* ptr, not struct */ + pipe_texture_reference((struct pipe_texture **) &i915->texture[sampler], + texture); i915->dirty |= I915_NEW_TEXTURE; } diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c index 4767584fc6..4daccec6e0 100644 --- a/src/gallium/drivers/i915simple/i915_state_derived.c +++ b/src/gallium/drivers/i915simple/i915_state_derived.c @@ -27,104 +27,111 @@ #include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "draw/draw_vertex.h" #include "i915_context.h" #include "i915_state.h" #include "i915_reg.h" #include "i915_fpc.h" -#include "pipe/p_shader_tokens.h" + /** - * Determine which post-transform / pre-rasterization vertex attributes - * we need. - * Derived from: fs, setup states. + * Determine the hardware vertex layout. + * Depends on vertex/fragment shader state. */ static void calculate_vertex_layout( struct i915_context *i915 ) { - const struct pipe_shader_state *fs = i915->fs; + const struct i915_fragment_shader *fs = i915->fs; const enum interp_mode colorInterp = i915->rasterizer->color_interp; struct vertex_info vinfo; - uint front0 = 0, back0 = 0, front1 = 0, back1 = 0; - boolean needW = 0; + boolean texCoords[8], colors[2], fog, needW; uint i; - boolean texCoords[8]; - uint src = 0; + int src; memset(texCoords, 0, sizeof(texCoords)); + colors[0] = colors[1] = fog = needW = FALSE; memset(&vinfo, 0, sizeof(vinfo)); - /* pos */ - draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src++); - /* Note: we'll set the S4_VFMT_XYZ[W] bits below */ - - for (i = 0; i < fs->num_inputs; i++) { - switch (fs->input_semantic_name[i]) { + /* Determine which fragment program inputs are needed. Setup HW vertex + * layout below, in the HW-specific attribute order. + */ + for (i = 0; i < fs->info.num_inputs; i++) { + switch (fs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: break; case TGSI_SEMANTIC_COLOR: - if (fs->input_semantic_index[i] == 0) { - front0 = draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src++); - vinfo.hwfmt[0] |= S4_VFMT_COLOR; - } - else { - assert(fs->input_semantic_index[i] == 1); - front1 = draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src++); - vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG; - } + assert(fs->info.input_semantic_index[i] < 2); + colors[fs->info.input_semantic_index[i]] = TRUE; break; case TGSI_SEMANTIC_GENERIC: /* usually a texcoord */ { - const uint unit = fs->input_semantic_index[i]; - uint hwtc; + const uint unit = fs->info.input_semantic_index[i]; + assert(unit < 8); texCoords[unit] = TRUE; - draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); - hwtc = TEXCOORDFMT_4D; needW = TRUE; - vinfo.hwfmt[1] |= hwtc << (unit * 4); } break; case TGSI_SEMANTIC_FOG: - debug_printf("i915 fogcoord not implemented yet\n"); - draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src++); + fog = TRUE; break; default: assert(0); } - } - /* finish up texcoord fields */ - for (i = 0; i < 8; i++) { - if (!texCoords[i]) { - const uint hwtc = TEXCOORDFMT_NOT_PRESENT; - vinfo.hwfmt[1] |= hwtc << (i* 4); - } - } - - /* go back and fill in the vertex position info now that we have needW */ + + /* pos */ + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_POSITION, 0); if (needW) { + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src); vinfo.hwfmt[0] |= S4_VFMT_XYZW; vinfo.emit[0] = EMIT_4F; } else { + draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src); vinfo.hwfmt[0] |= S4_VFMT_XYZ; vinfo.emit[0] = EMIT_3F; } - /* Additional attributes required for setup: Just twosided - * lighting. Edgeflag is dealt with specially by setting bits in - * the vertex header. - */ - if (i915->rasterizer->light_twoside) { - if (front0) { - back0 = draw_emit_vertex_attr(&vinfo, EMIT_OMIT, colorInterp, src++); + /* hardware point size */ + /* XXX todo */ + + /* primary color */ + if (colors[0]) { + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 0); + draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); + vinfo.hwfmt[0] |= S4_VFMT_COLOR; + } + + /* secondary color */ + if (colors[1]) { + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 1); + draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); + vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG; + } + + /* fog coord, not fog blend factor */ + if (fog) { + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_FOG, 0); + draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); + vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM; + } + + /* texcoords */ + for (i = 0; i < 8; i++) { + uint hwtc; + if (texCoords[i]) { + hwtc = TEXCOORDFMT_4D; + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_GENERIC, i); + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); } - if (back0) { - back1 = draw_emit_vertex_attr(&vinfo, EMIT_OMIT, colorInterp, src++); + else { + hwtc = TEXCOORDFMT_NOT_PRESENT; } + vinfo.hwfmt[1] |= hwtc << (i * 4); } draw_compute_vertex_size(&vinfo); @@ -148,7 +155,7 @@ static void calculate_vertex_layout( struct i915_context *i915 ) */ void i915_update_derived( struct i915_context *i915 ) { - if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS)) + if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS)) calculate_vertex_layout( i915 ); if (i915->dirty & (I915_NEW_SAMPLER | I915_NEW_TEXTURE)) @@ -164,7 +171,6 @@ void i915_update_derived( struct i915_context *i915 ) i915_update_dynamic( i915 ); if (i915->dirty & I915_NEW_FS) { - i915_translate_fragment_program(i915); i915->hardware_dirty |= I915_HW_PROGRAM; /* XXX right? */ } diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c index 3339287f49..6bbaac4e34 100644 --- a/src/gallium/drivers/i915simple/i915_state_emit.c +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -99,7 +99,11 @@ i915_emit_hardware_state(struct i915_context *i915 ) 2 + I915_TEX_UNITS*3 + 2 + I915_TEX_UNITS*3 + 2 + I915_MAX_CONSTANT*4 + +#if 0 i915->current.program_len + +#else + i915->fs->program_len + +#endif 6 ) * 3/2; /* plus 50% margin */ const unsigned relocs = ( I915_TEX_UNITS + @@ -325,15 +329,34 @@ i915_emit_hardware_state(struct i915_context *i915 ) /* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */ if (i915->hardware_dirty & I915_HW_PROGRAM) { - const uint nr = i915->current.num_constants[PIPE_SHADER_FRAGMENT]; - assert(nr <= I915_MAX_CONSTANT); - if (nr > 0) { - const uint *c - = (const uint *) i915->current.constants[PIPE_SHADER_FRAGMENT]; + /* Collate the user-defined constants with the fragment shader's + * immediates according to the constant_flags[] array. + */ + const uint nr = i915->fs->num_constants; + if (nr) { uint i; + OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) ); + for (i = 0; i < nr; i++) { + const uint *c; + if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { + /* grab user-defined constant */ + c = (uint *) i915->current.constants[PIPE_SHADER_FRAGMENT][i]; + } + else { + /* emit program constant */ + c = (uint *) i915->fs->constants[i]; + } +#if 0 /* debug */ + { + float *f = (float *) c; + printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], + (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER + ? "user" : "immediate")); + } +#endif OUT_BATCH(*c++); OUT_BATCH(*c++); OUT_BATCH(*c++); @@ -348,9 +371,9 @@ i915_emit_hardware_state(struct i915_context *i915 ) { uint i; /* we should always have, at least, a pass-through program */ - assert(i915->current.program_len > 0); - for (i = 0; i < i915->current.program_len; i++) { - OUT_BATCH(i915->current.program[i]); + assert(i915->fs->program_len > 0); + for (i = 0; i < i915->fs->program_len; i++) { + OUT_BATCH(i915->fs->program[i]); } } diff --git a/src/gallium/drivers/i915simple/i915_state_immediate.c b/src/gallium/drivers/i915simple/i915_state_immediate.c index 07031fc6c5..dfbbcab624 100644 --- a/src/gallium/drivers/i915simple/i915_state_immediate.c +++ b/src/gallium/drivers/i915simple/i915_state_immediate.c @@ -33,7 +33,7 @@ #include "i915_context.h" #include "i915_state.h" #include "i915_reg.h" -#include "p_util.h" +#include "pipe/p_util.h" /* All state expressable with the LOAD_STATE_IMMEDIATE_1 packet. diff --git a/src/gallium/drivers/i915simple/i915_state_inlines.h b/src/gallium/drivers/i915simple/i915_state_inlines.h index 0934ac79a4..378de8f9c4 100644 --- a/src/gallium/drivers/i915simple/i915_state_inlines.h +++ b/src/gallium/drivers/i915simple/i915_state_inlines.h @@ -28,8 +28,8 @@ #ifndef I915_STATE_INLINES_H #define I915_STATE_INLINES_H -#include "p_compiler.h" -#include "p_defines.h" +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" #include "i915_reg.h" diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index 17fd27895a..f4fbedbe9b 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -36,48 +36,6 @@ #include "util/p_tile.h" -/* - * XXX note: same as code in sp_surface.c - */ -static struct pipe_surface * -i915_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - struct i915_texture *tex = (struct i915_texture *)pt; - struct pipe_surface *ps; - unsigned offset; /* in bytes */ - - offset = tex->level_offset[level]; - - if (pt->target == PIPE_TEXTURE_CUBE) { - offset += tex->image_offset[level][face] * pt->cpp; - } - else if (pt->target == PIPE_TEXTURE_3D) { - offset += tex->image_offset[level][zslice] * pt->cpp; - } - else { - assert(face == 0); - assert(zslice == 0); - } - - ps = pipe->winsys->surface_alloc(pipe->winsys); - if (ps) { - assert(ps->refcount); - assert(ps->winsys); - pipe_buffer_reference(pipe->winsys, &ps->buffer, tex->buffer); - ps->format = pt->format; - ps->cpp = pt->cpp; - ps->width = pt->width[level]; - ps->height = pt->height[level]; - ps->pitch = tex->pitch; - ps->offset = offset; - } - return ps; -} - - - /* Assumes all values are within bounds -- no checking at this level - * do it higher up if required. */ @@ -115,6 +73,7 @@ i915_surface_copy(struct pipe_context *pipe, } } + /* Fill a rectangular sub-region. Need better logic about when to * push buffers into AGP - will currently do so whenever possible. */ @@ -184,8 +143,6 @@ i915_surface_fill(struct pipe_context *pipe, void i915_init_surface_functions(struct i915_context *i915) { - i915->pipe.get_tex_surface = i915_get_tex_surface; - i915->pipe.surface_copy = i915_surface_copy; i915->pipe.surface_fill = i915_surface_fill; } diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 6d37ae3d74..ef5adff550 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -40,6 +40,7 @@ #include "i915_context.h" #include "i915_texture.h" #include "i915_debug.h" +#include "i915_screen.h" static unsigned minify( unsigned d ) @@ -187,7 +188,7 @@ static const int step_offsets[6][2] = { static boolean -i915_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) +i915_miptree_layout(struct i915_texture * tex) { struct pipe_texture *pt = &tex->base; unsigned level; @@ -311,7 +312,7 @@ i915_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) static boolean -i945_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) +i945_miptree_layout(struct i915_texture * tex) { struct pipe_texture *pt = &tex->base; unsigned level; @@ -478,23 +479,26 @@ i945_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) } -struct pipe_texture * -i915_texture_create(struct pipe_context *pipe, - const struct pipe_texture *templat) +static struct pipe_texture * +i915_texture_create_screen(struct pipe_screen *screen, + const struct pipe_texture *templat) { struct i915_texture *tex = CALLOC_STRUCT(i915_texture); if (tex) { - struct i915_context *i915 = i915_context(pipe); + struct i915_screen *i915screen = i915_screen(screen); + struct pipe_winsys *ws = screen->winsys; tex->base = *templat; + tex->base.refcount = 1; + tex->base.screen = screen; - if (i915->flags.is_i945 ? i945_miptree_layout(pipe, tex) : - i915_miptree_layout(pipe, tex)) - tex->buffer = pipe->winsys->buffer_create(pipe->winsys, 64, - PIPE_BUFFER_USAGE_PIXEL, - tex->pitch * tex->base.cpp * - tex->total_height); + if (i915screen->is_i945 ? i945_miptree_layout(tex) : + i915_miptree_layout(tex)) + tex->buffer = ws->buffer_create(ws, 64, + PIPE_BUFFER_USAGE_PIXEL, + tex->pitch * tex->base.cpp * + tex->total_height); if (!tex->buffer) { FREE(tex); @@ -506,8 +510,9 @@ i915_texture_create(struct pipe_context *pipe, } -void -i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) +static void +i915_texture_release_screen(struct pipe_screen *screen, + struct pipe_texture **pt) { if (!*pt) return; @@ -524,7 +529,7 @@ i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); */ - pipe_buffer_reference(pipe->winsys, &tex->buffer, NULL); + pipe_buffer_reference(screen->winsys, &tex->buffer, NULL); for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) if (tex->image_offset[i]) @@ -534,3 +539,68 @@ i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) } *pt = NULL; } + + +static void +i915_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) +{ + /* no-op? */ +} + + +/* + * XXX note: same as code in sp_surface.c + */ +static struct pipe_surface * +i915_get_tex_surface_screen(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct i915_texture *tex = (struct i915_texture *)pt; + struct pipe_winsys *ws = screen->winsys; + struct pipe_surface *ps; + unsigned offset; /* in bytes */ + + offset = tex->level_offset[level]; + + if (pt->target == PIPE_TEXTURE_CUBE) { + offset += tex->image_offset[level][face] * pt->cpp; + } + else if (pt->target == PIPE_TEXTURE_3D) { + offset += tex->image_offset[level][zslice] * pt->cpp; + } + else { + assert(face == 0); + assert(zslice == 0); + } + + ps = ws->surface_alloc(ws); + if (ps) { + assert(ps->refcount); + assert(ps->winsys); + pipe_buffer_reference(ws, &ps->buffer, tex->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = tex->pitch; + ps->offset = offset; + } + return ps; +} + + +void +i915_init_texture_functions(struct i915_context *i915) +{ + i915->pipe.texture_update = i915_texture_update; +} + + +void +i915_init_screen_texture_functions(struct pipe_screen *screen) +{ + screen->texture_create = i915_texture_create_screen; + screen->texture_release = i915_texture_release_screen; + screen->get_tex_surface = i915_get_tex_surface_screen; +} diff --git a/src/gallium/drivers/i915simple/i915_texture.h b/src/gallium/drivers/i915simple/i915_texture.h index 330d111dc7..7225016a9f 100644 --- a/src/gallium/drivers/i915simple/i915_texture.h +++ b/src/gallium/drivers/i915simple/i915_texture.h @@ -1,17 +1,43 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ #ifndef I915_TEXTURE_H #define I915_TEXTURE_H -struct pipe_context; -struct pipe_texture; +struct i915_context; +struct pipe_screen; -struct pipe_texture * -i915_texture_create(struct pipe_context *pipe, - const struct pipe_texture *templat); +extern void +i915_init_texture_functions(struct i915_context *i915); + extern void -i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); +i915_init_screen_texture_functions(struct pipe_screen *screen); #endif /* I915_TEXTURE_H */ diff --git a/src/gallium/drivers/i915simple/i915_winsys.h b/src/gallium/drivers/i915simple/i915_winsys.h index fe49710852..aea3003281 100644 --- a/src/gallium/drivers/i915simple/i915_winsys.h +++ b/src/gallium/drivers/i915simple/i915_winsys.h @@ -40,6 +40,11 @@ #include "pipe/p_defines.h" +#ifdef __cplusplus +extern "C" { +#endif + + /* Pipe drivers are (meant to be!) independent of both GL and the * window system. The window system provides a buffer manager and a * set of additional hooks for things like command buffer submission, @@ -52,6 +57,7 @@ struct pipe_buffer; struct pipe_winsys; +struct pipe_screen; /** @@ -107,9 +113,12 @@ struct i915_winsys { #define I915_BUFFER_USAGE_LIT_VERTEX (PIPE_BUFFER_USAGE_CUSTOM << 0) -struct pipe_context *i915_create( struct pipe_winsys *, - struct i915_winsys *, - unsigned pci_id ); +struct pipe_context *i915_create_context( struct pipe_screen *, + struct pipe_winsys *, + struct i915_winsys * ); +#ifdef __cplusplus +} +#endif #endif diff --git a/src/gallium/drivers/i965simple/Makefile b/src/gallium/drivers/i965simple/Makefile index 1dec1f9749..e97146e57c 100644 --- a/src/gallium/drivers/i965simple/Makefile +++ b/src/gallium/drivers/i965simple/Makefile @@ -1,14 +1,13 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = i965simple -DRIVER_SOURCES = \ - brw_blit.c \ - brw_flush.c \ - brw_strings.c \ - brw_surface.c \ +C_SOURCES = \ + brw_blit.c \ + brw_flush.c \ + brw_screen.c \ + brw_surface.c \ brw_cc.c \ brw_clip.c \ brw_clip_line.c \ @@ -31,8 +30,7 @@ DRIVER_SOURCES = \ brw_sf.c \ brw_sf_emit.c \ brw_sf_state.c \ - brw_shader_info.c \ - brw_state.c \ + brw_state.c \ brw_state_batch.c \ brw_state_cache.c \ brw_state_pool.c \ @@ -51,16 +49,6 @@ DRIVER_SOURCES = \ brw_wm_state.c \ brw_wm_surface_state.c -C_SOURCES = \ - $(COMMON_SOURCES) \ - $(COMMON_BM_SOURCES) \ - $(MINIGLX_SOURCES) \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - -DRIVER_DEFINES = -I. - include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/i965simple/SConscript b/src/gallium/drivers/i965simple/SConscript index 74621de84c..c0825c4de3 100644 --- a/src/gallium/drivers/i965simple/SConscript +++ b/src/gallium/drivers/i965simple/SConscript @@ -29,13 +29,11 @@ i965simple = env.ConvenienceLibrary( 'brw_sf.c', 'brw_sf_emit.c', 'brw_sf_state.c', - 'brw_shader_info.c', 'brw_state.c', 'brw_state_batch.c', 'brw_state_cache.c', 'brw_state_pool.c', 'brw_state_upload.c', - 'brw_strings.c', 'brw_surface.c', 'brw_tex_layout.c', 'brw_urb.c', diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c index 5e58701e91..7c908da672 100644 --- a/src/gallium/drivers/i965simple/brw_context.c +++ b/src/gallium/drivers/i965simple/brw_context.c @@ -40,15 +40,14 @@ #include "pipe/p_winsys.h" #include "pipe/p_context.h" #include "pipe/p_util.h" +#include "pipe/p_screen.h" -/*************************************** - * Mesa's Driver Functions - ***************************************/ #ifndef BRW_DEBUG int BRW_DEBUG = (0); #endif + static void brw_destroy(struct pipe_context *pipe) { struct brw_context *brw = brw_context(pipe); @@ -56,6 +55,7 @@ static void brw_destroy(struct pipe_context *pipe) FREE(brw); } + static void brw_clear(struct pipe_context *pipe, struct pipe_surface *ps, unsigned clearValue) { @@ -71,164 +71,31 @@ static void brw_clear(struct pipe_context *pipe, struct pipe_surface *ps, } -static int -brw_get_param(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 8; - case PIPE_CAP_NPOT_TEXTURES: - return 1; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_S3TC: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; - case PIPE_CAP_POINT_SPRITE: - return 0; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; - case PIPE_CAP_OCCLUSION_QUERY: - return 0; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 11; /* max 1024x1024 */ - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 8; /* max 128x128x128 */ - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 11; /* max 1024x1024 */ - default: - return 0; - } -} - - -static float -brw_get_paramf(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 7.5; - - case PIPE_CAP_MAX_POINT_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 255.0; - - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 4.0; - - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0; - - default: - return 0; - } -} - -static boolean -brw_is_format_supported( struct pipe_context *pipe, - enum pipe_format format, uint type ) -{ -#if 0 - /* XXX: This is broken -- rewrite if still needed. */ - static const unsigned tex_supported[] = { - PIPE_FORMAT_U_R8_G8_B8_A8, - PIPE_FORMAT_U_A8_R8_G8_B8, - PIPE_FORMAT_U_R5_G6_B5, - PIPE_FORMAT_U_L8, - PIPE_FORMAT_U_A8, - PIPE_FORMAT_U_I8, - PIPE_FORMAT_U_L8_A8, - PIPE_FORMAT_YCBCR, - PIPE_FORMAT_YCBCR_REV, - PIPE_FORMAT_S8_Z24, - }; - - - /* Actually a lot more than this - add later: - */ - static const unsigned render_supported[] = { - PIPE_FORMAT_U_A8_R8_G8_B8, - PIPE_FORMAT_U_R5_G6_B5, - }; - - /* - */ - static const unsigned z_stencil_supported[] = { - PIPE_FORMAT_U_Z16, - PIPE_FORMAT_U_Z32, - PIPE_FORMAT_S8_Z24, - }; - - switch (type) { - case PIPE_RENDER_FORMAT: - *numFormats = Elements(render_supported); - return render_supported; - - case PIPE_TEX_FORMAT: - *numFormats = Elements(tex_supported); - return render_supported; - - case PIPE_Z_STENCIL_FORMAT: - *numFormats = Elements(render_supported); - return render_supported; - - default: - *numFormats = 0; - return NULL; - } -#else - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: - return TRUE; - default: - return FALSE; - }; - return FALSE; -#endif -} - - - - -struct pipe_context *brw_create(struct pipe_winsys *pipe_winsys, +struct pipe_context *brw_create(struct pipe_screen *screen, struct brw_winsys *brw_winsys, unsigned pci_id) { struct brw_context *brw; - pipe_winsys->printf(pipe_winsys, - "%s: creating brw_context with pci id 0x%x\n", - __FUNCTION__, pci_id); + screen->winsys->printf(screen->winsys, + "%s: creating brw_context with pci id 0x%x\n", + __FUNCTION__, pci_id); brw = CALLOC_STRUCT(brw_context); if (brw == NULL) return NULL; brw->winsys = brw_winsys; - brw->pipe.winsys = pipe_winsys; + brw->pipe.winsys = screen->winsys; + brw->pipe.screen = screen; brw->pipe.destroy = brw_destroy; - brw->pipe.is_format_supported = brw_is_format_supported; - brw->pipe.get_param = brw_get_param; - brw->pipe.get_paramf = brw_get_paramf; brw->pipe.clear = brw_clear; - brw->pipe.texture_create = brw_texture_create; - brw->pipe.texture_release = brw_texture_release; brw_init_surface_functions(brw); + brw_init_texture_functions(brw); brw_init_state_functions(brw); brw_init_flush_functions(brw); - brw_init_string_functions(brw); brw_init_draw_functions( brw ); diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h index 65664d853d..4da3a8cffc 100644 --- a/src/gallium/drivers/i965simple/brw_context.h +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -38,6 +38,8 @@ #include "pipe/p_defines.h" #include "pipe/p_state.h" +#include "tgsi/util/tgsi_scan.h" + #include "brw_structs.h" #include "brw_winsys.h" @@ -195,33 +197,22 @@ struct brw_state_flags { }; -struct brw_shader_info { - int nr_regs[8]; /* TGSI_FILE_* */ -}; - - - struct brw_vertex_program { struct pipe_shader_state program; - struct brw_shader_info info; + struct tgsi_shader_info info; int id; }; - struct brw_fragment_program { struct pipe_shader_state program; - struct brw_shader_info info; + struct tgsi_shader_info info; - boolean UsesDepth; - boolean UsesKill; - boolean ComputesDepth; + boolean UsesDepth; /* XXX add this to tgsi_shader_info? */ int id; }; - - struct pipe_setup_linkage { struct { unsigned vp_output:5; @@ -502,7 +493,7 @@ struct brw_context /* Arrays with buffer objects to copy non-bufferobj arrays into * for upload: */ - struct pipe_vertex_buffer *vbo_array[PIPE_ATTRIB_MAX]; + const struct pipe_vertex_buffer *vbo_array[PIPE_ATTRIB_MAX]; struct brw_vertex_element_state inputs[PIPE_ATTRIB_MAX]; diff --git a/src/gallium/drivers/i965simple/brw_draw_upload.c b/src/gallium/drivers/i965simple/brw_draw_upload.c index aa85d93866..9c0c78c236 100644 --- a/src/gallium/drivers/i965simple/brw_draw_upload.c +++ b/src/gallium/drivers/i965simple/brw_draw_upload.c @@ -256,7 +256,7 @@ boolean brw_upload_vertex_elements( struct brw_context *brw ) struct brw_vertex_element_packet vep; unsigned i; - unsigned nr_enabled = brw->attribs.VertexProgram->program.num_inputs; + unsigned nr_enabled = brw->attribs.VertexProgram->info.num_inputs; memset(&vep, 0, sizeof(vep)); diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c new file mode 100644 index 0000000000..5be369fe52 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -0,0 +1,235 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" + +#include "brw_context.h" +#include "brw_screen.h" +#include "brw_tex_layout.h" + + +static const char * +brw_get_vendor( struct pipe_screen *screen ) +{ + return "Tungsten Graphics, Inc."; +} + + +static const char * +brw_get_name( struct pipe_screen *screen ) +{ + static char buffer[128]; + const char *chipset; + + switch (brw_screen(screen)->pci_id) { + case PCI_CHIP_I965_Q: + chipset = "Intel(R) 965Q"; + break; + case PCI_CHIP_I965_G: + case PCI_CHIP_I965_G_1: + chipset = "Intel(R) 965G"; + break; + case PCI_CHIP_I965_GM: + chipset = "Intel(R) 965GM"; + break; + case PCI_CHIP_I965_GME: + chipset = "Intel(R) 965GME/GLE"; + break; + default: + chipset = "unknown"; + break; + } + + sprintf(buffer, "i965 (chipset: %s)", chipset); + return buffer; +} + + +static int +brw_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 11; /* max 1024x1024 */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 11; /* max 1024x1024 */ + default: + return 0; + } +} + + +static float +brw_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 7.5; + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 4.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + + default: + return 0; + } +} + + +static boolean +brw_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, uint type ) +{ +#if 0 + /* XXX: This is broken -- rewrite if still needed. */ + static const unsigned tex_supported[] = { + PIPE_FORMAT_U_R8_G8_B8_A8, + PIPE_FORMAT_U_A8_R8_G8_B8, + PIPE_FORMAT_U_R5_G6_B5, + PIPE_FORMAT_U_L8, + PIPE_FORMAT_U_A8, + PIPE_FORMAT_U_I8, + PIPE_FORMAT_U_L8_A8, + PIPE_FORMAT_YCBCR, + PIPE_FORMAT_YCBCR_REV, + PIPE_FORMAT_S8_Z24, + }; + + + /* Actually a lot more than this - add later: + */ + static const unsigned render_supported[] = { + PIPE_FORMAT_U_A8_R8_G8_B8, + PIPE_FORMAT_U_R5_G6_B5, + }; + + /* + */ + static const unsigned z_stencil_supported[] = { + PIPE_FORMAT_U_Z16, + PIPE_FORMAT_U_Z32, + PIPE_FORMAT_S8_Z24, + }; + + switch (type) { + case PIPE_RENDER_FORMAT: + *numFormats = Elements(render_supported); + return render_supported; + + case PIPE_TEX_FORMAT: + *numFormats = Elements(tex_supported); + return render_supported; + + case PIPE_Z_STENCIL_FORMAT: + *numFormats = Elements(render_supported); + return render_supported; + + default: + *numFormats = 0; + return NULL; + } +#else + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + return TRUE; + default: + return FALSE; + }; + return FALSE; +#endif +} + + +static void +brw_destroy_screen( struct pipe_screen *screen ) +{ + FREE(screen); +} + + +/** + * Create a new brw_screen object + */ +struct pipe_screen * +brw_create_screen(struct pipe_winsys *winsys, uint pci_id) +{ + struct brw_screen *brwscreen = CALLOC_STRUCT(brw_screen); + + if (!brwscreen) + return NULL; + + brwscreen->pci_id = pci_id; + + brwscreen->screen.winsys = winsys; + + brwscreen->screen.destroy = brw_destroy_screen; + + brwscreen->screen.get_name = brw_get_name; + brwscreen->screen.get_vendor = brw_get_vendor; + brwscreen->screen.get_param = brw_get_param; + brwscreen->screen.get_paramf = brw_get_paramf; + brwscreen->screen.is_format_supported = brw_is_format_supported; + + brw_init_screen_texture_funcs(&brwscreen->screen); + + return &brwscreen->screen; +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_surface.c b/src/gallium/drivers/i965simple/brw_screen.h index 287610b76b..d3c70387e6 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_surface.c +++ b/src/gallium/drivers/i965simple/brw_screen.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -26,46 +26,43 @@ **************************************************************************/ -#include "pipe/p_inlines.h" -#include "cell_context.h" -#include "cell_state.h" +#ifndef BRW_SCREEN_H +#define BRW_SCREEN_H -void -cell_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) +#include "pipe/p_screen.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Subclass of pipe_screen + */ +struct brw_screen { - struct cell_context *cell = cell_context(pipe); - - if (1 /*memcmp(&cell->framebuffer, fb, sizeof(*fb))*/) { - struct pipe_surface *csurf = fb->cbufs[0]; - struct pipe_surface *zsurf = fb->zsbuf; - uint i; - - /* unmap old surfaces */ - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - if (cell->framebuffer.cbufs[i] && cell->cbuf_map[i]) { - pipe_surface_unmap(cell->framebuffer.cbufs[i]); - cell->cbuf_map[i] = NULL; - } - } - - if (cell->framebuffer.zsbuf && cell->zsbuf_map) { - pipe_surface_unmap(cell->framebuffer.zsbuf); - cell->zsbuf_map = NULL; - } - - /* update my state */ - cell->framebuffer = *fb; - - /* map new surfaces */ - if (csurf) - cell->cbuf_map[0] = pipe_surface_map(csurf); - - if (zsurf) - cell->zsbuf_map = pipe_surface_map(zsurf); - - cell->dirty |= CELL_NEW_FRAMEBUFFER; - } + struct pipe_screen screen; + + uint pci_id; +}; + + +/** cast wrapper */ +static INLINE struct brw_screen * +brw_screen(struct pipe_screen *pscreen) +{ + return (struct brw_screen *) pscreen; +} + + +extern struct pipe_screen * +brw_create_screen(struct pipe_winsys *winsys, uint pci_id); + + +#ifdef __cplusplus } +#endif +#endif /* BRW_SCREEN_H */ diff --git a/src/gallium/drivers/i965simple/brw_sf.c b/src/gallium/drivers/i965simple/brw_sf.c index 7c83b81c85..c3b815a82b 100644 --- a/src/gallium/drivers/i965simple/brw_sf.c +++ b/src/gallium/drivers/i965simple/brw_sf.c @@ -133,7 +133,7 @@ static void upload_sf_prog( struct brw_context *brw ) key.vp_output_count = brw->vs.prog_data->outputs_written; /* BRW_NEW_FS */ - key.fp_input_count = brw->attribs.FragmentProgram->info.nr_regs[TGSI_FILE_INPUT]; + key.fp_input_count = brw->attribs.FragmentProgram->info.file_max[TGSI_FILE_INPUT] + 1; /* BRW_NEW_REDUCED_PRIMITIVE */ diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c index a762a870fe..f4694a4433 100644 --- a/src/gallium/drivers/i965simple/brw_shader_info.c +++ b/src/gallium/drivers/i965simple/brw_shader_info.c @@ -6,8 +6,9 @@ #include "tgsi/util/tgsi_parse.h" - - +/** + * XXX this obsolete new and no longer compiled. + */ void brw_shader_info(const struct tgsi_token *tokens, struct brw_shader_info *info ) { diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index f746d1cc57..6744a8aa4f 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -32,6 +32,7 @@ #include "pipe/p_winsys.h" #include "pipe/p_util.h" +#include "pipe/p_inlines.h" #include "pipe/p_shader_tokens.h" #include "tgsi/util/tgsi_dump.h" @@ -174,8 +175,12 @@ static void * brw_create_fs_state(struct pipe_context *pipe, brw_fp->program = *shader; brw_fp->id = brw_context(pipe)->program_id++; + tgsi_scan_shader(shader->tokens, &brw_fp->info); + +#if 0 brw_shader_info(shader->tokens, - &brw_fp->info); + &brw_fp->info2); +#endif tgsi_dump(shader->tokens, 0); @@ -210,9 +215,13 @@ static void *brw_create_vs_state(struct pipe_context *pipe, */ brw_vp->program = *shader; brw_vp->id = brw_context(pipe)->program_id++; - brw_shader_info(shader->tokens, - &brw_vp->info); + tgsi_scan_shader(shader->tokens, &brw_vp->info); + +#if 0 + brw_shader_info(shader->tokens, + &brw_vp->info2); +#endif tgsi_dump(shader->tokens, 0); return (void *)brw_vp; @@ -327,7 +336,8 @@ static void brw_set_sampler_texture(struct pipe_context *pipe, { struct brw_context *brw = brw_context(pipe); - brw->attribs.Texture[unit] = (struct brw_texture*)texture; /* ptr, not struct */ + pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[unit], + texture); brw->state.dirty.brw |= BRW_NEW_TEXTURE; } diff --git a/src/gallium/drivers/i965simple/brw_state.h b/src/gallium/drivers/i965simple/brw_state.h index 258e9a556e..de0a6371b8 100644 --- a/src/gallium/drivers/i965simple/brw_state.h +++ b/src/gallium/drivers/i965simple/brw_state.h @@ -148,11 +148,4 @@ void brw_invalidate_pools( struct brw_context *brw ); void brw_clear_batch_cache_flush( struct brw_context *brw ); -/* brw_shader_info.c - */ - -void brw_shader_info(const struct tgsi_token *tokens, - struct brw_shader_info *info ); - - #endif diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index 376a42b1a6..c99a91dcf7 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -35,47 +35,6 @@ #include "util/p_tile.h" -/* - * XXX note: same as code in sp_surface.c - */ -static struct pipe_surface * -brw_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - struct brw_texture *tex = (struct brw_texture *)pt; - struct pipe_surface *ps; - unsigned offset; /* in bytes */ - - offset = tex->level_offset[level]; - - if (pt->target == PIPE_TEXTURE_CUBE) { - offset += tex->image_offset[level][face] * pt->cpp; - } - else if (pt->target == PIPE_TEXTURE_3D) { - offset += tex->image_offset[level][zslice] * pt->cpp; - } - else { - assert(face == 0); - assert(zslice == 0); - } - - ps = pipe->winsys->surface_alloc(pipe->winsys); - if (ps) { - assert(ps->format); - assert(ps->refcount); - pipe_buffer_reference(pipe->winsys, &ps->buffer, tex->buffer); - ps->format = pt->format; - ps->cpp = pt->cpp; - ps->width = pt->width[level]; - ps->height = pt->height[level]; - ps->pitch = tex->pitch; - ps->offset = offset; - } - return ps; -} - - /* Upload data to a rectangular sub-region. Lots of choices how to do this: * * - memcpy by span to current destination @@ -201,10 +160,11 @@ brw_surface_fill(struct pipe_context *pipe, } } + void brw_init_surface_functions(struct brw_context *brw) { - brw->pipe.get_tex_surface = brw_get_tex_surface; + (void) brw_surface_data; /* silence warning */ brw->pipe.surface_copy = brw_surface_copy; brw->pipe.surface_fill = brw_surface_fill; } diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index 90561f1307..b24ac87c37 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -71,8 +71,6 @@ static unsigned minify( unsigned d ) } -static boolean brw_miptree_layout(struct pipe_context *, struct brw_texture *); - static void intel_miptree_set_image_offset(struct brw_texture *tex, unsigned level, unsigned img, @@ -199,7 +197,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex) } } -static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture *tex) +static boolean brw_miptree_layout(struct brw_texture *tex) { struct pipe_texture *pt = &tex->base; /* XXX: these vary depending on image format: @@ -300,19 +298,22 @@ static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture } -struct pipe_texture * -brw_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat) +static struct pipe_texture * +brw_texture_create_screen(struct pipe_screen *screen, + const struct pipe_texture *templat) { + struct pipe_winsys *ws = screen->winsys; struct brw_texture *tex = CALLOC_STRUCT(brw_texture); if (tex) { tex->base = *templat; + tex->base.refcount = 1; - if (brw_miptree_layout(pipe, tex)) - tex->buffer = pipe->winsys->buffer_create(pipe->winsys, 64, - PIPE_BUFFER_USAGE_PIXEL, - tex->pitch * tex->base.cpp * - tex->total_height); + if (brw_miptree_layout(tex)) + tex->buffer = ws->buffer_create(ws, 64, + PIPE_BUFFER_USAGE_PIXEL, + tex->pitch * tex->base.cpp * + tex->total_height); if (!tex->buffer) { FREE(tex); @@ -323,8 +324,10 @@ brw_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat return &tex->base; } -void -brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) + +static void +brw_texture_release_screen(struct pipe_screen *screen, + struct pipe_texture **pt) { if (!*pt) return; @@ -334,6 +337,7 @@ brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); */ if (--(*pt)->refcount <= 0) { + struct pipe_winsys *ws = screen->winsys; struct brw_texture *tex = (struct brw_texture *)*pt; uint i; @@ -341,7 +345,7 @@ brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); */ - pipe_buffer_reference(pipe->winsys, &tex->buffer, NULL); + pipe_buffer_reference(ws, &tex->buffer, NULL); for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) if (tex->image_offset[i]) @@ -351,3 +355,66 @@ brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) } *pt = NULL; } + + +static void +brw_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) +{ + /* no-op? */ +} + + +static struct pipe_surface * +brw_get_tex_surface_screen(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct pipe_winsys *ws = screen->winsys; + struct brw_texture *tex = (struct brw_texture *)pt; + struct pipe_surface *ps; + unsigned offset; /* in bytes */ + + offset = tex->level_offset[level]; + + if (pt->target == PIPE_TEXTURE_CUBE) { + offset += tex->image_offset[level][face] * pt->cpp; + } + else if (pt->target == PIPE_TEXTURE_3D) { + offset += tex->image_offset[level][zslice] * pt->cpp; + } + else { + assert(face == 0); + assert(zslice == 0); + } + + ps = ws->surface_alloc(ws); + if (ps) { + assert(ps->format); + assert(ps->refcount); + pipe_buffer_reference(ws, &ps->buffer, tex->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = tex->pitch; + ps->offset = offset; + } + return ps; +} + + +void +brw_init_texture_functions(struct brw_context *brw) +{ + brw->pipe.texture_update = brw_texture_update; +} + + +void +brw_init_screen_texture_funcs(struct pipe_screen *screen) +{ + screen->texture_create = brw_texture_create_screen; + screen->texture_release = brw_texture_release_screen; + screen->get_tex_surface = brw_get_tex_surface_screen; +} + diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.h b/src/gallium/drivers/i965simple/brw_tex_layout.h index cfd6b1ef3a..a6b6ba8146 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.h +++ b/src/gallium/drivers/i965simple/brw_tex_layout.h @@ -1,15 +1,44 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + + #ifndef BRW_TEX_LAYOUT_H #define BRW_TEX_LAYOUT_H -#include "pipe/p_compiler.h" -struct pipe_context; -struct pipe_texture; +struct brw_context; +struct pipe_screen; + -extern struct pipe_texture * -brw_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat); +extern void +brw_init_texture_functions(struct brw_context *brw); extern void -brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); +brw_init_screen_texture_funcs(struct pipe_screen *screen); + #endif diff --git a/src/gallium/drivers/i965simple/brw_vs.c b/src/gallium/drivers/i965simple/brw_vs.c index 738c6346d5..92327e896d 100644 --- a/src/gallium/drivers/i965simple/brw_vs.c +++ b/src/gallium/drivers/i965simple/brw_vs.c @@ -50,8 +50,8 @@ static void do_vs_prog( struct brw_context *brw, brw_init_compile(&c.func); c.vp = vp; - c.prog_data.outputs_written = vp->program.num_outputs; - c.prog_data.inputs_read = vp->program.num_inputs; + c.prog_data.outputs_written = vp->info.num_outputs; + c.prog_data.inputs_read = vp->info.num_inputs; #if 0 if (c.key.copy_edgeflag) { diff --git a/src/gallium/drivers/i965simple/brw_vs.h b/src/gallium/drivers/i965simple/brw_vs.h index 0e58f043b0..070f9dfcae 100644 --- a/src/gallium/drivers/i965simple/brw_vs.h +++ b/src/gallium/drivers/i965simple/brw_vs.h @@ -52,7 +52,7 @@ struct brw_vs_compile { struct brw_vs_prog_key key; struct brw_vs_prog_data prog_data; - struct brw_vertex_program *vp; + const struct brw_vertex_program *vp; unsigned nr_inputs; diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c index 05df4860ed..9020fcc001 100644 --- a/src/gallium/drivers/i965simple/brw_vs_emit.c +++ b/src/gallium/drivers/i965simple/brw_vs_emit.c @@ -86,7 +86,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c, /* Allocate input regs: */ - c->nr_inputs = c->vp->program.num_inputs; + c->nr_inputs = c->vp->info.num_inputs; for (i = 0; i < c->nr_inputs; i++) { c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0); reg++; @@ -99,7 +99,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c, c->nr_outputs = 0; c->first_output = reg; mrf = 4; - for (i = 0; i < c->vp->program.num_outputs; i++) { + for (i = 0; i < c->vp->info.num_outputs; i++) { c->nr_outputs++; #if 0 if (i == VERT_RESULT_HPOS) { @@ -1051,7 +1051,7 @@ static void process_instruction(struct brw_vs_compile *c, { struct brw_reg args[3], dst; struct brw_compile *p = &c->func; - struct brw_indirect stack_index = brw_indirect(0, 0); + /*struct brw_indirect stack_index = brw_indirect(0, 0);*/ unsigned i; unsigned index; unsigned file; diff --git a/src/gallium/drivers/i965simple/brw_winsys.h b/src/gallium/drivers/i965simple/brw_winsys.h index 3523a58614..b67bd73750 100644 --- a/src/gallium/drivers/i965simple/brw_winsys.h +++ b/src/gallium/drivers/i965simple/brw_winsys.h @@ -53,6 +53,8 @@ struct pipe_buffer; struct pipe_fence_handle; struct pipe_winsys; +struct pipe_screen; + /* The pipe driver currently understands the following chipsets: */ @@ -181,7 +183,7 @@ struct brw_winsys { #define BRW_BUFFER_USAGE_LIT_VERTEX (PIPE_BUFFER_USAGE_CUSTOM << 0) -struct pipe_context *brw_create(struct pipe_winsys *, +struct pipe_context *brw_create(struct pipe_screen *, struct brw_winsys *, unsigned pci_id); diff --git a/src/gallium/drivers/i965simple/brw_wm.c b/src/gallium/drivers/i965simple/brw_wm.c index 539b170744..1c4b5b5ede 100644 --- a/src/gallium/drivers/i965simple/brw_wm.c +++ b/src/gallium/drivers/i965simple/brw_wm.c @@ -94,11 +94,11 @@ static void brw_wm_populate_key( struct brw_context *brw, /* Build the index for table lookup */ /* BRW_NEW_DEPTH_STENCIL */ - if (fp->UsesKill || + if (fp->info.uses_kill || brw->attribs.DepthStencil->alpha.enabled) lookup |= IZ_PS_KILL_ALPHATEST_BIT; - if (fp->ComputesDepth) + if (fp->info.writes_z) lookup |= IZ_PS_COMPUTES_DEPTH_BIT; if (brw->attribs.DepthStencil->depth.enabled) diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c index 97418a52e7..74ccfd494a 100644 --- a/src/gallium/drivers/i965simple/brw_wm_decl.c +++ b/src/gallium/drivers/i965simple/brw_wm_decl.c @@ -259,9 +259,12 @@ static void prealloc_reg(struct brw_wm_compile *c) /* Then a copy of our part of the CURBE entry: */ { - int nr_constants = c->fp->info.nr_regs[TGSI_FILE_CONSTANT]; + int nr_constants = c->fp->info.file_max[TGSI_FILE_CONSTANT] + 1; int index = 0; + /* XXX number of constants, or highest numbered constant? */ + assert(nr_constants == c->fp->info.file_count[TGSI_FILE_CONSTANT]); + c->prog_data.max_const = 4*nr_constants; for (i = 0; i < nr_constants; i++) { for (j = 0; j < 4; j++, index++) @@ -282,13 +285,14 @@ static void prealloc_reg(struct brw_wm_compile *c) /* Next we receive the plane coefficients for parameter * interpolation: */ - for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_INPUT]; i++) { + assert(c->fp->info.file_max[TGSI_FILE_INPUT] == c->fp->info.num_inputs); + for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++) { c->payload_coef[i] = brw_vec8_grf(c->reg_index, 0); c->reg_index += 2; } c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; - c->prog_data.urb_read_length = (c->fp->program.num_inputs + 1) * 2; + c->prog_data.urb_read_length = (c->fp->info.num_inputs + 1) * 2; c->prog_data.curb_read_length = nr_curbe_regs; /* That's the end of the payload, now we can start allocating registers. @@ -302,11 +306,17 @@ static void prealloc_reg(struct brw_wm_compile *c) /* Now allocate room for the interpolated inputs and staging * registers for the outputs: */ - for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_INPUT]; i++) + /* XXX do we want to loop over the _number_ of inputs/outputs or loop + * to the highest input/output index that's used? + * Probably the same, actually. + */ + assert(c->fp->info.file_max[TGSI_FILE_INPUT] + 1 == c->fp->info.num_inputs); + assert(c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1 == c->fp->info.num_outputs); + for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++) for (j = 0; j < 4; j++) c->wm_regs[TGSI_FILE_INPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); - for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_OUTPUT]; i++) + for (i = 0; i < c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1; i++) for (j = 0; j < 4; j++) c->wm_regs[TGSI_FILE_OUTPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); diff --git a/src/gallium/drivers/i965simple/brw_wm_state.c b/src/gallium/drivers/i965simple/brw_wm_state.c index 5ccd488842..f3aa36b07f 100644 --- a/src/gallium/drivers/i965simple/brw_wm_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_state.c @@ -117,11 +117,11 @@ static void upload_wm_unit(struct brw_context *brw ) if (fp->UsesDepth) wm.wm5.program_uses_depth = 1; /* as far as we can tell */ - if (fp->ComputesDepth) + if (fp->info.writes_z) wm.wm5.program_computes_depth = 1; /* BRW_NEW_ALPHA_TEST */ - if (fp->UsesKill || + if (fp->info.uses_kill || brw->attribs.DepthStencil->alpha.enabled) wm.wm5.program_uses_killpixel = 1; diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h index 5998945677..c80461038b 100644 --- a/src/gallium/drivers/nouveau/nouveau_class.h +++ b/src/gallium/drivers/nouveau/nouveau_class.h @@ -4926,9 +4926,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV40TCL_VIEWPORT_SCALE_Y 0x00000a34 #define NV40TCL_VIEWPORT_SCALE_Z 0x00000a38 #define NV40TCL_VIEWPORT_SCALE_W 0x00000a3c -#define NV40TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000a60 +#define NV40TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000a60 #define NV40TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000a64 -#define NV40TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000a68 +#define NV40TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000a68 #define NV40TCL_DEPTH_FUNC 0x00000a6c #define NV40TCL_DEPTH_FUNC_NEVER 0x00000200 #define NV40TCL_DEPTH_FUNC_LESS 0x00000201 diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h index 07c31b014a..439c7e4734 100644 --- a/src/gallium/drivers/nouveau/nouveau_stateobj.h +++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h @@ -32,7 +32,7 @@ so_new(unsigned push, unsigned reloc) struct nouveau_stateobj *so; so = MALLOC(sizeof(struct nouveau_stateobj)); - so->refcount = 1; + so->refcount = 0; so->push = MALLOC(sizeof(unsigned) * push); so->reloc = MALLOC(sizeof(struct nouveau_stateobj_reloc) * reloc); @@ -45,22 +45,19 @@ so_new(unsigned push, unsigned reloc) static INLINE void so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso) { - struct nouveau_stateobj *so; - - so = *pso; - if (so) { - if (--so->refcount <= 0) { - free(so->push); - free(so->reloc); - free(so); - } - *pso = NULL; - } + struct nouveau_stateobj *so = *pso; if (ref) { ref->refcount++; - *pso = ref; } + + if (so && --so->refcount <= 0) { + free(so->push); + free(so->reloc); + free(so); + } + + *pso = ref; } static INLINE void diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index b5e470cfaa..44c8bb919b 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -49,13 +49,25 @@ struct nouveau_winsys { unsigned, unsigned, unsigned, unsigned, unsigned); }; +extern struct pipe_screen * +nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *, + unsigned chipset); + extern struct pipe_context * -nv30_create(struct pipe_winsys *, struct nouveau_winsys *, unsigned chipset); +nv30_create(struct pipe_screen *, unsigned pctx_id); + +extern struct pipe_screen * +nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *, + unsigned chipset); extern struct pipe_context * -nv40_create(struct pipe_winsys *, struct nouveau_winsys *, unsigned chipset); +nv40_create(struct pipe_screen *, unsigned pctx_id); + +extern struct pipe_screen * +nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *, + unsigned chipset); extern struct pipe_context * -nv50_create(struct pipe_winsys *, struct nouveau_winsys *, unsigned chipset); +nv50_create(struct pipe_screen *, unsigned pctx_id); #endif diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index b7c252fc98..3f80fb87c9 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -11,6 +11,7 @@ DRIVER_SOURCES = \ nv30_fragtex.c \ nv30_miptree.c \ nv30_query.c \ + nv30_screen.c \ nv30_state.c \ nv30_state_emit.c \ nv30_surface.c \ diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index e9afeb8017..28d3f057cc 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -4,80 +4,7 @@ #include "pipe/p_util.h" #include "nv30_context.h" - -static const char * -nv30_get_name(struct pipe_context *pipe) -{ - struct nv30_context *nv30 = nv30_context(pipe); - static char buffer[128]; - - snprintf(buffer, sizeof(buffer), "NV%02X", nv30->chipset); - return buffer; -} - -static const char * -nv30_get_vendor(struct pipe_context *pipe) -{ - return "nouveau"; -} - -static int -nv30_get_param(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 16; - case PIPE_CAP_NPOT_TEXTURES: - return 0; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_S3TC: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 1; - case PIPE_CAP_POINT_SPRITE: - return 1; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 2; - case PIPE_CAP_OCCLUSION_QUERY: - return 1; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 13; - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 10; - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 13; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0; - } -} - -static float -nv30_get_paramf(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 10.0; - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 64.0; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 4.0; - case PIPE_CAP_BITMAP_TEXCOORD_BIAS: - return 0.0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0.0; - } -} +#include "nv30_screen.h" static void nv30_flush(struct pipe_context *pipe, unsigned flags) @@ -338,9 +265,11 @@ nv30_init_hwctx(struct nv30_context *nv30, int rankine_class) #define NV35TCL_CHIPSET_3X_MASK 0x000001e0 struct pipe_context * -nv30_create(struct pipe_winsys *pipe_winsys, struct nouveau_winsys *nvws, - unsigned chipset) +nv30_create(struct pipe_screen *screen, unsigned pctx_id) { + struct pipe_winsys *pipe_winsys = screen->winsys; + struct nouveau_winsys *nvws = nv30_screen(screen)->nvws; + unsigned chipset = nv30_screen(screen)->chipset; struct nv30_context *nv30; int rankine_class = 0, ret; @@ -404,12 +333,9 @@ nv30_create(struct pipe_winsys *pipe_winsys, struct nouveau_winsys *nvws, /* Pipe context setup */ nv30->pipe.winsys = pipe_winsys; + nv30->pipe.screen = screen; nv30->pipe.destroy = nv30_destroy; - nv30->pipe.get_name = nv30_get_name; - nv30->pipe.get_vendor = nv30_get_vendor; - nv30->pipe.get_param = nv30_get_param; - nv30->pipe.get_paramf = nv30_get_paramf; nv30->pipe.draw_arrays = nv30_draw_arrays; nv30->pipe.draw_elements = nv30_draw_elements; @@ -420,7 +346,6 @@ nv30_create(struct pipe_winsys *pipe_winsys, struct nouveau_winsys *nvws, nv30_init_query_functions(nv30); nv30_init_surface_functions(nv30); nv30_init_state_functions(nv30); - nv30_init_miptree_functions(nv30); nv30->draw = draw_create(); assert(nv30->draw); diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index d6d16ee868..c63847a087 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -91,7 +91,7 @@ nv30_context(struct pipe_context *pipe) extern void nv30_init_state_functions(struct nv30_context *nv30); extern void nv30_init_surface_functions(struct nv30_context *nv30); -extern void nv30_init_miptree_functions(struct nv30_context *nv30); +extern void nv30_init_miptree_functions(struct pipe_screen *screen); extern void nv30_init_query_functions(struct nv30_context *nv30); /* nv30_draw.c */ diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index 5fb89f4cfd..23bcef08eb 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -4,6 +4,7 @@ #include "pipe/p_inlines.h" #include "nv30_context.h" +#include "nv30_screen.h" static void nv30_miptree_layout(struct nv30_miptree *nv30mt) @@ -54,9 +55,9 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt) } static void -nv30_miptree_create(struct pipe_context *pipe, struct pipe_texture **pt) +nv30_miptree_create(struct pipe_screen *screen, struct pipe_texture **pt) { - struct pipe_winsys *ws = pipe->winsys; + struct pipe_winsys *ws = screen->winsys; struct nv30_miptree *nv30mt; nv30mt = realloc(*pt, sizeof(struct nv30_miptree)); @@ -77,9 +78,9 @@ nv30_miptree_create(struct pipe_context *pipe, struct pipe_texture **pt) } static void -nv30_miptree_release(struct pipe_context *pipe, struct pipe_texture **pt) +nv30_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt) { - struct pipe_winsys *ws = pipe->winsys; + struct pipe_winsys *ws = screen->winsys; struct pipe_texture *mt = *pt; *pt = NULL; @@ -96,10 +97,42 @@ nv30_miptree_release(struct pipe_context *pipe, struct pipe_texture **pt) } } +static struct pipe_surface * +nv30_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct pipe_winsys *ws = screen->winsys; + struct nv30_miptree *nv30mt = (struct nv30_miptree *)pt; + struct pipe_surface *ps; + + ps = ws->surface_alloc(ws); + if (!ps) + return NULL; + pipe_buffer_reference(ws, &ps->buffer, nv30mt->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = nv30mt->level[level].pitch / ps->cpp; + + if (pt->target == PIPE_TEXTURE_CUBE) { + ps->offset = nv30mt->level[level].image_offset[face]; + } else + if (pt->target == PIPE_TEXTURE_3D) { + ps->offset = nv30mt->level[level].image_offset[zslice]; + } else { + ps->offset = nv30mt->level[level].image_offset[0]; + } + + return ps; +} void -nv30_init_miptree_functions(struct nv30_context *nv30) +nv30_init_miptree_functions(struct pipe_screen *screen) { - nv30->pipe.texture_create = nv30_miptree_create; - nv30->pipe.texture_release = nv30_miptree_release; + struct nv30_screen *nv30screen = nv30_screen(screen); + + nv30screen->screen.texture_create = nv30_miptree_create; + nv30screen->screen.texture_release = nv30_miptree_release; + nv30screen->screen.get_tex_surface = nv30_miptree_surface_get; } diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c new file mode 100644 index 0000000000..39f2ac1af5 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -0,0 +1,153 @@ +#include "pipe/p_screen.h" +#include "pipe/p_util.h" + +#include "nv30_context.h" +#include "nv30_screen.h" + +static const char * +nv30_screen_get_name(struct pipe_screen *screen) +{ + struct nv30_screen *nv30screen = nv30_screen(screen); + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", nv30screen->chipset); + return buffer; +} + +static const char * +nv30_screen_get_vendor(struct pipe_screen *screen) +{ + return "nouveau"; +} + +static int +nv30_screen_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 16; + case PIPE_CAP_NPOT_TEXTURES: + return 0; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 2; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv30_screen_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 4.0; + case PIPE_CAP_BITMAP_TEXCOORD_BIAS: + return 0.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static boolean +nv30_screen_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, uint type) +{ + switch (type) { + case PIPE_SURFACE: + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + break; + case PIPE_TEXTURE: + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_U_L8: + case PIPE_FORMAT_U_A8: + case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return TRUE; + default: + break; + } + break; + default: + assert(0); + }; + + return FALSE; +} + +static void +nv30_screen_destroy(struct pipe_screen *screen) +{ + FREE(screen); +} + +struct pipe_screen * +nv30_screen_create(struct pipe_winsys *winsys, struct nouveau_winsys *nvws, + unsigned chipset) +{ + struct nv30_screen *nv30screen = CALLOC_STRUCT(nv30_screen); + + if (!nv30screen) + return NULL; + + nv30screen->chipset = chipset; + nv30screen->nvws = nvws; + + nv30screen->screen.winsys = winsys; + + nv30screen->screen.destroy = nv30_screen_destroy; + + nv30screen->screen.get_name = nv30_screen_get_name; + nv30screen->screen.get_vendor = nv30_screen_get_vendor; + nv30screen->screen.get_param = nv30_screen_get_param; + nv30screen->screen.get_paramf = nv30_screen_get_paramf; + nv30screen->screen.is_format_supported = + nv30_screen_is_format_supported; + + nv30_init_miptree_functions(&nv30screen->screen); + return &nv30screen->screen; +} + diff --git a/src/gallium/drivers/nv30/nv30_screen.h b/src/gallium/drivers/nv30/nv30_screen.h new file mode 100644 index 0000000000..f878f81e11 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_screen.h @@ -0,0 +1,19 @@ +#ifndef __NV30_SCREEN_H__ +#define __NV30_SCREEN_H__ + +#include "pipe/p_screen.h" + +struct nv30_screen { + struct pipe_screen screen; + + struct nouveau_winsys *nvws; + unsigned chipset; +}; + +static INLINE struct nv30_screen * +nv30_screen(struct pipe_screen *screen) +{ + return (struct nv30_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nv30/nv30_surface.c b/src/gallium/drivers/nv30/nv30_surface.c index 974965679f..b20a3dd4c1 100644 --- a/src/gallium/drivers/nv30/nv30_surface.c +++ b/src/gallium/drivers/nv30/nv30_surface.c @@ -33,76 +33,6 @@ #include "pipe/p_inlines.h" #include "util/p_tile.h" -static boolean -nv30_surface_format_supported(struct pipe_context *pipe, - enum pipe_format format, uint type) -{ - switch (type) { - case PIPE_SURFACE: - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z16_UNORM: - return TRUE; - default: - break; - } - break; - case PIPE_TEXTURE: - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_A1R5G5B5_UNORM: - case PIPE_FORMAT_A4R4G4B4_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_U_L8: - case PIPE_FORMAT_U_A8: - case PIPE_FORMAT_U_I8: - case PIPE_FORMAT_U_A8_L8: - case PIPE_FORMAT_Z16_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - return TRUE; - default: - break; - } - break; - default: - assert(0); - }; - - return FALSE; -} - -static struct pipe_surface * -nv30_get_tex_surface(struct pipe_context *pipe, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - struct pipe_winsys *ws = pipe->winsys; - struct nv30_miptree *nv30mt = (struct nv30_miptree *)pt; - struct pipe_surface *ps; - - ps = ws->surface_alloc(ws); - if (!ps) - return NULL; - pipe_buffer_reference(ws, &ps->buffer, nv30mt->buffer); - ps->format = pt->format; - ps->cpp = pt->cpp; - ps->width = pt->width[level]; - ps->height = pt->height[level]; - ps->pitch = nv30mt->level[level].pitch / ps->cpp; - - if (pt->target == PIPE_TEXTURE_CUBE) { - ps->offset = nv30mt->level[level].image_offset[face]; - } else - if (pt->target == PIPE_TEXTURE_3D) { - ps->offset = nv30mt->level[level].image_offset[zslice]; - } else { - ps->offset = nv30mt->level[level].image_offset[0]; - } - - return ps; -} - static void nv30_surface_copy(struct pipe_context *pipe, unsigned do_flip, struct pipe_surface *dest, unsigned destx, unsigned desty, @@ -130,8 +60,6 @@ nv30_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, void nv30_init_surface_functions(struct nv30_context *nv30) { - nv30->pipe.is_format_supported = nv30_surface_format_supported; - nv30->pipe.get_tex_surface = nv30_get_tex_surface; nv30->pipe.surface_copy = nv30_surface_copy; nv30->pipe.surface_fill = nv30_surface_fill; } diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index 2a9de4a2dc..3369a21574 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -11,8 +11,17 @@ DRIVER_SOURCES = \ nv40_fragtex.c \ nv40_miptree.c \ nv40_query.c \ + nv40_screen.c \ nv40_state.c \ + nv40_state_blend.c \ + nv40_state_clip.c \ nv40_state_emit.c \ + nv40_state_fb.c \ + nv40_state_rasterizer.c \ + nv40_state_scissor.c \ + nv40_state_stipple.c \ + nv40_state_viewport.c \ + nv40_state_zsa.c \ nv40_surface.c \ nv40_vbo.c \ nv40_vertprog.c diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index 8b5cc693de..203c843a01 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -4,84 +4,7 @@ #include "pipe/p_util.h" #include "nv40_context.h" - -#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf -#define NV4X_GRCLASS4497_CHIPSETS 0x00005450 -#define NV6X_GRCLASS4497_CHIPSETS 0x00000088 - -static const char * -nv40_get_name(struct pipe_context *pipe) -{ - struct nv40_context *nv40 = nv40_context(pipe); - static char buffer[128]; - - snprintf(buffer, sizeof(buffer), "NV%02X", nv40->chipset); - return buffer; -} - -static const char * -nv40_get_vendor(struct pipe_context *pipe) -{ - return "nouveau"; -} - -static int -nv40_get_param(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 16; - case PIPE_CAP_NPOT_TEXTURES: - return 1; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_S3TC: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 1; - case PIPE_CAP_POINT_SPRITE: - return 1; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 4; - case PIPE_CAP_OCCLUSION_QUERY: - return 1; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 13; - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 10; - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 13; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0; - } -} - -static float -nv40_get_paramf(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 10.0; - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 64.0; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0; - case PIPE_CAP_BITMAP_TEXCOORD_BIAS: - return 0.0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0.0; - } -} +#include "nv40_screen.h" static void nv40_flush(struct pipe_context *pipe, unsigned flags) @@ -97,7 +20,7 @@ nv40_flush(struct pipe_context *pipe, unsigned flags) } if (flags & PIPE_FLUSH_WAIT) { - nvws->notifier_reset(nv40->hw->sync, 0); + nvws->notifier_reset(nv40->screen->sync, 0); BEGIN_RING(curie, 0x104, 1); OUT_RING (0); BEGIN_RING(curie, 0x100, 1); @@ -107,149 +30,7 @@ nv40_flush(struct pipe_context *pipe, unsigned flags) FIRE_RING(); if (flags & PIPE_FLUSH_WAIT) - nvws->notifier_wait(nv40->hw->sync, 0, 0, 2000); -} - -static void -nv40_channel_takedown(struct nv40_channel_context *cnv40) -{ - struct nouveau_winsys *nvws = cnv40->nvws; - - nvws->res_free(&cnv40->vp_exec_heap); - nvws->res_free(&cnv40->vp_data_heap); - nvws->res_free(&cnv40->query_heap); - nvws->notifier_free(&cnv40->query); - nvws->notifier_free(&cnv40->sync); - nvws->grobj_free(&cnv40->curie); - free(cnv40); -} - -static struct nv40_channel_context * -nv40_channel_init(struct pipe_winsys *ws, struct nouveau_winsys *nvws, - unsigned chipset) -{ - struct nv40_channel_context *cnv40 = NULL; - struct nouveau_stateobj *so; - unsigned curie_class = 0; - int ret; - - switch (chipset & 0xf0) { - case 0x40: - if (NV4X_GRCLASS4097_CHIPSETS & (1 << (chipset & 0x0f))) - curie_class = NV40TCL; - else - if (NV4X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) - curie_class = NV44TCL; - break; - case 0x60: - if (NV6X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) - curie_class = NV44TCL; - break; - default: - break; - } - - if (!curie_class) { - NOUVEAU_ERR("Unknown nv4x chipset: nv%02x\n", chipset); - return NULL; - } - - cnv40 = CALLOC(1, sizeof(struct nv40_channel_context)); - if (!cnv40) - return NULL; - cnv40->chipset = chipset; - cnv40->nvws = nvws; - - /* Notifier for sync purposes */ - ret = nvws->notifier_alloc(nvws, 1, &cnv40->sync); - if (ret) { - NOUVEAU_ERR("Error creating notifier object: %d\n", ret); - nv40_channel_takedown(cnv40); - return NULL; - } - - /* Query objects */ - ret = nvws->notifier_alloc(nvws, 32, &cnv40->query); - if (ret) { - NOUVEAU_ERR("Error initialising query objects: %d\n", ret); - nv40_channel_takedown(cnv40); - return NULL; - } - - ret = nvws->res_init(&cnv40->query_heap, 0, 32); - if (ret) { - NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); - nv40_channel_takedown(cnv40); - return NULL; - } - - /* Vtxprog resources */ - if (nvws->res_init(&cnv40->vp_exec_heap, 0, 512) || - nvws->res_init(&cnv40->vp_data_heap, 0, 256)) { - nv40_channel_takedown(cnv40); - return NULL; - } - - /* 3D object */ - ret = nvws->grobj_alloc(nvws, curie_class, &cnv40->curie); - if (ret) { - NOUVEAU_ERR("Error creating 3D object: %d\n", ret); - return FALSE; - } - - /* Static curie initialisation */ - so = so_new(128, 0); - so_method(so, cnv40->curie, NV40TCL_DMA_NOTIFY, 1); - so_data (so, cnv40->sync->handle); - so_method(so, cnv40->curie, NV40TCL_DMA_TEXTURE0, 2); - so_data (so, nvws->channel->vram->handle); - so_data (so, nvws->channel->gart->handle); - so_method(so, cnv40->curie, NV40TCL_DMA_COLOR1, 1); - so_data (so, nvws->channel->vram->handle); - so_method(so, cnv40->curie, NV40TCL_DMA_COLOR0, 2); - so_data (so, nvws->channel->vram->handle); - so_data (so, nvws->channel->vram->handle); - so_method(so, cnv40->curie, NV40TCL_DMA_VTXBUF0, 2); - so_data (so, nvws->channel->vram->handle); - so_data (so, nvws->channel->gart->handle); - so_method(so, cnv40->curie, NV40TCL_DMA_FENCE, 2); - so_data (so, 0); - so_data (so, cnv40->query->handle); - so_method(so, cnv40->curie, NV40TCL_DMA_UNK01AC, 2); - so_data (so, nvws->channel->vram->handle); - so_data (so, nvws->channel->vram->handle); - so_method(so, cnv40->curie, NV40TCL_DMA_COLOR2, 2); - so_data (so, nvws->channel->vram->handle); - so_data (so, nvws->channel->vram->handle); - - so_method(so, cnv40->curie, 0x1ea4, 3); - so_data (so, 0x00000010); - so_data (so, 0x01000100); - so_data (so, 0xff800006); - - /* vtxprog output routing */ - so_method(so, cnv40->curie, 0x1fc4, 1); - so_data (so, 0x06144321); - so_method(so, cnv40->curie, 0x1fc8, 2); - so_data (so, 0xedcba987); - so_data (so, 0x00000021); - so_method(so, cnv40->curie, 0x1fd0, 1); - so_data (so, 0x00171615); - so_method(so, cnv40->curie, 0x1fd4, 1); - so_data (so, 0x001b1a19); - - so_method(so, cnv40->curie, 0x1ef8, 1); - so_data (so, 0x0020ffff); - so_method(so, cnv40->curie, 0x1d64, 1); - so_data (so, 0x00d30000); - so_method(so, cnv40->curie, 0x1e94, 1); - so_data (so, 0x00000001); - - so_emit(nvws, so); - so_ref(NULL, &so); - nvws->push_flush(nvws->channel, 0); - - return cnv40; + nvws->notifier_wait(nv40->screen->sync, 0, 0, 2000); } static void @@ -259,40 +40,30 @@ nv40_destroy(struct pipe_context *pipe) if (nv40->draw) draw_destroy(nv40->draw); - - if (nv40->hw) { - if (--nv40->hw->refcount == 0) - nv40_channel_takedown(nv40->hw); - } - free(nv40); } struct pipe_context * -nv40_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, - unsigned chipset) +nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) { + struct nv40_screen *screen = nv40_screen(pscreen); + struct pipe_winsys *ws = pscreen->winsys; struct nv40_context *nv40; + unsigned chipset = screen->chipset; + struct nouveau_winsys *nvws = screen->nvws; nv40 = CALLOC(1, sizeof(struct nv40_context)); if (!nv40) return NULL; - - nv40->hw = nv40_channel_init(ws, nvws, chipset); - if (!nv40->hw) { - nv40_destroy(&nv40->pipe); - return NULL; - } + nv40->screen = screen; + nv40->pctx_id = pctx_id; nv40->chipset = chipset; nv40->nvws = nvws; nv40->pipe.winsys = ws; + nv40->pipe.screen = pscreen; nv40->pipe.destroy = nv40_destroy; - nv40->pipe.get_name = nv40_get_name; - nv40->pipe.get_vendor = nv40_get_vendor; - nv40->pipe.get_param = nv40_get_param; - nv40->pipe.get_paramf = nv40_get_paramf; nv40->pipe.draw_arrays = nv40_draw_arrays; nv40->pipe.draw_elements = nv40_draw_elements; nv40->pipe.clear = nv40_clear; diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index f511759e3b..e118776306 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -11,7 +11,7 @@ #include "nouveau/nouveau_gldefs.h" #define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv40_channel_context *ctx = nv40->hw + struct nv40_screen *ctx = nv40->screen #include "nouveau/nouveau_push.h" #include "nouveau/nouveau_stateobj.h" @@ -22,6 +22,45 @@ #define NOUVEAU_MSG(fmt, args...) \ fprintf(stderr, "nouveau: "fmt, ##args); +enum nv40_state_index { + NV40_STATE_FB = 0, + NV40_STATE_VIEWPORT = 1, + NV40_STATE_BLEND = 2, + NV40_STATE_RAST = 3, + NV40_STATE_ZSA = 4, + NV40_STATE_BCOL = 5, + NV40_STATE_CLIP = 6, + NV40_STATE_SCISSOR = 7, + NV40_STATE_STIPPLE = 8, + NV40_STATE_FRAGPROG = 9, + NV40_STATE_VERTPROG = 10, + NV40_STATE_FRAGTEX0 = 11, + NV40_STATE_FRAGTEX1 = 12, + NV40_STATE_FRAGTEX2 = 13, + NV40_STATE_FRAGTEX3 = 14, + NV40_STATE_FRAGTEX4 = 15, + NV40_STATE_FRAGTEX5 = 16, + NV40_STATE_FRAGTEX6 = 17, + NV40_STATE_FRAGTEX7 = 18, + NV40_STATE_FRAGTEX8 = 19, + NV40_STATE_FRAGTEX9 = 20, + NV40_STATE_FRAGTEX10 = 21, + NV40_STATE_FRAGTEX11 = 22, + NV40_STATE_FRAGTEX12 = 23, + NV40_STATE_FRAGTEX13 = 24, + NV40_STATE_FRAGTEX14 = 25, + NV40_STATE_FRAGTEX15 = 26, + NV40_STATE_VERTTEX0 = 27, + NV40_STATE_VERTTEX1 = 28, + NV40_STATE_VERTTEX2 = 29, + NV40_STATE_VERTTEX3 = 30, + NV40_STATE_VTXBUF = 31, + NV40_STATE_VTXFMT = 32, + NV40_STATE_MAX = 33 +}; + +#include "nv40_screen.h" + #define NV40_NEW_BLEND (1 << 0) #define NV40_NEW_RAST (1 << 1) #define NV40_NEW_ZSA (1 << 2) @@ -34,68 +73,70 @@ #define NV40_NEW_VERTPROG (1 << 9) #define NV40_NEW_FRAGPROG (1 << 10) #define NV40_NEW_ARRAYS (1 << 11) +#define NV40_NEW_UCP (1 << 12) -struct nv40_channel_context { - struct nouveau_winsys *nvws; - unsigned refcount; +#define NV40_FALLBACK_TNL (1 << 0) +#define NV40_FALLBACK_RAST (1 << 1) + +struct nv40_rasterizer_state { + struct pipe_rasterizer_state pipe; + struct nouveau_stateobj *so; +}; - unsigned chipset; +struct nv40_zsa_state { + struct pipe_depth_stencil_alpha_state pipe; + struct nouveau_stateobj *so; +}; + +struct nv40_blend_state { + struct pipe_blend_state pipe; + struct nouveau_stateobj *so; +}; - /* HW graphics objects */ - struct nouveau_grobj *curie; - struct nouveau_notifier *sync; - /* Query object resources */ - struct nouveau_notifier *query; - struct nouveau_resource *query_heap; +struct nv40_state { + unsigned scissor_enabled; + unsigned stipple_enabled; + unsigned fp_samplers; - /* Vtxprog resources */ - struct nouveau_resource *vp_exec_heap; - struct nouveau_resource *vp_data_heap; + uint64_t dirty; + struct nouveau_stateobj *hw[NV40_STATE_MAX]; }; struct nv40_context { struct pipe_context pipe; + struct nouveau_winsys *nvws; + struct nv40_screen *screen; + unsigned pctx_id; - struct nv40_channel_context *hw; struct draw_context *draw; int chipset; - uint32_t dirty; - + /* HW state derived from pipe states */ + struct nv40_state state; + unsigned fallback; + + /* Context state */ + unsigned dirty; + struct pipe_scissor_state scissor; + unsigned stipple[32]; + struct pipe_clip_state clip; + struct nv40_vertex_program *vertprog; + struct nv40_fragment_program *fragprog; + struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + struct nv40_rasterizer_state *rasterizer; + struct nv40_zsa_state *zsa; + struct nv40_blend_state *blend; + struct pipe_blend_color blend_colour; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; + struct pipe_buffer *idxbuf; + unsigned idxbuf_format; struct nv40_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; struct nv40_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; unsigned dirty_samplers; - unsigned fp_samplers; - unsigned vp_samplers; - - struct nouveau_stateobj *so_framebuffer; - struct nouveau_stateobj *so_fragtex[16]; - struct nouveau_stateobj *so_vtxbuf; - struct nouveau_stateobj *so_blend; - struct nouveau_stateobj *so_rast; - struct nouveau_stateobj *so_zsa; - struct nouveau_stateobj *so_bcol; - struct nouveau_stateobj *so_scissor; - struct nouveau_stateobj *so_viewport; - struct nouveau_stateobj *so_stipple; - - struct { - struct nv40_vertex_program *active; - - struct nv40_vertex_program *current; - struct pipe_buffer *constant_buf; - } vertprog; - - struct { - struct nv40_fragment_program *active; - - struct nv40_fragment_program *current; - struct pipe_buffer *constant_buf; - } fragprog; - struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX]; struct pipe_vertex_element vtxelt[PIPE_ATTRIB_MAX]; }; @@ -106,27 +147,29 @@ nv40_context(struct pipe_context *pipe) return (struct nv40_context *)pipe; } +struct nv40_state_entry { + boolean (*validate)(struct nv40_context *nv40); + struct { + unsigned pipe; + unsigned hw; + } dirty; +}; + extern void nv40_init_state_functions(struct nv40_context *nv40); extern void nv40_init_surface_functions(struct nv40_context *nv40); extern void nv40_init_miptree_functions(struct nv40_context *nv40); extern void nv40_init_query_functions(struct nv40_context *nv40); +extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen); + /* nv40_draw.c */ extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40); /* nv40_vertprog.c */ -extern void nv40_vertprog_translate(struct nv40_context *, - struct nv40_vertex_program *); -extern void nv40_vertprog_bind(struct nv40_context *, - struct nv40_vertex_program *); extern void nv40_vertprog_destroy(struct nv40_context *, struct nv40_vertex_program *); /* nv40_fragprog.c */ -extern void nv40_fragprog_translate(struct nv40_context *, - struct nv40_fragment_program *); -extern void nv40_fragprog_bind(struct nv40_context *, - struct nv40_fragment_program *); extern void nv40_fragprog_destroy(struct nv40_context *, struct nv40_fragment_program *); @@ -136,6 +179,19 @@ extern void nv40_fragtex_bind(struct nv40_context *); /* nv40_state.c and friends */ extern void nv40_emit_hw_state(struct nv40_context *nv40); extern void nv40_state_tex_update(struct nv40_context *nv40); +extern struct nv40_state_entry nv40_state_clip; +extern struct nv40_state_entry nv40_state_rasterizer; +extern struct nv40_state_entry nv40_state_scissor; +extern struct nv40_state_entry nv40_state_stipple; +extern struct nv40_state_entry nv40_state_fragprog; +extern struct nv40_state_entry nv40_state_vertprog; +extern struct nv40_state_entry nv40_state_blend; +extern struct nv40_state_entry nv40_state_blend_colour; +extern struct nv40_state_entry nv40_state_zsa; +extern struct nv40_state_entry nv40_state_viewport; +extern struct nv40_state_entry nv40_state_framebuffer; +extern struct nv40_state_entry nv40_state_fragtex; +extern struct nv40_state_entry nv40_state_vbo; /* nv40_vbo.c */ extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 07a418c1e9..3c4ea7e99e 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -668,7 +668,7 @@ nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, return TRUE; } -void +static void nv40_fragprog_translate(struct nv40_context *nv40, struct nv40_fragment_program *fp) { @@ -750,28 +750,80 @@ nv40_fragprog_translate(struct nv40_context *nv40, fp->insn[fpc->inst_offset + 3] = 0x00000000; fp->translated = TRUE; - fp->on_hw = FALSE; out_err: tgsi_parse_free(&parse); free(fpc); } -void -nv40_fragprog_bind(struct nv40_context *nv40, struct nv40_fragment_program *fp) +static void +nv40_fragprog_upload(struct nv40_context *nv40, + struct nv40_fragment_program *fp) +{ + struct pipe_winsys *ws = nv40->pipe.winsys; + const uint32_t le = 1; + uint32_t *map; + int i; + + map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + +#if 0 + for (i = 0; i < fp->insn_len; i++) { + NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); + } +#endif + + if ((*(const uint8_t *)&le)) { + for (i = 0; i < fp->insn_len; i++) { + map[i] = fp->insn[i]; + } + } else { + /* Weird swapping for big-endian chips */ + for (i = 0; i < fp->insn_len; i++) { + map[i] = ((fp->insn[i] & 0xffff) << 16) | + ((fp->insn[i] >> 16) & 0xffff); + } + } + + ws->buffer_unmap(ws, fp->buffer); +} + +static boolean +nv40_fragprog_validate(struct nv40_context *nv40) { + struct nv40_fragment_program *fp = nv40->fragprog; + struct pipe_buffer *constbuf = + nv40->constbuf[PIPE_SHADER_FRAGMENT]; struct pipe_winsys *ws = nv40->pipe.winsys; struct nouveau_stateobj *so; int i; + if (fp->translated) + goto update_constants; + + nv40_fragprog_translate(nv40, fp); if (!fp->translated) { - nv40_fragprog_translate(nv40, fp); - if (!fp->translated) - assert(0); + nv40->fallback |= NV40_FALLBACK_RAST; + return FALSE; } + fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4); + nv40_fragprog_upload(nv40, fp); + + so = so_new(4, 1); + so_method(so, nv40->screen->curie, NV40TCL_FP_ADDRESS, 1); + so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1); + so_method(so, nv40->screen->curie, NV40TCL_FP_CONTROL, 1); + so_data (so, fp->fp_control); + so_ref(so, &fp->so); + +update_constants: if (fp->nr_consts) { - float *map = ws->buffer_map(ws, nv40->fragprog.constant_buf, - PIPE_BUFFER_USAGE_CPU_READ); + boolean new_consts = FALSE; + float *map; + + map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); for (i = 0; i < fp->nr_consts; i++) { struct nv40_fragment_program_data *fpd = &fp->consts[i]; uint32_t *p = &fp->insn[fpd->offset]; @@ -780,56 +832,20 @@ nv40_fragprog_bind(struct nv40_context *nv40, struct nv40_fragment_program *fp) if (!memcmp(p, cb, 4 * sizeof(float))) continue; memcpy(p, cb, 4 * sizeof(float)); - fp->on_hw = 0; + new_consts = TRUE; } - ws->buffer_unmap(ws, nv40->fragprog.constant_buf); - } + ws->buffer_unmap(ws, constbuf); - if (!fp->on_hw) { - const uint32_t le = 1; - uint32_t *map; - - if (!fp->buffer) - fp->buffer = ws->buffer_create(ws, 0x100, 0, - fp->insn_len * 4); - map = ws->buffer_map(ws, fp->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE); - -#if 0 - for (i = 0; i < fp->insn_len; i++) { - NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); - } -#endif - - if ((*(const uint8_t *)&le)) { - for (i = 0; i < fp->insn_len; i++) { - map[i] = fp->insn[i]; - } - } else { - /* Weird swapping for big-endian chips */ - for (i = 0; i < fp->insn_len; i++) { - map[i] = ((fp->insn[i] & 0xffff) << 16) | - ((fp->insn[i] >> 16) & 0xffff); - } - } - - ws->buffer_unmap(ws, fp->buffer); - fp->on_hw = TRUE; + if (new_consts) + nv40_fragprog_upload(nv40, fp); } - so = so_new(4, 1); - so_method(so, nv40->hw->curie, NV40TCL_FP_ADDRESS, 1); - so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | - NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, - NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1); - so_method(so, nv40->hw->curie, NV40TCL_FP_CONTROL, 1); - so_data (so, fp->fp_control); - - so_emit(nv40->nvws, so); - so_ref(so, &fp->so); - so_ref(NULL, &so); + if (fp->so != nv40->state.hw[NV40_STATE_FRAGPROG]) { + so_ref(fp->so, &nv40->state.hw[NV40_STATE_FRAGPROG]); + return TRUE; + } - nv40->fragprog.active = fp; + return FALSE; } void @@ -840,3 +856,11 @@ nv40_fragprog_destroy(struct nv40_context *nv40, free(fp->insn); } +struct nv40_state_entry nv40_state_fragprog = { + .validate = nv40_fragprog_validate, + .dirty = { + .pipe = NV40_NEW_FRAGPROG, + .hw = NV40_STATE_FRAGPROG + } +}; + diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index 5af5fbe746..436f954cec 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -41,6 +41,7 @@ static struct nv40_texture_format * nv40_fragtex_format(uint pipe_format) { struct nv40_texture_format *tf = nv40_texture_formats; + char fs[128]; while (tf->defined) { if (tf->pipe == pipe_format) @@ -48,11 +49,13 @@ nv40_fragtex_format(uint pipe_format) tf++; } + pf_sprint_name(fs, pipe_format); + NOUVEAU_ERR("unknown texture format %s\n", fs); return NULL; } -static void +static struct nouveau_stateobj * nv40_fragtex_build(struct nv40_context *nv40, int unit) { struct nv40_sampler_state *ps = nv40->tex_sampler[unit]; @@ -90,7 +93,7 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) break; default: NOUVEAU_ERR("Unknown target %d\n", pt->target); - return; + return NULL; } if (swizzled) { @@ -103,7 +106,7 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) txs = tf->swizzle; so = so_new(16, 2); - so_method(so, nv40->hw->curie, NV40TCL_TEX_OFFSET(unit), 8); + so_method(so, nv40->screen->curie, NV40TCL_TEX_OFFSET(unit), 8); so_reloc (so, nv40mt->buffer, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); so_reloc (so, nv40mt->buffer, txf, tex_flags | NOUVEAU_BO_OR, NV40TCL_TEX_FORMAT_DMA0, NV40TCL_TEX_FORMAT_DMA1); @@ -114,28 +117,30 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) so_data (so, (pt->width[0] << NV40TCL_TEX_SIZE0_W_SHIFT) | pt->height[0]); so_data (so, ps->bcol); - so_method(so, nv40->hw->curie, NV40TCL_TEX_SIZE1(unit), 1); + so_method(so, nv40->screen->curie, NV40TCL_TEX_SIZE1(unit), 1); so_data (so, (pt->depth[0] << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); - so_emit(nv40->nvws, so); - so_ref (so, &nv40->so_fragtex[unit]); - so_ref (NULL, &so); + return so; } -void -nv40_fragtex_bind(struct nv40_context *nv40) +static boolean +nv40_fragtex_validate(struct nv40_context *nv40) { - struct nv40_fragment_program *fp = nv40->fragprog.active; + struct nv40_fragment_program *fp = nv40->fragprog; + struct nv40_state *state = &nv40->state; + struct nouveau_stateobj *so; unsigned samplers, unit; - samplers = nv40->fp_samplers & ~fp->samplers; + samplers = state->fp_samplers & ~fp->samplers; while (samplers) { unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - so_ref(NULL, &nv40->so_fragtex[unit]); - BEGIN_RING(curie, NV40TCL_TEX_ENABLE(unit), 1); - OUT_RING (0); + so = so_new(2, 0); + so_method(so, nv40->screen->curie, NV40TCL_TEX_ENABLE(unit), 1); + so_data (so, 0); + so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); + state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit)); } samplers = nv40->dirty_samplers & fp->samplers; @@ -143,9 +148,20 @@ nv40_fragtex_bind(struct nv40_context *nv40) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - nv40_fragtex_build(nv40, unit); + so = nv40_fragtex_build(nv40, unit); + so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); + state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit)); } - nv40->fp_samplers = fp->samplers; + nv40->state.fp_samplers = fp->samplers; + return FALSE; } +struct nv40_state_entry nv40_state_fragtex = { + .validate = nv40_fragtex_validate, + .dirty = { + .pipe = NV40_NEW_SAMPLER | NV40_NEW_FRAGPROG, + .hw = 0 + } +}; + diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index 92e6b3a43d..94ba05b710 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -54,15 +54,18 @@ nv40_miptree_layout(struct nv40_miptree *nv40mt) } static struct pipe_texture * -nv40_miptree_create(struct pipe_context *pipe, const struct pipe_texture *pt) +nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) { - struct pipe_winsys *ws = pipe->winsys; + struct pipe_winsys *ws = pscreen->winsys; struct nv40_miptree *mt; mt = MALLOC(sizeof(struct nv40_miptree)); if (!mt) return NULL; mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = pscreen; + nv40_miptree_layout(mt); mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, @@ -76,9 +79,9 @@ nv40_miptree_create(struct pipe_context *pipe, const struct pipe_texture *pt) } static void -nv40_miptree_release(struct pipe_context *pipe, struct pipe_texture **pt) +nv40_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **pt) { - struct pipe_winsys *ws = pipe->winsys; + struct pipe_winsys *ws = pscreen->winsys; struct pipe_texture *mt = *pt; *pt = NULL; @@ -95,10 +98,52 @@ nv40_miptree_release(struct pipe_context *pipe, struct pipe_texture **pt) } } +static void +nv40_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt) +{ +} + +static struct pipe_surface * +nv40_miptree_surface(struct pipe_screen *pscreen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct pipe_winsys *ws = pscreen->winsys; + struct nv40_miptree *nv40mt = (struct nv40_miptree *)pt; + struct pipe_surface *ps; + + ps = ws->surface_alloc(ws); + if (!ps) + return NULL; + pipe_buffer_reference(ws, &ps->buffer, nv40mt->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = nv40mt->level[level].pitch / ps->cpp; + + if (pt->target == PIPE_TEXTURE_CUBE) { + ps->offset = nv40mt->level[level].image_offset[face]; + } else + if (pt->target == PIPE_TEXTURE_3D) { + ps->offset = nv40mt->level[level].image_offset[zslice]; + } else { + ps->offset = nv40mt->level[level].image_offset[0]; + } + + return ps; +} + void nv40_init_miptree_functions(struct nv40_context *nv40) { - nv40->pipe.texture_create = nv40_miptree_create; - nv40->pipe.texture_release = nv40_miptree_release; + nv40->pipe.texture_update = nv40_miptree_update; +} + +void +nv40_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nv40_miptree_create; + pscreen->texture_release = nv40_miptree_release; + pscreen->get_tex_surface = nv40_miptree_surface; } diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c index 8bca2788b9..0317845624 100644 --- a/src/gallium/drivers/nv40/nv40_query.c +++ b/src/gallium/drivers/nv40/nv40_query.c @@ -45,9 +45,9 @@ nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); - if (nv40->nvws->res_alloc(nv40->hw->query_heap, 1, NULL, &q->object)) + if (nv40->nvws->res_alloc(nv40->screen->query_heap, 1, NULL, &q->object)) assert(0); - nv40->nvws->notifier_reset(nv40->hw->query, q->object->start); + nv40->nvws->notifier_reset(nv40->screen->query, q->object->start); BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1); OUT_RING (1); @@ -82,17 +82,17 @@ nv40_query_result(struct pipe_context *pipe, struct pipe_query *pq, if (!q->ready) { unsigned status; - status = nvws->notifier_status(nv40->hw->query, + status = nvws->notifier_status(nv40->screen->query, q->object->start); if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { if (wait == FALSE) return FALSE; - nvws->notifier_wait(nv40->hw->query, q->object->start, + nvws->notifier_wait(nv40->screen->query, q->object->start, NV_NOTIFY_STATE_STATUS_COMPLETED, 0); } - q->result = nvws->notifier_retval(nv40->hw->query, + q->result = nvws->notifier_retval(nv40->screen->query, q->object->start); q->ready = TRUE; nvws->res_free(&q->object); diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c new file mode 100644 index 0000000000..268ca83ce0 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -0,0 +1,279 @@ +#include "pipe/p_screen.h" +#include "pipe/p_util.h" + +#include "nv40_context.h" +#include "nv40_screen.h" + +#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf +#define NV4X_GRCLASS4497_CHIPSETS 0x00005450 +#define NV6X_GRCLASS4497_CHIPSETS 0x00000088 + +static const char * +nv40_screen_get_name(struct pipe_screen *pscreen) +{ + struct nv40_screen *screen = nv40_screen(pscreen); + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", screen->chipset); + return buffer; +} + +static const char * +nv40_screen_get_vendor(struct pipe_screen *pscreen) +{ + return "nouveau"; +} + +static int +nv40_screen_get_param(struct pipe_screen *pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 16; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 4; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv40_screen_get_paramf(struct pipe_screen *pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + case PIPE_CAP_BITMAP_TEXCOORD_BIAS: + return 0.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static boolean +nv40_screen_surface_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, uint type) +{ + switch (type) { + case PIPE_SURFACE: + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + break; + case PIPE_TEXTURE: + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_U_L8: + case PIPE_FORMAT_U_A8: + case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return TRUE; + default: + break; + } + break; + default: + assert(0); + }; + + return FALSE; +} + +static void +nv40_screen_destroy(struct pipe_screen *pscreen) +{ + struct nv40_screen *screen = nv40_screen(pscreen); + struct nouveau_winsys *nvws = screen->nvws; + + nvws->res_free(&screen->vp_exec_heap); + nvws->res_free(&screen->vp_data_heap); + nvws->res_free(&screen->query_heap); + nvws->notifier_free(&screen->query); + nvws->notifier_free(&screen->sync); + nvws->grobj_free(&screen->curie); + + FREE(pscreen); +} + +struct pipe_screen * +nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, + unsigned chipset) +{ + struct nv40_screen *screen = CALLOC_STRUCT(nv40_screen); + struct nouveau_stateobj *so; + unsigned curie_class; + int ret; + + if (!screen) + return NULL; + screen->chipset = chipset; + screen->nvws = nvws; + + /* 3D object */ + switch (chipset & 0xf0) { + case 0x40: + if (NV4X_GRCLASS4097_CHIPSETS & (1 << (chipset & 0x0f))) + curie_class = NV40TCL; + else + if (NV4X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) + curie_class = NV44TCL; + break; + case 0x60: + if (NV6X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) + curie_class = NV44TCL; + break; + default: + break; + } + + if (!curie_class) { + NOUVEAU_ERR("Unknown nv4x chipset: nv%02x\n", chipset); + return NULL; + } + + ret = nvws->grobj_alloc(nvws, curie_class, &screen->curie); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return FALSE; + } + + /* Notifier for sync purposes */ + ret = nvws->notifier_alloc(nvws, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv40_screen_destroy(&screen->pipe); + return NULL; + } + + /* Query objects */ + ret = nvws->notifier_alloc(nvws, 32, &screen->query); + if (ret) { + NOUVEAU_ERR("Error initialising query objects: %d\n", ret); + nv40_screen_destroy(&screen->pipe); + return NULL; + } + + ret = nvws->res_init(&screen->query_heap, 0, 32); + if (ret) { + NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); + nv40_screen_destroy(&screen->pipe); + return NULL; + } + + /* Vtxprog resources */ + if (nvws->res_init(&screen->vp_exec_heap, 0, 512) || + nvws->res_init(&screen->vp_data_heap, 0, 256)) { + nv40_screen_destroy(&screen->pipe); + return NULL; + } + + /* Static curie initialisation */ + so = so_new(128, 0); + so_method(so, screen->curie, NV40TCL_DMA_NOTIFY, 1); + so_data (so, screen->sync->handle); + so_method(so, screen->curie, NV40TCL_DMA_TEXTURE0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->gart->handle); + so_method(so, screen->curie, NV40TCL_DMA_COLOR1, 1); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->curie, NV40TCL_DMA_COLOR0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->curie, NV40TCL_DMA_VTXBUF0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->gart->handle); + so_method(so, screen->curie, NV40TCL_DMA_FENCE, 2); + so_data (so, 0); + so_data (so, screen->query->handle); + so_method(so, screen->curie, NV40TCL_DMA_UNK01AC, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->curie, NV40TCL_DMA_COLOR2, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->vram->handle); + + so_method(so, screen->curie, 0x1ea4, 3); + so_data (so, 0x00000010); + so_data (so, 0x01000100); + so_data (so, 0xff800006); + + /* vtxprog output routing */ + so_method(so, screen->curie, 0x1fc4, 1); + so_data (so, 0x06144321); + so_method(so, screen->curie, 0x1fc8, 2); + so_data (so, 0xedcba987); + so_data (so, 0x00000021); + so_method(so, screen->curie, 0x1fd0, 1); + so_data (so, 0x00171615); + so_method(so, screen->curie, 0x1fd4, 1); + so_data (so, 0x001b1a19); + + so_method(so, screen->curie, 0x1ef8, 1); + so_data (so, 0x0020ffff); + so_method(so, screen->curie, 0x1d64, 1); + so_data (so, 0x00d30000); + so_method(so, screen->curie, 0x1e94, 1); + so_data (so, 0x00000001); + + so_emit(nvws, so); + so_ref(NULL, &so); + nvws->push_flush(nvws->channel, 0); + + screen->pipe.winsys = ws; + screen->pipe.destroy = nv40_screen_destroy; + + screen->pipe.get_name = nv40_screen_get_name; + screen->pipe.get_vendor = nv40_screen_get_vendor; + screen->pipe.get_param = nv40_screen_get_param; + screen->pipe.get_paramf = nv40_screen_get_paramf; + + screen->pipe.is_format_supported = nv40_screen_surface_format_supported; + + nv40_screen_init_miptree_functions(&screen->pipe); + + return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv40/nv40_screen.h b/src/gallium/drivers/nv40/nv40_screen.h new file mode 100644 index 0000000000..3ea78aadfd --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_screen.h @@ -0,0 +1,36 @@ +#ifndef __NV40_SCREEN_H__ +#define __NV40_SCREEN_H__ + +#include "pipe/p_screen.h" + +struct nv40_screen { + struct pipe_screen pipe; + + struct nouveau_winsys *nvws; + unsigned chipset; + + unsigned cur_pctx; + + /* HW graphics objects */ + struct nouveau_grobj *curie; + struct nouveau_notifier *sync; + + /* Query object resources */ + struct nouveau_notifier *query; + struct nouveau_resource *query_heap; + + /* Vtxprog resources */ + struct nouveau_resource *vp_exec_heap; + struct nouveau_resource *vp_data_heap; + + /* Current 3D state of channel */ + struct nouveau_stateobj *state[NV40_STATE_MAX]; +}; + +static INLINE struct nv40_screen * +nv40_screen(struct pipe_screen *screen) +{ + return (struct nv40_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 713f31dbb1..caa2f9df0c 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -10,7 +10,8 @@ nv40_blend_state_create(struct pipe_context *pipe, const struct pipe_blend_state *cso) { struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_grobj *curie = nv40->hw->curie; + struct nouveau_grobj *curie = nv40->screen->curie; + struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso)); struct nouveau_stateobj *so = so_new(16, 0); if (cso->blend_enable) { @@ -46,7 +47,9 @@ nv40_blend_state_create(struct pipe_context *pipe, so_method(so, curie, NV40TCL_DITHER_ENABLE, 1); so_data (so, cso->dither ? 1 : 0); - return (void *)so; + so_ref(so, &bso->so); + bso->pipe = *cso; + return (void *)bso; } static void @@ -54,16 +57,17 @@ nv40_blend_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv40_context *nv40 = nv40_context(pipe); - so_ref(hwcso, &nv40->so_blend); + nv40->blend = hwcso; nv40->dirty |= NV40_NEW_BLEND; } static void nv40_blend_state_delete(struct pipe_context *pipe, void *hwcso) { - struct nouveau_stateobj *so = hwcso; + struct nv40_blend_state *bso = hwcso; - so_ref(NULL, &so); + so_ref(NULL, &bso->so); + FREE(bso); } @@ -255,12 +259,13 @@ nv40_sampler_state_bind(struct pipe_context *pipe, unsigned unit, nv40->tex_sampler[unit] = ps; nv40->dirty_samplers |= (1 << unit); + nv40->dirty |= NV40_NEW_SAMPLER; } static void nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso) { - free(hwcso); + FREE(hwcso); } static void @@ -271,6 +276,7 @@ nv40_set_sampler_texture(struct pipe_context *pipe, unsigned unit, nv40->tex_miptree[unit] = (struct nv40_miptree *)miptree; nv40->dirty_samplers |= (1 << unit); + nv40->dirty |= NV40_NEW_SAMPLER; } static void * @@ -278,33 +284,32 @@ nv40_rasterizer_state_create(struct pipe_context *pipe, const struct pipe_rasterizer_state *cso) { struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_grobj *curie = nv40->screen->curie; /*XXX: ignored: * light_twoside - * offset_cw/ccw -nohw - * scissor * point_smooth -nohw * multisample - * offset_units / offset_scale */ - so_method(so, nv40->hw->curie, NV40TCL_SHADE_MODEL, 1); + so_method(so, curie, NV40TCL_SHADE_MODEL, 1); so_data (so, cso->flatshade ? NV40TCL_SHADE_MODEL_FLAT : NV40TCL_SHADE_MODEL_SMOOTH); - so_method(so, nv40->hw->curie, NV40TCL_LINE_WIDTH, 2); + so_method(so, curie, NV40TCL_LINE_WIDTH, 2); so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff); so_data (so, cso->line_smooth ? 1 : 0); - so_method(so, nv40->hw->curie, NV40TCL_LINE_STIPPLE_ENABLE, 2); + so_method(so, curie, NV40TCL_LINE_STIPPLE_ENABLE, 2); so_data (so, cso->line_stipple_enable ? 1 : 0); so_data (so, (cso->line_stipple_pattern << 16) | cso->line_stipple_factor); - so_method(so, nv40->hw->curie, NV40TCL_POINT_SIZE, 1); + so_method(so, curie, NV40TCL_POINT_SIZE, 1); so_data (so, fui(cso->point_size)); - so_method(so, nv40->hw->curie, NV40TCL_POLYGON_MODE_FRONT, 6); + so_method(so, curie, NV40TCL_POLYGON_MODE_FRONT, 6); if (cso->front_winding == PIPE_WINDING_CCW) { so_data(so, nvgl_polygon_mode(cso->fill_ccw)); so_data(so, nvgl_polygon_mode(cso->fill_cw)); @@ -345,10 +350,32 @@ nv40_rasterizer_state_create(struct pipe_context *pipe, so_data(so, cso->poly_smooth ? 1 : 0); so_data(so, cso->cull_mode != PIPE_WINDING_NONE ? 1 : 0); - so_method(so, nv40->hw->curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, cso->poly_stipple_enable ? 1 : 0); - so_method(so, nv40->hw->curie, NV40TCL_POINT_SPRITE, 1); + so_method(so, curie, NV40TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) + so_data(so, 1); + else + so_data(so, 0); + if (cso->offset_cw || cso->offset_ccw) { + so_method(so, curie, NV40TCL_POLYGON_OFFSET_FACTOR, 2); + so_data (so, fui(cso->offset_scale)); + so_data (so, fui(cso->offset_units * 2)); + } + + so_method(so, curie, NV40TCL_POINT_SPRITE, 1); if (cso->point_sprite) { unsigned psctl = (1 << 0), i; @@ -362,7 +389,9 @@ nv40_rasterizer_state_create(struct pipe_context *pipe, so_data(so, 0); } - return (void *)so; + so_ref(so, &rsso->so); + rsso->pipe = *cso; + return (void *)rsso; } static void @@ -370,16 +399,17 @@ nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv40_context *nv40 = nv40_context(pipe); - so_ref(hwcso, &nv40->so_rast); + nv40->rasterizer = hwcso; nv40->dirty |= NV40_NEW_RAST; } static void nv40_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) { - struct nouveau_stateobj *so = hwcso; + struct nv40_rasterizer_state *rsso = hwcso; - so_ref(NULL, &so); + so_ref(NULL, &rsso->so); + FREE(rsso); } static void * @@ -387,20 +417,21 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, const struct pipe_depth_stencil_alpha_state *cso) { struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); struct nouveau_stateobj *so = so_new(32, 0); - so_method(so, nv40->hw->curie, NV40TCL_DEPTH_FUNC, 3); + so_method(so, nv40->screen->curie, NV40TCL_DEPTH_FUNC, 3); so_data (so, nvgl_comparison_op(cso->depth.func)); so_data (so, cso->depth.writemask ? 1 : 0); so_data (so, cso->depth.enabled ? 1 : 0); - so_method(so, nv40->hw->curie, NV40TCL_ALPHA_TEST_ENABLE, 3); + so_method(so, nv40->screen->curie, NV40TCL_ALPHA_TEST_ENABLE, 3); so_data (so, cso->alpha.enabled ? 1 : 0); so_data (so, nvgl_comparison_op(cso->alpha.func)); so_data (so, float_to_ubyte(cso->alpha.ref)); if (cso->stencil[0].enabled) { - so_method(so, nv40->hw->curie, NV40TCL_STENCIL_FRONT_ENABLE, 8); + so_method(so, nv40->screen->curie, NV40TCL_STENCIL_FRONT_ENABLE, 8); so_data (so, cso->stencil[0].enabled ? 1 : 0); so_data (so, cso->stencil[0].write_mask); so_data (so, nvgl_comparison_op(cso->stencil[0].func)); @@ -410,12 +441,12 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); } else { - so_method(so, nv40->hw->curie, NV40TCL_STENCIL_FRONT_ENABLE, 1); + so_method(so, nv40->screen->curie, NV40TCL_STENCIL_FRONT_ENABLE, 1); so_data (so, 0); } if (cso->stencil[1].enabled) { - so_method(so, nv40->hw->curie, NV40TCL_STENCIL_BACK_ENABLE, 8); + so_method(so, nv40->screen->curie, NV40TCL_STENCIL_BACK_ENABLE, 8); so_data (so, cso->stencil[1].enabled ? 1 : 0); so_data (so, cso->stencil[1].write_mask); so_data (so, nvgl_comparison_op(cso->stencil[1].func)); @@ -425,11 +456,13 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); } else { - so_method(so, nv40->hw->curie, NV40TCL_STENCIL_BACK_ENABLE, 1); + so_method(so, nv40->screen->curie, NV40TCL_STENCIL_BACK_ENABLE, 1); so_data (so, 0); } - return (void *)so; + so_ref(so, &zsaso->so); + zsaso->pipe = *cso; + return (void *)zsaso; } static void @@ -437,16 +470,17 @@ nv40_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv40_context *nv40 = nv40_context(pipe); - so_ref(hwcso, &nv40->so_zsa); + nv40->zsa = hwcso; nv40->dirty |= NV40_NEW_ZSA; } static void nv40_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) { - struct nouveau_stateobj *so = hwcso; + struct nv40_zsa_state *zsaso = hwcso; - so_ref(NULL, &so); + so_ref(NULL, &zsaso->so); + FREE(zsaso); } static void * @@ -465,9 +499,8 @@ static void nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_vertex_program *vp = hwcso; - nv40->vertprog.current = vp; + nv40->vertprog = hwcso; nv40->dirty |= NV40_NEW_VERTPROG; } @@ -478,7 +511,7 @@ nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv40_vertex_program *vp = hwcso; nv40_vertprog_destroy(nv40, vp); - free(vp); + FREE(vp); } static void * @@ -497,9 +530,8 @@ static void nv40_fp_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_fragment_program *fp = hwcso; - nv40->fragprog.current = fp; + nv40->fragprog = hwcso; nv40->dirty |= NV40_NEW_FRAGPROG; } @@ -510,7 +542,7 @@ nv40_fp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv40_fragment_program *fp = hwcso; nv40_fragprog_destroy(nv40, fp); - free(fp); + FREE(fp); } static void @@ -518,16 +550,8 @@ nv40_set_blend_color(struct pipe_context *pipe, const struct pipe_blend_color *bcol) { struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_stateobj *so = so_new(2, 0); - - so_method(so, nv40->hw->curie, NV40TCL_BLEND_COLOR, 1); - so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | - (float_to_ubyte(bcol->color[0]) << 16) | - (float_to_ubyte(bcol->color[1]) << 8) | - (float_to_ubyte(bcol->color[2]) << 0))); - so_ref(so, &nv40->so_bcol); - so_ref(NULL, &so); + nv40->blend_colour = *bcol; nv40->dirty |= NV40_NEW_BCOL; } @@ -535,6 +559,10 @@ static void nv40_set_clip_state(struct pipe_context *pipe, const struct pipe_clip_state *clip) { + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->clip = *clip; + nv40->dirty |= NV40_NEW_UCP; } static void @@ -544,11 +572,11 @@ nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, struct nv40_context *nv40 = nv40_context(pipe); if (shader == PIPE_SHADER_VERTEX) { - nv40->vertprog.constant_buf = buf->buffer; + nv40->constbuf[PIPE_SHADER_VERTEX] = buf->buffer; nv40->dirty |= NV40_NEW_VERTPROG; } else if (shader == PIPE_SHADER_FRAGMENT) { - nv40->fragprog.constant_buf = buf->buffer; + nv40->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer; nv40->dirty |= NV40_NEW_FRAGPROG; } } @@ -558,146 +586,8 @@ nv40_set_framebuffer_state(struct pipe_context *pipe, const struct pipe_framebuffer_state *fb) { struct nv40_context *nv40 = nv40_context(pipe); - struct pipe_surface *rt[4], *zeta; - uint32_t rt_enable, rt_format, w, h; - int i, colour_format = 0, zeta_format = 0; - struct nouveau_stateobj *so = so_new(64, 10); - unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; - - rt_enable = 0; - for (i = 0; i < 4; i++) { - if (!fb->cbufs[i]) - continue; - - if (colour_format) { - assert(w == fb->cbufs[i]->width); - assert(h == fb->cbufs[i]->height); - assert(colour_format == fb->cbufs[i]->format); - } else { - w = fb->cbufs[i]->width; - h = fb->cbufs[i]->height; - colour_format = fb->cbufs[i]->format; - rt_enable |= (NV40TCL_RT_ENABLE_COLOR0 << i); - rt[i] = fb->cbufs[i]; - } - } - - if (rt_enable & (NV40TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 | - NV40TCL_RT_ENABLE_COLOR3)) - rt_enable |= NV40TCL_RT_ENABLE_MRT; - - if (fb->zsbuf) { - if (colour_format) { - assert(w == fb->zsbuf->width); - assert(h == fb->zsbuf->height); - } else { - w = fb->zsbuf->width; - h = fb->zsbuf->height; - } - zeta_format = fb->zsbuf->format; - zeta = fb->zsbuf; - } - - rt_format = NV40TCL_RT_FORMAT_TYPE_LINEAR; - - switch (colour_format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case 0: - rt_format |= NV40TCL_RT_FORMAT_COLOR_A8R8G8B8; - break; - case PIPE_FORMAT_R5G6B5_UNORM: - rt_format |= NV40TCL_RT_FORMAT_COLOR_R5G6B5; - break; - default: - assert(0); - } - - switch (zeta_format) { - case PIPE_FORMAT_Z16_UNORM: - rt_format |= NV40TCL_RT_FORMAT_ZETA_Z16; - break; - case PIPE_FORMAT_Z24S8_UNORM: - case 0: - rt_format |= NV40TCL_RT_FORMAT_ZETA_Z24S8; - break; - default: - assert(0); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) { - so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR0, 1); - so_reloc (so, rt[0]->buffer, 0, rt_flags | NOUVEAU_BO_OR, - nv40->nvws->channel->vram->handle, - nv40->nvws->channel->gart->handle); - so_method(so, nv40->hw->curie, NV40TCL_COLOR0_PITCH, 2); - so_data (so, rt[0]->pitch * rt[0]->cpp); - so_reloc (so, rt[0]->buffer, rt[0]->offset, rt_flags | - NOUVEAU_BO_LOW, 0, 0); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) { - so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR1, 1); - so_reloc (so, rt[1]->buffer, 0, rt_flags | NOUVEAU_BO_OR, - nv40->nvws->channel->vram->handle, - nv40->nvws->channel->gart->handle); - so_method(so, nv40->hw->curie, NV40TCL_COLOR1_OFFSET, 2); - so_reloc (so, rt[1]->buffer, rt[1]->offset, rt_flags | - NOUVEAU_BO_LOW, 0, 0); - so_data (so, rt[1]->pitch * rt[1]->cpp); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { - so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR2, 1); - so_reloc (so, rt[2]->buffer, 0, rt_flags | NOUVEAU_BO_OR, - nv40->nvws->channel->vram->handle, - nv40->nvws->channel->gart->handle); - so_method(so, nv40->hw->curie, NV40TCL_COLOR2_OFFSET, 1); - so_reloc (so, rt[2]->buffer, rt[2]->offset, rt_flags | - NOUVEAU_BO_LOW, 0, 0); - so_method(so, nv40->hw->curie, NV40TCL_COLOR2_PITCH, 1); - so_data (so, rt[2]->pitch * rt[2]->cpp); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { - so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR3, 1); - so_reloc (so, rt[3]->buffer, 0, rt_flags | NOUVEAU_BO_OR, - nv40->nvws->channel->vram->handle, - nv40->nvws->channel->gart->handle); - so_method(so, nv40->hw->curie, NV40TCL_COLOR3_OFFSET, 1); - so_reloc (so, rt[3]->buffer, rt[3]->offset, rt_flags | - NOUVEAU_BO_LOW, 0, 0); - so_method(so, nv40->hw->curie, NV40TCL_COLOR3_PITCH, 1); - so_data (so, rt[3]->pitch * rt[3]->cpp); - } - - if (zeta_format) { - so_method(so, nv40->hw->curie, NV40TCL_DMA_ZETA, 1); - so_reloc (so, zeta->buffer, 0, rt_flags | NOUVEAU_BO_OR, - nv40->nvws->channel->vram->handle, - nv40->nvws->channel->gart->handle); - so_method(so, nv40->hw->curie, NV40TCL_ZETA_OFFSET, 1); - so_reloc (so, zeta->buffer, zeta->offset, rt_flags | - NOUVEAU_BO_LOW, 0, 0); - so_method(so, nv40->hw->curie, NV40TCL_ZETA_PITCH, 1); - so_data (so, zeta->pitch * zeta->cpp); - } - - so_method(so, nv40->hw->curie, NV40TCL_RT_ENABLE, 1); - so_data (so, rt_enable); - so_method(so, nv40->hw->curie, NV40TCL_RT_HORIZ, 3); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_data (so, rt_format); - so_method(so, nv40->hw->curie, NV40TCL_VIEWPORT_HORIZ, 2); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_method(so, nv40->hw->curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2); - so_data (so, ((w - 1) << 16) | 0); - so_data (so, ((h - 1) << 16) | 0); - - so_ref(so, &nv40->so_framebuffer); - so_ref(NULL, &so); + nv40->framebuffer = *fb; nv40->dirty |= NV40_NEW_FB; } @@ -706,15 +596,8 @@ nv40_set_polygon_stipple(struct pipe_context *pipe, const struct pipe_poly_stipple *stipple) { struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_stateobj *so = so_new(33, 0); - unsigned i; - so_method(so, nv40->hw->curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); - for (i = 0; i < 32; i++) - so_data(so, stipple->stipple[i]); - - so_ref(so, &nv40->so_stipple); - so_ref(NULL, &so); + memcpy(nv40->stipple, stipple->stipple, 4 * 32); nv40->dirty |= NV40_NEW_STIPPLE; } @@ -723,14 +606,8 @@ nv40_set_scissor_state(struct pipe_context *pipe, const struct pipe_scissor_state *s) { struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_stateobj *so = so_new(3, 0); - - so_method(so, nv40->hw->curie, NV40TCL_SCISSOR_HORIZ, 2); - so_data (so, ((s->maxx - s->minx) << 16) | s->minx); - so_data (so, ((s->maxy - s->miny) << 16) | s->miny); - so_ref(so, &nv40->so_scissor); - so_ref(NULL, &so); + nv40->scissor = *s; nv40->dirty |= NV40_NEW_SCISSOR; } @@ -739,20 +616,8 @@ nv40_set_viewport_state(struct pipe_context *pipe, const struct pipe_viewport_state *vpt) { struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_stateobj *so = so_new(9, 0); - - so_method(so, nv40->hw->curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8); - so_data (so, fui(vpt->translate[0])); - so_data (so, fui(vpt->translate[1])); - so_data (so, fui(vpt->translate[2])); - so_data (so, fui(vpt->translate[3])); - so_data (so, fui(vpt->scale[0])); - so_data (so, fui(vpt->scale[1])); - so_data (so, fui(vpt->scale[2])); - so_data (so, fui(vpt->scale[3])); - - so_ref(so, &nv40->so_viewport); - so_ref(NULL, &so); + + nv40->viewport = *vpt; nv40->dirty |= NV40_NEW_VIEWPORT; } @@ -763,7 +628,6 @@ nv40_set_vertex_buffer(struct pipe_context *pipe, unsigned index, struct nv40_context *nv40 = nv40_context(pipe); nv40->vtxbuf[index] = *vb; - nv40->dirty |= NV40_NEW_ARRAYS; } @@ -774,7 +638,6 @@ nv40_set_vertex_element(struct pipe_context *pipe, unsigned index, struct nv40_context *nv40 = nv40_context(pipe); nv40->vtxelt[index] = *ve; - nv40->dirty |= NV40_NEW_ARRAYS; } diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h index e82ab9de98..e5217fe91c 100644 --- a/src/gallium/drivers/nv40/nv40_state.h +++ b/src/gallium/drivers/nv40/nv40_state.h @@ -39,6 +39,7 @@ struct nv40_vertex_program { uint32_t ir; uint32_t or; + struct nouveau_stateobj *so; }; struct nv40_fragment_program_data { @@ -50,7 +51,6 @@ struct nv40_fragment_program { const struct pipe_shader_state *pipe; boolean translated; - boolean on_hw; unsigned samplers; uint32_t *insn; diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c new file mode 100644 index 0000000000..95e6d7394f --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_blend.c @@ -0,0 +1,40 @@ +#include "nv40_context.h" + +static boolean +nv40_state_blend_validate(struct nv40_context *nv40) +{ + so_ref(nv40->blend->so, &nv40->state.hw[NV40_STATE_BLEND]); + return TRUE; +} + +struct nv40_state_entry nv40_state_blend = { + .validate = nv40_state_blend_validate, + .dirty = { + .pipe = NV40_NEW_BLEND, + .hw = NV40_STATE_BLEND + } +}; + +static boolean +nv40_state_blend_colour_validate(struct nv40_context *nv40) +{ + struct nouveau_stateobj *so = so_new(2, 0); + struct pipe_blend_color *bcol = &nv40->blend_colour; + + so_method(so, nv40->screen->curie, NV40TCL_BLEND_COLOR, 1); + so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | + (float_to_ubyte(bcol->color[0]) << 16) | + (float_to_ubyte(bcol->color[1]) << 8) | + (float_to_ubyte(bcol->color[2]) << 0))); + + so_ref(so, &nv40->state.hw[NV40_STATE_BCOL]); + return TRUE; +} + +struct nv40_state_entry nv40_state_blend_colour = { + .validate = nv40_state_blend_colour_validate, + .dirty = { + .pipe = NV40_NEW_BCOL, + .hw = NV40_STATE_BCOL + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_clip.c b/src/gallium/drivers/nv40/nv40_state_clip.c new file mode 100644 index 0000000000..93e690161f --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_clip.c @@ -0,0 +1,18 @@ +#include "nv40_context.h" + +static boolean +nv40_state_clip_validate(struct nv40_context *nv40) +{ + if (nv40->clip.nr) + nv40->fallback |= NV40_FALLBACK_TNL; + + return FALSE; +} + +struct nv40_state_entry nv40_state_clip = { + .validate = nv40_state_clip_validate, + .dirty = { + .pipe = NV40_NEW_UCP, + .hw = 0 + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index a10c995548..9f268640e0 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -1,77 +1,108 @@ #include "nv40_context.h" #include "nv40_state.h" -/* Emit relocs for every referenced buffer. - * - * This is to ensure the bufmgr has an accurate idea of how - * the buffer is used. These relocs appear in the push buffer as - * NOPs, and will only be turned into state changes if a buffer - * actually moves. - */ +static struct nv40_state_entry *render_states[] = { + &nv40_state_framebuffer, + &nv40_state_rasterizer, + &nv40_state_clip, + &nv40_state_scissor, + &nv40_state_stipple, + &nv40_state_fragprog, + &nv40_state_fragtex, + &nv40_state_vertprog, + &nv40_state_blend, + &nv40_state_blend_colour, + &nv40_state_zsa, + &nv40_state_viewport, + &nv40_state_vbo, + NULL +}; + static void -nv40_state_emit_dummy_relocs(struct nv40_context *nv40) +nv40_state_validate(struct nv40_context *nv40) { - unsigned i; - - so_emit_reloc_markers(nv40->nvws, nv40->so_framebuffer); - for (i = 0; i < 16; i++) { - if (!(nv40->fp_samplers & (1 << i))) - continue; - so_emit_reloc_markers(nv40->nvws, nv40->so_fragtex[i]); - } - so_emit_reloc_markers(nv40->nvws, nv40->fragprog.active->so); -} + struct nv40_state_entry **states = render_states; + unsigned last_fallback; -void -nv40_emit_hw_state(struct nv40_context *nv40) -{ - if (nv40->dirty & NV40_NEW_FB) - so_emit(nv40->nvws, nv40->so_framebuffer); + last_fallback = nv40->fallback; + nv40->fallback = 0; - if (nv40->dirty & NV40_NEW_BLEND) - so_emit(nv40->nvws, nv40->so_blend); + while (*states) { + struct nv40_state_entry *e = *states; - if (nv40->dirty & NV40_NEW_RAST) - so_emit(nv40->nvws, nv40->so_rast); + if (nv40->dirty & e->dirty.pipe) { + if (e->validate(nv40)) + nv40->state.dirty |= (1ULL << e->dirty.hw); + } - if (nv40->dirty & NV40_NEW_ZSA) - so_emit(nv40->nvws, nv40->so_zsa); + states++; + } + nv40->dirty = 0; - if (nv40->dirty & NV40_NEW_BCOL) - so_emit(nv40->nvws, nv40->so_bcol); + if (nv40->fallback & NV40_FALLBACK_TNL && + !(last_fallback & NV40_FALLBACK_TNL)) { + NOUVEAU_ERR("XXX: hwtnl->swtnl\n"); + } else + if (last_fallback & NV40_FALLBACK_TNL && + !(nv40->fallback & NV40_FALLBACK_TNL)) { + NOUVEAU_ERR("XXX: swtnl->hwtnl\n"); + } - if (nv40->dirty & NV40_NEW_SCISSOR) - so_emit(nv40->nvws, nv40->so_scissor); + if (nv40->fallback & NV40_FALLBACK_RAST && + !(last_fallback & NV40_FALLBACK_RAST)) { + NOUVEAU_ERR("XXX: hwrast->swrast\n"); + } else + if (last_fallback & NV40_FALLBACK_RAST && + !(nv40->fallback & NV40_FALLBACK_RAST)) { + NOUVEAU_ERR("XXX: swrast->hwrast\n"); + } +} - if (nv40->dirty & NV40_NEW_VIEWPORT) - so_emit(nv40->nvws, nv40->so_viewport); +static void +nv40_state_emit(struct nv40_context *nv40) +{ + struct nv40_state *state = &nv40->state; + struct nv40_screen *screen = nv40->screen; + unsigned i, samplers; - if (nv40->dirty & NV40_NEW_STIPPLE) - so_emit(nv40->nvws, nv40->so_stipple); + if (nv40->pctx_id != screen->cur_pctx) { + for (i = 0; i < NV40_STATE_MAX; i++) { + if (state->hw[i] && screen->state[i] != state->hw[i]) + state->dirty |= (1ULL << i); + } - if (nv40->dirty & NV40_NEW_FRAGPROG) { - nv40_fragprog_bind(nv40, nv40->fragprog.current); - /*XXX: clear NV40_NEW_FRAGPROG if no new program uploaded */ + screen->cur_pctx = nv40->pctx_id; } - if (nv40->dirty_samplers || (nv40->dirty & NV40_NEW_FRAGPROG)) { - nv40_fragtex_bind(nv40); + while (state->dirty) { + unsigned idx = ffsll(state->dirty) - 1; - BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (2); - BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (1); - nv40->dirty &= ~NV40_NEW_FRAGPROG; + so_ref (state->hw[idx], &nv40->screen->state[idx]); + so_emit(nv40->nvws, nv40->screen->state[idx]); + state->dirty &= ~(1ULL << idx); } - if (nv40->dirty & NV40_NEW_VERTPROG) { - nv40_vertprog_bind(nv40, nv40->vertprog.current); - nv40->dirty &= ~NV40_NEW_VERTPROG; + so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FB]); + for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { + if (!(samplers & (1 << i))) + continue; + so_emit_reloc_markers(nv40->nvws, + state->hw[NV40_STATE_FRAGTEX0+i]); + samplers &= ~(1ULL << i); } + so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FRAGPROG]); + so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_VTXBUF]); +} - nv40->dirty_samplers = 0; - nv40->dirty = 0; +void +nv40_emit_hw_state(struct nv40_context *nv40) +{ + nv40_state_validate(nv40); + nv40_state_emit(nv40); - nv40_state_emit_dummy_relocs(nv40); + BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (2); + BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (1); } diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c new file mode 100644 index 0000000000..71795ab182 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -0,0 +1,155 @@ +#include "nv40_context.h" + +static boolean +nv40_state_framebuffer_validate(struct nv40_context *nv40) +{ + struct pipe_framebuffer_state *fb = &nv40->framebuffer; + struct pipe_surface *rt[4], *zeta; + uint32_t rt_enable, rt_format, w, h; + int i, colour_format = 0, zeta_format = 0; + struct nouveau_stateobj *so = so_new(64, 10); + unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; + + rt_enable = 0; + for (i = 0; i < 4; i++) { + if (!fb->cbufs[i]) + continue; + + if (colour_format) { + assert(w == fb->cbufs[i]->width); + assert(h == fb->cbufs[i]->height); + assert(colour_format == fb->cbufs[i]->format); + } else { + w = fb->cbufs[i]->width; + h = fb->cbufs[i]->height; + colour_format = fb->cbufs[i]->format; + rt_enable |= (NV40TCL_RT_ENABLE_COLOR0 << i); + rt[i] = fb->cbufs[i]; + } + } + + if (rt_enable & (NV40TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 | + NV40TCL_RT_ENABLE_COLOR3)) + rt_enable |= NV40TCL_RT_ENABLE_MRT; + + if (fb->zsbuf) { + if (colour_format) { + assert(w == fb->zsbuf->width); + assert(h == fb->zsbuf->height); + } else { + w = fb->zsbuf->width; + h = fb->zsbuf->height; + } + + zeta_format = fb->zsbuf->format; + zeta = fb->zsbuf; + } + + rt_format = NV40TCL_RT_FORMAT_TYPE_LINEAR; + + switch (colour_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case 0: + rt_format |= NV40TCL_RT_FORMAT_COLOR_A8R8G8B8; + break; + case PIPE_FORMAT_R5G6B5_UNORM: + rt_format |= NV40TCL_RT_FORMAT_COLOR_R5G6B5; + break; + default: + assert(0); + } + + switch (zeta_format) { + case PIPE_FORMAT_Z16_UNORM: + rt_format |= NV40TCL_RT_FORMAT_ZETA_Z16; + break; + case PIPE_FORMAT_Z24S8_UNORM: + case 0: + rt_format |= NV40TCL_RT_FORMAT_ZETA_Z24S8; + break; + default: + assert(0); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) { + so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR0, 1); + so_reloc (so, rt[0]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->screen->curie, NV40TCL_COLOR0_PITCH, 2); + so_data (so, rt[0]->pitch * rt[0]->cpp); + so_reloc (so, rt[0]->buffer, rt[0]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) { + so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR1, 1); + so_reloc (so, rt[1]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->screen->curie, NV40TCL_COLOR1_OFFSET, 2); + so_reloc (so, rt[1]->buffer, rt[1]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_data (so, rt[1]->pitch * rt[1]->cpp); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { + so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR2, 1); + so_reloc (so, rt[2]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->screen->curie, NV40TCL_COLOR2_OFFSET, 1); + so_reloc (so, rt[2]->buffer, rt[2]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, nv40->screen->curie, NV40TCL_COLOR2_PITCH, 1); + so_data (so, rt[2]->pitch * rt[2]->cpp); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { + so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR3, 1); + so_reloc (so, rt[3]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->screen->curie, NV40TCL_COLOR3_OFFSET, 1); + so_reloc (so, rt[3]->buffer, rt[3]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, nv40->screen->curie, NV40TCL_COLOR3_PITCH, 1); + so_data (so, rt[3]->pitch * rt[3]->cpp); + } + + if (zeta_format) { + so_method(so, nv40->screen->curie, NV40TCL_DMA_ZETA, 1); + so_reloc (so, zeta->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->screen->curie, NV40TCL_ZETA_OFFSET, 1); + so_reloc (so, zeta->buffer, zeta->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, nv40->screen->curie, NV40TCL_ZETA_PITCH, 1); + so_data (so, zeta->pitch * zeta->cpp); + } + + so_method(so, nv40->screen->curie, NV40TCL_RT_ENABLE, 1); + so_data (so, rt_enable); + so_method(so, nv40->screen->curie, NV40TCL_RT_HORIZ, 3); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_data (so, rt_format); + so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_HORIZ, 2); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2); + so_data (so, ((w - 1) << 16) | 0); + so_data (so, ((h - 1) << 16) | 0); + + so_ref(so, &nv40->state.hw[NV40_STATE_FB]); + return TRUE; +} + +struct nv40_state_entry nv40_state_framebuffer = { + .validate = nv40_state_framebuffer_validate, + .dirty = { + .pipe = NV40_NEW_FB, + .hw = NV40_STATE_FB + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_rasterizer.c b/src/gallium/drivers/nv40/nv40_state_rasterizer.c new file mode 100644 index 0000000000..9ecda5990f --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_rasterizer.c @@ -0,0 +1,17 @@ +#include "nv40_context.h" + +static boolean +nv40_state_rasterizer_validate(struct nv40_context *nv40) +{ + so_ref(nv40->rasterizer->so, + &nv40->state.hw[NV40_STATE_RAST]); + return TRUE; +} + +struct nv40_state_entry nv40_state_rasterizer = { + .validate = nv40_state_rasterizer_validate, + .dirty = { + .pipe = NV40_NEW_RAST, + .hw = NV40_STATE_RAST + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c new file mode 100644 index 0000000000..9e9eadc511 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_scissor.c @@ -0,0 +1,34 @@ +#include "nv40_context.h" + +static boolean +nv40_state_scissor_validate(struct nv40_context *nv40) +{ + struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; + struct pipe_scissor_state *s = &nv40->scissor; + struct nouveau_stateobj *so; + + if (nv40->state.hw[NV40_STATE_SCISSOR] && + (rast->scissor == 0 && nv40->state.scissor_enabled == 0)) + return FALSE; + + so = so_new(3, 0); + so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2); + if (rast->scissor) { + so_data (so, ((s->maxx - s->minx) << 16) | s->minx); + so_data (so, ((s->maxy - s->miny) << 16) | s->miny); + } else { + so_data (so, 4096 << 16); + so_data (so, 4096 << 16); + } + + so_ref(so, &nv40->state.hw[NV40_STATE_SCISSOR]); + return TRUE; +} + +struct nv40_state_entry nv40_state_scissor = { + .validate = nv40_state_scissor_validate, + .dirty = { + .pipe = NV40_NEW_SCISSOR | NV40_NEW_RAST, + .hw = NV40_STATE_SCISSOR + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c new file mode 100644 index 0000000000..b51024ad9b --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_stipple.c @@ -0,0 +1,39 @@ +#include "nv40_context.h" + +static boolean +nv40_state_stipple_validate(struct nv40_context *nv40) +{ + struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; + struct nouveau_grobj *curie = nv40->screen->curie; + struct nouveau_stateobj *so; + + if (nv40->state.hw[NV40_STATE_STIPPLE] && + (rast->poly_stipple_enable == 0 && nv40->state.stipple_enabled == 0)) + return FALSE; + + if (rast->poly_stipple_enable) { + unsigned i; + + so = so_new(35, 0); + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 1); + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); + for (i = 0; i < 32; i++) + so_data(so, nv40->stipple[i]); + } else { + so = so_new(2, 0); + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 0); + } + + so_ref(so, &nv40->state.hw[NV40_STATE_STIPPLE]); + return TRUE; +} + +struct nv40_state_entry nv40_state_stipple = { + .validate = nv40_state_stipple_validate, + .dirty = { + .pipe = NV40_NEW_STIPPLE | NV40_NEW_RAST, + .hw = NV40_STATE_STIPPLE, + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c new file mode 100644 index 0000000000..3a32533907 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_viewport.c @@ -0,0 +1,29 @@ +#include "nv40_context.h" + +static boolean +nv40_state_viewport_validate(struct nv40_context *nv40) +{ + struct nouveau_stateobj *so = so_new(9, 0); + struct pipe_viewport_state *vpt = &nv40->viewport; + + so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8); + so_data (so, fui(vpt->translate[0])); + so_data (so, fui(vpt->translate[1])); + so_data (so, fui(vpt->translate[2])); + so_data (so, fui(vpt->translate[3])); + so_data (so, fui(vpt->scale[0])); + so_data (so, fui(vpt->scale[1])); + so_data (so, fui(vpt->scale[2])); + so_data (so, fui(vpt->scale[3])); + + so_ref(so, &nv40->state.hw[NV40_STATE_VIEWPORT]); + return TRUE; +} + +struct nv40_state_entry nv40_state_viewport = { + .validate = nv40_state_viewport_validate, + .dirty = { + .pipe = NV40_NEW_VIEWPORT, + .hw = NV40_STATE_VIEWPORT + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_zsa.c b/src/gallium/drivers/nv40/nv40_state_zsa.c new file mode 100644 index 0000000000..fb760677c8 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_zsa.c @@ -0,0 +1,17 @@ +#include "nv40_context.h" + +static boolean +nv40_state_zsa_validate(struct nv40_context *nv40) +{ + so_ref(nv40->zsa->so, + &nv40->state.hw[NV40_STATE_ZSA]); + return TRUE; +} + +struct nv40_state_entry nv40_state_zsa = { + .validate = nv40_state_zsa_validate, + .dirty = { + .pipe = NV40_NEW_ZSA, + .hw = NV40_STATE_ZSA + } +}; diff --git a/src/gallium/drivers/nv40/nv40_surface.c b/src/gallium/drivers/nv40/nv40_surface.c index 9726ab4e4d..e8a6011696 100644 --- a/src/gallium/drivers/nv40/nv40_surface.c +++ b/src/gallium/drivers/nv40/nv40_surface.c @@ -33,76 +33,6 @@ #include "pipe/p_inlines.h" #include "util/p_tile.h" -static boolean -nv40_surface_format_supported(struct pipe_context *pipe, - enum pipe_format format, uint type) -{ - switch (type) { - case PIPE_SURFACE: - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z16_UNORM: - return TRUE; - default: - break; - } - break; - case PIPE_TEXTURE: - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_A1R5G5B5_UNORM: - case PIPE_FORMAT_A4R4G4B4_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_U_L8: - case PIPE_FORMAT_U_A8: - case PIPE_FORMAT_U_I8: - case PIPE_FORMAT_U_A8_L8: - case PIPE_FORMAT_Z16_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - return TRUE; - default: - break; - } - break; - default: - assert(0); - }; - - return FALSE; -} - -static struct pipe_surface * -nv40_get_tex_surface(struct pipe_context *pipe, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - struct pipe_winsys *ws = pipe->winsys; - struct nv40_miptree *nv40mt = (struct nv40_miptree *)pt; - struct pipe_surface *ps; - - ps = ws->surface_alloc(ws); - if (!ps) - return NULL; - pipe_buffer_reference(ws, &ps->buffer, nv40mt->buffer); - ps->format = pt->format; - ps->cpp = pt->cpp; - ps->width = pt->width[level]; - ps->height = pt->height[level]; - ps->pitch = nv40mt->level[level].pitch / ps->cpp; - - if (pt->target == PIPE_TEXTURE_CUBE) { - ps->offset = nv40mt->level[level].image_offset[face]; - } else - if (pt->target == PIPE_TEXTURE_3D) { - ps->offset = nv40mt->level[level].image_offset[zslice]; - } else { - ps->offset = nv40mt->level[level].image_offset[0]; - } - - return ps; -} - static void nv40_surface_copy(struct pipe_context *pipe, unsigned do_flip, struct pipe_surface *dest, unsigned destx, unsigned desty, @@ -130,8 +60,6 @@ nv40_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, void nv40_init_surface_functions(struct nv40_context *nv40) { - nv40->pipe.is_format_supported = nv40_surface_format_supported; - nv40->pipe.get_tex_surface = nv40_get_tex_surface; nv40->pipe.surface_copy = nv40_surface_copy; nv40->pipe.surface_fill = nv40_surface_fill; } diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index fa827ef0c5..bedc8c6d4e 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -30,9 +30,50 @@ nv40_vbo_type(uint format) case PIPE_FORMAT_TYPE_UNORM: return NV40TCL_VTXFMT_TYPE_UBYTE; default: - NOUVEAU_ERR("Unknown format 0x%08x\n", format); + { + char fs[128]; + pf_sprint_name(fs, format); + NOUVEAU_ERR("Unknown format %s\n", fs); return NV40TCL_VTXFMT_TYPE_FLOAT; } + } +} + +static boolean +nv40_vbo_set_idxbuf(struct nv40_context *nv40, struct pipe_buffer *ib, + unsigned ib_size) +{ + unsigned type; + + if (!ib) { + nv40->idxbuf = NULL; + nv40->idxbuf_format = 0xdeadbeef; + return FALSE; + } + + /* No support for 8bit indices, no support at all on 0x4497 chips */ + if (nv40->screen->curie->grclass == NV44TCL || ib_size == 1) + return FALSE; + + switch (ib_size) { + case 2: + type = NV40TCL_IDXBUF_FORMAT_TYPE_U16; + break; + case 4: + type = NV40TCL_IDXBUF_FORMAT_TYPE_U32; + break; + default: + return FALSE; + } + + if (ib != nv40->idxbuf || + type != nv40->idxbuf_format) { + nv40->dirty |= NV40_NEW_ARRAYS; + nv40->idxbuf = ib; + nv40->idxbuf_format = type; + } + + return TRUE; } static boolean @@ -97,105 +138,15 @@ nv40_vbo_static_attrib(struct nv40_context *nv40, int attrib, return TRUE; } -static void -nv40_vbo_arrays_update(struct nv40_context *nv40, struct pipe_buffer *ib, - unsigned ib_format) -{ - struct nv40_vertex_program *vp = nv40->vertprog.active; - struct nouveau_stateobj *vtxbuf, *vtxfmt; - unsigned inputs, hw, num_hw; - unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; - - inputs = vp->ir; - for (hw = 0; hw < 16 && inputs; hw++) { - if (inputs & (1 << hw)) { - num_hw = hw; - inputs &= ~(1 << hw); - } - } - num_hw++; - - vtxbuf = so_new(20, 18); - so_method(vtxbuf, nv40->hw->curie, NV40TCL_VTXBUF_ADDRESS(0), num_hw); - vtxfmt = so_new(17, 0); - so_method(vtxfmt, nv40->hw->curie, NV40TCL_VTXFMT(0), num_hw); - - inputs = vp->ir; - for (hw = 0; hw < num_hw; hw++) { - struct pipe_vertex_element *ve; - struct pipe_vertex_buffer *vb; - - if (!(inputs & (1 << hw))) { - so_data(vtxbuf, 0); - so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); - continue; - } - - ve = &nv40->vtxelt[hw]; - vb = &nv40->vtxbuf[ve->vertex_buffer_index]; - - if (!vb->pitch && nv40_vbo_static_attrib(nv40, hw, ve, vb)) { - so_data(vtxbuf, 0); - so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); - continue; - } - - so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset, - vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, - 0, NV40TCL_VTXBUF_ADDRESS_DMA1); - so_data (vtxfmt, ((vb->pitch << NV40TCL_VTXFMT_STRIDE_SHIFT) | - (nv40_vbo_ncomp(ve->src_format) << - NV40TCL_VTXFMT_SIZE_SHIFT) | - nv40_vbo_type(ve->src_format))); - } - - if (ib) { - so_method(vtxbuf, nv40->hw->curie, NV40TCL_IDXBUF_ADDRESS, 2); - so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); - so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR, - 0, NV40TCL_IDXBUF_FORMAT_DMA1); - } - - so_emit(nv40->nvws, vtxfmt); - so_emit(nv40->nvws, vtxbuf); - so_ref (vtxbuf, &nv40->so_vtxbuf); - so_ref (NULL, &vtxbuf); - so_ref (NULL, &vtxfmt); -} - -static boolean -nv40_vbo_validate_state(struct nv40_context *nv40, - struct pipe_buffer *ib, unsigned ib_format) -{ - unsigned vdn = nv40->dirty & NV40_NEW_ARRAYS; - - nv40_emit_hw_state(nv40); - if (vdn || ib) { - nv40_vbo_arrays_update(nv40, ib, ib_format); - nv40->dirty &= ~NV40_NEW_ARRAYS; - } - - so_emit_reloc_markers(nv40->nvws, nv40->so_vtxbuf); - - BEGIN_RING(curie, 0x1710, 1); - OUT_RING (0); /* vtx cache flush */ - - return TRUE; -} - boolean nv40_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv40_context *nv40 = nv40_context(pipe); unsigned nr; - boolean ret; - ret = nv40_vbo_validate_state(nv40, NULL, 0); - if (!ret) { - NOUVEAU_ERR("state validate failed\n"); - return FALSE; - } + nv40_vbo_set_idxbuf(nv40, NULL, 0); + nv40_emit_hw_state(nv40); BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); OUT_RING (nvgl_primitive(mode)); @@ -302,14 +253,9 @@ nv40_draw_elements_inline(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); struct pipe_winsys *ws = pipe->winsys; - boolean ret; void *map; - ret = nv40_vbo_validate_state(nv40, NULL, 0); - if (!ret) { - NOUVEAU_ERR("state validate failed\n"); - return FALSE; - } + nv40_emit_hw_state(nv40); map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); if (!ib) { @@ -345,30 +291,12 @@ nv40_draw_elements_inline(struct pipe_context *pipe, static boolean nv40_draw_elements_vbo(struct pipe_context *pipe, - struct pipe_buffer *ib, unsigned ib_size, unsigned mode, unsigned start, unsigned count) { struct nv40_context *nv40 = nv40_context(pipe); - unsigned nr, type; - boolean ret; - - switch (ib_size) { - case 2: - type = NV40TCL_IDXBUF_FORMAT_TYPE_U16; - break; - case 4: - type = NV40TCL_IDXBUF_FORMAT_TYPE_U32; - break; - default: - NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); - return FALSE; - } + unsigned nr; - ret = nv40_vbo_validate_state(nv40, ib, type); - if (!ret) { - NOUVEAU_ERR("failed state validation\n"); - return FALSE; - } + nv40_emit_hw_state(nv40); BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); OUT_RING (nvgl_primitive(mode)); @@ -406,19 +334,92 @@ nv40_draw_elements(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); - /* 0x4497 doesn't support real index buffers, and there doesn't appear - * to be support on any chipset for 8-bit indices. - */ - if (nv40->hw->curie->grclass == NV44TCL || indexSize == 1) { + if (nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize)) { + nv40_draw_elements_vbo(pipe, mode, start, count); + } else { nv40_draw_elements_inline(pipe, indexBuffer, indexSize, mode, start, count); - } else { - nv40_draw_elements_vbo(pipe, indexBuffer, indexSize, - mode, start, count); } pipe->flush(pipe, 0); return TRUE; } +static boolean +nv40_vbo_validate(struct nv40_context *nv40) +{ + struct nv40_vertex_program *vp = nv40->vertprog; + struct nouveau_stateobj *vtxbuf, *vtxfmt; + struct pipe_buffer *ib = nv40->idxbuf; + unsigned ib_format = nv40->idxbuf_format; + unsigned inputs, hw, num_hw; + unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + + inputs = vp->ir; + for (hw = 0; hw < 16 && inputs; hw++) { + if (inputs & (1 << hw)) { + num_hw = hw; + inputs &= ~(1 << hw); + } + } + num_hw++; + + vtxbuf = so_new(20, 18); + so_method(vtxbuf, nv40->screen->curie, NV40TCL_VTXBUF_ADDRESS(0), num_hw); + vtxfmt = so_new(17, 0); + so_method(vtxfmt, nv40->screen->curie, NV40TCL_VTXFMT(0), num_hw); + + inputs = vp->ir; + for (hw = 0; hw < num_hw; hw++) { + struct pipe_vertex_element *ve; + struct pipe_vertex_buffer *vb; + + if (!(inputs & (1 << hw))) { + so_data(vtxbuf, 0); + so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); + continue; + } + + ve = &nv40->vtxelt[hw]; + vb = &nv40->vtxbuf[ve->vertex_buffer_index]; + + if (!vb->pitch && nv40_vbo_static_attrib(nv40, hw, ve, vb)) { + so_data(vtxbuf, 0); + so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); + continue; + } + + so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset, + vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + 0, NV40TCL_VTXBUF_ADDRESS_DMA1); + so_data (vtxfmt, ((vb->pitch << NV40TCL_VTXFMT_STRIDE_SHIFT) | + (nv40_vbo_ncomp(ve->src_format) << + NV40TCL_VTXFMT_SIZE_SHIFT) | + nv40_vbo_type(ve->src_format))); + } + + if (ib) { + so_method(vtxbuf, nv40->screen->curie, NV40TCL_IDXBUF_ADDRESS, 2); + so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); + so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR, + 0, NV40TCL_IDXBUF_FORMAT_DMA1); + } + + so_method(vtxbuf, nv40->screen->curie, 0x1710, 1); + so_data (vtxbuf, 0); + + so_ref(vtxbuf, &nv40->state.hw[NV40_STATE_VTXBUF]); + nv40->state.dirty |= (1ULL << NV40_STATE_VTXBUF); + so_ref(vtxfmt, &nv40->state.hw[NV40_STATE_VTXFMT]); + nv40->state.dirty |= (1ULL << NV40_STATE_VTXFMT); + return FALSE; +} + +struct nv40_state_entry nv40_state_vbo = { + .validate = nv40_vbo_validate, + .dirty = { + .pipe = NV40_NEW_ARRAYS, + .hw = 0, + } +}; diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 9f4738b830..5b7a343e55 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -558,7 +558,7 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) return TRUE; } -void +static void nv40_vertprog_translate(struct nv40_context *nv40, struct nv40_vertex_program *vp) { @@ -631,24 +631,32 @@ out_err: free(vpc); } -void -nv40_vertprog_bind(struct nv40_context *nv40, struct nv40_vertex_program *vp) +static boolean +nv40_vertprog_validate(struct nv40_context *nv40) { + struct nv40_vertex_program *vp = nv40->vertprog; + struct pipe_buffer *constbuf = + nv40->constbuf[PIPE_SHADER_VERTEX]; struct nouveau_winsys *nvws = nv40->nvws; struct pipe_winsys *ws = nv40->pipe.winsys; boolean upload_code = FALSE, upload_data = FALSE; int i; /* Translate TGSI shader into hw bytecode */ + if (vp->translated) + goto check_gpu_resources; + + nv40_vertprog_translate(nv40, vp); if (!vp->translated) { - nv40_vertprog_translate(nv40, vp); - if (!vp->translated) - assert(0); + nv40->fallback |= NV40_FALLBACK_TNL; + return FALSE; } +check_gpu_resources: /* Allocate hw vtxprog exec slots */ if (!vp->exec) { - struct nouveau_resource *heap = nv40->hw->vp_exec_heap; + struct nouveau_resource *heap = nv40->screen->vp_exec_heap; + struct nouveau_stateobj *so; uint vplen = vp->nr_insns; if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) { @@ -663,12 +671,20 @@ nv40_vertprog_bind(struct nv40_context *nv40, struct nv40_vertex_program *vp) assert(0); } + so = so_new(5, 0); + so_method(so, nv40->screen->curie, NV40TCL_VP_START_FROM_ID, 1); + so_data (so, vp->exec->start); + so_method(so, nv40->screen->curie, NV40TCL_VP_ATTRIB_EN, 2); + so_data (so, vp->ir); + so_data (so, vp->or); + so_ref(so, &vp->so); + upload_code = TRUE; } /* Allocate hw vtxprog const slots */ if (vp->nr_consts && !vp->data) { - struct nouveau_resource *heap = nv40->hw->vp_data_heap; + struct nouveau_resource *heap = nv40->screen->vp_data_heap; if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) { while (heap->next && heap->size < vp->nr_consts) { @@ -725,8 +741,8 @@ nv40_vertprog_bind(struct nv40_context *nv40, struct nv40_vertex_program *vp) if (vp->nr_consts) { float *map = NULL; - if (nv40->vertprog.constant_buf) { - map = ws->buffer_map(ws, nv40->vertprog.constant_buf, + if (constbuf) { + map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); } @@ -747,9 +763,8 @@ nv40_vertprog_bind(struct nv40_context *nv40, struct nv40_vertex_program *vp) OUT_RINGp ((uint32_t *)vpd->value, 4); } - if (map) { - ws->buffer_unmap(ws, nv40->vertprog.constant_buf); - } + if (constbuf) + ws->buffer_unmap(ws, constbuf); } /* Upload vtxprog */ @@ -770,13 +785,12 @@ nv40_vertprog_bind(struct nv40_context *nv40, struct nv40_vertex_program *vp) } } - BEGIN_RING(curie, NV40TCL_VP_START_FROM_ID, 1); - OUT_RING (vp->exec->start); - BEGIN_RING(curie, NV40TCL_VP_ATTRIB_EN, 2); - OUT_RING (vp->ir); - OUT_RING (vp->or); + if (vp->so != nv40->state.hw[NV40_STATE_VERTPROG]) { + so_ref(vp->so, &nv40->state.hw[NV40_STATE_VERTPROG]); + return TRUE; + } - nv40->vertprog.active = vp; + return FALSE; } void @@ -788,3 +802,11 @@ nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp) free(vp->insns); } +struct nv40_state_entry nv40_state_vertprog = { + .validate = nv40_vertprog_validate, + .dirty = { + .pipe = NV40_NEW_VERTPROG, + .hw = NV40_STATE_VERTPROG, + } +}; + diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile index 68eb49ff2a..1c0b82887a 100644 --- a/src/gallium/drivers/nv50/Makefile +++ b/src/gallium/drivers/nv50/Makefile @@ -9,6 +9,7 @@ DRIVER_SOURCES = \ nv50_draw.c \ nv50_miptree.c \ nv50_query.c \ + nv50_screen.c \ nv50_state.c \ nv50_surface.c \ nv50_vbo.c diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 3c5a54bfd3..c937b8de6d 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -4,85 +4,7 @@ #include "pipe/p_util.h" #include "nv50_context.h" - -static boolean -nv50_is_format_supported(struct pipe_context *pipe, enum pipe_format format, - uint type) -{ - return FALSE; -} - -static const char * -nv50_get_name(struct pipe_context *pipe) -{ - struct nv50_context *nv50 = (struct nv50_context *)pipe; - static char buffer[128]; - - snprintf(buffer, sizeof(buffer), "NV%02X", nv50->chipset); - return buffer; -} - -static const char * -nv50_get_vendor(struct pipe_context *pipe) -{ - return "nouveau"; -} - -static int -nv50_get_param(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 32; - case PIPE_CAP_NPOT_TEXTURES: - return 0; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_S3TC: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; - case PIPE_CAP_POINT_SPRITE: - return 0; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 8; - case PIPE_CAP_OCCLUSION_QUERY: - return 0; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 0; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 13; - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 10; - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 13; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0; - } -} - -static float -nv50_get_paramf(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 10.0; - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 64.0; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 4.0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0.0; - } -} +#include "nv50_screen.h" static void nv50_flush(struct pipe_context *pipe, unsigned flags) @@ -134,9 +56,11 @@ nv50_init_hwctx(struct nv50_context *nv50, int tesla_class) #define GRCLASS5097_CHIPSETS 0x00000000 #define GRCLASS8297_CHIPSETS 0x00000010 struct pipe_context * -nv50_create(struct pipe_winsys *pipe_winsys, struct nouveau_winsys *nvws, - unsigned chipset) +nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) { + struct pipe_winsys *pipe_winsys = pscreen->winsys; + struct nouveau_winsys *nvws = nv50_screen(pscreen)->nvws; + unsigned chipset = nv50_screen(pscreen)->chipset; struct nv50_context *nv50; int tesla_class, ret; @@ -173,13 +97,9 @@ nv50_create(struct pipe_winsys *pipe_winsys, struct nouveau_winsys *nvws, } nv50->pipe.winsys = pipe_winsys; + nv50->pipe.screen = pscreen; nv50->pipe.destroy = nv50_destroy; - nv50->pipe.is_format_supported = nv50_is_format_supported; - nv50->pipe.get_name = nv50_get_name; - nv50->pipe.get_vendor = nv50_get_vendor; - nv50->pipe.get_param = nv50_get_param; - nv50->pipe.get_paramf = nv50_get_paramf; nv50->pipe.draw_arrays = nv50_draw_arrays; nv50->pipe.draw_elements = nv50_draw_elements; diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index b99254f619..a529bf3c3e 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -38,6 +38,8 @@ extern void nv50_init_surface_functions(struct nv50_context *nv50); extern void nv50_init_state_functions(struct nv50_context *nv50); extern void nv50_init_query_functions(struct nv50_context *nv50); +extern void nv50_screen_init_miptree_functions(struct pipe_screen *pscreen); + /* nv50_draw.c */ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50); diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 0c034ed438..720d33fda9 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -1,25 +1,46 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_util.h" +#include "pipe/p_screen.h" #include "nv50_context.h" static struct pipe_texture * -nv50_miptree_create(struct pipe_context *pipe, const struct pipe_texture *pt) +nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) { NOUVEAU_ERR("unimplemented\n"); return NULL; } static void -nv50_miptree_release(struct pipe_context *pipe, struct pipe_texture **pt) +nv50_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **pt) { NOUVEAU_ERR("unimplemented\n"); } +static struct pipe_surface * +nv50_miptree_surface(struct pipe_screen *pscreen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + NOUVEAU_ERR("unimplemented\n"); + return NULL; +} + +void +nv50_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nv50_miptree_create; + pscreen->texture_release = nv50_miptree_release; + pscreen->get_tex_surface = nv50_miptree_surface; +} + +static void +nv50_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt) +{ +} + void nv50_init_miptree_functions(struct nv50_context *nv50) { - nv50->pipe.texture_create = nv50_miptree_create; - nv50->pipe.texture_release = nv50_miptree_release; + nv50->pipe.texture_update = nv50_miptree_update; } diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c new file mode 100644 index 0000000000..f091779e3b --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -0,0 +1,119 @@ +#include "pipe/p_screen.h" +#include "pipe/p_util.h" + +#include "nv50_context.h" +#include "nv50_screen.h" + +static boolean +nv50_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, uint type) +{ + return FALSE; +} + +static const char * +nv50_screen_get_name(struct pipe_screen *pscreen) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", screen->chipset); + return buffer; +} + +static const char * +nv50_screen_get_vendor(struct pipe_screen *pscreen) +{ + return "nouveau"; +} + +static int +nv50_screen_get_param(struct pipe_screen *pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 32; + case PIPE_CAP_NPOT_TEXTURES: + return 0; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 8; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 0; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv50_screen_get_paramf(struct pipe_screen *pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 4.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static void +nv50_screen_destroy(struct pipe_screen *pscreen) +{ + FREE(pscreen); +} + +struct pipe_screen * +nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, + unsigned chipset) +{ + struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen); + + if (!screen) + return NULL; + + screen->chipset = chipset; + screen->nvws = nvws; + + screen->pipe.winsys = ws; + + screen->pipe.destroy = nv50_screen_destroy; + + screen->pipe.get_name = nv50_screen_get_name; + screen->pipe.get_vendor = nv50_screen_get_vendor; + screen->pipe.get_param = nv50_screen_get_param; + screen->pipe.get_paramf = nv50_screen_get_paramf; + + screen->pipe.is_format_supported = nv50_screen_is_format_supported; + + nv50_screen_init_miptree_functions(&screen->pipe); + + return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h new file mode 100644 index 0000000000..d664816a03 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -0,0 +1,19 @@ +#ifndef __NV50_SCREEN_H__ +#define __NV50_SCREEN_H__ + +#include "pipe/p_screen.h" + +struct nv50_screen { + struct pipe_screen pipe; + + struct nouveau_winsys *nvws; + unsigned chipset; +}; + +static INLINE struct nv50_screen * +nv50_screen(struct pipe_screen *screen) +{ + return (struct nv50_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index ca92ff02b8..39cf675a57 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -33,15 +33,6 @@ #include "pipe/p_inlines.h" #include "util/p_tile.h" -static struct pipe_surface * -nv50_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - NOUVEAU_ERR("unimplemented\n"); - return NULL; -} - static void nv50_surface_copy(struct pipe_context *pipe, unsigned flip, struct pipe_surface *dest, unsigned destx, unsigned desty, @@ -69,7 +60,6 @@ nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, void nv50_init_surface_functions(struct nv50_context *nv50) { - nv50->pipe.get_tex_surface = nv50_get_tex_surface; nv50->pipe.surface_copy = nv50_surface_copy; nv50->pipe.surface_fill = nv50_surface_fill; } diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 5479daf8ea..f32db35d58 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -1,10 +1,9 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = softpipe -DRIVER_SOURCES = \ +C_SOURCES = \ sp_fs_exec.c \ sp_fs_sse.c \ sp_fs_llvm.c \ @@ -28,6 +27,7 @@ DRIVER_SOURCES = \ sp_quad_output.c \ sp_quad_stencil.c \ sp_quad_stipple.c \ + sp_screen.c \ sp_state_blend.c \ sp_state_clip.c \ sp_state_derived.c \ @@ -41,12 +41,6 @@ DRIVER_SOURCES = \ sp_tile_cache.c \ sp_surface.c -C_SOURCES = \ - $(COMMON_SOURCES) \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index d581ee8d3c..88c21ee3b0 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -5,6 +5,9 @@ env = env.Clone() softpipe = env.ConvenienceLibrary( target = 'softpipe', source = [ + 'sp_fs_exec.c', + 'sp_fs_sse.c', + 'sp_fs_llvm.c', 'sp_clear.c', 'sp_context.c', 'sp_draw_arrays.c', @@ -25,6 +28,7 @@ softpipe = env.ConvenienceLibrary( 'sp_quad_stencil.c', 'sp_quad_stipple.c', 'sp_query.c', + 'sp_screen.c', 'sp_state_blend.c', 'sp_state_clip.c', 'sp_state_derived.c', diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 5e98f190bb..fa16ed94e8 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -48,29 +48,6 @@ /** - * Query format support for creating a texture, drawing surface, etc. - * \param format the format to test - * \param type one of PIPE_TEXTURE, PIPE_SURFACE - */ -static boolean -softpipe_is_format_supported( struct pipe_context *pipe, - enum pipe_format format, uint type ) -{ - switch (type) { - case PIPE_TEXTURE: - /* softpipe supports all texture formats */ - return TRUE; - case PIPE_SURFACE: - /* softpipe supports all (off-screen) surface formats */ - return TRUE; - default: - assert(0); - return FALSE; - } -} - - -/** * Map any drawing surfaces which aren't already mapped */ void @@ -143,76 +120,10 @@ static void softpipe_destroy( struct pipe_context *pipe ) } -static const char *softpipe_get_name( struct pipe_context *pipe ) -{ - return "softpipe"; -} - -static const char *softpipe_get_vendor( struct pipe_context *pipe ) -{ - return "Tungsten Graphics, Inc."; -} - -static int softpipe_get_param(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 8; - case PIPE_CAP_NPOT_TEXTURES: - return 1; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 1; - case PIPE_CAP_S3TC: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; - case PIPE_CAP_POINT_SPRITE: - return 1; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; - case PIPE_CAP_OCCLUSION_QUERY: - return 1; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 12; /* max 2Kx2K */ - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 8; /* max 128x128x128 */ - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 12; /* max 2Kx2K */ - default: - return 0; - } -} - -static float softpipe_get_paramf(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 255.0; /* arbitrary */ - - case PIPE_CAP_MAX_POINT_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 255.0; /* arbitrary */ - - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 0.0; - - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0; /* arbitrary */ - - default: - return 0; - } -} - -struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys, - struct softpipe_winsys *softpipe_winsys ) +struct pipe_context * +softpipe_create( struct pipe_screen *screen, + struct pipe_winsys *pipe_winsys, + struct softpipe_winsys *softpipe_winsys ) { struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context); uint i; @@ -226,15 +137,9 @@ struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys, softpipe->dump_fs = GETENV( "GALLIUM_DUMP_FS" ) != NULL; softpipe->pipe.winsys = pipe_winsys; + softpipe->pipe.screen = screen; softpipe->pipe.destroy = softpipe_destroy; - /* queries */ - softpipe->pipe.is_format_supported = softpipe_is_format_supported; - softpipe->pipe.get_name = softpipe_get_name; - softpipe->pipe.get_vendor = softpipe_get_vendor; - softpipe->pipe.get_param = softpipe_get_param; - softpipe->pipe.get_paramf = softpipe_get_paramf; - /* state setters */ softpipe->pipe.create_blend_state = softpipe_create_blend_state; softpipe->pipe.bind_blend_state = softpipe_bind_blend_state; @@ -279,11 +184,7 @@ struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys, softpipe->pipe.flush = softpipe_flush; softpipe_init_query_funcs( softpipe ); - - /* textures */ - softpipe->pipe.texture_create = softpipe_texture_create; - softpipe->pipe.texture_release = softpipe_texture_release; - softpipe->pipe.get_tex_surface = softpipe_get_tex_surface; + softpipe_init_texture_funcs( softpipe ); /* * Alloc caches for accessing drawing surfaces and textures. @@ -327,6 +228,15 @@ struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys, draw_set_rasterize_stage(softpipe->draw, softpipe->setup); } + /* plug in AA line/point stages */ + draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); + draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe); + +#if USE_DRAW_STAGE_PSTIPPLE + /* Do polygon stipple w/ texture map + frag prog? */ + draw_install_pstipple_stage(softpipe->draw, &softpipe->pipe); +#endif + sp_init_surface_functions(softpipe); return &softpipe->pipe; diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index b70d4fea85..feeafc7084 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -39,6 +39,13 @@ #include "sp_quad.h" +/** + * This is a temporary variable for testing draw-stage polygon stipple. + * If zero, do stipple in sp_quad_stipple.c + */ +#define USE_DRAW_STAGE_PSTIPPLE 1 + + struct softpipe_winsys; struct softpipe_vbuf_render; struct draw_context; @@ -68,7 +75,7 @@ struct softpipe_context { struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; - struct softpipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 9ad30a7681..d5bd7a702f 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -44,9 +44,44 @@ struct sp_exec_fragment_shader { +/** + * Compute quad X,Y,Z,W for the four fragments in a quad. + * + * This should really be part of the compiled shader. + */ +void +sp_setup_pos_vector(const struct tgsi_interp_coef *coef, + float x, float y, + struct tgsi_exec_vector *quadpos) +{ + uint chan; + /* do X */ + quadpos->xyzw[0].f[0] = x; + quadpos->xyzw[0].f[1] = x + 1; + quadpos->xyzw[0].f[2] = x; + quadpos->xyzw[0].f[3] = x + 1; + + /* do Y */ + quadpos->xyzw[1].f[0] = y; + quadpos->xyzw[1].f[1] = y; + quadpos->xyzw[1].f[2] = y + 1; + quadpos->xyzw[1].f[3] = y + 1; + + /* do Z and W for all fragments in the quad */ + for (chan = 2; chan < 4; chan++) { + const float dadx = coef->dadx[chan]; + const float dady = coef->dady[chan]; + const float a0 = coef->a0[chan] + dadx * x + dady * y; + quadpos->xyzw[chan].f[0] = a0; + quadpos->xyzw[chan].f[1] = a0 + dadx; + quadpos->xyzw[chan].f[2] = a0 + dady; + quadpos->xyzw[chan].f[3] = a0 + dadx + dady; + } +} + static void -exec_prepare( struct sp_fragment_shader *base, +exec_prepare( const struct sp_fragment_shader *base, struct tgsi_exec_machine *machine, struct tgsi_sampler *samplers ) { @@ -63,7 +98,7 @@ exec_prepare( struct sp_fragment_shader *base, * interface: */ static unsigned -exec_run( struct sp_fragment_shader *base, +exec_run( const struct sp_fragment_shader *base, struct tgsi_exec_machine *machine, struct quad_header *quad ) { diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c index 22da471453..07d058155f 100644 --- a/src/gallium/drivers/softpipe/sp_fs_llvm.c +++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c @@ -96,7 +96,7 @@ shade_quad_llvm(struct quad_stage *qs, if (qss->colorOutSlot >= 0) { unsigned i; /* XXX need to handle multiple color outputs someday */ - allvmrt(qss->stage.softpipe->fs->shader.output_semantic_name[qss->colorOutSlot] + allvmrt(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] == TGSI_SEMANTIC_COLOR); for (i = 0; i < QUAD_SIZE; ++i) { quad->outputs.color[0][i] = dests[i][qss->colorOutSlot][0]; @@ -146,7 +146,7 @@ shade_quad_llvm(struct quad_stage *qs, unsigned -run_llvm_fs( struct sp_fragment_shader *base, +run_llvm_fs( const struct sp_fragment_shader *base, struct foo *machine ) { } diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index d90066e025..8095d662ee 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -42,7 +42,7 @@ #if defined(__i386__) || defined(__386__) -#include "x86/rtasm/x86sse.h" +#include "rtasm/rtasm_x86sse.h" /* Surely this should be defined somewhere in a tgsi header: */ @@ -63,41 +63,6 @@ struct sp_sse_fragment_shader { }; -/** - * Compute quad X,Y,Z,W for the four fragments in a quad. - * - * This should really be part of the compiled shader. - */ -void -sp_setup_pos_vector(const struct tgsi_interp_coef *coef, - float x, float y, - struct tgsi_exec_vector *quadpos) -{ - uint chan; - /* do X */ - quadpos->xyzw[0].f[0] = x; - quadpos->xyzw[0].f[1] = x + 1; - quadpos->xyzw[0].f[2] = x; - quadpos->xyzw[0].f[3] = x + 1; - - /* do Y */ - quadpos->xyzw[1].f[0] = y; - quadpos->xyzw[1].f[1] = y; - quadpos->xyzw[1].f[2] = y + 1; - quadpos->xyzw[1].f[3] = y + 1; - - /* do Z and W for all fragments in the quad */ - for (chan = 2; chan < 4; chan++) { - const float dadx = coef->dadx[chan]; - const float dady = coef->dady[chan]; - const float a0 = coef->a0[chan] + dadx * x + dady * y; - quadpos->xyzw[chan].f[0] = a0; - quadpos->xyzw[chan].f[1] = a0 + dadx; - quadpos->xyzw[chan].f[2] = a0 + dady; - quadpos->xyzw[chan].f[3] = a0 + dadx + dady; - } -} - static void fs_sse_prepare( struct sp_fragment_shader *base, @@ -124,6 +89,9 @@ fs_sse_run( struct sp_fragment_shader *base, (float)quad->x0, (float)quad->y0, machine->Temps); + /* init kill mask */ + machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] = 0x0; + shader->func( machine->Inputs, machine->Outputs, machine->Consts, diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index d73521ccbe..2feee5c485 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -476,33 +476,33 @@ static void tri_persp_coeff( struct setup_stage *setup, * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. */ static void -setup_fragcoord_coeff(struct setup_stage *setup) +setup_fragcoord_coeff(struct setup_stage *setup, uint slot) { /*X*/ - setup->coef[0].a0[0] = 0; - setup->coef[0].dadx[0] = 1.0; - setup->coef[0].dady[0] = 0.0; + setup->coef[slot].a0[0] = 0; + setup->coef[slot].dadx[0] = 1.0; + setup->coef[slot].dady[0] = 0.0; /*Y*/ if (setup->softpipe->rasterizer->origin_lower_left) { /* y=0=bottom */ const int winHeight = setup->softpipe->framebuffer.cbufs[0]->height; - setup->coef[0].a0[1] = (float) (winHeight - 1); - setup->coef[0].dady[1] = -1.0; + setup->coef[slot].a0[1] = (float) (winHeight - 1); + setup->coef[slot].dady[1] = -1.0; } else { /* y=0=top */ - setup->coef[0].a0[1] = 0.0; - setup->coef[0].dady[1] = 1.0; + setup->coef[slot].a0[1] = 0.0; + setup->coef[slot].dady[1] = 1.0; } - setup->coef[0].dadx[1] = 0.0; + setup->coef[slot].dadx[1] = 0.0; /*Z*/ - setup->coef[0].a0[2] = setup->posCoef.a0[2]; - setup->coef[0].dadx[2] = setup->posCoef.dadx[2]; - setup->coef[0].dady[2] = setup->posCoef.dady[2]; + setup->coef[slot].a0[2] = setup->posCoef.a0[2]; + setup->coef[slot].dadx[2] = setup->posCoef.dadx[2]; + setup->coef[slot].dady[2] = setup->posCoef.dady[2]; /*W*/ - setup->coef[0].a0[3] = setup->posCoef.a0[3]; - setup->coef[0].dadx[3] = setup->posCoef.dadx[3]; - setup->coef[0].dady[3] = setup->posCoef.dady[3]; + setup->coef[slot].a0[3] = setup->posCoef.a0[3]; + setup->coef[slot].dadx[3] = setup->posCoef.dadx[3]; + setup->coef[slot].dady[3] = setup->posCoef.dady[3]; } @@ -514,7 +514,7 @@ setup_fragcoord_coeff(struct setup_stage *setup) static void setup_tri_coefficients( struct setup_stage *setup ) { struct softpipe_context *softpipe = setup->softpipe; - const struct pipe_shader_state *fs = &softpipe->fs->shader; + const struct sp_fragment_shader *spfs = softpipe->fs; const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); uint fragSlot; @@ -525,7 +525,7 @@ static void setup_tri_coefficients( struct setup_stage *setup ) /* setup interpolation for all the remaining attributes: */ - for (fragSlot = 0; fragSlot < fs->num_inputs; fragSlot++) { + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { const uint vertSlot = vinfo->src_index[fragSlot]; uint j; @@ -543,14 +543,13 @@ static void setup_tri_coefficients( struct setup_stage *setup ) tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); break; case INTERP_POS: - assert(fragSlot == 0); - setup_fragcoord_coeff(setup); + setup_fragcoord_coeff(setup, fragSlot); break; default: assert(0); } - if (fs->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { /* FOG.y = front/back facing XXX fix this */ setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; setup->coef[fragSlot].dadx[1] = 0.0; @@ -758,7 +757,7 @@ static INLINE void setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim) { struct softpipe_context *softpipe = setup->softpipe; - const struct pipe_shader_state *fs = &setup->softpipe->fs->shader; + const struct sp_fragment_shader *spfs = softpipe->fs; const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); uint fragSlot; @@ -780,7 +779,7 @@ setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim) /* setup interpolation for all the remaining attributes: */ - for (fragSlot = 0; fragSlot < fs->num_inputs; fragSlot++) { + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { const uint vertSlot = vinfo->src_index[fragSlot]; uint j; @@ -798,15 +797,13 @@ setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim) line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); break; case INTERP_POS: - assert(fragSlot == 0); - assert(0); /* XXX fix this: */ - setup_fragcoord_coeff(setup); + setup_fragcoord_coeff(setup, fragSlot); break; default: assert(0); } - if (fs->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { /* FOG.y = front/back facing XXX fix this */ setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; setup->coef[fragSlot].dadx[1] = 0.0; @@ -970,7 +967,7 @@ setup_point(struct draw_stage *stage, struct prim_header *prim) { struct setup_stage *setup = setup_stage( stage ); struct softpipe_context *softpipe = setup->softpipe; - const struct pipe_shader_state *fs = &softpipe->fs->shader; + const struct sp_fragment_shader *spfs = softpipe->fs; const struct vertex_header *v0 = prim->v[0]; const int sizeAttr = setup->softpipe->psize_slot; const float size @@ -1005,7 +1002,7 @@ setup_point(struct draw_stage *stage, struct prim_header *prim) const_coeff(setup, &setup->posCoef, 0, 2); const_coeff(setup, &setup->posCoef, 0, 3); - for (fragSlot = 0; fragSlot < fs->num_inputs; fragSlot++) { + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { const uint vertSlot = vinfo->src_index[fragSlot]; uint j; @@ -1022,15 +1019,13 @@ setup_point(struct draw_stage *stage, struct prim_header *prim) &setup->coef[fragSlot], vertSlot, j); break; case INTERP_POS: - assert(fragSlot == 0); - assert(0); /* XXX fix this: */ - setup_fragcoord_coeff(setup); + setup_fragcoord_coeff(setup, fragSlot); break; default: assert(0); } - if (fs->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { /* FOG.y = front/back facing XXX fix this */ setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; setup->coef[fragSlot].dadx[1] = 0.0; @@ -1168,9 +1163,13 @@ static void setup_begin( struct draw_stage *stage ) { struct setup_stage *setup = setup_stage(stage); struct softpipe_context *sp = setup->softpipe; - const struct pipe_shader_state *fs = &setup->softpipe->fs->shader; + const struct sp_fragment_shader *fs = setup->softpipe->fs; - setup->quad.nr_attrs = fs->num_inputs; + if (sp->dirty) { + softpipe_update_derived(sp); + } + + setup->quad.nr_attrs = fs->info.num_inputs; sp->quad.first->begin(sp->quad.first); diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c index 6bd468a51c..8603c1a367 100644 --- a/src/gallium/drivers/softpipe/sp_quad.c +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -56,11 +56,12 @@ sp_build_depth_stencil( void sp_build_quad_pipeline(struct softpipe_context *sp) { - boolean early_depth_test = + boolean early_depth_test = sp->depth_stencil->depth.enabled && sp->framebuffer.zsbuf && !sp->depth_stencil->alpha.enabled && - sp->fs->shader.output_semantic_name[0] != TGSI_SEMANTIC_POSITION; + !sp->fs->info.uses_kill && + !sp->fs->info.writes_z; /* build up the pipeline in reverse order... */ @@ -112,7 +113,9 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp_push_quad_first( sp, sp->quad.earlyz ); } +#if !USE_DRAW_STAGE_PSTIPPLE if (sp->rasterizer->poly_stipple_enable) { sp_push_quad_first( sp, sp->quad.polygon_stipple ); } +#endif } diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index cf1b1eff75..1fbb2e38c4 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -91,7 +91,7 @@ shade_quad( /* store result color */ if (qss->colorOutSlot >= 0) { /* XXX need to handle multiple color outputs someday */ - assert(qss->stage.softpipe->fs->shader.output_semantic_name[qss->colorOutSlot] + assert(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] == TGSI_SEMANTIC_COLOR); memcpy( quad->outputs.color, @@ -142,14 +142,14 @@ static void shade_begin(struct quad_stage *qs) /* set TGSI sampler state that varies */ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { qss->samplers[i].state = softpipe->sampler[i]; - qss->samplers[i].texture = &softpipe->texture[i]->base; + qss->samplers[i].texture = softpipe->texture[i]; } /* find output slots for depth, color */ qss->colorOutSlot = -1; qss->depthOutSlot = -1; - for (i = 0; i < qss->stage.softpipe->fs->shader.num_outputs; i++) { - switch (qss->stage.softpipe->fs->shader.output_semantic_name[i]) { + for (i = 0; i < qss->stage.softpipe->fs->info.num_outputs; i++) { + switch (qss->stage.softpipe->fs->info.output_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: qss->depthOutSlot = i; break; diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c new file mode 100644 index 0000000000..1850a1ced3 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -0,0 +1,165 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" + +#include "sp_texture.h" +#include "sp_winsys.h" + + +static const char * +softpipe_get_vendor(struct pipe_screen *screen) +{ + return "Tungsten Graphics, Inc."; +} + + +static const char * +softpipe_get_name(struct pipe_screen *screen) +{ + return "softpipe"; +} + + +static int +softpipe_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 1; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 12; /* max 2Kx2K */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 12; /* max 2Kx2K */ + default: + return 0; + } +} + + +static float +softpipe_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 255.0; /* arbitrary */ + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; /* arbitrary */ + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 0.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; /* arbitrary */ + default: + return 0; + } +} + + +/** + * Query format support for creating a texture, drawing surface, etc. + * \param format the format to test + * \param type one of PIPE_TEXTURE, PIPE_SURFACE + */ +static boolean +softpipe_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, uint type ) +{ + switch (type) { + case PIPE_TEXTURE: + /* softpipe supports all texture formats */ + return TRUE; + case PIPE_SURFACE: + /* softpipe supports all (off-screen) surface formats */ + return TRUE; + default: + assert(0); + return FALSE; + } +} + + +static void +softpipe_destroy_screen( struct pipe_screen *screen ) +{ + FREE(screen); +} + + +/** + * Create a new pipe_screen object + * Note: we're not presently subclassing pipe_screen (no softpipe_screen). + */ +struct pipe_screen * +softpipe_create_screen(struct pipe_winsys *winsys) +{ + struct pipe_screen *screen = CALLOC_STRUCT(pipe_screen); + + if (!screen) + return NULL; + + screen->winsys = winsys; + + screen->destroy = softpipe_destroy_screen; + + screen->get_name = softpipe_get_name; + screen->get_vendor = softpipe_get_vendor; + screen->get_param = softpipe_get_param; + screen->get_paramf = softpipe_get_paramf; + screen->is_format_supported = softpipe_is_format_supported; + + softpipe_init_screen_texture_funcs(screen); + + return screen; +} diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index ef8cf67d4c..3943d4ed2b 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -32,6 +32,7 @@ #define SP_STATE_H #include "pipe/p_state.h" +#include "tgsi/util/tgsi_scan.h" #define SP_NEW_VIEWPORT 0x1 @@ -52,7 +53,6 @@ struct tgsi_sampler; -struct tgsi_interp_coef; struct tgsi_exec_machine; @@ -61,16 +61,18 @@ struct tgsi_exec_machine; * This is starting to look an awful lot like a quad pipeline stage... */ struct sp_fragment_shader { - struct pipe_shader_state shader; + struct pipe_shader_state shader; + + struct tgsi_shader_info info; - void (*prepare)( struct sp_fragment_shader *shader, + void (*prepare)( const struct sp_fragment_shader *shader, struct tgsi_exec_machine *machine, struct tgsi_sampler *samplers); /* Run the shader - this interface will get cleaned up in the * future: */ - unsigned (*run)( struct sp_fragment_shader *shader, + unsigned (*run)( const struct sp_fragment_shader *shader, struct tgsi_exec_machine *machine, struct quad_header *quad ); diff --git a/src/gallium/drivers/softpipe/sp_state_clip.c b/src/gallium/drivers/softpipe/sp_state_clip.c index c797c0dd3b..4946c776e3 100644 --- a/src/gallium/drivers/softpipe/sp_state_clip.c +++ b/src/gallium/drivers/softpipe/sp_state_clip.c @@ -42,24 +42,16 @@ void softpipe_set_clip_state( struct pipe_context *pipe, } - -/* Called when driver state tracker notices changes to the viewport - * matrix: - */ void softpipe_set_viewport_state( struct pipe_context *pipe, const struct pipe_viewport_state *viewport ) { struct softpipe_context *softpipe = softpipe_context(pipe); - softpipe->viewport = *viewport; /* struct copy */ - softpipe->dirty |= SP_NEW_VIEWPORT; - /* pass the viewport info to the draw module */ draw_set_viewport_state(softpipe->draw, viewport); - /* Using tnl/ and vf/ modules is temporary while getting started. - * Full pipe will have vertex shader, vertex fetch of its own. - */ + softpipe->viewport = *viewport; /* struct copy */ + softpipe->dirty |= SP_NEW_VIEWPORT; } @@ -68,7 +60,9 @@ void softpipe_set_scissor_state( struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); - memcpy( &softpipe->scissor, scissor, sizeof(*scissor) ); + draw_flush(softpipe->draw); + + softpipe->scissor = *scissor; /* struct copy */ softpipe->dirty |= SP_NEW_SCISSOR; } @@ -78,6 +72,8 @@ void softpipe_set_polygon_stipple( struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); - memcpy( &softpipe->poly_stipple, stipple, sizeof(*stipple) ); + draw_flush(softpipe->draw); + + softpipe->poly_stipple = *stipple; /* struct copy */ softpipe->dirty |= SP_NEW_STIPPLE; } diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 9d8fd8b750..eafbaed4b9 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -35,30 +35,6 @@ /** - * Search vertex program's outputs to find a match for the given - * semantic name/index. Return the index of the output slot. - * - * Return 0 if not found. This will cause the fragment program to use - * vertex attrib 0 (position) in the cases where the fragment program - * attempts to use a missing vertex program output. This is an undefined - * condition that users shouldn't hit anyway. - */ -static int -find_vs_output(const struct pipe_shader_state *vs, - uint semantic_name, - uint semantic_index) -{ - uint i; - for (i = 0; i < vs->num_outputs; i++) { - if (vs->output_semantic_name[i] == semantic_name && - vs->output_semantic_index[i] == semantic_index) - return i; - } - return 0; -} - - -/** * Mark the current vertex layout as "invalid". * We'll validate the vertex layout later, when we start to actually * render a point or line or tri. @@ -85,8 +61,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) if (vinfo->num_attribs == 0) { /* compute vertex layout now */ - const struct pipe_shader_state *vs = &softpipe->vs->shader; - const struct pipe_shader_state *fs = &softpipe->fs->shader; + const struct sp_fragment_shader *spfs = softpipe->fs; const enum interp_mode colorInterp = softpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; uint i; @@ -98,7 +73,8 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; vinfo_vbuf->num_attribs = 0; draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0); - vinfo_vbuf->size = 4 * vs->num_outputs + /* size in dwords or floats */ + vinfo_vbuf->size = 4 * draw_num_vs_outputs(softpipe->draw) + sizeof(struct vertex_header) / 4; } @@ -107,29 +83,30 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) * from the vertex shader. */ vinfo->num_attribs = 0; - for (i = 0; i < fs->num_inputs; i++) { + for (i = 0; i < spfs->info.num_inputs; i++) { int src; - switch (fs->input_semantic_name[i]) { + switch (spfs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: - src = find_vs_output(vs, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_vs_output(softpipe->draw, + TGSI_SEMANTIC_POSITION, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); break; case TGSI_SEMANTIC_COLOR: - src = find_vs_output(vs, TGSI_SEMANTIC_COLOR, - fs->input_semantic_index[i]); + src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_COLOR, + spfs->info.input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); break; case TGSI_SEMANTIC_FOG: - src = find_vs_output(vs, TGSI_SEMANTIC_FOG, 0); + src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_FOG, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); break; case TGSI_SEMANTIC_GENERIC: /* this includes texcoords and varying vars */ - src = find_vs_output(vs, TGSI_SEMANTIC_GENERIC, - fs->input_semantic_index[i]); + src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_GENERIC, + spfs->info.input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); break; @@ -138,7 +115,8 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) } } - softpipe->psize_slot = find_vs_output(vs, TGSI_SEMANTIC_PSIZE, 0); + softpipe->psize_slot = draw_find_vs_output(softpipe->draw, + TGSI_SEMANTIC_PSIZE, 0); if (softpipe->psize_slot > 0) { draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, softpipe->psize_slot); diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index b0238f8173..eb641ed321 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -36,6 +36,7 @@ #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_scan.h" void * @@ -45,20 +46,24 @@ softpipe_create_fs_state(struct pipe_context *pipe, struct softpipe_context *softpipe = softpipe_context(pipe); struct sp_fragment_shader *state; + /* debug */ if (softpipe->dump_fs) tgsi_dump(templ->tokens, 0); + /* codegen */ state = softpipe_create_fs_llvm( softpipe, templ ); - if (state) - return state; - - state = softpipe_create_fs_sse( softpipe, templ ); - if (state) - return state; - - state = softpipe_create_fs_exec( softpipe, templ ); + if (!state) { + state = softpipe_create_fs_sse( softpipe, templ ); + if (!state) { + state = softpipe_create_fs_exec( softpipe, templ ); + } + } assert(state); + + /* get/save the summary info for this shader */ + tgsi_scan_shader(templ->tokens, &state->info); + return state; } diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 460adccec4..1d6dd17d1d 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -30,6 +30,7 @@ */ #include "pipe/p_util.h" +#include "pipe/p_inlines.h" #include "draw/draw_context.h" @@ -82,9 +83,9 @@ softpipe_set_sampler_texture(struct pipe_context *pipe, draw_flush(softpipe->draw); assert(unit < PIPE_MAX_SAMPLERS); - softpipe->texture[unit] = softpipe_texture(texture); /* ptr, not struct */ + pipe_texture_reference(&softpipe->texture[unit], texture); - sp_tile_cache_set_texture(softpipe->tex_cache[unit], texture); + sp_tile_cache_set_texture(pipe, softpipe->tex_cache[unit], texture); softpipe->dirty |= SP_NEW_TEXTURE; } diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index e2c6893e9f..124b18b708 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -27,7 +27,7 @@ /* Authors: Keith Whitwell <keith@tungstengraphics.com> */ -#include "p_inlines.h" +#include "pipe/p_inlines.h" #include "sp_context.h" #include "sp_state.h" diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 2f82fd6abe..0ced585c7f 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -332,6 +332,61 @@ linear_texcoord(unsigned wrapMode, float s, unsigned size, } +/** + * For RECT textures / unnormalized texcoords + * Only a subset of wrap modes supported. + */ +static INLINE int +nearest_texcoord_unnorm(unsigned wrapMode, float s, unsigned size) +{ + int i; + switch (wrapMode) { + case PIPE_TEX_WRAP_CLAMP: + i = ifloor(s); + return CLAMP(i, 0, (int) size-1); + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + /* fall-through */ + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return ifloor( CLAMP(s, 0.5F, (float) size - 0.5F) ); + default: + assert(0); + return 0; + } +} + + +/** + * For RECT textures / unnormalized texcoords. + * Only a subset of wrap modes supported. + */ +static INLINE void +linear_texcoord_unnorm(unsigned wrapMode, float s, unsigned size, + int *i0, int *i1, float *a) +{ + switch (wrapMode) { + case PIPE_TEX_WRAP_CLAMP: + /* Not exactly what the spec says, but it matches NVIDIA output */ + s = CLAMP(s - 0.5F, 0.0f, (float) size - 1.0f); + *i0 = ifloor(s); + *i1 = *i0 + 1; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + /* fall-through */ + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + s = CLAMP(s, 0.5F, (float) size - 0.5F); + s -= 0.5F; + *i0 = ifloor(s); + *i1 = *i0 + 1; + if (*i1 > (int) size - 1) + *i1 = size - 1; + break; + default: + assert(0); + } + *a = FRAC(s); +} + + static unsigned choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) { @@ -415,15 +470,15 @@ compute_lambda(struct tgsi_sampler *sampler, { float rho, lambda; + assert(sampler->state->normalized_coords); + assert(s); { float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]; float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; dsdx = FABSF(dsdx); dsdy = FABSF(dsdy); - rho = MAX2(dsdx, dsdy); - if (sampler->state->normalized_coords) - rho *= sampler->texture->width[0]; + rho = MAX2(dsdx, dsdy) * sampler->texture->width[0]; } if (t) { float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; @@ -431,9 +486,7 @@ compute_lambda(struct tgsi_sampler *sampler, float max; dtdx = FABSF(dtdx); dtdy = FABSF(dtdy); - max = MAX2(dtdx, dtdy); - if (sampler->state->normalized_coords) - max *= sampler->texture->height[0]; + max = MAX2(dtdx, dtdy) * sampler->texture->height[0]; rho = MAX2(rho, max); } if (p) { @@ -442,9 +495,7 @@ compute_lambda(struct tgsi_sampler *sampler, float max; dpdx = FABSF(dpdx); dpdy = FABSF(dpdy); - max = MAX2(dpdx, dpdy); - if (sampler->state->normalized_coords) - max *= sampler->texture->depth[0]; + max = MAX2(dpdx, dpdy) * sampler->texture->depth[0]; rho = MAX2(rho, max); } @@ -474,8 +525,24 @@ choose_mipmap_levels(struct tgsi_sampler *sampler, { if (sampler->state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { /* no mipmap selection needed */ - *imgFilter = sampler->state->mag_img_filter; - *level0 = *level1 = (int) sampler->state->min_lod; + *level0 = *level1 = CLAMP((int) sampler->state->min_lod, + 0, (int) sampler->texture->last_level); + + if (sampler->state->min_img_filter != sampler->state->mag_img_filter) { + /* non-mipmapped texture, but still need to determine if doing + * minification or magnification. + */ + float lambda = compute_lambda(sampler, s, t, p, lodbias); + if (lambda <= 0.0) { + *imgFilter = sampler->state->mag_img_filter; + } + else { + *imgFilter = sampler->state->min_img_filter; + } + } + else { + *imgFilter = sampler->state->mag_img_filter; + } } else { float lambda; @@ -487,7 +554,7 @@ choose_mipmap_levels(struct tgsi_sampler *sampler, /* vertex shader */ lambda = lodbias; /* not really a bias, but absolute LOD */ - if (lambda < 0.0) { /* XXX threshold depends on the filter */ + if (lambda <= 0.0) { /* XXX threshold depends on the filter */ /* magnifying */ *imgFilter = sampler->state->mag_img_filter; *level0 = *level1 = 0; @@ -612,13 +679,10 @@ sp_get_samples_2d_common(struct tgsi_sampler *sampler, choose_mipmap_levels(sampler, s, t, p, lodbias, &level0, &level1, &levelBlend, &imgFilter); - if (sampler->state->normalized_coords) { - width = sampler->texture->width[level0]; - height = sampler->texture->height[level0]; - } - else { - width = height = 1; - } + assert(sampler->state->normalized_coords); + + width = sampler->texture->width[level0]; + height = sampler->texture->height[level0]; assert(width > 0); @@ -749,14 +813,11 @@ sp_get_samples_3d(struct tgsi_sampler *sampler, choose_mipmap_levels(sampler, s, t, p, lodbias, &level0, &level1, &levelBlend, &imgFilter); - if (sampler->state->normalized_coords) { - width = sampler->texture->width[level0]; - height = sampler->texture->height[level0]; - depth = sampler->texture->depth[level0]; - } - else { - width = height = depth = 1; - } + assert(sampler->state->normalized_coords); + + width = sampler->texture->width[level0]; + height = sampler->texture->height[level0]; + depth = sampler->texture->depth[level0]; assert(width > 0); assert(height > 0); @@ -873,6 +934,73 @@ sp_get_samples_cube(struct tgsi_sampler *sampler, } +static void +sp_get_samples_rect(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + //sp_get_samples_2d_common(sampler, s, t, p, lodbias, rgba, faces); + static const uint face = 0; + const uint compare_func = sampler->state->compare_func; + unsigned level0, level1, j, imgFilter; + int width, height; + float levelBlend; + + choose_mipmap_levels(sampler, s, t, p, lodbias, + &level0, &level1, &levelBlend, &imgFilter); + + /* texture RECTS cannot be mipmapped */ + assert(level0 == level1); + + width = sampler->texture->width[level0]; + height = sampler->texture->height[level0]; + + assert(width > 0); + + switch (imgFilter) { + case PIPE_TEX_FILTER_NEAREST: + for (j = 0; j < QUAD_SIZE; j++) { + int x = nearest_texcoord_unnorm(sampler->state->wrap_s, s[j], width); + int y = nearest_texcoord_unnorm(sampler->state->wrap_t, t[j], height); + get_texel(sampler, face, level0, x, y, 0, rgba, j); + if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(compare_func, rgba, p, j); + } + } + break; + case PIPE_TEX_FILTER_LINEAR: + for (j = 0; j < QUAD_SIZE; j++) { + float tx[4][4], a, b; + int x0, y0, x1, y1, c; + linear_texcoord_unnorm(sampler->state->wrap_s, s[j], width, &x0, &x1, &a); + linear_texcoord_unnorm(sampler->state->wrap_t, t[j], height, &y0, &y1, &b); + get_texel(sampler, face, level0, x0, y0, 0, tx, 0); + get_texel(sampler, face, level0, x1, y0, 0, tx, 1); + get_texel(sampler, face, level0, x0, y1, 0, tx, 2); + get_texel(sampler, face, level0, x1, y1, 0, tx, 3); + if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(compare_func, tx, p, 0); + shadow_compare(compare_func, tx, p, 1); + shadow_compare(compare_func, tx, p, 2); + shadow_compare(compare_func, tx, p, 3); + } + + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(a, b, tx[c][0], tx[c][1], tx[c][2], tx[c][3]); + } + } + break; + default: + assert(0); + } +} + + + + /** * Called via tgsi_sampler::get_samples() * Use the sampler's state setting to get a filtered RGBA value @@ -898,15 +1026,21 @@ sp_get_samples(struct tgsi_sampler *sampler, switch (sampler->texture->target) { case PIPE_TEXTURE_1D: + assert(sampler->state->normalized_coords); sp_get_samples_1d(sampler, s, t, p, lodbias, rgba); break; case PIPE_TEXTURE_2D: - sp_get_samples_2d(sampler, s, t, p, lodbias, rgba); + if (sampler->state->normalized_coords) + sp_get_samples_2d(sampler, s, t, p, lodbias, rgba); + else + sp_get_samples_rect(sampler, s, t, p, lodbias, rgba); break; case PIPE_TEXTURE_3D: + assert(sampler->state->normalized_coords); sp_get_samples_3d(sampler, s, t, p, lodbias, rgba); break; case PIPE_TEXTURE_CUBE: + assert(sampler->state->normalized_coords); sp_get_samples_cube(sampler, s, t, p, lodbias, rgba); break; default: diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 6de7a9b543..c605ed925b 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -39,6 +39,7 @@ #include "sp_context.h" #include "sp_state.h" #include "sp_texture.h" +#include "sp_tile_cache.h" /* Simple, maximally packed layout. @@ -79,32 +80,38 @@ softpipe_texture_layout(struct softpipe_texture * spt) } -struct pipe_texture * -softpipe_texture_create(struct pipe_context *pipe, - const struct pipe_texture *templat) +static struct pipe_texture * +softpipe_texture_create_screen(struct pipe_screen *screen, + const struct pipe_texture *templat) { + struct pipe_winsys *ws = screen->winsys; struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture); if (!spt) return NULL; spt->base = *templat; + spt->base.refcount = 1; + spt->base.screen = screen; softpipe_texture_layout(spt); - spt->buffer = pipe->winsys->buffer_create(pipe->winsys, 32, - PIPE_BUFFER_USAGE_PIXEL, - spt->buffer_size); + spt->buffer = ws->buffer_create(ws, 32, + PIPE_BUFFER_USAGE_PIXEL, + spt->buffer_size); if (!spt->buffer) { FREE(spt); return NULL; } + assert(spt->base.refcount == 1); + return &spt->base; } -void -softpipe_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) +static void +softpipe_texture_release_screen(struct pipe_screen *screen, + struct pipe_texture **pt) { if (!*pt) return; @@ -120,7 +127,7 @@ softpipe_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); */ - pipe_buffer_reference(pipe->winsys, &spt->buffer, NULL); + pipe_buffer_reference(screen->winsys, &spt->buffer, NULL); FREE(spt); } @@ -128,24 +135,22 @@ softpipe_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) } -/** - * Called via pipe->get_tex_surface() - */ -struct pipe_surface * -softpipe_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) +static struct pipe_surface * +softpipe_get_tex_surface_screen(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) { + struct pipe_winsys *ws = screen->winsys; struct softpipe_texture *spt = softpipe_texture(pt); struct pipe_surface *ps; assert(level <= pt->last_level); - ps = pipe->winsys->surface_alloc(pipe->winsys); + ps = ws->surface_alloc(ws); if (ps) { assert(ps->refcount); assert(ps->winsys); - pipe_buffer_reference(pipe->winsys, &ps->buffer, spt->buffer); + pipe_buffer_reference(ws, &ps->buffer, spt->buffer); ps->format = pt->format; ps->cpp = pt->cpp; ps->width = pt->width[level]; @@ -157,10 +162,41 @@ softpipe_get_tex_surface(struct pipe_context *pipe, ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * (pt->compressed ? ps->height/4 : ps->height) * ps->width * ps->cpp; - } else { + } + else { assert(face == 0); assert(zslice == 0); } } return ps; } + + +static void +softpipe_texture_update(struct pipe_context *pipe, + struct pipe_texture *texture) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint unit; + for (unit = 0; unit < PIPE_MAX_SAMPLERS; unit++) { + if (softpipe->texture[unit] == texture) { + sp_flush_tile_cache(softpipe, softpipe->tex_cache[unit]); + } + } +} + + +void +softpipe_init_texture_funcs( struct softpipe_context *softpipe ) +{ + softpipe->pipe.texture_update = softpipe_texture_update; +} + + +void +softpipe_init_screen_texture_funcs(struct pipe_screen *screen) +{ + screen->texture_create = softpipe_texture_create_screen; + screen->texture_release = softpipe_texture_release_screen; + screen->get_tex_surface = softpipe_get_tex_surface_screen; +} diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index fa646c0de9..a7322144e6 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -29,8 +29,12 @@ #define SP_TEXTURE_H +#include "pipe/p_state.h" + + struct pipe_context; -struct pipe_texture; +struct pipe_screen; +struct softpipe_context; struct softpipe_texture @@ -54,18 +58,12 @@ softpipe_texture(struct pipe_texture *pt) } +extern void +softpipe_init_texture_funcs( struct softpipe_context *softpipe ); -extern struct pipe_texture * -softpipe_texture_create(struct pipe_context *pipe, - const struct pipe_texture *templat); extern void -softpipe_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); - -extern struct pipe_surface * -softpipe_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice); +softpipe_init_screen_texture_funcs(struct pipe_screen *screen); #endif /* SP_TEXTURE */ diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index dde3fabc81..4caf2dd3fc 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -212,14 +212,15 @@ sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc) * Specify the texture to cache. */ void -sp_tile_cache_set_texture(struct softpipe_tile_cache *tc, +sp_tile_cache_set_texture(struct pipe_context *pipe, + struct softpipe_tile_cache *tc, struct pipe_texture *texture) { uint i; assert(!tc->surface); - tc->texture = texture; + pipe_texture_reference(&tc->texture, texture); if (tc->tex_surf_map) { pipe_surface_unmap(tc->tex_surf); @@ -358,30 +359,37 @@ sp_flush_tile_cache(struct softpipe_context *softpipe, struct pipe_surface *ps = tc->surface; int inuse = 0, pos; - if (!ps || !ps->buffer) - return; - - for (pos = 0; pos < NUM_ENTRIES; pos++) { - struct softpipe_cached_tile *tile = tc->entries + pos; - if (tile->x >= 0) { - if (tc->depth_stencil) { - pipe_put_tile_raw(pipe, ps, - tile->x, tile->y, TILE_SIZE, TILE_SIZE, - tile->data.depth32, 0/*STRIDE*/); - } - else { - pipe_put_tile_rgba(pipe, ps, - tile->x, tile->y, TILE_SIZE, TILE_SIZE, - (float *) tile->data.color); + if (ps && ps->buffer) { + /* caching a drawing surface */ + for (pos = 0; pos < NUM_ENTRIES; pos++) { + struct softpipe_cached_tile *tile = tc->entries + pos; + if (tile->x >= 0) { + if (tc->depth_stencil) { + pipe_put_tile_raw(pipe, ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->data.depth32, 0/*STRIDE*/); + } + else { + pipe_put_tile_rgba(pipe, ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + (float *) tile->data.color); + } + tile->x = tile->y = -1; /* mark as empty */ + inuse++; } - tile->x = tile->y = -1; /* mark as empty */ - inuse++; } - } #if TILE_CLEAR_OPTIMIZATION - sp_tile_cache_flush_clear(&softpipe->pipe, tc); + sp_tile_cache_flush_clear(&softpipe->pipe, tc); #endif + } + else if (tc->texture) { + /* caching a texture, mark all entries as embpy */ + for (pos = 0; pos < NUM_ENTRIES; pos++) { + tc->entries[pos].x = -1; + } + tc->tex_face = -1; + } #if 0 debug_printf("flushed tiles in use: %d\n", inuse); @@ -481,6 +489,7 @@ sp_get_cached_tile_tex(struct pipe_context *pipe, struct softpipe_tile_cache *tc, int x, int y, int z, int face, int level) { + struct pipe_screen *screen = pipe->screen; /* tile pos in framebuffer: */ const int tile_x = x & ~(TILE_SIZE - 1); const int tile_y = y & ~(TILE_SIZE - 1); @@ -506,7 +515,7 @@ sp_get_cached_tile_tex(struct pipe_context *pipe, if (tc->tex_surf_map) pipe_surface_unmap(tc->tex_surf); - tc->tex_surf = pipe->get_tex_surface(pipe, tc->texture, face, level, z); + tc->tex_surf = screen->get_tex_surface(screen, tc->texture, face, level, z); tc->tex_surf_map = pipe_surface_map(tc->tex_surf); tc->tex_face = face; diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index 7fd1081286..2631e29a3a 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -80,7 +80,8 @@ extern void sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc); extern void -sp_tile_cache_set_texture(struct softpipe_tile_cache *tc, +sp_tile_cache_set_texture(struct pipe_context *pipe, + struct softpipe_tile_cache *tc, struct pipe_texture *texture); extern void diff --git a/src/gallium/drivers/softpipe/sp_winsys.h b/src/gallium/drivers/softpipe/sp_winsys.h index d6b379f58c..fc372dba27 100644 --- a/src/gallium/drivers/softpipe/sp_winsys.h +++ b/src/gallium/drivers/softpipe/sp_winsys.h @@ -46,12 +46,18 @@ struct softpipe_winsys { }; +struct pipe_screen; struct pipe_winsys; struct pipe_context; -struct pipe_context *softpipe_create( struct pipe_winsys *, +struct pipe_context *softpipe_create( struct pipe_screen *, + struct pipe_winsys *, struct softpipe_winsys * ); +struct pipe_screen * +softpipe_create_screen(struct pipe_winsys *); + + #endif /* SP_WINSYS_H */ diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index 30cd729c56..91f3d2ac2d 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -42,6 +42,14 @@ #endif +#if defined(__MSC__) + +/* Avoid 'expression is always true' warning */ +#pragma warning(disable: 4296) + +#endif /* __MSC__ */ + + typedef unsigned int uint; typedef unsigned char ubyte; typedef unsigned char boolean; @@ -61,8 +69,10 @@ typedef long long int64_t; typedef unsigned long long uint64_t; #if defined(_WIN64) +typedef __int64 intptr_t; typedef unsigned __int64 uintptr_t; #else +typedef int intptr_t; typedef unsigned int uintptr_t; #endif diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 39f95695fb..1501b52f3e 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -31,6 +31,13 @@ #include "p_state.h" +#ifdef __cplusplus +extern "C" { +#endif + + +struct pipe_screen; + struct pipe_state_cache; /* Opaque driver handles: @@ -46,27 +53,14 @@ struct pipe_query; */ struct pipe_context { struct pipe_winsys *winsys; + struct pipe_screen *screen; void *priv; /** context private data (for DRI for example) */ + void *draw; /** private, for draw module (temporary? */ void (*destroy)( struct pipe_context * ); /* - * Queries - */ - /** type is one of PIPE_SURFACE, PIPE_TEXTURE, etc. */ - boolean (*is_format_supported)( struct pipe_context *pipe, - enum pipe_format format, uint type ); - - const char *(*get_name)( struct pipe_context *pipe ); - - const char *(*get_vendor)( struct pipe_context *pipe ); - - int (*get_param)( struct pipe_context *pipe, int param ); - float (*get_paramf)( struct pipe_context *pipe, int param ); - - - /* * Drawing. * Return false on fallbacks (temporary??) */ @@ -196,21 +190,16 @@ struct pipe_context { struct pipe_surface *ps, unsigned clearValue); - - /* - * Texture functions + /** + * Called when texture data is changed. + * Note: we could pass some hints about which mip levels or cube faces + * have changed... + * XXX this may go away - could pass a 'write' flag to get_tex_surface() */ - struct pipe_texture * (*texture_create)(struct pipe_context *pipe, - const struct pipe_texture *templat); + void (*texture_update)(struct pipe_context *pipe, + struct pipe_texture *texture); - void (*texture_release)(struct pipe_context *pipe, - struct pipe_texture **pt); - /** Get a surface which is a "view" into a texture */ - struct pipe_surface *(*get_tex_surface)(struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level, - unsigned zslice); /* Flush rendering: */ @@ -218,4 +207,9 @@ struct pipe_context { unsigned flags ); }; + +#ifdef __cplusplus +} +#endif + #endif /* PIPE_CONTEXT_H */ diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 0bf53ecb79..0c662d6517 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -30,6 +30,10 @@ #include "p_format.h" +#ifdef __cplusplus +extern "C" { +#endif + #define PIPE_BLENDFACTOR_ONE 0x1 #define PIPE_BLENDFACTOR_SRC_COLOR 0x2 #define PIPE_BLENDFACTOR_SRC_ALPHA 0x3 @@ -148,7 +152,7 @@ enum pipe_texture_target { */ #define PIPE_TEX_FILTER_NEAREST 0 #define PIPE_TEX_FILTER_LINEAR 1 -//#define PIPE_TEX_FILTER_ANISO 2 +/* #define PIPE_TEX_FILTER_ANISO 2 */ #define PIPE_TEX_COMPARE_NONE 0 @@ -267,4 +271,8 @@ enum pipe_texture_target { #define PIPE_CAP_MAX_TEXTURE_LOD_BIAS 19 #define PIPE_CAP_BITMAP_TEXCOORD_BIAS 20 +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index c9ad324315..f90087b3c9 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -28,11 +28,15 @@ #ifndef PIPE_FORMAT_H #define PIPE_FORMAT_H -#include <stdio.h> // for sprintf +#include <stdio.h> /* for sprintf */ #include "p_compiler.h" #include "p_debug.h" +#ifdef __cplusplus +extern "C" { +#endif + /** * The PIPE_FORMAT is a 32-bit wide bitfield that encodes all the information * needed to uniquely describe a pixel format. @@ -418,4 +422,8 @@ static INLINE uint pf_get_size( enum pipe_format format ) { return pf_get_bits(format) / 8; } +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h index ebf6ed86bc..274f76a383 100644 --- a/src/gallium/include/pipe/p_inlines.h +++ b/src/gallium/include/pipe/p_inlines.h @@ -30,9 +30,15 @@ #include "p_context.h" #include "p_defines.h" +#include "p_screen.h" #include "p_winsys.h" +#ifdef __cplusplus +extern "C" { +#endif + + static INLINE void * pipe_surface_map(struct pipe_surface *surface) { @@ -92,7 +98,7 @@ pipe_buffer_reference(struct pipe_winsys *winsys, * \sa pipe_surface_reference */ static INLINE void -pipe_texture_reference(struct pipe_context *pipe, struct pipe_texture **ptr, +pipe_texture_reference(struct pipe_texture **ptr, struct pipe_texture *pt) { assert(ptr); @@ -101,7 +107,10 @@ pipe_texture_reference(struct pipe_context *pipe, struct pipe_texture **ptr, pt->refcount++; if (*ptr) { - pipe->texture_release(pipe, ptr); + struct pipe_screen *screen = (*ptr)->screen; + assert(screen); + screen->texture_release(screen, ptr); + assert(!*ptr); } @@ -109,4 +118,19 @@ pipe_texture_reference(struct pipe_context *pipe, struct pipe_texture **ptr, } +static INLINE void +pipe_texture_release(struct pipe_texture **ptr) +{ + struct pipe_screen *screen; + assert(ptr); + screen = (*ptr)->screen; + screen->texture_release(screen, ptr); + *ptr = NULL; +} + + +#ifdef __cplusplus +} +#endif + #endif /* P_INLINES_H */ diff --git a/src/gallium/drivers/i915simple/i915_strings.c b/src/gallium/include/pipe/p_pointer.h index 301fedea19..3a1e6be88e 100644 --- a/src/gallium/drivers/i915simple/i915_strings.c +++ b/src/gallium/include/pipe/p_pointer.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,59 +25,71 @@ * **************************************************************************/ -#include "i915_context.h" -#include "i915_reg.h" +#ifndef P_POINTER_H +#define P_POINTER_H +#include "p_compiler.h" -static const char *i915_get_vendor( struct pipe_context *pipe ) +#ifdef __cplusplus +extern "C" { +#endif + +static INLINE intptr_t +pointer_to_intptr( const void *p ) { - return "Tungsten Graphics, Inc."; + union { + const void *p; + intptr_t i; + } pi; + pi.p = p; + return pi.i; } - -static const char *i915_get_name( struct pipe_context *pipe ) +static INLINE void * +intptr_to_pointer( intptr_t i ) { - static char buffer[128]; - const char *chipset; - - switch (i915_context(pipe)->pci_id) { - case PCI_CHIP_I915_G: - chipset = "915G"; - break; - case PCI_CHIP_I915_GM: - chipset = "915GM"; - break; - case PCI_CHIP_I945_G: - chipset = "945G"; - break; - case PCI_CHIP_I945_GM: - chipset = "945GM"; - break; - case PCI_CHIP_I945_GME: - chipset = "945GME"; - break; - case PCI_CHIP_G33_G: - chipset = "G33"; - break; - case PCI_CHIP_Q35_G: - chipset = "Q35"; - break; - case PCI_CHIP_Q33_G: - chipset = "Q33"; - break; - default: - chipset = "unknown"; - break; - } + union { + void *p; + intptr_t i; + } pi; + pi.i = i; + return pi.p; +} - sprintf(buffer, "i915 (chipset: %s)", chipset); - return buffer; +static INLINE uintptr_t +pointer_to_uintptr( const void *ptr ) +{ + union { + const void *p; + uintptr_t u; + } pu; + pu.p = ptr; + return pu.u; } +static INLINE void * +uintptr_to_pointer( uintptr_t u ) +{ + union { + void *p; + uintptr_t u; + } pu; + pu.u = u; + return pu.p; +} -void -i915_init_string_functions(struct i915_context *i915) +/** + * Return a pointer aligned to next multiple of N bytes. + */ +static INLINE void * +align_pointer( const void *unaligned, uintptr_t alignment ) { - i915->pipe.get_name = i915_get_name; - i915->pipe.get_vendor = i915_get_vendor; + uintptr_t aligned = (pointer_to_uintptr( unaligned ) + alignment - 1) & ~(alignment - 1); + return uintptr_to_pointer( aligned ); } + +#ifdef __cplusplus +} +#endif + +#endif /* P_POINTER_H */ diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h new file mode 100644 index 0000000000..6be9a82b68 --- /dev/null +++ b/src/gallium/include/pipe/p_screen.h @@ -0,0 +1,98 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Screen, Adapter or GPU + * + * These are driver functions/facilities that are context independent. + */ + + +#ifndef P_SCREEN_H +#define P_SCREEN_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + + + +#ifdef __cplusplus +extern "C" { +#endif + + + +/** + * Gallium screen/adapter context. Basically everything + * hardware-specific that doesn't actually require a rendering + * context. + */ +struct pipe_screen { + struct pipe_winsys *winsys; + + void (*destroy)( struct pipe_screen * ); + + + /* + * Capability queries + */ + const char *(*get_name)( struct pipe_screen * ); + + const char *(*get_vendor)( struct pipe_screen * ); + + int (*get_param)( struct pipe_screen *, int param ); + + float (*get_paramf)( struct pipe_screen *, int param ); + + boolean (*is_format_supported)( struct pipe_screen *, + enum pipe_format format, + uint type ); + + + /* + * Texture functions + */ + struct pipe_texture * (*texture_create)(struct pipe_screen *, + const struct pipe_texture *templat); + + void (*texture_release)(struct pipe_screen *, + struct pipe_texture **pt); + + /** Get a surface which is a "view" into a texture */ + struct pipe_surface *(*get_tex_surface)(struct pipe_screen *, + struct pipe_texture *texture, + unsigned face, unsigned level, + unsigned zslice); +}; + + +#ifdef __cplusplus +} +#endif + +#endif /* P_SCREEN_H */ diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 3ce35310f6..0a6145a6bf 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -1,9 +1,11 @@ #if !defined TGSI_TOKEN_H #define TGSI_TOKEN_H -#if defined __cplusplus +#ifdef __cplusplus extern "C" { -#endif // defined __cplusplus +#endif + +#include "p_compiler.h" struct tgsi_version { @@ -48,6 +50,8 @@ struct tgsi_token #define TGSI_FILE_SAMPLER 5 #define TGSI_FILE_ADDRESS 6 #define TGSI_FILE_IMMEDIATE 7 +#define TGSI_FILE_COUNT 8 /**< how many TGSI_FILE_ types */ + #define TGSI_DECLARE_RANGE 0 #define TGSI_DECLARE_MASK 1 @@ -798,9 +802,9 @@ struct tgsi_dst_register_ext_predicate }; -#if defined __cplusplus -} // extern "C" -#endif // defined __cplusplus +#ifdef __cplusplus +} +#endif -#endif // !defined TGSI_TOKEN_H +#endif /* TGSI_TOKEN_H */ diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 1082343e2f..5fab41acbd 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -42,6 +42,12 @@ #include "p_defines.h" #include "p_format.h" + +#ifdef __cplusplus +extern "C" { +#endif + + /** * Implementation limits */ @@ -57,6 +63,7 @@ /* fwd decls */ +struct pipe_screen; struct pipe_surface; struct pipe_winsys; @@ -66,7 +73,8 @@ struct pipe_winsys; * The driver will certainly subclass this to include actual memory * management information. */ -struct pipe_buffer { +struct pipe_buffer +{ unsigned alignment; unsigned usage; unsigned size; @@ -76,8 +84,6 @@ struct pipe_buffer { }; - - /** * Primitive (point/line/tri) rasterization info */ @@ -113,18 +119,21 @@ struct pipe_rasterizer_state }; -struct pipe_poly_stipple { +struct pipe_poly_stipple +{ unsigned stipple[32]; }; -struct pipe_viewport_state { +struct pipe_viewport_state +{ float scale[4]; float translate[4]; }; -struct pipe_scissor_state { +struct pipe_scissor_state +{ unsigned minx:16; unsigned miny:16; unsigned maxx:16; @@ -132,7 +141,8 @@ struct pipe_scissor_state { }; -struct pipe_clip_state { +struct pipe_clip_state +{ float ucp[PIPE_MAX_CLIP_PLANES][4]; unsigned nr; }; @@ -141,17 +151,19 @@ struct pipe_clip_state { /** * Constants for vertex/fragment shaders */ -struct pipe_constant_buffer { +struct pipe_constant_buffer +{ struct pipe_buffer *buffer; unsigned size; /** in bytes */ }; -struct pipe_shader_state { +struct pipe_shader_state +{ const struct tgsi_token *tokens; + /* XXX these are going away */ ubyte num_inputs; ubyte num_outputs; - ubyte input_map[PIPE_MAX_SHADER_INPUTS]; /* XXX this may be temporary */ ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */ ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ @@ -185,7 +197,8 @@ struct pipe_depth_stencil_alpha_state }; -struct pipe_blend_state { +struct pipe_blend_state +{ unsigned blend_enable:1; unsigned rgb_func:3; /**< PIPE_BLEND_x */ @@ -204,7 +217,8 @@ struct pipe_blend_state { }; -struct pipe_blend_color { +struct pipe_blend_color +{ float color[4]; }; @@ -224,19 +238,19 @@ struct pipe_framebuffer_state */ struct pipe_sampler_state { - unsigned wrap_s:3; /**< PIPE_TEX_WRAP_x */ - unsigned wrap_t:3; /**< PIPE_TEX_WRAP_x */ - unsigned wrap_r:3; /**< PIPE_TEX_WRAP_x */ + unsigned wrap_s:3; /**< PIPE_TEX_WRAP_x */ + unsigned wrap_t:3; /**< PIPE_TEX_WRAP_x */ + unsigned wrap_r:3; /**< PIPE_TEX_WRAP_x */ unsigned min_img_filter:2; /**< PIPE_TEX_FILTER_x */ unsigned min_mip_filter:2; /**< PIPE_TEX_MIPFILTER_x */ unsigned mag_img_filter:2; /**< PIPE_TEX_FILTER_x */ - unsigned compare:1; /**< shadow/depth compare enabled? */ - unsigned compare_mode:1; /**< PIPE_TEX_COMPARE_x */ - unsigned compare_func:3; /**< PIPE_FUNC_x */ - unsigned normalized_coords:1; /**< Are coords normalized to [0,1]? */ - float shadow_ambient; /**< shadow test fail color/intensity */ - float lod_bias; /**< LOD/lambda bias */ - float min_lod, max_lod; /**< LOD clamp range, after bias */ + unsigned compare:1; /**< shadow/depth compare enabled? */ + unsigned compare_mode:1; /**< PIPE_TEX_COMPARE_x */ + unsigned compare_func:3; /**< PIPE_FUNC_x */ + unsigned normalized_coords:1; /**< Are coords normalized to [0,1]? */ + float shadow_ambient; /**< shadow test fail color/intensity */ + float lod_bias; /**< LOD/lambda bias */ + float min_lod, max_lod; /**< LOD clamp range, after bias */ float border_color[4]; float max_anisotropy; }; @@ -248,10 +262,10 @@ struct pipe_sampler_state */ struct pipe_surface { - struct pipe_buffer *buffer; /**< driver private buffer handle */ + struct pipe_buffer *buffer; /**< surface's buffer/memory */ enum pipe_format format; /**< PIPE_FORMAT_x */ unsigned status; /**< PIPE_SURFACE_STATUS_x */ - unsigned clear_value; /**< may be temporary */ + unsigned clear_value; /**< XXX may be temporary */ unsigned cpp; /**< bytes per pixel */ unsigned width; unsigned height; @@ -263,8 +277,7 @@ struct pipe_surface /** - * Texture. Represents one or several texture images on one or several mipmap - * levels. + * Texture object. */ struct pipe_texture { @@ -284,6 +297,8 @@ struct pipe_texture /* These are also refcounted: */ unsigned refcount; + + struct pipe_screen *screen; /**< screen that this texture belongs to */ }; @@ -319,4 +334,8 @@ struct pipe_vertex_element }; +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/gallium/include/pipe/p_thread.h b/src/gallium/include/pipe/p_thread.h index cd432c547c..4325abc951 100644 --- a/src/gallium/include/pipe/p_thread.h +++ b/src/gallium/include/pipe/p_thread.h @@ -1,54 +1,317 @@ /************************************************************************** * + * Copyright 1999-2006 Brian Paul * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ -#ifndef P_THREAD_H -#define P_THREAD_H +/** + * @file + * Thread + * + * Initial version by John Stone (j.stone@acm.org) (johns@cs.umr.edu) + * and Christoph Poliwoda (poliwoda@volumegraphics.com) + * Revised by Keith Whitwell + * Adapted for new gl dispatcher by Brian Paul + * + * + * + * DOCUMENTATION + * + * This thread module exports the following types: + * _glthread_TSD Thread-specific data area + * _glthread_Thread Thread datatype + * _glthread_Mutex Mutual exclusion lock + * + * Macros: + * _glthread_DECLARE_STATIC_MUTEX(name) Declare a non-local mutex + * _glthread_INIT_MUTEX(name) Initialize a mutex + * _glthread_LOCK_MUTEX(name) Lock a mutex + * _glthread_UNLOCK_MUTEX(name) Unlock a mutex + * + * Functions: + * _glthread_GetID(v) Get integer thread ID + * _glthread_InitTSD() Initialize thread-specific data + * _glthread_GetTSD() Get thread-specific data + * _glthread_SetTSD() Set thread-specific data + * + * If this file is accidentally included by a non-threaded build, + * it should not cause the build to fail, or otherwise cause problems. + * In general, it should only be included when needed however. + */ + +#ifndef _P_THREAD_H_ +#define _P_THREAD_H_ + + +#if defined(USE_MGL_NAMESPACE) +#define _glapi_Dispatch _mglapi_Dispatch +#endif + + + +#if (defined(PTHREADS) || defined(SOLARIS_THREADS) ||\ + defined(WIN32_THREADS) || defined(USE_XTHREADS) || defined(BEOS_THREADS)) \ + && !defined(THREADS) +# define THREADS +#endif + +#ifdef VMS +#include <GL/vms_x_fix.h> +#endif + +/* + * POSIX threads. This should be your choice in the Unix world + * whenever possible. When building with POSIX threads, be sure + * to enable any compiler flags which will cause the MT-safe + * libc (if one exists) to be used when linking, as well as any + * header macros for MT-safe errno, etc. For Solaris, this is the -mt + * compiler flag. On Solaris with gcc, use -D_REENTRANT to enable + * proper compiling for MT-safe libc etc. + */ +#if defined(PTHREADS) +#include <pthread.h> /* POSIX threads headers */ + +typedef struct { + pthread_key_t key; + int initMagic; +} _glthread_TSD; + +typedef pthread_t _glthread_Thread; + +typedef pthread_mutex_t _glthread_Mutex; + +#define _glthread_DECLARE_STATIC_MUTEX(name) \ + static _glthread_Mutex name = PTHREAD_MUTEX_INITIALIZER + +#define _glthread_INIT_MUTEX(name) \ + pthread_mutex_init(&(name), NULL) + +#define _glthread_DESTROY_MUTEX(name) \ + pthread_mutex_destroy(&(name)) + +#define _glthread_LOCK_MUTEX(name) \ + (void) pthread_mutex_lock(&(name)) + +#define _glthread_UNLOCK_MUTEX(name) \ + (void) pthread_mutex_unlock(&(name)) + +#endif /* PTHREADS */ + + + + +/* + * Solaris threads. Use only up to Solaris 2.4. + * Solaris 2.5 and higher provide POSIX threads. + * Be sure to compile with -mt on the Solaris compilers, or + * use -D_REENTRANT if using gcc. + */ +#ifdef SOLARIS_THREADS +#include <thread.h> + +typedef struct { + thread_key_t key; + mutex_t keylock; + int initMagic; +} _glthread_TSD; + +typedef thread_t _glthread_Thread; + +typedef mutex_t _glthread_Mutex; + +/* XXX need to really implement mutex-related macros */ +#define _glthread_DECLARE_STATIC_MUTEX(name) static _glthread_Mutex name = 0 +#define _glthread_INIT_MUTEX(name) (void) name +#define _glthread_DESTROY_MUTEX(name) (void) name +#define _glthread_LOCK_MUTEX(name) (void) name +#define _glthread_UNLOCK_MUTEX(name) (void) name + +#endif /* SOLARIS_THREADS */ + + + + +/* + * Windows threads. Should work with Windows NT and 95. + * IMPORTANT: Link with multithreaded runtime library when THREADS are + * used! + */ +#ifdef WIN32_THREADS +#include <windows.h> + +typedef struct { + DWORD key; + int initMagic; +} _glthread_TSD; + +typedef HANDLE _glthread_Thread; + +typedef CRITICAL_SECTION _glthread_Mutex; + +#define _glthread_DECLARE_STATIC_MUTEX(name) /*static*/ _glthread_Mutex name = {0,0,0,0,0,0} +#define _glthread_INIT_MUTEX(name) InitializeCriticalSection(&name) +#define _glthread_DESTROY_MUTEX(name) DeleteCriticalSection(&name) +#define _glthread_LOCK_MUTEX(name) EnterCriticalSection(&name) +#define _glthread_UNLOCK_MUTEX(name) LeaveCriticalSection(&name) + +#endif /* WIN32_THREADS */ + + -#include "p_compiler.h" /* - * XXX: We should come up with a system-independent thread definitions. - * XXX: Patching glthread defs for now. + * XFree86 has its own thread wrapper, Xthreads.h + * We wrap it again for GL. */ +#ifdef USE_XTHREADS +#include <X11/Xthreads.h> + +typedef struct { + xthread_key_t key; + int initMagic; +} _glthread_TSD; + +typedef xthread_t _glthread_Thread; + +typedef xmutex_rec _glthread_Mutex; + +#ifdef XMUTEX_INITIALIZER +#define _glthread_DECLARE_STATIC_MUTEX(name) \ + static _glthread_Mutex name = XMUTEX_INITIALIZER +#else +#define _glthread_DECLARE_STATIC_MUTEX(name) \ + static _glthread_Mutex name +#endif + +#define _glthread_INIT_MUTEX(name) \ + xmutex_init(&(name)) + +#define _glthread_DESTROY_MUTEX(name) \ + xmutex_clear(&(name)) + +#define _glthread_LOCK_MUTEX(name) \ + (void) xmutex_lock(&(name)) + +#define _glthread_UNLOCK_MUTEX(name) \ + (void) xmutex_unlock(&(name)) + +#endif /* USE_XTHREADS */ + + + +/* + * BeOS threads. R5.x required. + */ +#ifdef BEOS_THREADS + +#include <kernel/OS.h> +#include <support/TLS.h> + +typedef struct { + int32 key; + int initMagic; +} _glthread_TSD; + +typedef thread_id _glthread_Thread; + +/* Use Benaphore, aka speeder semaphore */ +typedef struct { + int32 lock; + sem_id sem; +} benaphore; +typedef benaphore _glthread_Mutex; + +#define _glthread_DECLARE_STATIC_MUTEX(name) static _glthread_Mutex name = { 0, 0 } +#define _glthread_INIT_MUTEX(name) name.sem = create_sem(0, #name"_benaphore"), name.lock = 0 +#define _glthread_DESTROY_MUTEX(name) delete_sem(name.sem), name.lock = 0 +#define _glthread_LOCK_MUTEX(name) if (name.sem == 0) _glthread_INIT_MUTEX(name); \ + if (atomic_add(&(name.lock), 1) >= 1) acquire_sem(name.sem) +#define _glthread_UNLOCK_MUTEX(name) if (atomic_add(&(name.lock), -1) > 1) release_sem(name.sem) + +#endif /* BEOS_THREADS */ + + + +#ifndef THREADS + +/* + * THREADS not defined + */ + +typedef unsigned _glthread_TSD; + +typedef unsigned _glthread_Thread; + +typedef unsigned _glthread_Mutex; + +#define _glthread_DECLARE_STATIC_MUTEX(name) static _glthread_Mutex name = 0 + +#define _glthread_INIT_MUTEX(name) (void) name + +#define _glthread_DESTROY_MUTEX(name) (void) name + +#define _glthread_LOCK_MUTEX(name) (void) name + +#define _glthread_UNLOCK_MUTEX(name) (void) name + +#endif /* THREADS */ + + + +/* + * Platform independent thread specific data API. + */ + +extern unsigned long +_glthread_GetID(void); + + +extern void +_glthread_InitTSD(_glthread_TSD *); + + +extern void * +_glthread_GetTSD(_glthread_TSD *); -#ifndef __MSC__ -#include "glapi/glthread.h" +extern void +_glthread_SetTSD(_glthread_TSD *, void *); -#else /* __MSC__ */ +#if defined(GLX_USE_TLS) -typedef int _glthread_Mutex; +extern __thread struct _glapi_table * _glapi_tls_Dispatch + __attribute__((tls_model("initial-exec"))); -#define _glthread_INIT_MUTEX( M ) ((void) (M)) -#define _glthread_LOCK_MUTEX( M ) ((void) (M)) -#define _glthread_UNLOCK_MUTEX( M ) ((void) (M)) +#define GET_DISPATCH() _glapi_tls_Dispatch -#define sched_yield() ((void) 0) +#elif !defined(GL_CALL) +# if defined(THREADS) +# define GET_DISPATCH() \ + ((__builtin_expect( _glapi_Dispatch != NULL, 1 )) \ + ? _glapi_Dispatch : _glapi_get_dispatch()) +# else +# define GET_DISPATCH() _glapi_Dispatch +# endif /* defined(THREADS) */ +#endif /* ndef GL_CALL */ -#endif /* __MSC__ */ -#endif /* P_THREAD_H */ +#endif /* _P_THREAD_H_ */ diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index d7da2801c9..3b32ba1d24 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -30,16 +30,17 @@ #include "p_compiler.h" #include "p_debug.h" +#include "p_pointer.h" #include <math.h> -#ifdef WIN32 - #ifdef __cplusplus -extern "C" -{ +extern "C" { #endif + +#ifdef WIN32 + void * __stdcall EngAllocMem( unsigned long Flags, @@ -50,10 +51,6 @@ void __stdcall EngFreeMem( void *Mem ); -#ifdef __cplusplus -} -#endif - static INLINE void * MALLOC( unsigned size ) { @@ -115,33 +112,6 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) /** - * Return a pointer aligned to next multiple of N bytes. - */ -static INLINE void * -align_pointer( void *unaligned, uint alignment ) -{ - if (sizeof(void *) == 64) { - union { - void *p; - uint64 u; - } pu; - pu.p = unaligned; - pu.u = (pu.u + alignment - 1) & ~(uint64) (alignment - 1); - return pu.p; - } - else { - /* 32-bit pointers */ - union { - void *p; - uint u; - } pu; - pu.p = unaligned; - pu.u = (pu.u + alignment - 1) & ~(alignment - 1); - return pu.p; - } -} - -/** * Return memory on given byte alignment */ static INLINE void * @@ -405,4 +375,8 @@ extern void pipe_copy_rect(ubyte * dst, unsigned cpp, unsigned dst_pitch, int src_pitch, unsigned src_x, int src_y); +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/gallium/include/pipe/p_winsys.h b/src/gallium/include/pipe/p_winsys.h index 1e81eebd78..e784c92491 100644 --- a/src/gallium/include/pipe/p_winsys.h +++ b/src/gallium/include/pipe/p_winsys.h @@ -25,12 +25,6 @@ * **************************************************************************/ -#ifndef P_WINSYS_H -#define P_WINSYS_H - - -#include "p_format.h" - /** * \file * This is the interface that Gallium3D requires any window system @@ -38,6 +32,17 @@ * which is public. */ +#ifndef P_WINSYS_H +#define P_WINSYS_H + + +#include "p_format.h" + + +#ifdef __cplusplus +extern "C" { +#endif + /** Opaque type */ struct pipe_fence_handle; @@ -156,5 +161,8 @@ struct pipe_winsys }; +#ifdef __cplusplus +} +#endif #endif /* P_WINSYS_H */ diff --git a/src/gallium/winsys/Makefile b/src/gallium/winsys/Makefile new file mode 100644 index 0000000000..3dc5ea5744 --- /dev/null +++ b/src/gallium/winsys/Makefile @@ -0,0 +1,20 @@ +TOP = ../../.. +include $(TOP)/configs/current + + +SUBDIRS = $(GALLIUM_WINSYS_DIRS) + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + rm -f `find . -name \*.[oa]` diff --git a/src/gallium/winsys/SConscript b/src/gallium/winsys/SConscript new file mode 100644 index 0000000000..635a68eea2 --- /dev/null +++ b/src/gallium/winsys/SConscript @@ -0,0 +1,11 @@ +Import('*') + +if dri: + SConscript([ + 'dri/SConscript', + ]) + +if 'xlib' in env['drivers'] and not dri: + SConscript([ + 'xlib/SConscript', + ]) diff --git a/src/gallium/winsys/dri/Makefile.template b/src/gallium/winsys/dri/Makefile.template index 2a261ed669..3bc1fdd4d4 100644 --- a/src/gallium/winsys/dri/Makefile.template +++ b/src/gallium/winsys/dri/Makefile.template @@ -1,7 +1,9 @@ # -*-makefile-*- -MESA_MODULES = $(TOP)/src/mesa/libmesa.a - +MESA_MODULES = \ + $(TOP)/src/mesa/libmesa.a \ + $(GALLIUM_AUXILIARIES) + COMMON_GALLIUM_SOURCES = \ $(TOP)/src/mesa/drivers/dri/common/utils.c \ $(TOP)/src/mesa/drivers/dri/common/vblank.c \ @@ -23,8 +25,9 @@ WINOBJ= WINLIB= INCLUDES = $(SHARED_INCLUDES) $(EXPAT_INCLUDES) -OBJECTS = $(C_SOURCES:.c=.o) \ - $(ASM_SOURCES:.S=.o) +OBJECTS = \ + $(C_SOURCES:.c=.o) \ + $(ASM_SOURCES:.S=.o) else # miniglx diff --git a/src/gallium/winsys/dri/SConscript b/src/gallium/winsys/dri/SConscript new file mode 100644 index 0000000000..8c56ce917c --- /dev/null +++ b/src/gallium/winsys/dri/SConscript @@ -0,0 +1,51 @@ +Import('*') + +drienv = env.Clone() + +drienv.Replace(CPPPATH = [ + '#src/mesa/drivers/dri/common', + '#include', + '#include/GL/internal', + '#src/gallium/include', + '#src/gallium/auxiliary', + '#src/gallium/drivers', + '#src/mesa', + '#src/mesa/main', + '#src/mesa/glapi', + '#src/mesa/math', + '#src/mesa/transform', + '#src/mesa/shader', + '#src/mesa/swrast', + '#src/mesa/swrast_setup', + '#src/egl/main', + '#src/egl/drivers/dri', +]) + +drienv.ParseConfig('pkg-config --cflags --libs libdrm') + +COMMON_GALLIUM_SOURCES = [ + '#src/mesa/drivers/dri/common/utils.c', + '#src/mesa/drivers/dri/common/vblank.c', + '#src/mesa/drivers/dri/common/dri_util.c', + '#src/mesa/drivers/dri/common/xmlconfig.c', +] + +COMMON_BM_SOURCES = [ + '#src/mesa/drivers/dri/common/dri_bufmgr.c', + '#src/mesa/drivers/dri/common/dri_drmpool.c', +] + +Export([ + 'drienv', + 'COMMON_GALLIUM_SOURCES', + 'COMMON_BM_SOURCES', +]) + +# TODO: Installation +#install: $(LIBNAME) +# $(INSTALL) -d $(DRI_DRIVER_INSTALL_DIR) +# $(INSTALL) -m 755 $(LIBNAME) $(DRI_DRIVER_INSTALL_DIR) + +SConscript([ + 'intel/SConscript', +]) diff --git a/src/gallium/winsys/dri/intel/SConscript b/src/gallium/winsys/dri/intel/SConscript index a7cc10450e..0ad19d42a8 100644 --- a/src/gallium/winsys/dri/intel/SConscript +++ b/src/gallium/winsys/dri/intel/SConscript @@ -9,11 +9,6 @@ env.Append(CPPPATH = [ #MINIGLX_SOURCES = server/intel_dri.c -pipe_drivers = [ - softpipe, - i915simple -] - DRIVER_SOURCES = [ 'intel_winsys_pipe.c', 'intel_winsys_softpipe.c', @@ -31,11 +26,14 @@ sources = \ COMMON_BM_SOURCES + \ DRIVER_SOURCES -# DRIVER_DEFINES = -I../intel $(shell pkg-config libdrm --atleast-version=2.3.1 \ -# && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP") +drivers = [ + softpipe, + i915simple +] +# TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions env.SharedLibrary( target ='i915tex_dri.so', source = sources, - LIBS = pipe_drivers + env['LIBS'], + LIBS = drivers + mesa + auxiliaries + env['LIBS'], )
\ No newline at end of file diff --git a/src/gallium/winsys/dri/intel/intel_batchbuffer.c b/src/gallium/winsys/dri/intel/intel_batchbuffer.c index 49e04d81ec..5830b88b37 100644 --- a/src/gallium/winsys/dri/intel/intel_batchbuffer.c +++ b/src/gallium/winsys/dri/intel/intel_batchbuffer.c @@ -26,6 +26,7 @@ **************************************************************************/ #include <errno.h> +#include <stdio.h> #include "intel_batchbuffer.h" #include "intel_context.h" #include "intel_screen.h" diff --git a/src/gallium/winsys/dri/intel/intel_batchbuffer.h b/src/gallium/winsys/dri/intel/intel_batchbuffer.h index 82feafa21f..caf6870a3c 100644 --- a/src/gallium/winsys/dri/intel/intel_batchbuffer.h +++ b/src/gallium/winsys/dri/intel/intel_batchbuffer.h @@ -28,6 +28,7 @@ #ifndef INTEL_BATCHBUFFER_H #define INTEL_BATCHBUFFER_H +#include "pipe/p_debug.h" #include "pipe/p_compiler.h" #include "dri_bufmgr.h" diff --git a/src/gallium/winsys/dri/intel/intel_batchpool.c b/src/gallium/winsys/dri/intel/intel_batchpool.c index 33b56817f6..ce154c7b88 100644 --- a/src/gallium/winsys/dri/intel/intel_batchpool.c +++ b/src/gallium/winsys/dri/intel/intel_batchpool.c @@ -36,9 +36,12 @@ #include <xf86drm.h> #include <stdlib.h> +#include <stdio.h> #include <errno.h> -#include "imports.h" -#include "glthread.h" + +#include "pipe/p_compiler.h" +#include "pipe/p_thread.h" + #include "dri_bufpool.h" #include "dri_bufmgr.h" #include "intel_batchpool.h" @@ -196,7 +199,7 @@ pool_create(struct _DriBufferPool *pool, _glthread_LOCK_MUTEX(p->mutex); if (p->numFree == 0) - pool_checkFree(p, GL_TRUE); + pool_checkFree(p, TRUE); if (p->numFree == 0) { fprintf(stderr, "Out of fixed size buffer objects\n"); @@ -278,7 +281,7 @@ pool_map(struct _DriBufferPool *pool, void *private, unsigned flags, return -EBUSY; } - buf->mapped = GL_TRUE; + buf->mapped = TRUE; *virtual = (unsigned char *) p->virtual + buf->start; _glthread_UNLOCK_MUTEX(p->mutex); return 0; @@ -361,7 +364,7 @@ pool_validate(struct _DriBufferPool *pool, void *private) BBuf *buf = (BBuf *) private; BPool *p = buf->parent; _glthread_LOCK_MUTEX(p->mutex); - buf->unfenced = GL_TRUE; + buf->unfenced = TRUE; _glthread_UNLOCK_MUTEX(p->mutex); return 0; } @@ -379,7 +382,7 @@ pool_takedown(struct _DriBufferPool *pool) while ((p->numFree < p->numTot) && p->numDelayed) { _glthread_UNLOCK_MUTEX(p->mutex); sched_yield(); - pool_checkFree(p, GL_TRUE); + pool_checkFree(p, TRUE); _glthread_LOCK_MUTEX(p->mutex); } diff --git a/src/gallium/winsys/dri/intel/intel_context.c b/src/gallium/winsys/dri/intel/intel_context.c index c033f2a592..79b320c6bf 100644 --- a/src/gallium/winsys/dri/intel/intel_context.c +++ b/src/gallium/winsys/dri/intel/intel_context.c @@ -188,7 +188,8 @@ intelCreateContext(const __GLcontextModes * visual, /* * Pipe-related setup */ - if (!getenv("INTEL_HW")) { + if (getenv("INTEL_SP")) { + /* use softpipe driver instead of hw */ pipe = intel_create_softpipe( intel, intelScreen->winsys ); } else { diff --git a/src/gallium/winsys/dri/intel/intel_context.h b/src/gallium/winsys/dri/intel/intel_context.h index b01370c049..45430984d8 100644 --- a/src/gallium/winsys/dri/intel/intel_context.h +++ b/src/gallium/winsys/dri/intel/intel_context.h @@ -30,6 +30,9 @@ #include "drm.h" + +#include "pipe/p_debug.h" + #include "intel_screen.h" #include "i915_drm.h" diff --git a/src/gallium/winsys/dri/intel/intel_winsys_i915.c b/src/gallium/winsys/dri/intel/intel_winsys_i915.c index 0ed3890e93..2def1afc31 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_i915.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_i915.c @@ -40,6 +40,7 @@ #include "pipe/p_util.h" #include "i915simple/i915_winsys.h" +#include "i915simple/i915_screen.h" struct intel_i915_winsys { @@ -135,6 +136,7 @@ intel_create_i915simple( struct intel_context *intel, struct pipe_winsys *winsys ) { struct intel_i915_winsys *iws = CALLOC_STRUCT( intel_i915_winsys ); + struct pipe_screen *screen; /* Fill in this struct with callbacks that i915simple will need to * communicate with the window system, buffer manager, etc. @@ -146,9 +148,11 @@ intel_create_i915simple( struct intel_context *intel, iws->winsys.batch_finish = intel_i915_batch_finish; iws->intel = intel; + screen = i915_create_screen(winsys, intel->intelScreen->deviceID); + /* Create the i915simple context: */ - return i915_create( winsys, - &iws->winsys, - intel->intelScreen->deviceID ); + return i915_create_context( screen, + winsys, + &iws->winsys ); } diff --git a/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c b/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c index 9e483bdc9f..0bc2dc4002 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c @@ -68,6 +68,7 @@ intel_create_softpipe( struct intel_context *intel, struct pipe_winsys *winsys ) { struct intel_softpipe_winsys *isws = CALLOC_STRUCT( intel_softpipe_winsys ); + struct pipe_screen *screen = softpipe_create_screen(winsys); /* Fill in this struct with callbacks that softpipe will need to * communicate with the window system, buffer manager, etc. @@ -77,5 +78,5 @@ intel_create_softpipe( struct intel_context *intel, /* Create the softpipe context: */ - return softpipe_create( winsys, &isws->sws ); + return softpipe_create( screen, winsys, &isws->sws ); } diff --git a/src/gallium/winsys/dri/nouveau/nouveau_context.c b/src/gallium/winsys/dri/nouveau/nouveau_context.c index 01fada5b89..dc852c9f49 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_context.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_context.c @@ -4,6 +4,7 @@ #include "utils.h" #include "state_tracker/st_public.h" +#include "state_tracker/st_context.h" #include "pipe/p_defines.h" #include "pipe/p_context.h" @@ -21,6 +22,75 @@ static const struct dri_debug_control debug_control[] = { int __nouveau_debug = 0; #endif +static void +nouveau_channel_context_destroy(struct nouveau_channel_context *nvc) +{ + nouveau_grobj_free(&nvc->NvNull); + nouveau_grobj_free(&nvc->NvCtxSurf2D); + nouveau_grobj_free(&nvc->NvImageBlit); + nouveau_grobj_free(&nvc->NvGdiRect); + nouveau_grobj_free(&nvc->NvM2MF); + nouveau_grobj_free(&nvc->Nv2D); + + nouveau_notifier_free(&nvc->sync_notifier); + + nouveau_channel_free(&nvc->channel); + + FREE(nvc); +} + +static struct nouveau_channel_context * +nouveau_channel_context_create(struct nouveau_device *nvdev, unsigned chipset) +{ + struct nouveau_channel_context *nvc; + int ret; + + nvc = CALLOC_STRUCT(nouveau_channel_context); + if (!nvc) + return NULL; + nvc->chipset = chipset; + + if ((ret = nouveau_channel_alloc(nvdev, 0x8003d001, 0x8003d002, + &nvc->channel))) { + NOUVEAU_ERR("Error creating GPU channel: %d\n", ret); + nouveau_channel_context_destroy(nvc); + return NULL; + } + + if ((ret = nouveau_grobj_alloc(nvc->channel, 0x00000000, 0x30, + &nvc->NvNull))) { + NOUVEAU_ERR("Error creating NULL object: %d\n", ret); + nouveau_channel_context_destroy(nvc); + return NULL; + } + nvc->next_handle = 0x80000000; + + if ((ret = nouveau_notifier_alloc(nvc->channel, nvc->next_handle++, 1, + &nvc->sync_notifier))) { + NOUVEAU_ERR("Error creating channel sync notifier: %d\n", ret); + nouveau_channel_context_destroy(nvc); + return NULL; + } + + switch (chipset) { + case 0x50: + case 0x80: + ret = nouveau_surface_channel_create_nv50(nvc); + break; + default: + ret = nouveau_surface_channel_create_nv04(nvc); + break; + } + + if (ret) { + NOUVEAU_ERR("Error initialising surface objects: %d\n", ret); + nouveau_channel_context_destroy(nvc); + return NULL; + } + + return nvc; +} + GLboolean nouveau_context_create(const __GLcontextModes *glVis, __DRIcontextPrivate *driContextPriv, @@ -32,7 +102,8 @@ nouveau_context_create(const __GLcontextModes *glVis, struct nouveau_device_priv *nvdev; struct pipe_context *pipe = NULL; struct st_context *st_share = NULL; - int ret; + struct nouveau_channel_context *nvc = NULL; + int i, ret; if (sharedContextPrivate) { st_share = ((struct nouveau_context *)sharedContextPrivate)->st; @@ -45,13 +116,6 @@ nouveau_context_create(const __GLcontextModes *glVis, return GL_FALSE; } - if ((ret = nouveau_channel_alloc(nv_screen->device, - 0x8003d001, 0x8003d002, - &nv->channel))) { - NOUVEAU_ERR("Error creating GPU channel: %d\n", ret); - return GL_FALSE; - } - driContextPriv->driverPrivate = (void *)nv; nv->nv_screen = nv_screen; nv->dri_screen = driScrnPriv; @@ -101,19 +165,56 @@ nouveau_context_create(const __GLcontextModes *glVis, nv->frontbuffer = fb_surf; } - if ((ret = nouveau_grobj_alloc(nv->channel, 0x00000000, 0x30, - &nv->NvNull))) { - NOUVEAU_ERR("Error creating NULL object: %d\n", ret); - return GL_FALSE; + /* Attempt to share a single channel between multiple contexts from + * a single process. + */ + nvc = nv_screen->nvc; + if (!nvc && st_share) { + struct nouveau_context *snv = st_share->pipe->priv; + if (snv) { + nvc = snv->nvc; + } } - nv->next_handle = 0x80000000; - if ((ret = nouveau_notifier_alloc(nv->channel, nv->next_handle++, 1, - &nv->sync_notifier))) { - NOUVEAU_ERR("Error creating channel sync notifier: %d\n", ret); - return GL_FALSE; + /*XXX: temporary - disable multi-context/single-channel on non-NV4x */ + switch (nv->chipset & 0xf0) { + case 0x40: + case 0x60: + break; + default: + nvc = NULL; + break; + } + + if (!nvc) { + nvc = nouveau_channel_context_create(&nvdev->base, nv->chipset); + if (!nvc) { + NOUVEAU_ERR("Failed initialising GPU context\n"); + return GL_FALSE; + } + nv_screen->nvc = nvc; + } + + nvc->refcount++; + nv->nvc = nvc; + + /* Find a free slot for a pipe context, allocate a new one if needed */ + nv->pctx_id = -1; + for (i = 0; i < nvc->nr_pctx; i++) { + if (nvc->pctx[i] == NULL) { + nv->pctx_id = i; + break; + } } + if (nv->pctx_id < 0) { + nv->pctx_id = nvc->nr_pctx++; + nvc->pctx = + realloc(nvc->pctx, + sizeof(struct pipe_context *) * nvc->nr_pctx); + } + + /* Create pipe */ if (nv->chipset < 0x50) ret = nouveau_surface_init_nv04(nv); else @@ -146,15 +247,18 @@ void nouveau_context_destroy(__DRIcontextPrivate *driContextPriv) { struct nouveau_context *nv = driContextPriv->driverPrivate; + struct nouveau_channel_context *nvc = nv->nvc; assert(nv); st_flush(nv->st, PIPE_FLUSH_WAIT); st_destroy_context(nv->st); - nouveau_grobj_free(&nv->NvCtxSurf2D); - nouveau_grobj_free(&nv->NvImageBlit); - nouveau_channel_free(&nv->channel); + if (nv->pctx_id >= 0) { + nvc->pctx[nv->pctx_id] = NULL; + if (--nvc->refcount <= 0) + nouveau_channel_context_destroy(nvc); + } free(nv); } diff --git a/src/gallium/winsys/dri/nouveau/nouveau_context.h b/src/gallium/winsys/dri/nouveau/nouveau_context.h index 5805f969ba..92f551855a 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_context.h +++ b/src/gallium/winsys/dri/nouveau/nouveau_context.h @@ -13,6 +13,32 @@ struct nouveau_framebuffer { struct st_framebuffer *stfb; }; +struct nouveau_channel_context { + struct pipe_screen *pscreen; + int refcount; + + unsigned cur_pctx; + unsigned nr_pctx; + struct pipe_context **pctx; + + unsigned chipset; + + struct nouveau_channel *channel; + + struct nouveau_notifier *sync_notifier; + + struct nouveau_grobj *NvNull; + struct nouveau_grobj *NvCtxSurf2D; + struct nouveau_grobj *NvImageBlit; + struct nouveau_grobj *NvGdiRect; + struct nouveau_grobj *NvM2MF; + struct nouveau_grobj *Nv2D; + + uint32_t next_handle; + uint32_t next_subchannel; + uint32_t next_sequence; +}; + struct nouveau_context { struct st_context *st; @@ -31,17 +57,8 @@ struct nouveau_context { struct pipe_surface *frontbuffer; /* Hardware context */ - struct nouveau_channel *channel; - struct nouveau_notifier *sync_notifier; - struct nouveau_grobj *NvNull; - struct nouveau_grobj *NvCtxSurf2D; - struct nouveau_grobj *NvImageBlit; - struct nouveau_grobj *NvGdiRect; - struct nouveau_grobj *NvM2MF; - struct nouveau_grobj *Nv2D; - uint32_t next_handle; - uint32_t next_subchannel; - uint32_t next_sequence; + struct nouveau_channel_context *nvc; + int pctx_id; /* pipe_surface accel */ struct pipe_surface *surf_src, *surf_dst; @@ -80,6 +97,10 @@ extern int __nouveau_debug; extern void LOCK_HARDWARE(struct nouveau_context *); extern void UNLOCK_HARDWARE(struct nouveau_context *); +extern int +nouveau_surface_channel_create_nv04(struct nouveau_channel_context *); +extern int +nouveau_surface_channel_create_nv50(struct nouveau_channel_context *); extern int nouveau_surface_init_nv04(struct nouveau_context *); extern int nouveau_surface_init_nv50(struct nouveau_context *); diff --git a/src/gallium/winsys/dri/nouveau/nouveau_local.h b/src/gallium/winsys/dri/nouveau/nouveau_local.h index 59febca292..f7d91fc748 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_local.h +++ b/src/gallium/winsys/dri/nouveau/nouveau_local.h @@ -1,8 +1,11 @@ #ifndef __NOUVEAU_LOCAL_H__ #define __NOUVEAU_LOCAL_H__ +#include "pipe/p_compiler.h" #include <stdio.h> +struct pipe_buffer; + /* Debug output */ #define NOUVEAU_MSG(fmt, args...) do { \ fprintf(stdout, "nouveau: "fmt, ##args); \ @@ -24,66 +27,90 @@ #define NOUVEAU_DMA_TIMEOUT 2000 /* Push buffer access macros */ -#define OUT_RING(data) do { \ - (*nv->channel->pushbuf->cur++) = (data); \ -} while(0) - -#define OUT_RINGp(src,size) do { \ - memcpy(nv->channel->pushbuf->cur, (src), (size)<<2); \ - nv->channel->pushbuf->cur += (size); \ -} while(0) - -#define OUT_RINGf(data) do { \ - union { float v; uint32_t u; } c; \ - c.v = (data); \ - OUT_RING(c.u); \ -} while(0) - -#define FIRE_RING() do { \ - nouveau_pushbuf_flush(nv->channel, 0); \ -} while(0) - -#define BEGIN_RING_GR(obj,mthd,size) do { \ - if (nv->channel->pushbuf->remaining < ((size) + 1)) \ - nouveau_pushbuf_flush(nv->channel, ((size) + 1)); \ - OUT_RING(((obj)->subc << 13) | ((size) << 18) | (mthd)); \ - nv->channel->pushbuf->remaining -= ((size) + 1); \ -} while(0) - -#define BEGIN_RING(obj,mthd,size) do { \ - BEGIN_RING_GR(nv->obj, (mthd), (size)); \ -} while(0) - -#define BIND_RING(o,s) do { \ - nv->o->subc = (s); \ - BEGIN_RING(o, 0x0000, 1); \ - OUT_RING (nv->o->handle); \ -} while(0) - -#define OUT_RELOC(buf,data,flags,vor,tor) do { \ - nouveau_pipe_emit_reloc(nv->channel, nv->channel->pushbuf->cur++, \ - buf, (data), (flags), (vor), (tor)); \ -} while(0) +static INLINE void +OUT_RING(struct nouveau_channel *chan, unsigned data) +{ + *(chan->pushbuf->cur++) = (data); +} + +static INLINE void +OUT_RINGp(struct nouveau_channel *chan, uint32_t *data, unsigned size) +{ + memcpy(chan->pushbuf->cur, data, size * 4); + chan->pushbuf->cur += size; +} + +static INLINE void +OUT_RINGf(struct nouveau_channel *chan, float f) +{ + union { uint32_t i; float f; } c; + c.f = f; + OUT_RING(chan, c.i); +} + +static INLINE void +BEGIN_RING(struct nouveau_channel *chan, struct nouveau_grobj *gr, + unsigned mthd, unsigned size) +{ + if (chan->pushbuf->remaining < (size + 1)) + nouveau_pushbuf_flush(chan, (size + 1)); + OUT_RING(chan, (gr->subc << 13) | (size << 18) | mthd); + chan->pushbuf->remaining -= (size + 1); +} + +static INLINE void +FIRE_RING(struct nouveau_channel *chan) +{ + nouveau_pushbuf_flush(chan, 0); +} + +static INLINE void +BIND_RING(struct nouveau_channel *chan, struct nouveau_grobj *gr, unsigned subc) +{ + gr->subc = subc; + BEGIN_RING(chan, gr, 0x0000, 1); + OUT_RING (chan, gr->handle); +} + +static INLINE void +OUT_RELOC(struct nouveau_channel *chan, struct pipe_buffer *buf, + unsigned data, unsigned flags, unsigned vor, unsigned tor) +{ + nouveau_pipe_emit_reloc(chan, chan->pushbuf->cur++, buf, + data, flags, vor, tor); +} /* Raw data + flags depending on FB/TT buffer */ -#define OUT_RELOCd(bo,data,flags,vor,tor) do { \ - OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \ -} while(0) +static INLINE void +OUT_RELOCd(struct nouveau_channel *chan, struct pipe_buffer *buf, + unsigned data, unsigned flags, unsigned vor, unsigned tor) +{ + OUT_RELOC(chan, buf, data, flags | NOUVEAU_BO_OR, vor, tor); +} /* FB/TT object handle */ -#define OUT_RELOCo(bo,flags) do { \ - OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \ - nv->channel->vram->handle, nv->channel->gart->handle); \ -} while(0) +static INLINE void +OUT_RELOCo(struct nouveau_channel *chan, struct pipe_buffer *buf, + unsigned flags) +{ + OUT_RELOC(chan, buf, 0, flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); +} /* Low 32-bits of offset */ -#define OUT_RELOCl(bo,delta,flags) do { \ - OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \ -} while(0) +static INLINE void +OUT_RELOCl(struct nouveau_channel *chan, struct pipe_buffer *buf, + unsigned delta, unsigned flags) +{ + OUT_RELOC(chan, buf, delta, flags | NOUVEAU_BO_LOW, 0, 0); +} /* High 32-bits of offset */ -#define OUT_RELOCh(bo,delta,flags) do { \ - OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \ -} while(0) +static INLINE void +OUT_RELOCh(struct nouveau_channel *chan, struct pipe_buffer *buf, + unsigned delta, unsigned flags) +{ + OUT_RELOC(chan, buf, delta, flags | NOUVEAU_BO_HIGH, 0, 0); +} #endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_screen.h b/src/gallium/winsys/dri/nouveau/nouveau_screen.h index 019823bd44..e9da202690 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_screen.h +++ b/src/gallium/winsys/dri/nouveau/nouveau_screen.h @@ -14,6 +14,8 @@ struct nouveau_screen { uint32_t front_pitch; uint32_t front_cpp; uint32_t front_height; + + void *nvc; }; #endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_swapbuffers.c b/src/gallium/winsys/dri/nouveau/nouveau_swapbuffers.c index 91bf243f42..70e0104e83 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_swapbuffers.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_swapbuffers.c @@ -42,7 +42,7 @@ nouveau_copy_buffer(__DRIdrawablePrivate *dPriv, struct pipe_surface *surf, nv->surface_copy(nv, dx, dy, sx, sy, w, h); } - FIRE_RING(); + FIRE_RING(nv->nvc->channel); UNLOCK_HARDWARE(nv); if (nv->last_stamp != dPriv->lastStamp) { diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys.c b/src/gallium/winsys/dri/nouveau/nouveau_winsys.c index 2ca05d84c6..87619bdcfb 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_winsys.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_winsys.c @@ -11,7 +11,7 @@ nouveau_pipe_notifier_alloc(struct nouveau_winsys *nvws, int count, { struct nouveau_context *nv = nvws->nv; - return nouveau_notifier_alloc(nv->channel, nv->next_handle++, + return nouveau_notifier_alloc(nv->nvc->channel, nv->nvc->next_handle++, count, notify); } @@ -20,17 +20,16 @@ nouveau_pipe_grobj_alloc(struct nouveau_winsys *nvws, int grclass, struct nouveau_grobj **grobj) { struct nouveau_context *nv = nvws->nv; + struct nouveau_channel *chan = nv->nvc->channel; int ret; - ret = nouveau_grobj_alloc(nv->channel, nv->next_handle++, + ret = nouveau_grobj_alloc(chan, nv->nvc->next_handle++, grclass, grobj); if (ret) return ret; - (*grobj)->subc = nv->next_subchannel++; - assert((*grobj)->subc <= 7); - BEGIN_RING_GR(*grobj, 0x0000, 1); - OUT_RING ((*grobj)->handle); + assert(nv->nvc->next_subchannel < 7); + BIND_RING(chan, *grobj, nv->nvc->next_subchannel++); return 0; } @@ -71,24 +70,31 @@ nouveau_pipe_emit_reloc(struct nouveau_channel *chan, void *ptr, struct pipe_context * nouveau_pipe_create(struct nouveau_context *nv) { + struct nouveau_channel_context *nvc = nv->nvc; struct nouveau_winsys *nvws = CALLOC_STRUCT(nouveau_winsys); - struct pipe_context *(*hw_create)(struct pipe_winsys *, + struct pipe_screen *(*hws_create)(struct pipe_winsys *, struct nouveau_winsys *, - unsigned); + unsigned chipset); + struct pipe_context *(*hw_create)(struct pipe_screen *, unsigned); + struct pipe_winsys *ws; + struct pipe_screen *pscreen; if (!nvws) return NULL; switch (nv->chipset & 0xf0) { case 0x30: + hws_create = nv30_screen_create; hw_create = nv30_create; break; case 0x40: case 0x60: + hws_create = nv40_screen_create; hw_create = nv40_create; break; case 0x50: case 0x80: + hws_create = nv50_screen_create; hw_create = nv50_create; break; default: @@ -97,7 +103,7 @@ nouveau_pipe_create(struct nouveau_context *nv) } nvws->nv = nv; - nvws->channel = nv->channel; + nvws->channel = nv->nvc->channel; nvws->res_init = nouveau_resource_init; nvws->res_alloc = nouveau_resource_alloc; @@ -119,6 +125,11 @@ nouveau_pipe_create(struct nouveau_context *nv) nvws->surface_copy = nouveau_pipe_surface_copy; nvws->surface_fill = nouveau_pipe_surface_fill; - return hw_create(nouveau_create_pipe_winsys(nv), nvws, nv->chipset); + ws = nouveau_create_pipe_winsys(nv); + + if (!nvc->pscreen) + nvc->pscreen = hws_create(ws, nvws, nv->chipset); + nvc->pctx[nv->pctx_id] = hw_create(nvc->pscreen, nv->pctx_id); + return nvc->pctx[nv->pctx_id]; } diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys_softpipe.c b/src/gallium/winsys/dri/nouveau/nouveau_winsys_softpipe.c index 0e1b4273d1..704f6c7750 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_winsys_softpipe.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_winsys_softpipe.c @@ -61,23 +61,25 @@ nouveau_is_format_supported(struct softpipe_winsys *sws, uint format) return FALSE; } - - struct pipe_context * nouveau_create_softpipe(struct nouveau_context *nv) { - struct nouveau_softpipe_winsys *nvsws; - - nvsws = CALLOC_STRUCT(nouveau_softpipe_winsys); - - /* Fill in this struct with callbacks that softpipe will need to - * communicate with the window system, buffer manager, etc. - */ - nvsws->sws.is_format_supported = nouveau_is_format_supported; - nvsws->nv = nv; + struct nouveau_softpipe_winsys *nvsws; + struct pipe_screen *pscreen; + struct pipe_winsys *ws; + + ws = nouveau_create_pipe_winsys(nv); + if (!ws) + return NULL; + pscreen = softpipe_create_screen(ws); + + nvsws = CALLOC_STRUCT(nouveau_softpipe_winsys); + if (!nvsws) + return NULL; + + nvsws->sws.is_format_supported = nouveau_is_format_supported; + nvsws->nv = nv; - /* Create the softpipe context: - */ - return softpipe_create(nouveau_create_pipe_winsys(nv), &nvsws->sws); + return softpipe_create(pscreen, ws, &nvsws->sws); } diff --git a/src/gallium/winsys/dri/nouveau/nv04_surface.c b/src/gallium/winsys/dri/nouveau/nv04_surface.c index fe1ea4ed70..cdcd71eaad 100644 --- a/src/gallium/winsys/dri/nouveau/nv04_surface.c +++ b/src/gallium/winsys/dri/nouveau/nv04_surface.c @@ -30,6 +30,7 @@ static void nv04_surface_copy_m2mf(struct nouveau_context *nv, unsigned dx, unsigned dy, unsigned sx, unsigned sy, unsigned w, unsigned h) { + struct nouveau_channel *chan = nv->nvc->channel; struct pipe_surface *dst = nv->surf_dst; struct pipe_surface *src = nv->surf_src; unsigned dst_offset, src_offset; @@ -40,17 +41,18 @@ nv04_surface_copy_m2mf(struct nouveau_context *nv, unsigned dx, unsigned dy, while (h) { int count = (h > 2047) ? 2047 : h; - BEGIN_RING(NvM2MF, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8); - OUT_RELOCl(src->buffer, src_offset, NOUVEAU_BO_VRAM | + BEGIN_RING(chan, nv->nvc->NvM2MF, + NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8); + OUT_RELOCl(chan, src->buffer, src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCl(dst->buffer, dst_offset, NOUVEAU_BO_VRAM | + OUT_RELOCl(chan, dst->buffer, dst_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR); - OUT_RING (src->pitch * src->cpp); - OUT_RING (dst->pitch * dst->cpp); - OUT_RING (w * src->cpp); - OUT_RING (count); - OUT_RING (0x0101); - OUT_RING (0); + OUT_RING (chan, src->pitch * src->cpp); + OUT_RING (chan, dst->pitch * dst->cpp); + OUT_RING (chan, w * src->cpp); + OUT_RING (chan, count); + OUT_RING (chan, 0x0101); + OUT_RING (chan, 0); h -= count; src_offset += src->pitch * src->cpp * count; @@ -62,16 +64,19 @@ static void nv04_surface_copy_blit(struct nouveau_context *nv, unsigned dx, unsigned dy, unsigned sx, unsigned sy, unsigned w, unsigned h) { - BEGIN_RING(NvImageBlit, 0x0300, 3); - OUT_RING ((sy << 16) | sx); - OUT_RING ((dy << 16) | dx); - OUT_RING (( h << 16) | w); + struct nouveau_channel *chan = nv->nvc->channel; + + BEGIN_RING(chan, nv->nvc->NvImageBlit, 0x0300, 3); + OUT_RING (chan, (sy << 16) | sx); + OUT_RING (chan, (dy << 16) | dx); + OUT_RING (chan, ( h << 16) | w); } static int nv04_surface_copy_prep(struct nouveau_context *nv, struct pipe_surface *dst, struct pipe_surface *src) { + struct nouveau_channel *chan = nv->nvc->channel; int format; if (src->cpp != dst->cpp) @@ -81,12 +86,12 @@ nv04_surface_copy_prep(struct nouveau_context *nv, struct pipe_surface *dst, * to NV_MEMORY_TO_MEMORY_FORMAT in this case. */ if ((src->offset & 63) || (dst->offset & 63)) { - BEGIN_RING(NvM2MF, + BEGIN_RING(nv->nvc->channel, nv->nvc->NvM2MF, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2); - OUT_RELOCo(src->buffer, NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD); - OUT_RELOCo(dst->buffer, NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | - NOUVEAU_BO_WR); + OUT_RELOCo(chan, src->buffer, NOUVEAU_BO_GART | + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCo(chan, dst->buffer, NOUVEAU_BO_GART | + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); nv->surface_copy = nv04_surface_copy_m2mf; nv->surf_dst = dst; @@ -101,15 +106,20 @@ nv04_surface_copy_prep(struct nouveau_context *nv, struct pipe_surface *dst, } nv->surface_copy = nv04_surface_copy_blit; - BEGIN_RING(NvCtxSurf2D, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); - OUT_RELOCo(src->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCo(dst->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - BEGIN_RING(NvCtxSurf2D, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); - OUT_RING (format); - OUT_RING (((dst->pitch * dst->cpp) << 16) | (src->pitch * src->cpp)); - OUT_RELOCl(src->buffer, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCl(dst->buffer, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, nv->nvc->NvCtxSurf2D, + NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); + OUT_RELOCo(chan, src->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCo(chan, dst->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(chan, nv->nvc->NvCtxSurf2D, + NV04_CONTEXT_SURFACES_2D_FORMAT, 4); + OUT_RING (chan, format); + OUT_RING (chan, ((dst->pitch * dst->cpp) << 16) | + (src->pitch * src->cpp)); + OUT_RELOCl(chan, src->buffer, src->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, dst->buffer, dst->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); return 0; } @@ -117,7 +127,7 @@ nv04_surface_copy_prep(struct nouveau_context *nv, struct pipe_surface *dst, static void nv04_surface_copy_done(struct nouveau_context *nv) { - FIRE_RING(); + FIRE_RING(nv->nvc->channel); } static int @@ -125,6 +135,9 @@ nv04_surface_fill(struct nouveau_context *nv, struct pipe_surface *dst, unsigned dx, unsigned dy, unsigned w, unsigned h, unsigned value) { + struct nouveau_channel *chan = nv->nvc->channel; + struct nouveau_grobj *surf2d = nv->nvc->NvCtxSurf2D; + struct nouveau_grobj *rect = nv->nvc->NvGdiRect; int cs2d_format, gdirect_format; if ((cs2d_format = nv04_surface_format(dst->cpp)) < 0) { @@ -137,86 +150,99 @@ nv04_surface_fill(struct nouveau_context *nv, struct pipe_surface *dst, return 1; } - BEGIN_RING(NvCtxSurf2D, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); - OUT_RELOCo(dst->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCo(dst->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(NvCtxSurf2D, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); - OUT_RING (cs2d_format); - OUT_RING (((dst->pitch * dst->cpp) << 16) | (dst->pitch * dst->cpp)); - OUT_RELOCl(dst->buffer, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(dst->buffer, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - BEGIN_RING(NvGdiRect, NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT, 1); - OUT_RING (gdirect_format); - BEGIN_RING(NvGdiRect, NV04_GDI_RECTANGLE_TEXT_COLOR1_A, 1); - OUT_RING (value); - BEGIN_RING(NvGdiRect, + BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); + OUT_RELOCo(chan, dst->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCo(chan, dst->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); + OUT_RING (chan, cs2d_format); + OUT_RING (chan, ((dst->pitch * dst->cpp) << 16) | + (dst->pitch * dst->cpp)); + OUT_RELOCl(chan, dst->buffer, dst->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, dst->buffer, dst->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT, 1); + OUT_RING (chan, gdirect_format); + BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR1_A, 1); + OUT_RING (chan, value); + BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(0), 2); - OUT_RING ((dx << 16) | dy); - OUT_RING (( w << 16) | h); + OUT_RING (chan, (dx << 16) | dy); + OUT_RING (chan, ( w << 16) | h); - FIRE_RING(); + FIRE_RING(chan); return 0; } int -nouveau_surface_init_nv04(struct nouveau_context *nv) +nouveau_surface_channel_create_nv04(struct nouveau_channel_context *nvc) { + struct nouveau_channel *chan = nvc->channel; unsigned class; int ret; - if ((ret = nouveau_grobj_alloc(nv->channel, nv->next_handle++, 0x39, - &nv->NvM2MF))) { + if ((ret = nouveau_grobj_alloc(chan, nvc->next_handle++, 0x39, + &nvc->NvM2MF))) { NOUVEAU_ERR("Error creating m2mf object: %d\n", ret); return 1; } - BIND_RING (NvM2MF, nv->next_subchannel++); - BEGIN_RING(NvM2MF, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1); - OUT_RING (nv->sync_notifier->handle); - - class = nv->chipset < 0x10 ? NV04_CONTEXT_SURFACES_2D : - NV10_CONTEXT_SURFACES_2D; - if ((ret = nouveau_grobj_alloc(nv->channel, nv->next_handle++, class, - &nv->NvCtxSurf2D))) { + BIND_RING (chan, nvc->NvM2MF, nvc->next_subchannel++); + BEGIN_RING(chan, nvc->NvM2MF, + NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1); + OUT_RING (chan, nvc->sync_notifier->handle); + + class = nvc->chipset < 0x10 ? NV04_CONTEXT_SURFACES_2D : + NV10_CONTEXT_SURFACES_2D; + if ((ret = nouveau_grobj_alloc(chan, nvc->next_handle++, class, + &nvc->NvCtxSurf2D))) { NOUVEAU_ERR("Error creating 2D surface object: %d\n", ret); return 1; } - BIND_RING (NvCtxSurf2D, nv->next_subchannel++); - BEGIN_RING(NvCtxSurf2D, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); - OUT_RING (nv->channel->vram->handle); - OUT_RING (nv->channel->vram->handle); - - class = nv->chipset < 0x10 ? NV04_IMAGE_BLIT : - NV12_IMAGE_BLIT; - if ((ret = nouveau_grobj_alloc(nv->channel, nv->next_handle++, class, - &nv->NvImageBlit))) { + BIND_RING (chan, nvc->NvCtxSurf2D, nvc->next_subchannel++); + BEGIN_RING(chan, nvc->NvCtxSurf2D, + NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); + OUT_RING (chan, nvc->channel->vram->handle); + OUT_RING (chan, nvc->channel->vram->handle); + + class = nvc->chipset < 0x10 ? NV04_IMAGE_BLIT : + NV12_IMAGE_BLIT; + if ((ret = nouveau_grobj_alloc(chan, nvc->next_handle++, class, + &nvc->NvImageBlit))) { NOUVEAU_ERR("Error creating blit object: %d\n", ret); return 1; } - BIND_RING (NvImageBlit, nv->next_subchannel++); - BEGIN_RING(NvImageBlit, NV04_IMAGE_BLIT_DMA_NOTIFY, 1); - OUT_RING (nv->sync_notifier->handle); - BEGIN_RING(NvImageBlit, NV04_IMAGE_BLIT_SURFACE, 1); - OUT_RING (nv->NvCtxSurf2D->handle); - BEGIN_RING(NvImageBlit, NV04_IMAGE_BLIT_OPERATION, 1); - OUT_RING (NV04_IMAGE_BLIT_OPERATION_SRCCOPY); + BIND_RING (chan, nvc->NvImageBlit, nvc->next_subchannel++); + BEGIN_RING(chan, nvc->NvImageBlit, NV04_IMAGE_BLIT_DMA_NOTIFY, 1); + OUT_RING (chan, nvc->sync_notifier->handle); + BEGIN_RING(chan, nvc->NvImageBlit, NV04_IMAGE_BLIT_SURFACE, 1); + OUT_RING (chan, nvc->NvCtxSurf2D->handle); + BEGIN_RING(chan, nvc->NvImageBlit, NV04_IMAGE_BLIT_OPERATION, 1); + OUT_RING (chan, NV04_IMAGE_BLIT_OPERATION_SRCCOPY); class = NV04_GDI_RECTANGLE_TEXT; - if ((ret = nouveau_grobj_alloc(nv->channel, nv->next_handle++, class, - &nv->NvGdiRect))) { + if ((ret = nouveau_grobj_alloc(chan, nvc->next_handle++, class, + &nvc->NvGdiRect))) { NOUVEAU_ERR("Error creating rect object: %d\n", ret); return 1; } - BIND_RING (NvGdiRect, nv->next_subchannel++); - BEGIN_RING(NvGdiRect, NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY, 1); - OUT_RING (nv->sync_notifier->handle); - BEGIN_RING(NvGdiRect, NV04_GDI_RECTANGLE_TEXT_SURFACE, 1); - OUT_RING (nv->NvCtxSurf2D->handle); - BEGIN_RING(NvGdiRect, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1); - OUT_RING (NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY); - BEGIN_RING(NvGdiRect, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT, 1); - OUT_RING (NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE); + BIND_RING (chan, nvc->NvGdiRect, nvc->next_subchannel++); + BEGIN_RING(chan, nvc->NvGdiRect, NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY, 1); + OUT_RING (chan, nvc->sync_notifier->handle); + BEGIN_RING(chan, nvc->NvGdiRect, NV04_GDI_RECTANGLE_TEXT_SURFACE, 1); + OUT_RING (chan, nvc->NvCtxSurf2D->handle); + BEGIN_RING(chan, nvc->NvGdiRect, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1); + OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY); + BEGIN_RING(chan, nvc->NvGdiRect, + NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT, 1); + OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE); + return 0; +} + +int +nouveau_surface_init_nv04(struct nouveau_context *nv) +{ nv->surface_copy_prep = nv04_surface_copy_prep; nv->surface_copy = nv04_surface_copy_blit; nv->surface_copy_done = nv04_surface_copy_done; diff --git a/src/gallium/winsys/dri/nouveau/nv50_surface.c b/src/gallium/winsys/dri/nouveau/nv50_surface.c index 15a1002861..5d74fb8d2b 100644 --- a/src/gallium/winsys/dri/nouveau/nv50_surface.c +++ b/src/gallium/winsys/dri/nouveau/nv50_surface.c @@ -19,6 +19,8 @@ static int nv50_surface_copy_prep(struct nouveau_context *nv, struct pipe_surface *dst, struct pipe_surface *src) { + struct nouveau_channel *chan = nv->nvc->channel; + struct nouveau_grobj *eng2d = nv->nvc->Nv2D; int surf_format; assert(src->cpp == dst->cpp); @@ -26,34 +28,38 @@ nv50_surface_copy_prep(struct nouveau_context *nv, surf_format = nv50_format(dst->cpp); assert(surf_format >= 0); - BEGIN_RING(Nv2D, NV50_2D_DMA_IN_MEMORY0, 2); - OUT_RELOCo(src->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCo(dst->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - BEGIN_RING(Nv2D, NV50_2D_DST_FORMAT, 2); - OUT_RING (surf_format); - OUT_RING (1); - BEGIN_RING(Nv2D, NV50_2D_DST_PITCH, 5); - OUT_RING (dst->pitch * dst->cpp); - OUT_RING (dst->pitch); - OUT_RING (dst->height); - OUT_RELOCh(dst->buffer, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(dst->buffer, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(Nv2D, NV50_2D_CLIP_X, 4); - OUT_RING (0); - OUT_RING (0); - OUT_RING (dst->pitch); - OUT_RING (dst->height); - - BEGIN_RING(Nv2D, NV50_2D_SRC_FORMAT, 2); - OUT_RING (surf_format); - OUT_RING (1); - BEGIN_RING(Nv2D, NV50_2D_SRC_PITCH, 5); - OUT_RING (src->pitch * src->cpp); - OUT_RING (src->pitch); - OUT_RING (src->height); - OUT_RELOCh(src->buffer, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCl(src->buffer, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + BEGIN_RING(chan, eng2d, NV50_2D_DMA_IN_MEMORY0, 2); + OUT_RELOCo(chan, src->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCo(chan, dst->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 2); + OUT_RING (chan, surf_format); + OUT_RING (chan, 1); + BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 5); + OUT_RING (chan, dst->pitch * dst->cpp); + OUT_RING (chan, dst->pitch); + OUT_RING (chan, dst->height); + OUT_RELOCh(chan, dst->buffer, dst->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, dst->buffer, dst->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, eng2d, NV50_2D_CLIP_X, 4); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, dst->pitch); + OUT_RING (chan, dst->height); + + BEGIN_RING(chan, eng2d, NV50_2D_SRC_FORMAT, 2); + OUT_RING (chan, surf_format); + OUT_RING (chan, 1); + BEGIN_RING(chan, eng2d, NV50_2D_SRC_PITCH, 5); + OUT_RING (chan, src->pitch * src->cpp); + OUT_RING (chan, src->pitch); + OUT_RING (chan, src->height); + OUT_RELOCh(chan, src->buffer, src->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, src->buffer, src->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); return 0; } @@ -62,27 +68,30 @@ static void nv50_surface_copy(struct nouveau_context *nv, unsigned dx, unsigned dy, unsigned sx, unsigned sy, unsigned w, unsigned h) { - BEGIN_RING(Nv2D, 0x0110, 1); - OUT_RING (0); - BEGIN_RING(Nv2D, NV50_2D_BLIT_DST_X, 12); - OUT_RING (dx); - OUT_RING (dy); - OUT_RING (w); - OUT_RING (h); - OUT_RING (0); - OUT_RING (1); - OUT_RING (0); - OUT_RING (1); - OUT_RING (0); - OUT_RING (sx); - OUT_RING (0); - OUT_RING (sy); + struct nouveau_channel *chan = nv->nvc->channel; + struct nouveau_grobj *eng2d = nv->nvc->Nv2D; + + BEGIN_RING(chan, eng2d, 0x0110, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, eng2d, NV50_2D_BLIT_DST_X, 12); + OUT_RING (chan, dx); + OUT_RING (chan, dy); + OUT_RING (chan, w); + OUT_RING (chan, h); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, sx); + OUT_RING (chan, 0); + OUT_RING (chan, sy); } static void nv50_surface_copy_done(struct nouveau_context *nv) { - FIRE_RING(); + FIRE_RING(nv->nvc->channel); } static int @@ -90,6 +99,8 @@ nv50_surface_fill(struct nouveau_context *nv, struct pipe_surface *dst, unsigned dx, unsigned dy, unsigned w, unsigned h, unsigned value) { + struct nouveau_channel *chan = nv->nvc->channel; + struct nouveau_grobj *eng2d = nv->nvc->Nv2D; int surf_format, rect_format; surf_format = nv50_format(dst->cpp); @@ -100,57 +111,65 @@ nv50_surface_fill(struct nouveau_context *nv, struct pipe_surface *dst, if (rect_format < 0) return 1; - BEGIN_RING(Nv2D, NV50_2D_DMA_IN_MEMORY1, 1); - OUT_RELOCo(dst->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(Nv2D, NV50_2D_DST_FORMAT, 2); - OUT_RING (surf_format); - OUT_RING (1); - BEGIN_RING(Nv2D, NV50_2D_DST_PITCH, 5); - OUT_RING (dst->pitch * dst->cpp); - OUT_RING (dst->pitch); - OUT_RING (dst->height); - OUT_RELOCh(dst->buffer, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(dst->buffer, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(Nv2D, NV50_2D_CLIP_X, 4); - OUT_RING (0); - OUT_RING (0); - OUT_RING (dst->pitch); - OUT_RING (dst->height); - - BEGIN_RING(Nv2D, 0x0580, 3); - OUT_RING (4); - OUT_RING (rect_format); - OUT_RING (value); - - BEGIN_RING(Nv2D, NV50_2D_RECT_X1, 4); - OUT_RING (dx); - OUT_RING (dy); - OUT_RING (dx + w); - OUT_RING (dy + h); - - FIRE_RING(); + BEGIN_RING(chan, eng2d, NV50_2D_DMA_IN_MEMORY1, 1); + OUT_RELOCo(chan, dst->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 2); + OUT_RING (chan, surf_format); + OUT_RING (chan, 1); + BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 5); + OUT_RING (chan, dst->pitch * dst->cpp); + OUT_RING (chan, dst->pitch); + OUT_RING (chan, dst->height); + OUT_RELOCh(chan, dst->buffer, dst->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, dst->buffer, dst->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, eng2d, NV50_2D_CLIP_X, 4); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, dst->pitch); + OUT_RING (chan, dst->height); + + BEGIN_RING(chan, eng2d, 0x0580, 3); + OUT_RING (chan, 4); + OUT_RING (chan, rect_format); + OUT_RING (chan, value); + + BEGIN_RING(chan, eng2d, NV50_2D_RECT_X1, 4); + OUT_RING (chan, dx); + OUT_RING (chan, dy); + OUT_RING (chan, dx + w); + OUT_RING (chan, dy + h); + + FIRE_RING(chan); return 0; } int -nouveau_surface_init_nv50(struct nouveau_context *nv) +nouveau_surface_channel_create_nv50(struct nouveau_channel_context *nvc) { int ret; - ret = nouveau_grobj_alloc(nv->channel, nv->next_handle++, NV50_2D, - &nv->Nv2D); + ret = nouveau_grobj_alloc(nvc->channel, nvc->next_handle++, NV50_2D, + &nvc->Nv2D); if (ret) return ret; - BIND_RING (Nv2D, 0); - BEGIN_RING(Nv2D, NV50_2D_DMA_NOTIFY, 1); - OUT_RING (nv->sync_notifier->handle); - BEGIN_RING(Nv2D, NV50_2D_DMA_IN_MEMORY0, 2); - OUT_RING (nv->channel->vram->handle); - OUT_RING (nv->channel->vram->handle); - BEGIN_RING(Nv2D, NV50_2D_OPERATION, 1); - OUT_RING (NV50_2D_OPERATION_SRCCOPY); + BIND_RING (nvc->channel, nvc->Nv2D, 0); + BEGIN_RING(nvc->channel, nvc->Nv2D, NV50_2D_DMA_NOTIFY, 1); + OUT_RING (nvc->channel, nvc->sync_notifier->handle); + BEGIN_RING(nvc->channel, nvc->Nv2D, NV50_2D_DMA_IN_MEMORY0, 2); + OUT_RING (nvc->channel, nvc->channel->vram->handle); + OUT_RING (nvc->channel, nvc->channel->vram->handle); + BEGIN_RING(nvc->channel, nvc->Nv2D, NV50_2D_OPERATION, 1); + OUT_RING (nvc->channel, NV50_2D_OPERATION_SRCCOPY); + + return 0; +} +int +nouveau_surface_init_nv50(struct nouveau_context *nv) +{ nv->surface_copy_prep = nv50_surface_copy_prep; nv->surface_copy = nv50_surface_copy; nv->surface_copy_done = nv50_surface_copy_done; diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile new file mode 100644 index 0000000000..c443330942 --- /dev/null +++ b/src/gallium/winsys/xlib/Makefile @@ -0,0 +1,93 @@ +# src/gallium/winsys/xlib/Makefile + +TOP = ../../../.. +include $(TOP)/configs/current + + +GL_MAJOR = 1 +GL_MINOR = 5 +GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY) + + +INCLUDE_DIRS = \ + -I$(TOP)/include \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/mesa/main \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/gallium/auxiliary + +XLIB_WINSYS_SOURCES = \ + glxapi.c \ + fakeglx.c \ + xfonts.c \ + xm_api.c \ + xm_winsys.c \ + xm_winsys_aub.c \ + brw_aub.c + +XLIB_WINSYS_OBJECTS = $(XLIB_WINSYS_SOURCES:.c=.o) + + +ifeq ($(CONFIG_NAME), linux-cell) +# The SPU code is in a separate .a file, unfortunately +CELL_SPU_LIB = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a +endif + +LIBS = \ + $(GALLIUM_DRIVERS) \ + $(TOP)/src/mesa/libglapi.a \ + $(TOP)/src/mesa/libmesa.a \ + $(GALLIUM_AUXILIARIES) \ + $(CELL_SPU_LIB) \ + + +.SUFFIXES : .cpp + +.c.o: + $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + +.cpp.o: + $(CXX) -c $(INCLUDE_DIRS) $(CXXFLAGS) $< -o $@ + + + +default: $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) + + +# Make the libGL.so library +$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(XLIB_WINSYS_OBJECTS) $(LIBS) + $(TOP)/bin/mklib -o $(GL_LIB) \ + -linker "$(CC)" \ + -major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \ + -install $(TOP)/$(LIB_DIR) \ + $(MKLIB_OPTIONS) $(XLIB_WINSYS_OBJECTS) \ + --start-group $(LIBS) --end-group $(GL_LIB_DEPS) + + +depend: $(ALL_SOURCES) + @ echo "running $(MKDEP)" + @ rm -f depend # workaround oops on gutsy?!? + @ touch depend + @ $(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) $(ALL_SOURCES) \ + > /dev/null 2>/dev/null + + +install: default + $(INSTALL) -d $(INSTALL_DIR)/include/GL + $(INSTALL) -d $(INSTALL_DIR)/$(LIB_DIR) + $(INSTALL) -m 644 $(TOP)/include/GL/*.h $(INSTALL_DIR)/include/GL + @if [ -e $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) ]; then \ + $(INSTALL) $(TOP)/$(LIB_DIR)/libGL* $(INSTALL_DIR)/$(LIB_DIR); \ + fi + + +# Emacs tags +tags: + etags `find . -name \*.[ch]` $(TOP)/include/GL/*.h + +clean: + -rm -f *.o + + +include depend diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript new file mode 100644 index 0000000000..c38b5be52c --- /dev/null +++ b/src/gallium/winsys/xlib/SConscript @@ -0,0 +1,34 @@ +####################################################################### +# SConscript for xlib winsys + +Import('*') + +env = env.Clone() + +env.Append(CPPPATH = [ + '#/src/mesa', + '#/src/mesa/main', +]) + +sources = [ + 'glxapi.c', + 'fakeglx.c', + 'xfonts.c', + 'xm_api.c', + 'xm_winsys.c', + 'xm_winsys_aub.c', + 'brw_aub.c', +] + +drivers = [ + softpipe, + i915simple, + i965simple, +] + +# TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions +env.SharedLibrary( + target ='GL', + source = sources, + LIBS = glapi + mesa + drivers + auxiliaries + env['LIBS'], +) diff --git a/src/gallium/winsys/xlib/brw_aub.c b/src/gallium/winsys/xlib/brw_aub.c index 541d50c6e4..10eedd8402 100644 --- a/src/gallium/winsys/xlib/brw_aub.c +++ b/src/gallium/winsys/xlib/brw_aub.c @@ -29,11 +29,13 @@ * Keith Whitwell <keith@tungstengraphics.com> */ +#include <stdio.h> +#include <stdlib.h> #include "brw_aub.h" #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "imports.h" -//#include "intel_winsys.h" +#include "pipe/p_util.h" +#include "pipe/p_debug.h" struct brw_aubfile { @@ -350,9 +352,9 @@ struct brw_aubfile *brw_aubfile_create( void ) i++; - if (_mesa_getenv("INTEL_AUBFILE")) { - val = snprintf(filename, sizeof(filename), "%s%d.aub", _mesa_getenv("INTEL_AUBFILE"), i%4); - _mesa_printf("--> Aub file: %s\n", filename); + if (getenv("INTEL_AUBFILE")) { + val = snprintf(filename, sizeof(filename), "%s%d.aub", getenv("INTEL_AUBFILE"), i%4); + debug_printf("--> Aub file: %s\n", filename); aubfile->file = fopen(filename, "w"); } else { @@ -360,12 +362,12 @@ struct brw_aubfile *brw_aubfile_create( void ) if (val < 0 || val > sizeof(filename)) strcpy(filename, "default.aub"); - _mesa_printf("--> Aub file: %s\n", filename); + debug_printf("--> Aub file: %s\n", filename); aubfile->file = fopen(filename, "w"); } if (!aubfile->file) { - _mesa_printf("couldn't open aubfile\n"); + debug_printf("couldn't open aubfile\n"); exit(1); } diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index 8da596d419..018cf01cd6 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -36,6 +36,8 @@ #include "glxheader.h" #include "xmesaP.h" +#undef ASSERT + #include "pipe/p_winsys.h" #include "pipe/p_format.h" #include "pipe/p_context.h" @@ -45,6 +47,7 @@ #ifdef GALLIUM_CELL #include "cell/ppu/cell_context.h" +#include "cell/ppu/cell_screen.h" #include "cell/ppu/cell_winsys.h" #else #define TILE_SIZE 32 /* avoid compilation errors */ @@ -448,7 +451,8 @@ xmesa_create_pipe_context(XMesaContext xmesa, uint pixelformat) #ifdef GALLIUM_CELL if (!getenv("GALLIUM_NOCELL")) { struct cell_winsys *cws = cell_get_winsys(pixelformat); - pipe = cell_create_context(pws, cws); + struct pipe_screen *screen = cell_create_screen(pws); + pipe = cell_create_context(screen, cws); if (pipe) pipe->priv = xmesa; return pipe; @@ -457,7 +461,8 @@ xmesa_create_pipe_context(XMesaContext xmesa, uint pixelformat) #endif { struct softpipe_winsys *spws = xmesa_get_softpipe_winsys(pixelformat); - pipe = softpipe_create( pws, spws ); + struct pipe_screen *screen = softpipe_create_screen(pws); + pipe = softpipe_create( screen, pws, spws ); if (pipe) pipe->priv = xmesa; diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.c b/src/gallium/winsys/xlib/xm_winsys_aub.c index dbfd37bda2..d55d8c39eb 100644 --- a/src/gallium/winsys/xlib/xm_winsys_aub.c +++ b/src/gallium/winsys/xlib/xm_winsys_aub.c @@ -40,6 +40,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "i965simple/brw_winsys.h" +#include "i965simple/brw_screen.h" #include "brw_aub.h" #include "xm_winsys_aub.h" @@ -565,6 +566,7 @@ struct pipe_context * xmesa_create_i965simple( struct pipe_winsys *winsys ) { struct aub_brw_winsys *iws = CALLOC_STRUCT( aub_brw_winsys ); + struct pipe_screen *screen = brw_create_screen(winsys, 0/* XXX pci_id */); /* Fill in this struct with callbacks that i965simple will need to * communicate with the window system, buffer manager, etc. @@ -583,7 +585,7 @@ xmesa_create_i965simple( struct pipe_winsys *winsys ) /* Create the i965simple context: */ - return brw_create( winsys, + return brw_create( screen, &iws->winsys, 0 ); } |