summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.c171
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.h25
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aaline.c18
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_pstipple.c14
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache.c3
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c280
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt32
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.c166
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.h32
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c705
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.c8
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.h2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.c53
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.h3
-rw-r--r--src/gallium/auxiliary/util/u_blit.c26
-rw-r--r--src/gallium/auxiliary/util/u_format.csv10
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.c33
-rw-r--r--src/gallium/auxiliary/util/u_math.h48
-rw-r--r--src/gallium/auxiliary/util/u_surface.c6
-rw-r--r--src/gallium/auxiliary/vl/vl_compositor.c16
-rw-r--r--src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c50
23 files changed, 1147 insertions, 560 deletions
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 4f13b3e2ba..b6f7b88322 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -50,20 +50,35 @@ struct cso_context {
struct {
void *samplers[PIPE_MAX_SAMPLERS];
unsigned nr_samplers;
+
+ void *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS];
+ unsigned nr_vertex_samplers;
} hw;
void *samplers[PIPE_MAX_SAMPLERS];
unsigned nr_samplers;
+ void *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS];
+ unsigned nr_vertex_samplers;
+
unsigned nr_samplers_saved;
void *samplers_saved[PIPE_MAX_SAMPLERS];
+ unsigned nr_vertex_samplers_saved;
+ void *vertex_samplers_saved[PIPE_MAX_VERTEX_SAMPLERS];
+
struct pipe_texture *textures[PIPE_MAX_SAMPLERS];
uint nr_textures;
+ struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS];
+ uint nr_vertex_textures;
+
uint nr_textures_saved;
struct pipe_texture *textures_saved[PIPE_MAX_SAMPLERS];
+ uint nr_vertex_textures_saved;
+ struct pipe_texture *vertex_textures_saved[PIPE_MAX_SAMPLERS];
+
/** Current and saved state.
* The saved state is used as a 1-deep stack.
*/
@@ -244,7 +259,8 @@ void cso_release_all( struct cso_context *ctx )
if (ctx->pipe) {
ctx->pipe->bind_blend_state( ctx->pipe, NULL );
ctx->pipe->bind_rasterizer_state( ctx->pipe, NULL );
- ctx->pipe->bind_sampler_states( ctx->pipe, 0, NULL );
+ ctx->pipe->bind_fragment_sampler_states( ctx->pipe, 0, NULL );
+ ctx->pipe->bind_vertex_sampler_states(ctx->pipe, 0, NULL);
ctx->pipe->bind_depth_stencil_alpha_state( ctx->pipe, NULL );
ctx->pipe->bind_fs_state( ctx->pipe, NULL );
ctx->pipe->bind_vs_state( ctx->pipe, NULL );
@@ -255,6 +271,11 @@ void cso_release_all( struct cso_context *ctx )
pipe_texture_reference(&ctx->textures_saved[i], NULL);
}
+ for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+ pipe_texture_reference(&ctx->vertex_textures[i], NULL);
+ pipe_texture_reference(&ctx->vertex_textures_saved[i], NULL);
+ }
+
free_framebuffer_state(&ctx->fb);
free_framebuffer_state(&ctx->fb_saved);
@@ -378,6 +399,46 @@ enum pipe_error cso_single_sampler(struct cso_context *ctx,
return PIPE_OK;
}
+enum pipe_error
+cso_single_vertex_sampler(struct cso_context *ctx,
+ unsigned idx,
+ const struct pipe_sampler_state *templ)
+{
+ void *handle = NULL;
+
+ if (templ != NULL) {
+ unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_sampler_state));
+ struct cso_hash_iter iter = cso_find_state_template(ctx->cache,
+ hash_key, CSO_SAMPLER,
+ (void*)templ);
+
+ if (cso_hash_iter_is_null(iter)) {
+ struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler));
+ if (!cso)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ memcpy(&cso->state, templ, sizeof(*templ));
+ cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state);
+ cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state;
+ cso->context = ctx->pipe;
+
+ iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso);
+ if (cso_hash_iter_is_null(iter)) {
+ FREE(cso);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
+ handle = cso->data;
+ }
+ else {
+ handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data;
+ }
+ }
+
+ ctx->vertex_samplers[idx] = handle;
+ return PIPE_OK;
+}
+
void cso_single_sampler_done( struct cso_context *ctx )
{
unsigned i;
@@ -398,7 +459,36 @@ void cso_single_sampler_done( struct cso_context *ctx )
memcpy(ctx->hw.samplers, ctx->samplers, ctx->nr_samplers * sizeof(void *));
ctx->hw.nr_samplers = ctx->nr_samplers;
- ctx->pipe->bind_sampler_states(ctx->pipe, ctx->nr_samplers, ctx->samplers);
+ ctx->pipe->bind_fragment_sampler_states(ctx->pipe, ctx->nr_samplers, ctx->samplers);
+ }
+}
+
+void
+cso_single_vertex_sampler_done(struct cso_context *ctx)
+{
+ unsigned i;
+
+ /* find highest non-null sampler */
+ for (i = PIPE_MAX_VERTEX_SAMPLERS; i > 0; i--) {
+ if (ctx->vertex_samplers[i - 1] != NULL)
+ break;
+ }
+
+ ctx->nr_vertex_samplers = i;
+
+ if (ctx->hw.nr_vertex_samplers != ctx->nr_vertex_samplers ||
+ memcmp(ctx->hw.vertex_samplers,
+ ctx->vertex_samplers,
+ ctx->nr_vertex_samplers * sizeof(void *)) != 0)
+ {
+ memcpy(ctx->hw.vertex_samplers,
+ ctx->vertex_samplers,
+ ctx->nr_vertex_samplers * sizeof(void *));
+ ctx->hw.nr_vertex_samplers = ctx->nr_vertex_samplers;
+
+ ctx->pipe->bind_vertex_sampler_states(ctx->pipe,
+ ctx->nr_vertex_samplers,
+ ctx->vertex_samplers);
}
}
@@ -447,6 +537,21 @@ void cso_restore_samplers(struct cso_context *ctx)
cso_single_sampler_done( ctx );
}
+void
+cso_save_vertex_samplers(struct cso_context *ctx)
+{
+ ctx->nr_vertex_samplers_saved = ctx->nr_vertex_samplers;
+ memcpy(ctx->vertex_samplers_saved, ctx->vertex_samplers, sizeof(ctx->vertex_samplers));
+}
+
+void
+cso_restore_vertex_samplers(struct cso_context *ctx)
+{
+ ctx->nr_vertex_samplers = ctx->nr_vertex_samplers_saved;
+ memcpy(ctx->vertex_samplers, ctx->vertex_samplers_saved, sizeof(ctx->vertex_samplers));
+ cso_single_vertex_sampler_done(ctx);
+}
+
enum pipe_error cso_set_sampler_textures( struct cso_context *ctx,
uint count,
@@ -461,7 +566,7 @@ enum pipe_error cso_set_sampler_textures( struct cso_context *ctx,
for ( ; i < PIPE_MAX_SAMPLERS; i++)
pipe_texture_reference(&ctx->textures[i], NULL);
- ctx->pipe->set_sampler_textures(ctx->pipe, count, textures);
+ ctx->pipe->set_fragment_sampler_textures(ctx->pipe, count, textures);
return PIPE_OK;
}
@@ -491,13 +596,71 @@ void cso_restore_sampler_textures( struct cso_context *ctx )
for ( ; i < PIPE_MAX_SAMPLERS; i++)
pipe_texture_reference(&ctx->textures[i], NULL);
- ctx->pipe->set_sampler_textures(ctx->pipe, ctx->nr_textures, ctx->textures);
+ ctx->pipe->set_fragment_sampler_textures(ctx->pipe, ctx->nr_textures, ctx->textures);
ctx->nr_textures_saved = 0;
}
+enum pipe_error
+cso_set_vertex_sampler_textures(struct cso_context *ctx,
+ uint count,
+ struct pipe_texture **textures)
+{
+ uint i;
+
+ ctx->nr_vertex_textures = count;
+
+ for (i = 0; i < count; i++) {
+ pipe_texture_reference(&ctx->vertex_textures[i], textures[i]);
+ }
+ for ( ; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+ pipe_texture_reference(&ctx->vertex_textures[i], NULL);
+ }
+
+ ctx->pipe->set_vertex_sampler_textures(ctx->pipe, count, textures);
+
+ return PIPE_OK;
+}
+
+void
+cso_save_vertex_sampler_textures(struct cso_context *ctx)
+{
+ uint i;
+
+ ctx->nr_vertex_textures_saved = ctx->nr_vertex_textures;
+ for (i = 0; i < ctx->nr_vertex_textures; i++) {
+ assert(!ctx->vertex_textures_saved[i]);
+ pipe_texture_reference(&ctx->vertex_textures_saved[i], ctx->vertex_textures[i]);
+ }
+}
+
+void
+cso_restore_vertex_sampler_textures(struct cso_context *ctx)
+{
+ uint i;
+
+ ctx->nr_vertex_textures = ctx->nr_vertex_textures_saved;
+
+ for (i = 0; i < ctx->nr_vertex_textures; i++) {
+ pipe_texture_reference(&ctx->vertex_textures[i], NULL);
+ ctx->vertex_textures[i] = ctx->vertex_textures_saved[i];
+ ctx->vertex_textures_saved[i] = NULL;
+ }
+ for ( ; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+ pipe_texture_reference(&ctx->vertex_textures[i], NULL);
+ }
+
+ ctx->pipe->set_vertex_sampler_textures(ctx->pipe,
+ ctx->nr_vertex_textures,
+ ctx->vertex_textures);
+
+ ctx->nr_vertex_textures_saved = 0;
+}
+
+
+
enum pipe_error cso_set_depth_stencil_alpha(struct cso_context *ctx,
const struct pipe_depth_stencil_alpha_state *templ)
{
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h
index 69630e98ba..e5b92177cf 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -84,6 +84,20 @@ enum pipe_error cso_single_sampler( struct cso_context *cso,
void cso_single_sampler_done( struct cso_context *cso );
+void
+cso_save_vertex_samplers(struct cso_context *cso);
+
+void
+cso_restore_vertex_samplers(struct cso_context *cso);
+
+enum pipe_error
+cso_single_vertex_sampler(struct cso_context *cso,
+ unsigned nr,
+ const struct pipe_sampler_state *states);
+
+void
+cso_single_vertex_sampler_done(struct cso_context *cso);
+
enum pipe_error cso_set_sampler_textures( struct cso_context *cso,
@@ -94,6 +108,17 @@ void cso_restore_sampler_textures( struct cso_context *cso );
+enum pipe_error
+cso_set_vertex_sampler_textures(struct cso_context *cso,
+ uint count,
+ struct pipe_texture **textures);
+void
+cso_save_vertex_sampler_textures(struct cso_context *cso);
+void
+cso_restore_vertex_sampler_textures(struct cso_context *cso);
+
+
+
/* These aren't really sensible -- most of the time the api provides
* object semantics for shaders anyway, and the cases where it doesn't
* (eg mesa's internall-generated texenv programs), it will be up to
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 9f956715a2..ca04223e3d 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -398,9 +398,9 @@ aaline_create_texture(struct aaline_stage *aaline)
texTemp.target = PIPE_TEXTURE_2D;
texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */
texTemp.last_level = MAX_TEXTURE_LEVEL;
- texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL;
- texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL;
- texTemp.depth[0] = 1;
+ texTemp.width0 = 1 << MAX_TEXTURE_LEVEL;
+ texTemp.height0 = 1 << MAX_TEXTURE_LEVEL;
+ texTemp.depth0 = 1;
pf_get_block(texTemp.format, &texTemp.block);
aaline->texture = screen->texture_create(screen, &texTemp);
@@ -413,11 +413,11 @@ aaline_create_texture(struct aaline_stage *aaline)
*/
for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) {
struct pipe_transfer *transfer;
- const uint size = aaline->texture->width[level];
+ const uint size = u_minify(aaline->texture->width0, level);
ubyte *data;
uint i, j;
- assert(aaline->texture->width[level] == aaline->texture->height[level]);
+ assert(aaline->texture->width0 == aaline->texture->height0);
/* This texture is new, no need to flush.
*/
@@ -896,16 +896,16 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe)
aaline->driver_bind_fs_state = pipe->bind_fs_state;
aaline->driver_delete_fs_state = pipe->delete_fs_state;
- aaline->driver_bind_sampler_states = pipe->bind_sampler_states;
- aaline->driver_set_sampler_textures = pipe->set_sampler_textures;
+ aaline->driver_bind_sampler_states = pipe->bind_fragment_sampler_states;
+ aaline->driver_set_sampler_textures = pipe->set_fragment_sampler_textures;
/* override the driver's functions */
pipe->create_fs_state = aaline_create_fs_state;
pipe->bind_fs_state = aaline_bind_fs_state;
pipe->delete_fs_state = aaline_delete_fs_state;
- pipe->bind_sampler_states = aaline_bind_sampler_states;
- pipe->set_sampler_textures = aaline_set_sampler_textures;
+ pipe->bind_fragment_sampler_states = aaline_bind_sampler_states;
+ pipe->set_fragment_sampler_textures = aaline_set_sampler_textures;
/* Install once everything is known to be OK:
*/
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index 283502cdf3..2fc0b4de55 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -427,9 +427,9 @@ pstip_create_texture(struct pstip_stage *pstip)
texTemp.target = PIPE_TEXTURE_2D;
texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */
texTemp.last_level = 0;
- texTemp.width[0] = 32;
- texTemp.height[0] = 32;
- texTemp.depth[0] = 1;
+ texTemp.width0 = 32;
+ texTemp.height0 = 32;
+ texTemp.depth0 = 1;
pf_get_block(texTemp.format, &texTemp.block);
pstip->texture = screen->texture_create(screen, &texTemp);
@@ -754,8 +754,8 @@ draw_install_pstipple_stage(struct draw_context *draw,
pstip->driver_bind_fs_state = pipe->bind_fs_state;
pstip->driver_delete_fs_state = pipe->delete_fs_state;
- pstip->driver_bind_sampler_states = pipe->bind_sampler_states;
- pstip->driver_set_sampler_textures = pipe->set_sampler_textures;
+ pstip->driver_bind_sampler_states = pipe->bind_fragment_sampler_states;
+ pstip->driver_set_sampler_textures = pipe->set_fragment_sampler_textures;
pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple;
/* override the driver's functions */
@@ -763,8 +763,8 @@ draw_install_pstipple_stage(struct draw_context *draw,
pipe->bind_fs_state = pstip_bind_fs_state;
pipe->delete_fs_state = pstip_delete_fs_state;
- pipe->bind_sampler_states = pstip_bind_sampler_states;
- pipe->set_sampler_textures = pstip_set_sampler_textures;
+ pipe->bind_fragment_sampler_states = pstip_bind_sampler_states;
+ pipe->set_fragment_sampler_textures = pstip_set_sampler_textures;
pipe->set_polygon_stipple = pstip_set_polygon_stipple;
return TRUE;
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
index d3f179ced1..757c487454 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c
@@ -346,7 +346,8 @@ vcache_check_run( struct draw_pt_front_end *frontend,
vcache->fetch_max,
draw_count);
- if (max_index == 0xffffffff ||
+ if (max_index >= DRAW_PIPE_MAX_VERTICES ||
+ fetch_count >= UNDEFINED_VERTEX_ID ||
fetch_count > draw_count) {
if (0) debug_printf("fail\n");
goto fail;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index 2ef4293d4d..2f973684f6 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007-2009 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -18,7 +18,7 @@
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -80,11 +80,27 @@ struct fenced_buffer_list
*/
struct fenced_buffer
{
+ /*
+ * Immutable members.
+ */
+
struct pb_buffer base;
-
struct pb_buffer *buffer;
+ struct fenced_buffer_list *list;
+
+ /**
+ * Protected by fenced_buffer_list::mutex
+ */
+ struct list_head head;
- /* FIXME: protect access with mutex */
+ /**
+ * Following members are mutable and protected by this mutex.
+ *
+ * You may lock this mutex alone, or lock it with fenced_buffer_list::mutex
+ * held, but in order to prevent deadlocks you must never lock
+ * fenced_buffer_list::mutex with this mutex held.
+ */
+ pipe_mutex mutex;
/**
* A bitmask of PIPE_BUFFER_USAGE_CPU/GPU_READ/WRITE describing the current
@@ -96,9 +112,6 @@ struct fenced_buffer
struct pb_validate *vl;
unsigned validation_flags;
struct pipe_fence_handle *fence;
-
- struct list_head head;
- struct fenced_buffer_list *list;
};
@@ -110,15 +123,24 @@ fenced_buffer(struct pb_buffer *buf)
}
+/**
+ * Add the buffer to the fenced list.
+ *
+ * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this
+ * order before calling this function.
+ *
+ * Reference count should be incremented before calling this function.
+ */
static INLINE void
-_fenced_buffer_add(struct fenced_buffer *fenced_buf)
+fenced_buffer_add_locked(struct fenced_buffer_list *fenced_list,
+ struct fenced_buffer *fenced_buf)
{
- struct fenced_buffer_list *fenced_list = fenced_buf->list;
-
assert(pipe_is_referenced(&fenced_buf->base.base.reference));
assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE);
assert(fenced_buf->fence);
+ /* TODO: Move the reference count increment here */
+
#ifdef DEBUG
LIST_DEL(&fenced_buf->head);
assert(fenced_list->numUnfenced);
@@ -130,32 +152,16 @@ _fenced_buffer_add(struct fenced_buffer *fenced_buf)
/**
- * Actually destroy the buffer.
+ * Remove the buffer from the fenced list.
+ *
+ * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this
+ * order before calling this function.
+ *
+ * Reference count should be decremented after calling this function.
*/
static INLINE void
-_fenced_buffer_destroy(struct fenced_buffer *fenced_buf)
-{
- struct fenced_buffer_list *fenced_list = fenced_buf->list;
-
- assert(!pipe_is_referenced(&fenced_buf->base.base.reference));
- assert(!fenced_buf->fence);
-#ifdef DEBUG
- assert(fenced_buf->head.prev);
- assert(fenced_buf->head.next);
- LIST_DEL(&fenced_buf->head);
- assert(fenced_list->numUnfenced);
- --fenced_list->numUnfenced;
-#else
- (void)fenced_list;
-#endif
- pb_reference(&fenced_buf->buffer, NULL);
- FREE(fenced_buf);
-}
-
-
-static INLINE void
-_fenced_buffer_remove(struct fenced_buffer_list *fenced_list,
- struct fenced_buffer *fenced_buf)
+fenced_buffer_remove_locked(struct fenced_buffer_list *fenced_list,
+ struct fenced_buffer *fenced_buf)
{
struct pb_fence_ops *ops = fenced_list->ops;
@@ -177,37 +183,53 @@ _fenced_buffer_remove(struct fenced_buffer_list *fenced_list,
++fenced_list->numUnfenced;
#endif
- /**
- * FIXME!!!
- */
-
- if(!pipe_is_referenced(&fenced_buf->base.base.reference))
- _fenced_buffer_destroy(fenced_buf);
+ /* TODO: Move the reference count decrement and destruction here */
}
+/**
+ * Wait for the fence to expire, and remove it from the fenced list.
+ *
+ * fenced_buffer::mutex must be held. fenced_buffer_list::mutex must not be
+ * held -- it will
+ */
static INLINE enum pipe_error
-_fenced_buffer_finish(struct fenced_buffer *fenced_buf)
+fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list,
+ struct fenced_buffer *fenced_buf)
{
- struct fenced_buffer_list *fenced_list = fenced_buf->list;
struct pb_fence_ops *ops = fenced_list->ops;
+ enum pipe_error ret = PIPE_ERROR;
#if 0
debug_warning("waiting for GPU");
#endif
+ assert(pipe_is_referenced(&fenced_buf->base.base.reference));
assert(fenced_buf->fence);
+
+ /* Acquire the global lock */
+ pipe_mutex_unlock(fenced_buf->mutex);
+ pipe_mutex_lock(fenced_list->mutex);
+ pipe_mutex_lock(fenced_buf->mutex);
+
if(fenced_buf->fence) {
- if(ops->fence_finish(ops, fenced_buf->fence, 0) != 0) {
- return PIPE_ERROR;
+ if(ops->fence_finish(ops, fenced_buf->fence, 0) == 0) {
+ /* Remove from the fenced list */
+ /* TODO: remove consequents */
+ fenced_buffer_remove_locked(fenced_list, fenced_buf);
+
+ p_atomic_dec(&fenced_buf->base.base.reference.count);
+ assert(pipe_is_referenced(&fenced_buf->base.base.reference));
+
+ fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE;
+
+ ret = PIPE_OK;
}
- /* Remove from the fenced list */
- /* TODO: remove consequents */
- _fenced_buffer_remove(fenced_list, fenced_buf);
}
- fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE;
- return PIPE_OK;
+ pipe_mutex_unlock(fenced_list->mutex);
+
+ return ret;
}
@@ -215,7 +237,7 @@ _fenced_buffer_finish(struct fenced_buffer *fenced_buf)
* Free as many fenced buffers from the list head as possible.
*/
static void
-_fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
+fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list,
int wait)
{
struct pb_fence_ops *ops = fenced_list->ops;
@@ -228,21 +250,28 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
while(curr != &fenced_list->delayed) {
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
+ pipe_mutex_lock(fenced_buf->mutex);
+
if(fenced_buf->fence != prev_fence) {
int signaled;
if (wait)
signaled = ops->fence_finish(ops, fenced_buf->fence, 0);
else
signaled = ops->fence_signalled(ops, fenced_buf->fence, 0);
- if (signaled != 0)
+ if (signaled != 0) {
+ pipe_mutex_unlock(fenced_buf->mutex);
break;
+ }
prev_fence = fenced_buf->fence;
}
else {
assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0);
}
- _fenced_buffer_remove(fenced_list, fenced_buf);
+ fenced_buffer_remove_locked(fenced_list, fenced_buf);
+ pipe_mutex_unlock(fenced_buf->mutex);
+
+ pb_reference((struct pb_buffer **)&fenced_buf, NULL);
curr = next;
next = curr->next;
@@ -256,30 +285,25 @@ fenced_buffer_destroy(struct pb_buffer *buf)
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
struct fenced_buffer_list *fenced_list = fenced_buf->list;
- pipe_mutex_lock(fenced_list->mutex);
assert(!pipe_is_referenced(&fenced_buf->base.base.reference));
- if (fenced_buf->fence) {
- struct pb_fence_ops *ops = fenced_list->ops;
- if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) {
- struct list_head *curr, *prev;
- curr = &fenced_buf->head;
- prev = curr->prev;
- do {
- fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
- assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0);
- _fenced_buffer_remove(fenced_list, fenced_buf);
- curr = prev;
- prev = curr->prev;
- } while (curr != &fenced_list->delayed);
- }
- else {
- /* delay destruction */
- }
- }
- else {
- _fenced_buffer_destroy(fenced_buf);
- }
+ assert(!fenced_buf->fence);
+
+#ifdef DEBUG
+ pipe_mutex_lock(fenced_list->mutex);
+ assert(fenced_buf->head.prev);
+ assert(fenced_buf->head.next);
+ LIST_DEL(&fenced_buf->head);
+ assert(fenced_list->numUnfenced);
+ --fenced_list->numUnfenced;
pipe_mutex_unlock(fenced_list->mutex);
+#else
+ (void)fenced_list;
+#endif
+
+ pb_reference(&fenced_buf->buffer, NULL);
+
+ pipe_mutex_destroy(fenced_buf->mutex);
+ FREE(fenced_buf);
}
@@ -290,24 +314,23 @@ fenced_buffer_map(struct pb_buffer *buf,
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
struct fenced_buffer_list *fenced_list = fenced_buf->list;
struct pb_fence_ops *ops = fenced_list->ops;
- void *map;
+ void *map = NULL;
+
+ pipe_mutex_lock(fenced_buf->mutex);
assert(!(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE));
/* Serialize writes */
if((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_WRITE) ||
((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ) && (flags & PIPE_BUFFER_USAGE_CPU_WRITE))) {
- if(flags & PIPE_BUFFER_USAGE_DONTBLOCK) {
+ if((flags & PIPE_BUFFER_USAGE_DONTBLOCK) &&
+ ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) {
/* Don't wait for the GPU to finish writing */
- if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0)
- _fenced_buffer_remove(fenced_list, fenced_buf);
- else
- return NULL;
- }
- else {
- /* Wait for the GPU to finish writing */
- _fenced_buffer_finish(fenced_buf);
+ goto finish;
}
+
+ /* Wait for the GPU to finish writing */
+ fenced_buffer_finish_locked(fenced_list, fenced_buf);
}
#if 0
@@ -324,6 +347,9 @@ fenced_buffer_map(struct pb_buffer *buf,
fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE;
}
+finish:
+ pipe_mutex_unlock(fenced_buf->mutex);
+
return map;
}
@@ -332,6 +358,9 @@ static void
fenced_buffer_unmap(struct pb_buffer *buf)
{
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
+
+ pipe_mutex_lock(fenced_buf->mutex);
+
assert(fenced_buf->mapcount);
if(fenced_buf->mapcount) {
pb_unmap(fenced_buf->buffer);
@@ -339,6 +368,8 @@ fenced_buffer_unmap(struct pb_buffer *buf)
if(!fenced_buf->mapcount)
fenced_buf->flags &= ~PIPE_BUFFER_USAGE_CPU_READ_WRITE;
}
+
+ pipe_mutex_unlock(fenced_buf->mutex);
}
@@ -350,11 +381,14 @@ fenced_buffer_validate(struct pb_buffer *buf,
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
enum pipe_error ret;
+ pipe_mutex_lock(fenced_buf->mutex);
+
if(!vl) {
/* invalidate */
fenced_buf->vl = NULL;
fenced_buf->validation_flags = 0;
- return PIPE_OK;
+ ret = PIPE_OK;
+ goto finish;
}
assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE);
@@ -362,14 +396,17 @@ fenced_buffer_validate(struct pb_buffer *buf,
flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE;
/* Buffer cannot be validated in two different lists */
- if(fenced_buf->vl && fenced_buf->vl != vl)
- return PIPE_ERROR_RETRY;
+ if(fenced_buf->vl && fenced_buf->vl != vl) {
+ ret = PIPE_ERROR_RETRY;
+ goto finish;
+ }
#if 0
/* Do not validate if buffer is still mapped */
if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) {
/* TODO: wait for the thread that mapped the buffer to unmap it */
- return PIPE_ERROR_RETRY;
+ ret = PIPE_ERROR_RETRY;
+ goto finish;
}
/* Final sanity checking */
assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE));
@@ -379,17 +416,21 @@ fenced_buffer_validate(struct pb_buffer *buf,
if(fenced_buf->vl == vl &&
(fenced_buf->validation_flags & flags) == flags) {
/* Nothing to do -- buffer already validated */
- return PIPE_OK;
+ ret = PIPE_OK;
+ goto finish;
}
ret = pb_validate(fenced_buf->buffer, vl, flags);
if (ret != PIPE_OK)
- return ret;
+ goto finish;
fenced_buf->vl = vl;
fenced_buf->validation_flags |= flags;
- return PIPE_OK;
+finish:
+ pipe_mutex_unlock(fenced_buf->mutex);
+
+ return ret;
}
@@ -404,29 +445,36 @@ fenced_buffer_fence(struct pb_buffer *buf,
fenced_buf = fenced_buffer(buf);
fenced_list = fenced_buf->list;
ops = fenced_list->ops;
-
- if(fence == fenced_buf->fence) {
- /* Nothing to do */
- return;
- }
- assert(fenced_buf->vl);
- assert(fenced_buf->validation_flags);
-
pipe_mutex_lock(fenced_list->mutex);
- if (fenced_buf->fence)
- _fenced_buffer_remove(fenced_list, fenced_buf);
- if (fence) {
- ops->fence_reference(ops, &fenced_buf->fence, fence);
- fenced_buf->flags |= fenced_buf->validation_flags;
- _fenced_buffer_add(fenced_buf);
- }
- pipe_mutex_unlock(fenced_list->mutex);
+ pipe_mutex_lock(fenced_buf->mutex);
+
+ assert(pipe_is_referenced(&fenced_buf->base.base.reference));
+
+ if(fence != fenced_buf->fence) {
+ assert(fenced_buf->vl);
+ assert(fenced_buf->validation_flags);
+
+ if (fenced_buf->fence) {
+ fenced_buffer_remove_locked(fenced_list, fenced_buf);
+ p_atomic_dec(&fenced_buf->base.base.reference.count);
+ assert(pipe_is_referenced(&fenced_buf->base.base.reference));
+ }
+ if (fence) {
+ ops->fence_reference(ops, &fenced_buf->fence, fence);
+ fenced_buf->flags |= fenced_buf->validation_flags;
+ p_atomic_inc(&fenced_buf->base.base.reference.count);
+ fenced_buffer_add_locked(fenced_list, fenced_buf);
+ }
+
+ pb_fence(fenced_buf->buffer, fence);
- pb_fence(fenced_buf->buffer, fence);
+ fenced_buf->vl = NULL;
+ fenced_buf->validation_flags = 0;
+ }
- fenced_buf->vl = NULL;
- fenced_buf->validation_flags = 0;
+ pipe_mutex_unlock(fenced_buf->mutex);
+ pipe_mutex_unlock(fenced_list->mutex);
}
@@ -436,6 +484,7 @@ fenced_buffer_get_base_buffer(struct pb_buffer *buf,
pb_size *offset)
{
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
+ /* NOTE: accesses immutable members only -- mutex not necessary */
pb_get_base_buffer(fenced_buf->buffer, base_buf, offset);
}
@@ -475,6 +524,8 @@ fenced_buffer_create(struct fenced_buffer_list *fenced_list,
buf->buffer = buffer;
buf->list = fenced_list;
+ pipe_mutex_init(buf->mutex);
+
#ifdef DEBUG
pipe_mutex_lock(fenced_list->mutex);
LIST_ADDTAIL(&buf->head, &fenced_list->unfenced);
@@ -516,7 +567,7 @@ fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
int wait)
{
pipe_mutex_lock(fenced_list->mutex);
- _fenced_buffer_list_check_free(fenced_list, wait);
+ fenced_buffer_list_check_free_locked(fenced_list, wait);
pipe_mutex_unlock(fenced_list->mutex);
}
@@ -538,11 +589,13 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list)
next = curr->next;
while(curr != &fenced_list->unfenced) {
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
+ pipe_mutex_lock(fenced_buf->mutex);
assert(!fenced_buf->fence);
debug_printf("%10p %7u %7u\n",
(void *) fenced_buf,
fenced_buf->base.base.size,
p_atomic_read(&fenced_buf->base.base.reference.count));
+ pipe_mutex_unlock(fenced_buf->mutex);
curr = next;
next = curr->next;
}
@@ -552,6 +605,7 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list)
while(curr != &fenced_list->delayed) {
int signaled;
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
+ pipe_mutex_lock(fenced_buf->mutex);
signaled = ops->fence_signalled(ops, fenced_buf->fence, 0);
debug_printf("%10p %7u %7u %10p %s\n",
(void *) fenced_buf,
@@ -559,6 +613,7 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list)
p_atomic_read(&fenced_buf->base.base.reference.count),
(void *) fenced_buf->fence,
signaled == 0 ? "y" : "n");
+ pipe_mutex_unlock(fenced_buf->mutex);
curr = next;
next = curr->next;
}
@@ -579,8 +634,8 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list)
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
sched_yield();
#endif
- _fenced_buffer_list_check_free(fenced_list, 1);
pipe_mutex_lock(fenced_list->mutex);
+ fenced_buffer_list_check_free_locked(fenced_list, 1);
}
#ifdef DEBUG
@@ -588,6 +643,7 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list)
#endif
pipe_mutex_unlock(fenced_list->mutex);
+ pipe_mutex_destroy(fenced_list->mutex);
fenced_list->ops->destroy(fenced_list->ops);
diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
index eb492076b7..080fd4c731 100644
--- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
+++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
@@ -1129,3 +1129,35 @@ TGSI Instruction Specification
target Label of target instruction.
+
+3 Other tokens
+===============
+
+
+3.1 Declaration Semantic
+-------------------------
+
+
+ Follows Declaration token if Semantic bit is set.
+
+ Since its purpose is to link a shader with other stages of the pipeline,
+ it is valid to follow only those Declaration tokens that declare a register
+ either in INPUT or OUTPUT file.
+
+ SemanticName field contains the semantic name of the register being declared.
+ There is no default value.
+
+ SemanticIndex is an optional subscript that can be used to distinguish
+ different register declarations with the same semantic name. The default value
+ is 0.
+
+ The meanings of the individual semantic names are explained in the following
+ sections.
+
+
+3.1.1 FACE
+
+ Valid only in a fragment shader INPUT declaration.
+
+ FACE.x is negative when the primitive is back facing. FACE.x is positive
+ when the primitive is front facing.
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 4fa10e2f7e..9791e58db3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -419,6 +419,7 @@ tgsi_default_instruction( void )
instruction.NrTokens = 1;
instruction.Opcode = TGSI_OPCODE_MOV;
instruction.Saturate = TGSI_SAT_NONE;
+ instruction.Predicate = 0;
instruction.NumDstRegs = 1;
instruction.NumSrcRegs = 1;
instruction.Padding = 0;
@@ -428,12 +429,12 @@ tgsi_default_instruction( void )
}
struct tgsi_instruction
-tgsi_build_instruction(
- unsigned opcode,
- unsigned saturate,
- unsigned num_dst_regs,
- unsigned num_src_regs,
- struct tgsi_header *header )
+tgsi_build_instruction(unsigned opcode,
+ unsigned saturate,
+ unsigned predicate,
+ unsigned num_dst_regs,
+ unsigned num_src_regs,
+ struct tgsi_header *header)
{
struct tgsi_instruction instruction;
@@ -445,6 +446,7 @@ tgsi_build_instruction(
instruction = tgsi_default_instruction();
instruction.Opcode = opcode;
instruction.Saturate = saturate;
+ instruction.Predicate = predicate;
instruction.NumDstRegs = num_dst_regs;
instruction.NumSrcRegs = num_src_regs;
@@ -472,9 +474,9 @@ tgsi_default_full_instruction( void )
unsigned i;
full_instruction.Instruction = tgsi_default_instruction();
+ full_instruction.InstructionPredicate = tgsi_default_instruction_predicate();
full_instruction.InstructionExtLabel = tgsi_default_instruction_ext_label();
full_instruction.InstructionExtTexture = tgsi_default_instruction_ext_texture();
- full_instruction.InstructionExtPredicate = tgsi_default_instruction_ext_predicate();
for( i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) {
full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register();
}
@@ -504,14 +506,34 @@ tgsi_build_full_instruction(
instruction = (struct tgsi_instruction *) &tokens[size];
size++;
- *instruction = tgsi_build_instruction(
- full_inst->Instruction.Opcode,
- full_inst->Instruction.Saturate,
- full_inst->Instruction.NumDstRegs,
- full_inst->Instruction.NumSrcRegs,
- header );
+ *instruction = tgsi_build_instruction(full_inst->Instruction.Opcode,
+ full_inst->Instruction.Saturate,
+ full_inst->Instruction.Predicate,
+ full_inst->Instruction.NumDstRegs,
+ full_inst->Instruction.NumSrcRegs,
+ header);
prev_token = (struct tgsi_token *) instruction;
+ if (full_inst->Instruction.Predicate) {
+ struct tgsi_instruction_predicate *instruction_predicate;
+
+ if (maxsize <= size) {
+ return 0;
+ }
+ instruction_predicate = (struct tgsi_instruction_predicate *)&tokens[size];
+ size++;
+
+ *instruction_predicate =
+ tgsi_build_instruction_predicate(full_inst->InstructionPredicate.Index,
+ full_inst->InstructionPredicate.Negate,
+ full_inst->InstructionPredicate.SwizzleX,
+ full_inst->InstructionPredicate.SwizzleY,
+ full_inst->InstructionPredicate.SwizzleZ,
+ full_inst->InstructionPredicate.SwizzleW,
+ instruction,
+ header);
+ }
+
if( tgsi_compare_instruction_ext_label(
full_inst->InstructionExtLabel,
tgsi_default_instruction_ext_label() ) ) {
@@ -550,29 +572,6 @@ tgsi_build_full_instruction(
prev_token = (struct tgsi_token *) instruction_ext_texture;
}
- if (tgsi_compare_instruction_ext_predicate(full_inst->InstructionExtPredicate,
- tgsi_default_instruction_ext_predicate())) {
- struct tgsi_instruction_ext_predicate *instruction_ext_predicate;
-
- if (maxsize <= size) {
- return 0;
- }
- instruction_ext_predicate = (struct tgsi_instruction_ext_predicate *)&tokens[size];
- size++;
-
- *instruction_ext_predicate =
- tgsi_build_instruction_ext_predicate(full_inst->InstructionExtPredicate.SrcIndex,
- full_inst->InstructionExtPredicate.Negate,
- full_inst->InstructionExtPredicate.SwizzleX,
- full_inst->InstructionExtPredicate.SwizzleY,
- full_inst->InstructionExtPredicate.SwizzleZ,
- full_inst->InstructionExtPredicate.SwizzleW,
- prev_token,
- instruction,
- header);
- prev_token = (struct tgsi_token *)instruction_ext_predicate;
- }
-
for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) {
const struct tgsi_full_dst_register *reg = &full_inst->FullDstRegisters[i];
struct tgsi_dst_register *dst_register;
@@ -746,6 +745,47 @@ tgsi_build_full_instruction(
return size;
}
+struct tgsi_instruction_predicate
+tgsi_default_instruction_predicate(void)
+{
+ struct tgsi_instruction_predicate instruction_predicate;
+
+ instruction_predicate.SwizzleX = TGSI_SWIZZLE_X;
+ instruction_predicate.SwizzleY = TGSI_SWIZZLE_Y;
+ instruction_predicate.SwizzleZ = TGSI_SWIZZLE_Z;
+ instruction_predicate.SwizzleW = TGSI_SWIZZLE_W;
+ instruction_predicate.Negate = 0;
+ instruction_predicate.Index = 0;
+ instruction_predicate.Padding = 0;
+
+ return instruction_predicate;
+}
+
+struct tgsi_instruction_predicate
+tgsi_build_instruction_predicate(int index,
+ unsigned negate,
+ unsigned swizzleX,
+ unsigned swizzleY,
+ unsigned swizzleZ,
+ unsigned swizzleW,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header)
+{
+ struct tgsi_instruction_predicate instruction_predicate;
+
+ instruction_predicate = tgsi_default_instruction_predicate();
+ instruction_predicate.SwizzleX = swizzleX;
+ instruction_predicate.SwizzleY = swizzleY;
+ instruction_predicate.SwizzleZ = swizzleZ;
+ instruction_predicate.SwizzleW = swizzleW;
+ instruction_predicate.Negate = negate;
+ instruction_predicate.Index = index;
+
+ instruction_grow(instruction, header);
+
+ return instruction_predicate;
+}
+
/** test for inequality of 32-bit values pointed to by a and b */
static INLINE boolean
compare32(const void *a, const void *b)
@@ -835,60 +875,6 @@ tgsi_build_instruction_ext_texture(
return instruction_ext_texture;
}
-struct tgsi_instruction_ext_predicate
-tgsi_default_instruction_ext_predicate(void)
-{
- struct tgsi_instruction_ext_predicate instruction_ext_predicate;
-
- instruction_ext_predicate.Type = TGSI_INSTRUCTION_EXT_TYPE_PREDICATE;
- instruction_ext_predicate.SwizzleX = TGSI_SWIZZLE_X;
- instruction_ext_predicate.SwizzleY = TGSI_SWIZZLE_Y;
- instruction_ext_predicate.SwizzleZ = TGSI_SWIZZLE_Z;
- instruction_ext_predicate.SwizzleW = TGSI_SWIZZLE_W;
- instruction_ext_predicate.Negate = 0;
- instruction_ext_predicate.SrcIndex = 0;
- instruction_ext_predicate.Padding = 0;
- instruction_ext_predicate.Extended = 0;
-
- return instruction_ext_predicate;
-}
-
-unsigned
-tgsi_compare_instruction_ext_predicate(struct tgsi_instruction_ext_predicate a,
- struct tgsi_instruction_ext_predicate b)
-{
- a.Padding = b.Padding = 0;
- a.Extended = b.Extended = 0;
- return compare32(&a, &b);
-}
-
-struct tgsi_instruction_ext_predicate
-tgsi_build_instruction_ext_predicate(unsigned index,
- unsigned negate,
- unsigned swizzleX,
- unsigned swizzleY,
- unsigned swizzleZ,
- unsigned swizzleW,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header)
-{
- struct tgsi_instruction_ext_predicate instruction_ext_predicate;
-
- instruction_ext_predicate = tgsi_default_instruction_ext_predicate();
- instruction_ext_predicate.SwizzleX = swizzleX;
- instruction_ext_predicate.SwizzleY = swizzleY;
- instruction_ext_predicate.SwizzleZ = swizzleZ;
- instruction_ext_predicate.SwizzleW = swizzleW;
- instruction_ext_predicate.Negate = negate;
- instruction_ext_predicate.SrcIndex = index;
-
- prev_token->Extended = 1;
- instruction_grow(instruction, header);
-
- return instruction_ext_predicate;
-}
-
struct tgsi_src_register
tgsi_default_src_register( void )
{
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h
index 669712eb8f..0fe5f229d3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.h
@@ -143,6 +143,7 @@ struct tgsi_instruction
tgsi_build_instruction(
unsigned opcode,
unsigned saturate,
+ unsigned predicate,
unsigned num_dst_regs,
unsigned num_src_regs,
struct tgsi_header *header );
@@ -157,6 +158,19 @@ tgsi_build_full_instruction(
struct tgsi_header *header,
unsigned maxsize );
+struct tgsi_instruction_predicate
+tgsi_default_instruction_predicate(void);
+
+struct tgsi_instruction_predicate
+tgsi_build_instruction_predicate(int index,
+ unsigned negate,
+ unsigned swizzleX,
+ unsigned swizzleY,
+ unsigned swizzleZ,
+ unsigned swizzleW,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header);
+
struct tgsi_instruction_ext_label
tgsi_default_instruction_ext_label( void );
@@ -187,24 +201,6 @@ tgsi_build_instruction_ext_texture(
struct tgsi_instruction *instruction,
struct tgsi_header *header );
-struct tgsi_instruction_ext_predicate
-tgsi_default_instruction_ext_predicate(void);
-
-unsigned
-tgsi_compare_instruction_ext_predicate(struct tgsi_instruction_ext_predicate a,
- struct tgsi_instruction_ext_predicate b);
-
-struct tgsi_instruction_ext_predicate
-tgsi_build_instruction_ext_predicate(unsigned index,
- unsigned negate,
- unsigned swizzleX,
- unsigned swizzleY,
- unsigned swizzleZ,
- unsigned swizzleW,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header);
-
struct tgsi_src_register
tgsi_default_src_register( void );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index b7569e74d4..e22a1643c8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -62,9 +62,6 @@
#define FAST_MATH 1
-/** for tgsi_full_instruction::Flags */
-#define SOA_DEPENDENCY_FLAG 0x1
-
#define TILE_TOP_LEFT 0
#define TILE_TOP_RIGHT 1
#define TILE_BOTTOM_LEFT 2
@@ -332,20 +329,6 @@ tgsi_exec_machine_bind_shader(
maxInstructions += 10;
}
- if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) {
- uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
- parse.FullToken.FullInstruction.Flags = SOA_DEPENDENCY_FLAG;
- /* XXX we only handle SOA dependencies properly for MOV/SWZ
- * at this time!
- */
- if (opcode != TGSI_OPCODE_MOV) {
- debug_printf("Warning: SOA dependency in instruction"
- " is not handled:\n");
- tgsi_dump_instruction(&parse.FullToken.FullInstruction,
- numInstructions);
- }
- }
-
memcpy(instructions + numInstructions,
&parse.FullToken.FullInstruction,
sizeof(instructions[0]));
@@ -386,6 +369,7 @@ tgsi_exec_machine_create( void )
memset(mach, 0, sizeof(*mach));
mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
+ mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
/* Setup constants. */
for( i = 0; i < 4; i++ ) {
@@ -517,7 +501,7 @@ micro_ddy(
dst->f[0] =
dst->f[1] =
dst->f[2] =
- dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
+ dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
}
static void
@@ -595,6 +579,24 @@ micro_exp2(
dst->f[2] = util_fast_exp2( src->f[2] );
dst->f[3] = util_fast_exp2( src->f[3] );
#else
+
+#if DEBUG
+ /* Inf is okay for this instruction, so clamp it to silence assertions. */
+ uint i;
+ union tgsi_exec_channel clamped;
+
+ for (i = 0; i < 4; i++) {
+ if (src->f[i] > 127.99999f) {
+ clamped.f[i] = 127.99999f;
+ } else if (src->f[i] < -126.99999f) {
+ clamped.f[i] = -126.99999f;
+ } else {
+ clamped.f[i] = src->f[i];
+ }
+ }
+ src = &clamped;
+#endif
+
dst->f[0] = powf( 2.0f, src->f[0] );
dst->f[1] = powf( 2.0f, src->f[1] );
dst->f[2] = powf( 2.0f, src->f[2] );
@@ -1193,10 +1195,10 @@ fetch_src_file_channel(
assert(index->i[1] < TGSI_EXEC_NUM_PREDS);
assert(index->i[2] < TGSI_EXEC_NUM_PREDS);
assert(index->i[3] < TGSI_EXEC_NUM_PREDS);
- chan->u[0] = mach->Addrs[0].xyzw[swizzle].u[0];
- chan->u[1] = mach->Addrs[0].xyzw[swizzle].u[1];
- chan->u[2] = mach->Addrs[0].xyzw[swizzle].u[2];
- chan->u[3] = mach->Addrs[0].xyzw[swizzle].u[3];
+ chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0];
+ chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1];
+ chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2];
+ chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3];
break;
case TGSI_FILE_OUTPUT:
@@ -1478,10 +1480,17 @@ store_dest(
dst = &mach->Addrs[index].xyzw[chan_index];
break;
+ case TGSI_FILE_LOOP:
+ assert(reg->DstRegister.Index == 0);
+ assert(mach->LoopCounterStackTop > 0);
+ assert(chan_index == CHAN_X);
+ dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index];
+ break;
+
case TGSI_FILE_PREDICATE:
index = reg->DstRegister.Index;
assert(index < TGSI_EXEC_NUM_PREDS);
- dst = &mach->Addrs[index].xyzw[chan_index];
+ dst = &mach->Predicates[index].xyzw[chan_index];
break;
default:
@@ -1489,6 +1498,47 @@ store_dest(
return;
}
+ if (inst->Instruction.Predicate) {
+ uint swizzle;
+ union tgsi_exec_channel *pred;
+
+ switch (chan_index) {
+ case CHAN_X:
+ swizzle = inst->InstructionPredicate.SwizzleX;
+ break;
+ case CHAN_Y:
+ swizzle = inst->InstructionPredicate.SwizzleY;
+ break;
+ case CHAN_Z:
+ swizzle = inst->InstructionPredicate.SwizzleZ;
+ break;
+ case CHAN_W:
+ swizzle = inst->InstructionPredicate.SwizzleW;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ assert(inst->InstructionPredicate.Index == 0);
+
+ pred = &mach->Predicates[inst->InstructionPredicate.Index].xyzw[swizzle];
+
+ if (inst->InstructionPredicate.Negate) {
+ for (i = 0; i < QUAD_SIZE; i++) {
+ if (pred->u[i]) {
+ execmask &= ~(1 << i);
+ }
+ }
+ } else {
+ for (i = 0; i < QUAD_SIZE; i++) {
+ if (!pred->u[i]) {
+ execmask &= ~(1 << i);
+ }
+ }
+ }
+ }
+
switch (inst->Instruction.Saturate) {
case TGSI_SAT_NONE:
for (i = 0; i < QUAD_SIZE; i++)
@@ -1715,6 +1765,64 @@ exec_tex(struct tgsi_exec_machine *mach,
}
}
+static void
+exec_txd(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ const uint unit = inst->FullSrcRegisters[3].SrcRegister.Index;
+ union tgsi_exec_channel r[4];
+ uint chan_index;
+
+ /*
+ * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
+ */
+
+ switch (inst->InstructionExtTexture.Texture) {
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
+
+ FETCH(&r[0], 0, CHAN_X);
+
+ fetch_texel(mach->Samplers[unit],
+ &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */
+ &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
+ break;
+
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 0, CHAN_Z);
+
+ fetch_texel(mach->Samplers[unit],
+ &r[0], &r[1], &r[2], 0.0f, /* inputs */
+ &r[0], &r[1], &r[2], &r[3]); /* outputs */
+ break;
+
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 0, CHAN_Z);
+
+ fetch_texel(mach->Samplers[unit],
+ &r[0], &r[1], &r[2], 0.0f,
+ &r[0], &r[1], &r[2], &r[3]);
+ break;
+
+ default:
+ assert(0);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&r[chan_index], 0, chan_index);
+ }
+}
+
/**
* Evaluate a constant-valued coefficient at the position of the
@@ -1784,53 +1892,58 @@ typedef void (* eval_coef_func)(
unsigned chan );
static void
-exec_declaration(
- struct tgsi_exec_machine *mach,
- const struct tgsi_full_declaration *decl )
+exec_declaration(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_declaration *decl)
{
- if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
- if( decl->Declaration.File == TGSI_FILE_INPUT ) {
- unsigned first, last, mask;
- eval_coef_func eval;
+ if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
+ if (decl->Declaration.File == TGSI_FILE_INPUT) {
+ uint first, last, mask;
first = decl->DeclarationRange.First;
last = decl->DeclarationRange.Last;
mask = decl->Declaration.UsageMask;
- switch( decl->Declaration.Interpolate ) {
- case TGSI_INTERPOLATE_CONSTANT:
- eval = eval_constant_coef;
- break;
-
- case TGSI_INTERPOLATE_LINEAR:
- eval = eval_linear_coef;
- break;
-
- case TGSI_INTERPOLATE_PERSPECTIVE:
- eval = eval_perspective_coef;
- break;
+ if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) {
+ assert(decl->Semantic.SemanticIndex == 0);
+ assert(first == last);
+ assert(mask = TGSI_WRITEMASK_XYZW);
- default:
- assert( 0 );
- return;
- }
+ mach->Inputs[first] = mach->QuadPos;
+ } else if (decl->Semantic.SemanticName == TGSI_SEMANTIC_FACE) {
+ uint i;
- if( mask == TGSI_WRITEMASK_XYZW ) {
- unsigned i, j;
+ assert(decl->Semantic.SemanticIndex == 0);
+ assert(first == last);
- for( i = first; i <= last; i++ ) {
- for( j = 0; j < NUM_CHANNELS; j++ ) {
- eval( mach, i, j );
- }
+ for (i = 0; i < QUAD_SIZE; i++) {
+ mach->Inputs[first].xyzw[0].f[i] = mach->Face;
+ }
+ } else {
+ eval_coef_func eval;
+ uint i, j;
+
+ switch (decl->Declaration.Interpolate) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ eval = eval_constant_coef;
+ break;
+
+ case TGSI_INTERPOLATE_LINEAR:
+ eval = eval_linear_coef;
+ break;
+
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ eval = eval_perspective_coef;
+ break;
+
+ default:
+ assert(0);
+ return;
}
- }
- else {
- unsigned i, j;
- for( j = 0; j < NUM_CHANNELS; j++ ) {
- if( mask & (1 << j) ) {
- for( i = first; i <= last; i++ ) {
- eval( mach, i, j );
+ for (j = 0; j < NUM_CHANNELS; j++) {
+ if (mask & (1 << j)) {
+ for (i = first; i <= last; i++) {
+ eval(mach, i, j);
}
}
}
@@ -1847,6 +1960,7 @@ exec_instruction(
{
uint chan_index;
union tgsi_exec_channel r[10];
+ union tgsi_exec_channel d[8];
(*pc)++;
@@ -1855,42 +1969,27 @@ exec_instruction(
case TGSI_OPCODE_FLR:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_flr( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_flr(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
case TGSI_OPCODE_MOV:
- if (inst->Flags & SOA_DEPENDENCY_FLAG) {
- /* Do all fetches into temp regs, then do all stores to avoid
- * intermediate/accidental clobbering. This could be done all the
- * time for MOV but for other instructions we'll need more temps...
- */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[chan_index], 0, chan_index );
- }
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[chan_index], 0, chan_index );
- }
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH(&d[chan_index], 0, chan_index);
}
- else {
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- STORE( &r[0], 0, chan_index );
- }
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
case TGSI_OPCODE_LIT:
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
- }
-
if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
FETCH( &r[0], 0, CHAN_X );
if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, CHAN_Y );
+ micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
}
if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
@@ -1901,11 +2000,19 @@ exec_instruction(
micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
micro_pow( &r[1], &r[1], &r[2] );
- micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, CHAN_Z );
+ micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
}
- }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ STORE(&d[CHAN_Y], 0, CHAN_Y);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ STORE(&d[CHAN_Z], 0, CHAN_Z);
+ }
+ }
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
+ }
if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
}
@@ -1973,14 +2080,13 @@ exec_instruction(
break;
case TGSI_OPCODE_MUL:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
- {
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
-
- micro_mul( &r[0], &r[0], &r[1] );
-
- STORE(&r[0], 0, chan_index);
+ micro_mul(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -1988,8 +2094,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_add( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
+ micro_add(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2045,25 +2153,29 @@ exec_instruction(
break;
case TGSI_OPCODE_DST:
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
- }
-
if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
FETCH( &r[0], 0, CHAN_Y );
FETCH( &r[1], 1, CHAN_Y);
- micro_mul( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, CHAN_Y );
+ micro_mul(&d[CHAN_Y], &r[0], &r[1]);
}
-
if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( &r[0], 0, CHAN_Z );
- STORE( &r[0], 0, CHAN_Z );
+ FETCH(&d[CHAN_Z], 0, CHAN_Z);
}
-
if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- FETCH( &r[0], 1, CHAN_W );
- STORE( &r[0], 0, CHAN_W );
+ FETCH(&d[CHAN_W], 1, CHAN_W);
+ }
+
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ STORE(&d[CHAN_Y], 0, CHAN_Y);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ STORE(&d[CHAN_Z], 0, CHAN_Z);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ STORE(&d[CHAN_W], 0, CHAN_W);
}
break;
@@ -2073,9 +2185,10 @@ exec_instruction(
FETCH(&r[1], 1, chan_index);
/* XXX use micro_min()?? */
- micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
-
- STORE(&r[0], 0, chan_index);
+ micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2085,9 +2198,10 @@ exec_instruction(
FETCH(&r[1], 1, chan_index);
/* XXX use micro_max()?? */
- micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
-
- STORE(&r[0], 0, chan_index );
+ micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] );
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2096,8 +2210,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, chan_index );
+ micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2106,8 +2222,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, chan_index );
+ micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2118,8 +2236,10 @@ exec_instruction(
FETCH( &r[1], 1, chan_index );
micro_mul( &r[0], &r[0], &r[1] );
FETCH( &r[1], 2, chan_index );
- micro_add( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
+ micro_add(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2127,10 +2247,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
-
- micro_sub( &r[0], &r[0], &r[1] );
-
- STORE(&r[0], 0, chan_index);
+ micro_sub(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2139,12 +2259,12 @@ exec_instruction(
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
FETCH(&r[2], 2, chan_index);
-
micro_sub( &r[1], &r[1], &r[2] );
micro_mul( &r[0], &r[0], &r[1] );
- micro_add( &r[0], &r[0], &r[2] );
-
- STORE(&r[0], 0, chan_index);
+ micro_add(&d[chan_index], &r[0], &r[2]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2153,8 +2273,10 @@ exec_instruction(
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
FETCH(&r[2], 2, chan_index);
- micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
- STORE(&r[0], 0, chan_index);
+ micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2179,8 +2301,10 @@ exec_instruction(
case TGSI_OPCODE_FRC:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_frc( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_frc(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2190,8 +2314,10 @@ exec_instruction(
FETCH(&r[1], 1, chan_index);
micro_max(&r[0], &r[0], &r[1]);
FETCH(&r[1], 2, chan_index);
- micro_min(&r[0], &r[0], &r[1]);
- STORE(&r[0], 0, chan_index);
+ micro_min(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2199,19 +2325,17 @@ exec_instruction(
case TGSI_OPCODE_ARR:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_rnd( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_rnd(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
case TGSI_OPCODE_EX2:
FETCH(&r[0], 0, CHAN_X);
-#if FAST_MATH
micro_exp2( &r[0], &r[0] );
-#else
- micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
-#endif
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( &r[0], 0, chan_index );
@@ -2247,11 +2371,7 @@ exec_instruction(
FETCH(&r[4], 1, CHAN_Y);
micro_mul( &r[5], &r[3], &r[4] );
- micro_sub( &r[2], &r[2], &r[5] );
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &r[2], 0, CHAN_X );
- }
+ micro_sub(&d[CHAN_X], &r[2], &r[5]);
FETCH(&r[2], 1, CHAN_X);
@@ -2260,20 +2380,21 @@ exec_instruction(
FETCH(&r[5], 0, CHAN_X);
micro_mul( &r[1], &r[1], &r[5] );
- micro_sub( &r[3], &r[3], &r[1] );
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- STORE( &r[3], 0, CHAN_Y );
- }
+ micro_sub(&d[CHAN_Y], &r[3], &r[1]);
micro_mul( &r[5], &r[5], &r[4] );
micro_mul( &r[0], &r[0], &r[2] );
- micro_sub( &r[5], &r[5], &r[0] );
+ micro_sub(&d[CHAN_Z], &r[5], &r[0]);
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- STORE( &r[5], 0, CHAN_Z );
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ STORE(&d[CHAN_X], 0, CHAN_X);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ STORE(&d[CHAN_Y], 0, CHAN_Y);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ STORE(&d[CHAN_Z], 0, CHAN_Z);
}
-
if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
}
@@ -2282,11 +2403,11 @@ exec_instruction(
case TGSI_OPCODE_ABS:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
-
- micro_abs( &r[0], &r[0] );
-
- STORE(&r[0], 0, chan_index);
+ micro_abs(&d[chan_index], &r[0]);
}
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
+ }
break;
case TGSI_OPCODE_RCC:
@@ -2338,16 +2459,20 @@ exec_instruction(
case TGSI_OPCODE_DDX:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_ddx( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_ddx(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
case TGSI_OPCODE_DDY:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_ddy( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_ddy(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2428,10 +2553,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_eq( &r[0], &r[0], &r[1],
- &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
- &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, chan_index );
+ micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2445,8 +2570,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
- STORE( &r[0], 0, chan_index );
+ micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2462,8 +2589,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, chan_index );
+ micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2471,8 +2600,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
- STORE( &r[0], 0, chan_index );
+ micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2502,7 +2633,7 @@ exec_instruction(
/* src[1] = d[strq]/dx */
/* src[2] = d[strq]/dy */
/* src[3] = sampler unit */
- assert (0);
+ exec_txd(mach, inst);
break;
case TGSI_OPCODE_TXL:
@@ -2546,13 +2677,8 @@ exec_instruction(
micro_mul(&r[3], &r[3], &r[1]);
micro_add(&r[2], &r[2], &r[3]);
FETCH(&r[3], 0, CHAN_X);
- micro_add(&r[2], &r[2], &r[3]);
- if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
- STORE(&r[2], 0, CHAN_X);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
- STORE(&r[2], 0, CHAN_Z);
- }
+ micro_add(&d[CHAN_X], &r[2], &r[3]);
+
}
if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
@@ -2562,13 +2688,20 @@ exec_instruction(
micro_mul(&r[3], &r[3], &r[1]);
micro_add(&r[2], &r[2], &r[3]);
FETCH(&r[3], 0, CHAN_Y);
- micro_add(&r[2], &r[2], &r[3]);
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
- STORE(&r[2], 0, CHAN_Y);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
- STORE(&r[2], 0, CHAN_W);
- }
+ micro_add(&d[CHAN_Y], &r[2], &r[3]);
+
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ STORE(&d[CHAN_X], 0, CHAN_X);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ STORE(&d[CHAN_Y], 0, CHAN_Y);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ STORE(&d[CHAN_X], 0, CHAN_Z);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ STORE(&d[CHAN_Y], 0, CHAN_W);
}
break;
@@ -2653,8 +2786,10 @@ exec_instruction(
/* TGSI_OPCODE_SGN */
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_sgn( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_sgn(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2663,10 +2798,10 @@ exec_instruction(
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
FETCH(&r[2], 2, chan_index);
-
- micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
-
- STORE(&r[0], 0, chan_index);
+ micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2841,32 +2976,40 @@ exec_instruction(
case TGSI_OPCODE_CEIL:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_ceil( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_ceil(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
case TGSI_OPCODE_I2F:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_i2f( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_i2f(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
case TGSI_OPCODE_NOT:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_not( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_not(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
case TGSI_OPCODE_TRUNC:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_trunc( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_trunc(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2874,8 +3017,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_shl( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
+ micro_shl(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2883,8 +3028,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_ishr( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
+ micro_ishr(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2892,8 +3039,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_and( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
+ micro_and(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2901,8 +3050,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_or( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
+ micro_or(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2914,8 +3065,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_xor( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
+ micro_xor(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2946,8 +3099,23 @@ exec_instruction(
for (chan_index = 0; chan_index < 3; chan_index++) {
FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
}
- STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
++mach->LoopCounterStackTop;
+ STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X);
+ /* update LoopMask */
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
+ mach->LoopMask &= ~0x1;
+ }
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
+ mach->LoopMask &= ~0x2;
+ }
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
+ mach->LoopMask &= ~0x4;
+ }
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
+ mach->LoopMask &= ~0x8;
+ }
+ /* TODO: if mach->LoopMask == 0, jump to end of loop */
+ UPDATE_EXEC_MASK(mach);
/* fall-through (for now) */
case TGSI_OPCODE_BGNLOOP:
/* push LoopMask and ContMasks */
@@ -2961,28 +3129,28 @@ exec_instruction(
case TGSI_OPCODE_ENDFOR:
assert(mach->LoopCounterStackTop > 0);
- micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
- &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
- &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
+ micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
+ &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
+ &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
/* update LoopMask */
- if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) {
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
mach->LoopMask &= ~0x1;
}
- if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) {
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
mach->LoopMask &= ~0x2;
}
- if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) {
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
mach->LoopMask &= ~0x4;
}
- if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) {
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
mach->LoopMask &= ~0x8;
}
- micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
- &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
- &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
+ micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
+ &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
+ &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
assert(mach->LoopLabelStackTop > 0);
inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
- STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
+ STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X);
/* Restore ContMask, but don't pop */
assert(mach->ContStackTop > 0);
mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
@@ -3049,7 +3217,28 @@ exec_instruction(
break;
case TGSI_OPCODE_ENDSUB:
- /* no-op */
+ /*
+ * XXX: This really should be a no-op. We should never reach this opcode.
+ */
+
+ assert(mach->CallStackTop > 0);
+ mach->CallStackTop--;
+
+ mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
+ mach->CondMask = mach->CondStack[mach->CondStackTop];
+
+ mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
+ mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
+
+ mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
+ mach->ContMask = mach->ContStack[mach->ContStackTop];
+
+ assert(mach->FuncStackTop > 0);
+ mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
+
+ *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
+
+ UPDATE_EXEC_MASK(mach);
break;
case TGSI_OPCODE_NOP:
@@ -3060,6 +3249,8 @@ exec_instruction(
}
}
+#define DEBUG_EXECUTION 0
+
/**
* Run TGSI interpreter.
@@ -3103,10 +3294,67 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
exec_declaration( mach, mach->Declarations+i );
}
- /* execute instructions, until pc is set to -1 */
- while (pc != -1) {
- assert(pc < (int) mach->NumInstructions);
- exec_instruction( mach, mach->Instructions + pc, &pc );
+ {
+#if DEBUG_EXECUTION
+ struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
+ struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
+ uint inst = 1;
+
+ memcpy(temps, mach->Temps, sizeof(temps));
+ memcpy(outputs, mach->Outputs, sizeof(outputs));
+#endif
+
+ /* execute instructions, until pc is set to -1 */
+ while (pc != -1) {
+
+#if DEBUG_EXECUTION
+ uint i;
+
+ tgsi_dump_instruction(&mach->Instructions[pc], inst++);
+#endif
+
+ assert(pc < (int) mach->NumInstructions);
+ exec_instruction(mach, mach->Instructions + pc, &pc);
+
+#if DEBUG_EXECUTION
+ for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
+ if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
+ uint j;
+
+ memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
+ debug_printf("TEMP[%2u] = ", i);
+ for (j = 0; j < 4; j++) {
+ if (j > 0) {
+ debug_printf(" ");
+ }
+ debug_printf("(%6f, %6f, %6f, %6f)\n",
+ temps[i].xyzw[0].f[j],
+ temps[i].xyzw[1].f[j],
+ temps[i].xyzw[2].f[j],
+ temps[i].xyzw[3].f[j]);
+ }
+ }
+ }
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
+ uint j;
+
+ memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
+ debug_printf("OUT[%2u] = ", i);
+ for (j = 0; j < 4; j++) {
+ if (j > 0) {
+ debug_printf(" ");
+ }
+ debug_printf("{%6f, %6f, %6f, %6f}\n",
+ outputs[i].xyzw[0].f[j],
+ outputs[i].xyzw[1].f[j],
+ outputs[i].xyzw[2].f[j],
+ outputs[i].xyzw[3].f[j]);
+ }
+ }
+ }
+#endif
+ }
}
#if 0
@@ -3120,5 +3368,10 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
}
#endif
+ assert(mach->CondStackTop == 0);
+ assert(mach->LoopStackTop == 0);
+ assert(mach->ContStackTop == 0);
+ assert(mach->CallStackTop == 0);
+
return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 471f591dd6..fd94c1bc44 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -218,6 +218,7 @@ struct tgsi_exec_machine
struct tgsi_exec_vector Outputs[PIPE_MAX_ATTRIBS];
struct tgsi_exec_vector *Addrs;
+ struct tgsi_exec_vector *Predicates;
struct tgsi_sampler **Samplers;
@@ -232,6 +233,7 @@ struct tgsi_exec_machine
/* FRAGMENT processor only. */
const struct tgsi_interp_coef *InterpCoefs;
struct tgsi_exec_vector QuadPos;
+ float Face; /**< +1 if front facing, -1 if back facing */
/* Conditional execution masks */
uint CondMask; /**< For IF/ELSE/ENDIF */
@@ -252,7 +254,7 @@ struct tgsi_exec_machine
uint LoopLabelStack[TGSI_EXEC_MAX_LOOP_NESTING];
int LoopLabelStackTop;
- /** Loop counter stack (x = count, y = current, z = step) */
+ /** Loop counter stack (x = index, y = counter, z = step) */
struct tgsi_exec_vector LoopCounterStack[TGSI_EXEC_MAX_LOOP_NESTING];
int LoopCounterStackTop;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index 83f9df1183..9ca2993452 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -175,6 +175,10 @@ tgsi_parse_token(
copy_token(&inst->Instruction, &token);
extended = inst->Instruction.Extended;
+ if (inst->Instruction.Predicate) {
+ next_token(ctx, &inst->InstructionPredicate);
+ }
+
while( extended ) {
struct tgsi_src_register_ext token;
@@ -189,10 +193,6 @@ tgsi_parse_token(
copy_token(&inst->InstructionExtTexture, &token);
break;
- case TGSI_INSTRUCTION_EXT_TYPE_PREDICATE:
- copy_token(&inst->InstructionExtPredicate, &token);
- break;
-
default:
assert( 0 );
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index 76f1676d85..cb4772ade8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -80,9 +80,9 @@ struct tgsi_full_immediate
struct tgsi_full_instruction
{
struct tgsi_instruction Instruction;
+ struct tgsi_instruction_predicate InstructionPredicate;
struct tgsi_instruction_ext_label InstructionExtLabel;
struct tgsi_instruction_ext_texture InstructionExtTexture;
- struct tgsi_instruction_ext_predicate InstructionExtPredicate;
struct tgsi_full_dst_register FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS];
struct tgsi_full_src_register FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS];
uint Flags; /**< user-defined usage */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 36e27ea52f..4d8145032b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -34,7 +34,7 @@ typedef uint reg_flag;
#define BITS_IN_REG_FLAG (sizeof( reg_flag ) * 8)
-#define MAX_REGISTERS 256
+#define MAX_REGISTERS 1024
#define MAX_REG_FLAGS ((MAX_REGISTERS + BITS_IN_REG_FLAG - 1) / BITS_IN_REG_FLAG)
struct sanity_check_ctx
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 3f752e9352..bd963267cc 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -47,9 +47,9 @@ union tgsi_any_token {
struct tgsi_immediate imm;
union tgsi_immediate_data imm_data;
struct tgsi_instruction insn;
+ struct tgsi_instruction_predicate insn_predicate;
struct tgsi_instruction_ext_label insn_ext_label;
struct tgsi_instruction_ext_texture insn_ext_texture;
- struct tgsi_instruction_ext_predicate insn_ext_predicate;
struct tgsi_src_register src;
struct tgsi_src_register_ext_mod src_ext_mod;
struct tgsi_dimension dim;
@@ -72,6 +72,7 @@ struct ureg_tokens {
#define UREG_MAX_IMMEDIATE 32
#define UREG_MAX_TEMP 256
#define UREG_MAX_ADDR 2
+#define UREG_MAX_LOOP 1
#define UREG_MAX_PRED 1
#define DOMAIN_DECL 0
@@ -117,6 +118,7 @@ struct ureg_program
unsigned nr_addrs;
unsigned nr_preds;
+ unsigned nr_loops;
unsigned nr_instructions;
struct ureg_tokens domain[2];
@@ -350,6 +352,7 @@ struct ureg_src ureg_DECL_constant(struct ureg_program *ureg,
i = ureg->nr_constant_ranges++;
ureg->constant_range[i].first = index;
ureg->constant_range[i].last = index;
+ goto out;
}
/* Collapse all ranges down to one:
@@ -417,6 +420,19 @@ struct ureg_dst ureg_DECL_address( struct ureg_program *ureg )
return ureg_dst_register( TGSI_FILE_ADDRESS, 0 );
}
+/* Allocate a new loop register.
+ */
+struct ureg_dst
+ureg_DECL_loop(struct ureg_program *ureg)
+{
+ if (ureg->nr_loops < UREG_MAX_LOOP) {
+ return ureg_dst_register(TGSI_FILE_LOOP, ureg->nr_loops++);
+ }
+
+ assert(0);
+ return ureg_dst_register(TGSI_FILE_LOOP, 0);
+}
+
/* Allocate a new predicate register.
*/
struct ureg_dst
@@ -661,35 +677,27 @@ ureg_emit_insn(struct ureg_program *ureg,
validate( opcode, num_dst, num_src );
out = get_tokens( ureg, DOMAIN_INSN, count );
- out[0].value = 0;
- out[0].insn.Type = TGSI_TOKEN_TYPE_INSTRUCTION;
- out[0].insn.NrTokens = 0;
+ out[0].insn = tgsi_default_instruction();
out[0].insn.Opcode = opcode;
out[0].insn.Saturate = saturate;
out[0].insn.NumDstRegs = num_dst;
out[0].insn.NumSrcRegs = num_src;
- out[0].insn.Padding = 0;
result.insn_token = ureg->domain[DOMAIN_INSN].count - count;
+ result.extended_token = result.insn_token;
if (predicate) {
- out[0].insn.Extended = 1;
- out[1].insn_ext_predicate = tgsi_default_instruction_ext_predicate();
- out[1].insn_ext_predicate.Negate = pred_negate;
- out[1].insn_ext_predicate.SwizzleX = pred_swizzle_x;
- out[1].insn_ext_predicate.SwizzleY = pred_swizzle_y;
- out[1].insn_ext_predicate.SwizzleZ = pred_swizzle_z;
- out[1].insn_ext_predicate.SwizzleW = pred_swizzle_w;
-
- result.extended_token = result.insn_token + 1;
- } else {
- out[0].insn.Extended = 0;
-
- result.extended_token = result.insn_token;
+ out[0].insn.Predicate = 1;
+ out[1].insn_predicate = tgsi_default_instruction_predicate();
+ out[1].insn_predicate.Negate = pred_negate;
+ out[1].insn_predicate.SwizzleX = pred_swizzle_x;
+ out[1].insn_predicate.SwizzleY = pred_swizzle_y;
+ out[1].insn_predicate.SwizzleZ = pred_swizzle_z;
+ out[1].insn_predicate.SwizzleW = pred_swizzle_w;
}
ureg->nr_instructions++;
-
+
return result;
}
@@ -1023,6 +1031,13 @@ static void emit_decls( struct ureg_program *ureg )
0, ureg->nr_addrs );
}
+ if (ureg->nr_loops) {
+ emit_decl_range(ureg,
+ TGSI_FILE_LOOP,
+ 0,
+ ureg->nr_loops);
+ }
+
if (ureg->nr_preds) {
emit_decl_range(ureg,
TGSI_FILE_PREDICATE,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index dae4291194..94cc70a208 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -158,6 +158,9 @@ struct ureg_dst
ureg_DECL_address( struct ureg_program * );
struct ureg_dst
+ureg_DECL_loop( struct ureg_program * );
+
+struct ureg_dst
ureg_DECL_predicate(struct ureg_program *);
/* Supply an index to the sampler declaration as this is the hook to
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 5038642599..5372df5735 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -354,9 +354,9 @@ util_blit_pixels_writemask(struct blit_state *ctx,
texTemp.target = PIPE_TEXTURE_2D;
texTemp.format = src->format;
texTemp.last_level = 0;
- texTemp.width[0] = srcW;
- texTemp.height[0] = srcH;
- texTemp.depth[0] = 1;
+ texTemp.width0 = srcW;
+ texTemp.height0 = srcH;
+ texTemp.depth0 = 1;
pf_get_block(src->format, &texTemp.block);
tex = screen->texture_create(screen, &texTemp);
@@ -389,10 +389,10 @@ util_blit_pixels_writemask(struct blit_state *ctx,
}
else {
pipe_texture_reference(&tex, src->texture);
- s0 = srcX0 / (float)tex->width[0];
- s1 = srcX1 / (float)tex->width[0];
- t0 = srcY0 / (float)tex->height[0];
- t1 = srcY1 / (float)tex->height[0];
+ s0 = srcX0 / (float)tex->width0;
+ s1 = srcX1 / (float)tex->width0;
+ t0 = srcY0 / (float)tex->height0;
+ t1 = srcY1 / (float)tex->height0;
}
@@ -518,13 +518,13 @@ util_blit_pixels_tex(struct blit_state *ctx,
assert(filter == PIPE_TEX_MIPFILTER_NEAREST ||
filter == PIPE_TEX_MIPFILTER_LINEAR);
- assert(tex->width[0] != 0);
- assert(tex->height[0] != 0);
+ assert(tex->width0 != 0);
+ assert(tex->height0 != 0);
- s0 = srcX0 / (float)tex->width[0];
- s1 = srcX1 / (float)tex->width[0];
- t0 = srcY0 / (float)tex->height[0];
- t1 = srcY1 / (float)tex->height[0];
+ s0 = srcX0 / (float)tex->width0;
+ s1 = srcX1 / (float)tex->width0;
+ t0 = srcY0 / (float)tex->height0;
+ t1 = srcY1 / (float)tex->height0;
assert(ctx->pipe->screen->is_format_supported(ctx->pipe->screen, dst->format,
PIPE_TEXTURE_2D,
diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv
index f1bf94f17d..b9cc2aa716 100644
--- a/src/gallium/auxiliary/util/u_format.csv
+++ b/src/gallium/auxiliary/util/u_format.csv
@@ -97,3 +97,13 @@ PIPE_FORMAT_B8G8R8A8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , zyxw,
PIPE_FORMAT_B8G8R8X8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , zyx1, srgb
PIPE_FORMAT_X8UB8UG8SR8S_NORM , arith , 1, 1, sn8 , sn8 , un8 , x8 , 1zyx, rgb
PIPE_FORMAT_B6UG5SR5S_NORM , arith , 1, 1, sn5 , sn5 , un6 , , xyz1, rgb
+PIPE_FORMAT_YCBCR , yuv , 2, 1, x32 , , , , xyz1, yuv
+PIPE_FORMAT_YCBCR_REV , yuv , 2, 1, x32 , , , , xyz1, yuv
+PIPE_FORMAT_DXT1_RGBA , dxt , 4, 4, x64 , , , , xyzw, rgb
+PIPE_FORMAT_DXT1_RGB , dxt , 4, 4, x64 , , , , xyz1, rgb
+PIPE_FORMAT_DXT3_RGBA , dxt , 4, 4, x128, , , , xyzw, rgb
+PIPE_FORMAT_DXT5_RGBA , dxt , 4, 4, x128, , , , xyzw, rgb
+PIPE_FORMAT_DXT1_SRGBA , dxt , 4, 4, x64 , , , , xyzw, srgb
+PIPE_FORMAT_DXT1_SRGB , dxt , 4, 4, x64 , , , , xyz1, srgb
+PIPE_FORMAT_DXT3_SRGBA , dxt , 4, 4, x128, , , , xyzw, srgb
+PIPE_FORMAT_DXT5_SRGBA , dxt , 4, 4, x128, , , , xyzw, srgb
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index aa823aa218..f67f1e458d 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -45,6 +45,7 @@
#include "util/u_draw_quad.h"
#include "util/u_gen_mipmap.h"
#include "util/u_simple_shaders.h"
+#include "util/u_math.h"
#include "cso_cache/cso_context.h"
@@ -1125,12 +1126,12 @@ make_1d_mipmap(struct gen_mipmap_state *ctx,
srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
PIPE_TRANSFER_READ, 0, 0,
- pt->width[srcLevel],
- pt->height[srcLevel]);
+ u_minify(pt->width0, srcLevel),
+ u_minify(pt->height0, srcLevel));
dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
PIPE_TRANSFER_WRITE, 0, 0,
- pt->width[dstLevel],
- pt->height[dstLevel]);
+ u_minify(pt->width0, dstLevel),
+ u_minify(pt->height0, dstLevel));
srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
@@ -1168,12 +1169,12 @@ make_2d_mipmap(struct gen_mipmap_state *ctx,
srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
PIPE_TRANSFER_READ, 0, 0,
- pt->width[srcLevel],
- pt->height[srcLevel]);
+ u_minify(pt->width0, srcLevel),
+ u_minify(pt->height0, srcLevel));
dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
PIPE_TRANSFER_WRITE, 0, 0,
- pt->width[dstLevel],
- pt->height[dstLevel]);
+ u_minify(pt->width0, dstLevel),
+ u_minify(pt->height0, dstLevel));
srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
@@ -1213,12 +1214,12 @@ make_3d_mipmap(struct gen_mipmap_state *ctx,
srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
PIPE_TRANSFER_READ, 0, 0,
- pt->width[srcLevel],
- pt->height[srcLevel]);
+ u_minify(pt->width0, srcLevel),
+ u_minify(pt->height0, srcLevel));
dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
PIPE_TRANSFER_WRITE, 0, 0,
- pt->width[dstLevel],
- pt->height[dstLevel]);
+ u_minify(pt->width0, dstLevel),
+ u_minify(pt->height0, dstLevel));
srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
@@ -1575,8 +1576,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
* Setup framebuffer / dest surface
*/
fb.cbufs[0] = surf;
- fb.width = pt->width[dstLevel];
- fb.height = pt->height[dstLevel];
+ fb.width = u_minify(pt->width0, dstLevel);
+ fb.height = u_minify(pt->height0, dstLevel);
cso_set_framebuffer(ctx->cso, &fb);
/*
@@ -1597,8 +1598,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
offset = set_vertex_data(ctx,
pt->target,
face,
- (float) pt->width[dstLevel],
- (float) pt->height[dstLevel]);
+ (float) u_minify(pt->width0, dstLevel),
+ (float) u_minify(pt->height0, dstLevel));
util_draw_vertex_buffer(ctx->pipe,
ctx->vbuf,
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 75b075f160..a5cd6574c0 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -491,6 +491,47 @@ util_next_power_of_two(unsigned x)
/**
+ * Return number of bits set in n.
+ */
+static INLINE unsigned
+util_bitcount(unsigned n)
+{
+#if defined(PIPE_CC_GCC)
+ return __builtin_popcount(n);
+#else
+ /* K&R classic bitcount.
+ *
+ * For each iteration, clear the LSB from the bitfield.
+ * Requires only one iteration per set bit, instead of
+ * one iteration per bit less than highest set bit.
+ */
+ unsigned bits = 0;
+ for (bits; n; bits++) {
+ n &= n - 1;
+ }
+ return bits;
+#endif
+}
+
+
+/**
+ * Reverse byte order of a 32 bit word.
+ */
+static INLINE uint32_t
+util_bswap32(uint32_t n)
+{
+#if defined(PIPE_CC_GCC)
+ return __builtin_bswap32(n);
+#else
+ return (n >> 24) |
+ ((n >> 8) & 0x0000ff00) |
+ ((n << 8) & 0x00ff0000) |
+ (n << 24);
+#endif
+}
+
+
+/**
* Clamp X to [MIN, MAX].
* This is a macro to allow float, int, uint, etc. types.
*/
@@ -499,6 +540,9 @@ util_next_power_of_two(unsigned x)
#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) )
#define MAX2( A, B ) ( (A)>(B) ? (A) : (B) )
+#define MIN3( A, B, C ) MIN2( MIN2( A, B ), C )
+#define MAX3( A, B, C ) MAX2( MAX2( A, B ), C )
+
static INLINE int
align(int value, int alignment)
@@ -507,9 +551,9 @@ align(int value, int alignment)
}
static INLINE unsigned
-minify(unsigned value)
+u_minify(unsigned value, unsigned levels)
{
- return MAX2(1, value >> 1);
+ return MAX2(1, value >> levels);
}
#ifndef COPY_4V
diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c
index 85e443204e..de8c266db8 100644
--- a/src/gallium/auxiliary/util/u_surface.c
+++ b/src/gallium/auxiliary/util/u_surface.c
@@ -79,9 +79,9 @@ util_create_rgba_surface(struct pipe_screen *screen,
templ.target = target;
templ.format = format;
templ.last_level = 0;
- templ.width[0] = width;
- templ.height[0] = height;
- templ.depth[0] = 1;
+ templ.width0 = width;
+ templ.height0 = height;
+ templ.depth0 = 1;
pf_get_block(format, &templ.block);
templ.tex_usage = usage;
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index cda6dc134a..8f52170f90 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -455,8 +455,8 @@ void vl_compositor_render(struct vl_compositor *compositor,
assert(dst_area);
assert(picture_type == PIPE_MPEG12_PICTURE_TYPE_FRAME);
- compositor->fb_state.width = dst_surface->width[0];
- compositor->fb_state.height = dst_surface->height[0];
+ compositor->fb_state.width = dst_surface->width0;
+ compositor->fb_state.height = dst_surface->height0;
compositor->fb_state.cbufs[0] = compositor->pipe->screen->get_tex_surface
(
compositor->pipe->screen,
@@ -479,8 +479,8 @@ void vl_compositor_render(struct vl_compositor *compositor,
compositor->pipe->set_framebuffer_state(compositor->pipe, &compositor->fb_state);
compositor->pipe->set_viewport_state(compositor->pipe, &compositor->viewport);
compositor->pipe->set_scissor_state(compositor->pipe, &compositor->scissor);
- compositor->pipe->bind_sampler_states(compositor->pipe, 1, &compositor->sampler);
- compositor->pipe->set_sampler_textures(compositor->pipe, 1, &src_surface);
+ compositor->pipe->bind_fragment_sampler_states(compositor->pipe, 1, &compositor->sampler);
+ compositor->pipe->set_fragment_sampler_textures(compositor->pipe, 1, &src_surface);
compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader);
compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader);
compositor->pipe->set_vertex_buffers(compositor->pipe, 2, compositor->vertex_bufs);
@@ -504,12 +504,12 @@ void vl_compositor_render(struct vl_compositor *compositor,
vs_consts->dst_trans.z = 0;
vs_consts->dst_trans.w = 0;
- vs_consts->src_scale.x = src_area->w / (float)src_surface->width[0];
- vs_consts->src_scale.y = src_area->h / (float)src_surface->height[0];
+ vs_consts->src_scale.x = src_area->w / (float)src_surface->width0;
+ vs_consts->src_scale.y = src_area->h / (float)src_surface->height0;
vs_consts->src_scale.z = 1;
vs_consts->src_scale.w = 1;
- vs_consts->src_trans.x = src_area->x / (float)src_surface->width[0];
- vs_consts->src_trans.y = src_area->y / (float)src_surface->height[0];
+ vs_consts->src_trans.x = src_area->x / (float)src_surface->width0;
+ vs_consts->src_trans.y = src_area->y / (float)src_surface->height0;
vs_consts->src_trans.z = 0;
vs_consts->src_trans.w = 0;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index c4ba69817f..93da67b984 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -681,7 +681,7 @@ xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
(
r->pipe->screen, r->textures.all[i],
0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0,
- r->textures.all[i]->width[0], r->textures.all[i]->height[0]
+ r->textures.all[i]->width0, r->textures.all[i]->height0
);
r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]);
@@ -835,26 +835,26 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
/* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
template.format = PIPE_FORMAT_R16_SNORM;
template.last_level = 0;
- template.width[0] = r->pot_buffers ?
+ template.width0 = r->pot_buffers ?
util_next_power_of_two(r->picture_width) : r->picture_width;
- template.height[0] = r->pot_buffers ?
+ template.height0 = r->pot_buffers ?
util_next_power_of_two(r->picture_height) : r->picture_height;
- template.depth[0] = 1;
+ template.depth0 = 1;
pf_get_block(template.format, &template.block);
template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC;
r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template);
if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
- template.width[0] = r->pot_buffers ?
+ template.width0 = r->pot_buffers ?
util_next_power_of_two(r->picture_width / 2) :
r->picture_width / 2;
- template.height[0] = r->pot_buffers ?
+ template.height0 = r->pot_buffers ?
util_next_power_of_two(r->picture_height / 2) :
r->picture_height / 2;
}
else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
- template.height[0] = r->pot_buffers ?
+ template.height0 = r->pot_buffers ?
util_next_power_of_two(r->picture_height / 2) :
r->picture_height / 2;
@@ -1283,8 +1283,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
);
- vs_consts->denorm.x = r->surface->width[0];
- vs_consts->denorm.y = r->surface->height[0];
+ vs_consts->denorm.x = r->surface->width0;
+ vs_consts->denorm.y = r->surface->height0;
pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer);
@@ -1296,8 +1296,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems);
- r->pipe->set_sampler_textures(r->pipe, 3, r->textures.all);
- r->pipe->bind_sampler_states(r->pipe, 3, r->samplers.all);
+ r->pipe->set_fragment_sampler_textures(r->pipe, 3, r->textures.all);
+ r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
r->pipe->bind_vs_state(r->pipe, r->i_vs);
r->pipe->bind_fs_state(r->pipe, r->i_fs);
@@ -1310,8 +1310,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
r->textures.individual.ref[0] = r->past;
- r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all);
- r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all);
+ r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
+ r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
@@ -1324,8 +1324,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
r->textures.individual.ref[0] = r->past;
- r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all);
- r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all);
+ r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
+ r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
@@ -1338,8 +1338,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
r->textures.individual.ref[0] = r->future;
- r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all);
- r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all);
+ r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
+ r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
@@ -1352,8 +1352,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
r->textures.individual.ref[0] = r->future;
- r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all);
- r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all);
+ r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
+ r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
@@ -1367,8 +1367,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
r->textures.individual.ref[0] = r->past;
r->textures.individual.ref[1] = r->future;
- r->pipe->set_sampler_textures(r->pipe, 5, r->textures.all);
- r->pipe->bind_sampler_states(r->pipe, 5, r->samplers.all);
+ r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
+ r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
@@ -1382,8 +1382,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
r->textures.individual.ref[0] = r->past;
r->textures.individual.ref[1] = r->future;
- r->pipe->set_sampler_textures(r->pipe, 5, r->textures.all);
- r->pipe->bind_sampler_states(r->pipe, 5, r->samplers.all);
+ r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
+ r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
@@ -1633,8 +1633,8 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
renderer->past = past;
renderer->future = future;
renderer->fence = fence;
- renderer->surface_tex_inv_size.x = 1.0f / surface->width[0];
- renderer->surface_tex_inv_size.y = 1.0f / surface->height[0];
+ renderer->surface_tex_inv_size.x = 1.0f / surface->width0;
+ renderer->surface_tex_inv_size.y = 1.0f / surface->height0;
}
while (num_macroblocks) {