From aa5db684382bd8662a83ca09ed000e4a5a1013f9 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 16 Jul 2009 14:14:32 +0100 Subject: softpipe: remove backwards dependency from tilecache to softpipe The tile cache is a utility, it shouldn't know anything about the entity which is making use of it (ie softpipe). Remove softpipe parameter to all the tilecache function calls, and also remove the need to keep a softpipe pointer in the sampler structs. --- src/gallium/drivers/softpipe/sp_context.c | 8 ++---- src/gallium/drivers/softpipe/sp_flush.c | 6 ++--- src/gallium/drivers/softpipe/sp_quad_blend.c | 6 ++--- src/gallium/drivers/softpipe/sp_quad_colormask.c | 3 +-- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 2 +- src/gallium/drivers/softpipe/sp_quad_output.c | 3 +-- src/gallium/drivers/softpipe/sp_quad_stencil.c | 2 +- src/gallium/drivers/softpipe/sp_state_derived.c | 22 ++++++++++++++++ src/gallium/drivers/softpipe/sp_state_sampler.c | 2 +- src/gallium/drivers/softpipe/sp_state_surface.c | 4 +-- src/gallium/drivers/softpipe/sp_tex_sample.c | 32 ++++++++--------------- src/gallium/drivers/softpipe/sp_tex_sample.h | 5 ++-- src/gallium/drivers/softpipe/sp_tile_cache.c | 20 ++++++-------- src/gallium/drivers/softpipe/sp_tile_cache.h | 12 +++------ 14 files changed, 62 insertions(+), 65 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 86df320ea8..f085889d3a 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -73,8 +73,8 @@ softpipe_unmap_transfers(struct softpipe_context *sp) uint i; for (i = 0; i < sp->framebuffer.nr_cbufs; i++) - sp_flush_tile_cache(sp, sp->cbuf_cache[i]); - sp_flush_tile_cache(sp, sp->zsbuf_cache); + sp_flush_tile_cache(sp->cbuf_cache[i]); + sp_flush_tile_cache(sp->zsbuf_cache); for (i = 0; i < sp->framebuffer.nr_cbufs; i++) { sp_tile_cache_unmap_transfers(sp->cbuf_cache[i]); @@ -254,8 +254,6 @@ softpipe_create( struct pipe_screen *screen ) /* vertex shader samplers */ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { softpipe->tgsi.vert_samplers[i].base.get_samples = sp_get_samples_vertex; - softpipe->tgsi.vert_samplers[i].unit = i; - softpipe->tgsi.vert_samplers[i].sp = softpipe; softpipe->tgsi.vert_samplers[i].cache = softpipe->tex_cache[i]; softpipe->tgsi.vert_samplers_list[i] = &softpipe->tgsi.vert_samplers[i]; } @@ -263,8 +261,6 @@ softpipe_create( struct pipe_screen *screen ) /* fragment shader samplers */ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { softpipe->tgsi.frag_samplers[i].base.get_samples = sp_get_samples_fragment; - softpipe->tgsi.frag_samplers[i].unit = i; - softpipe->tgsi.frag_samplers[i].sp = softpipe; softpipe->tgsi.frag_samplers[i].cache = softpipe->tex_cache[i]; softpipe->tgsi.frag_samplers_list[i] = &softpipe->tgsi.frag_samplers[i]; } diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index 4a14d49686..732277a2c5 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -52,17 +52,17 @@ softpipe_flush( struct pipe_context *pipe, if (flags & PIPE_FLUSH_TEXTURE_CACHE) { for (i = 0; i < softpipe->num_textures; i++) { - sp_flush_tile_cache(softpipe, softpipe->tex_cache[i]); + sp_flush_tile_cache(softpipe->tex_cache[i]); } } if (flags & PIPE_FLUSH_RENDER_CACHE) { for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) if (softpipe->cbuf_cache[i]) - sp_flush_tile_cache(softpipe, softpipe->cbuf_cache[i]); + sp_flush_tile_cache(softpipe->cbuf_cache[i]); if (softpipe->zsbuf_cache) - sp_flush_tile_cache(softpipe, softpipe->zsbuf_cache); + sp_flush_tile_cache(softpipe->zsbuf_cache); /* Need this call for hardware buffers before swapbuffers. * diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index b1e18805c7..04b5daf3a4 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -130,8 +130,7 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad) uint *dst4 = (uint *) dst; uint *res4 = (uint *) res; struct softpipe_cached_tile * - tile = sp_get_cached_tile(softpipe, - softpipe->cbuf_cache[cbuf], + tile = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], quad->input.x0, quad->input.y0); float (*quadColor)[4] = quad->output.color[cbuf]; uint i, j; @@ -260,8 +259,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { float source[4][QUAD_SIZE], dest[4][QUAD_SIZE]; struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe, - softpipe->cbuf_cache[cbuf], + = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], quad->input.x0, quad->input.y0); float (*quadColor)[4] = quad->output.color[cbuf]; uint i, j; diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c index dc90e5d5e9..89efbe3b02 100644 --- a/src/gallium/drivers/softpipe/sp_quad_colormask.c +++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c @@ -54,8 +54,7 @@ colormask_quad(struct quad_stage *qs, struct quad_header *quad) for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { float dest[4][QUAD_SIZE]; struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe, - softpipe->cbuf_cache[cbuf], + = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], quad->input.x0, quad->input.y0); float (*quadColor)[4] = quad->output.color[cbuf]; uint i, j; diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index d463930bae..768b9275b3 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -60,7 +60,7 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) unsigned zmask = 0; unsigned j; struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); + = sp_get_cached_tile(softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); assert(ps); /* shouldn't get here if there's no zbuffer */ diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c index 92d5f9f3c1..dd8f5377e9 100644 --- a/src/gallium/drivers/softpipe/sp_quad_output.c +++ b/src/gallium/drivers/softpipe/sp_quad_output.c @@ -50,8 +50,7 @@ output_quad(struct quad_stage *qs, struct quad_header *quad) /* loop over colorbuffer outputs */ for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe, - softpipe->cbuf_cache[cbuf], + = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], quad->input.x0, quad->input.y0); float (*quadColor)[4] = quad->output.color[cbuf]; int i, j; diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c index 5e9d447737..34a8d9e9f6 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stencil.c +++ b/src/gallium/drivers/softpipe/sp_quad_stencil.c @@ -206,7 +206,7 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) ubyte ref, wrtMask, valMask; ubyte stencilVals[QUAD_SIZE]; struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); + = sp_get_cached_tile(softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); uint j; uint face = quad->input.facing; diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 75551000c9..75be99768c 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -184,11 +184,33 @@ compute_cliprect(struct softpipe_context *sp) } +static void +update_tgsi_samplers( struct softpipe_context *softpipe ) +{ + unsigned i; + + /* vertex shader samplers */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + softpipe->tgsi.vert_samplers[i].sampler = softpipe->sampler[i]; + softpipe->tgsi.vert_samplers[i].texture = softpipe->texture[i]; + } + + /* fragment shader samplers */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + softpipe->tgsi.frag_samplers[i].sampler = softpipe->sampler[i]; + softpipe->tgsi.frag_samplers[i].texture = softpipe->texture[i]; + } +} + /* Hopefully this will remain quite simple, otherwise need to pull in * something like the state tracker mechanism. */ void softpipe_update_derived( struct softpipe_context *softpipe ) { + if (softpipe->dirty & (SP_NEW_SAMPLER | + SP_NEW_TEXTURE)) + update_tgsi_samplers( softpipe ); + if (softpipe->dirty & (SP_NEW_RASTERIZER | SP_NEW_FS | SP_NEW_VS)) diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index cb517b02e4..aa2f3f2ccd 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -97,7 +97,7 @@ softpipe_set_sampler_textures(struct pipe_context *pipe, struct pipe_texture *tex = i < num ? texture[i] : NULL; pipe_texture_reference(&softpipe->texture[i], tex); - sp_tile_cache_set_texture(pipe, softpipe->tex_cache[i], tex); + sp_tile_cache_set_texture(softpipe->tex_cache[i], tex); } softpipe->num_textures = num; diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index 7c06d864a7..1621a27614 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -53,7 +53,7 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, /* check if changing cbuf */ if (sp->framebuffer.cbufs[i] != fb->cbufs[i]) { /* flush old */ - sp_flush_tile_cache(sp, sp->cbuf_cache[i]); + sp_flush_tile_cache(sp->cbuf_cache[i]); /* assign new */ sp->framebuffer.cbufs[i] = fb->cbufs[i]; @@ -68,7 +68,7 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, /* zbuf changing? */ if (sp->framebuffer.zsbuf != fb->zsbuf) { /* flush old */ - sp_flush_tile_cache(sp, sp->zsbuf_cache); + sp_flush_tile_cache(sp->zsbuf_cache); /* assign new */ sp->framebuffer.zsbuf = fb->zsbuf; diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index a1d3bade27..3daa88eedd 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -668,10 +668,8 @@ get_texel(const struct tgsi_sampler *tgsi_sampler, float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) { const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); - struct softpipe_context *sp = samp->sp; - const uint unit = samp->unit; - const struct pipe_texture *texture = sp->texture[unit]; - const struct pipe_sampler_state *sampler = sp->sampler[unit]; + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; if (x < 0 || x >= (int) texture->width[level] || y < 0 || y >= (int) texture->height[level] || @@ -685,7 +683,7 @@ get_texel(const struct tgsi_sampler *tgsi_sampler, const int tx = x % TILE_SIZE; const int ty = y % TILE_SIZE; const struct softpipe_cached_tile *tile - = sp_get_cached_tile_tex(sp, samp->cache, + = sp_get_cached_tile_tex(samp->cache, x, y, z, face, level); rgba[0][j] = tile->data.color[ty][tx][0]; rgba[1][j] = tile->data.color[ty][tx][1]; @@ -840,10 +838,8 @@ sp_get_samples_2d_common(const struct tgsi_sampler *tgsi_sampler, const unsigned faces[4]) { const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); - const struct softpipe_context *sp = samp->sp; - const uint unit = samp->unit; - const struct pipe_texture *texture = sp->texture[unit]; - const struct pipe_sampler_state *sampler = sp->sampler[unit]; + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; unsigned level0, level1, j, imgFilter; int width, height; float levelBlend; @@ -992,10 +988,8 @@ sp_get_samples_3d(const struct tgsi_sampler *tgsi_sampler, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); - const struct softpipe_context *sp = samp->sp; - const uint unit = samp->unit; - const struct pipe_texture *texture = sp->texture[unit]; - const struct pipe_sampler_state *sampler = sp->sampler[unit]; + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; /* get/map pipe_surfaces corresponding to 3D tex slices */ unsigned level0, level1, j, imgFilter; int width, height, depth; @@ -1139,10 +1133,8 @@ sp_get_samples_rect(const struct tgsi_sampler *tgsi_sampler, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); - const struct softpipe_context *sp = samp->sp; - const uint unit = samp->unit; - const struct pipe_texture *texture = sp->texture[unit]; - const struct pipe_sampler_state *sampler = sp->sampler[unit]; + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; const uint face = 0; unsigned level0, level1, j, imgFilter; int width, height; @@ -1216,10 +1208,8 @@ sp_get_samples(struct tgsi_sampler *tgsi_sampler, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); - const struct softpipe_context *sp = samp->sp; - const uint unit = samp->unit; - const struct pipe_texture *texture = sp->texture[unit]; - const struct pipe_sampler_state *sampler = sp->sampler[unit]; + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; if (!texture) return; diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index 40d8eb2c2a..3c5beb560f 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -39,8 +39,9 @@ struct sp_shader_sampler { struct tgsi_sampler base; /**< base class */ - uint unit; - struct softpipe_context *sp; + const struct pipe_texture *texture; + const struct pipe_sampler_state *sampler; + struct softpipe_tile_cache *cache; }; diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 1f9b8f1f4f..306284808c 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -236,8 +236,7 @@ sp_tile_cache_unmap_transfers(struct softpipe_tile_cache *tc) * Specify the texture to cache. */ void -sp_tile_cache_set_texture(struct pipe_context *pipe, - struct softpipe_tile_cache *tc, +sp_tile_cache_set_texture(struct softpipe_tile_cache *tc, struct pipe_texture *texture) { uint i; @@ -344,8 +343,7 @@ clear_tile(struct softpipe_cached_tile *tile, * Actually clear the tiles which were flagged as being in a clear state. */ static void -sp_tile_cache_flush_clear(struct pipe_context *pipe, - struct softpipe_tile_cache *tc) +sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc) { struct pipe_transfer *pt = tc->transfer; const uint w = tc->transfer->width; @@ -382,8 +380,7 @@ sp_tile_cache_flush_clear(struct pipe_context *pipe, * any tiles "flagged" as cleared will be "really" cleared. */ void -sp_flush_tile_cache(struct softpipe_context *softpipe, - struct softpipe_tile_cache *tc) +sp_flush_tile_cache(struct softpipe_tile_cache *tc) { struct pipe_transfer *pt = tc->transfer; int inuse = 0, pos; @@ -409,7 +406,7 @@ sp_flush_tile_cache(struct softpipe_context *softpipe, } #if TILE_CLEAR_OPTIMIZATION - sp_tile_cache_flush_clear(&softpipe->pipe, tc); + sp_tile_cache_flush_clear(tc); #endif } else if (tc->texture) { @@ -431,8 +428,7 @@ sp_flush_tile_cache(struct softpipe_context *softpipe, * \param x, y position of tile, in pixels */ struct softpipe_cached_tile * -sp_get_cached_tile(struct softpipe_context *softpipe, - struct softpipe_tile_cache *tc, int x, int y) +sp_get_cached_tile(struct softpipe_tile_cache *tc, int x, int y) { struct pipe_transfer *pt = tc->transfer; @@ -513,11 +509,11 @@ tex_cache_pos(int x, int y, int z, int face, int level) * Tiles are read-only and indexed with more params. */ const struct softpipe_cached_tile * -sp_get_cached_tile_tex(struct softpipe_context *sp, - struct softpipe_tile_cache *tc, int x, int y, int z, +sp_get_cached_tile_tex(struct softpipe_tile_cache *tc, + int x, int y, int z, int face, int level) { - struct pipe_screen *screen = sp->pipe.screen; + struct pipe_screen *screen = tc->screen; /* tile pos in framebuffer: */ const int tile_x = x & ~(TILE_SIZE - 1); const int tile_y = y & ~(TILE_SIZE - 1); diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index 8f247d0e58..639cde6705 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -80,25 +80,21 @@ extern void sp_tile_cache_unmap_transfers(struct softpipe_tile_cache *tc); extern void -sp_tile_cache_set_texture(struct pipe_context *pipe, - struct softpipe_tile_cache *tc, +sp_tile_cache_set_texture(struct softpipe_tile_cache *tc, struct pipe_texture *texture); extern void -sp_flush_tile_cache(struct softpipe_context *softpipe, - struct softpipe_tile_cache *tc); +sp_flush_tile_cache(struct softpipe_tile_cache *tc); extern void sp_tile_cache_clear(struct softpipe_tile_cache *tc, const float *rgba, uint clearValue); extern struct softpipe_cached_tile * -sp_get_cached_tile(struct softpipe_context *softpipe, - struct softpipe_tile_cache *tc, int x, int y); +sp_get_cached_tile(struct softpipe_tile_cache *tc, int x, int y); extern const struct softpipe_cached_tile * -sp_get_cached_tile_tex(struct softpipe_context *softpipe, - struct softpipe_tile_cache *tc, int x, int y, int z, +sp_get_cached_tile_tex(struct softpipe_tile_cache *tc, int x, int y, int z, int face, int level); -- cgit v1.2.3 From b5d583efeff5f195bff48c95125a225c273189e2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 17 Jul 2009 10:44:22 +0100 Subject: softpipe: make some small steps to flush texture cache less frequently No performance gain yet, but the code is a bit cleaner. --- src/gallium/drivers/softpipe/sp_context.h | 3 +- src/gallium/drivers/softpipe/sp_state_derived.c | 14 +++++ src/gallium/drivers/softpipe/sp_texture.c | 3 +- src/gallium/drivers/softpipe/sp_texture.h | 2 +- src/gallium/drivers/softpipe/sp_tile_cache.c | 80 +++++++++++++++---------- src/gallium/drivers/softpipe/sp_tile_cache.h | 3 + 6 files changed, 70 insertions(+), 35 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 7888c2f644..f66f0b7849 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -149,7 +149,8 @@ struct softpipe_context { struct softpipe_tile_cache *cbuf_cache[PIPE_MAX_COLOR_BUFS]; struct softpipe_tile_cache *zsbuf_cache; - + + unsigned tex_timestamp; struct softpipe_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; unsigned use_sse : 1; diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 75be99768c..629a1f8e29 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -32,6 +32,7 @@ #include "draw/draw_vertex.h" #include "draw/draw_private.h" #include "sp_context.h" +#include "sp_screen.h" #include "sp_state.h" @@ -200,6 +201,10 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) softpipe->tgsi.frag_samplers[i].sampler = softpipe->sampler[i]; softpipe->tgsi.frag_samplers[i].texture = softpipe->texture[i]; } + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + sp_tile_cache_validate_texture( softpipe->tex_cache[i] ); + } } /* Hopefully this will remain quite simple, otherwise need to pull in @@ -207,6 +212,15 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) */ void softpipe_update_derived( struct softpipe_context *softpipe ) { + struct softpipe_screen *sp_screen = softpipe_screen(softpipe->pipe.screen); + + /* Check for updated textures. + */ + if (softpipe->tex_timestamp != sp_screen->timestamp) { + softpipe->tex_timestamp = sp_screen->timestamp; + softpipe->dirty |= SP_NEW_TEXTURE; + } + if (softpipe->dirty & (SP_NEW_SAMPLER | SP_NEW_TEXTURE)) update_tgsi_samplers( softpipe ); diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 7a533dad9f..0c84375bf1 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -227,7 +227,8 @@ softpipe_get_tex_surface(struct pipe_screen *screen, if (ps->usage & (PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_GPU_WRITE)) { /* Mark the surface as dirty. The tile cache will look for this. */ - spt->modified = TRUE; + spt->timestamp++; + softpipe_screen(screen)->timestamp++; } ps->face = face; diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index 893aa7d11d..42df722a2d 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -48,7 +48,7 @@ struct softpipe_texture */ struct pipe_buffer *buffer; - boolean modified; + unsigned timestamp; }; struct softpipe_transfer diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 306284808c..79b1e036be 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -54,7 +54,10 @@ struct softpipe_tile_cache struct pipe_surface *surface; /**< the surface we're caching */ struct pipe_transfer *transfer; void *transfer_map; + struct pipe_texture *texture; /**< if caching a texture */ + unsigned timestamp; + struct softpipe_cached_tile entries[NUM_ENTRIES]; uint clear_flags[(MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32]; float clear_color[4]; /**< for color bufs */ @@ -231,6 +234,23 @@ sp_tile_cache_unmap_transfers(struct softpipe_tile_cache *tc) } } +void +sp_tile_cache_validate_texture(struct softpipe_tile_cache *tc) +{ + if (tc->texture) { + struct softpipe_texture *spt = softpipe_texture(tc->texture); + if (spt->timestamp != tc->timestamp) { + /* texture was modified, invalidate all cached tiles */ + uint i; + _debug_printf("INV %d %d\n", tc->timestamp, spt->timestamp); + for (i = 0; i < NUM_ENTRIES; i++) { + tc->entries[i].x = -3; + } + + tc->timestamp = spt->timestamp; + } + } +} /** * Specify the texture to cache. @@ -243,27 +263,29 @@ sp_tile_cache_set_texture(struct softpipe_tile_cache *tc, assert(!tc->transfer); - pipe_texture_reference(&tc->texture, texture); + if (tc->texture != texture) { + pipe_texture_reference(&tc->texture, texture); - if (tc->tex_trans) { - struct pipe_screen *screen = tc->tex_trans->texture->screen; + if (tc->tex_trans) { + struct pipe_screen *screen = tc->tex_trans->texture->screen; + + if (tc->tex_trans_map) { + screen->transfer_unmap(screen, tc->tex_trans); + tc->tex_trans_map = NULL; + } - if (tc->tex_trans_map) { - screen->transfer_unmap(screen, tc->tex_trans); - tc->tex_trans_map = NULL; + screen->tex_transfer_destroy(tc->tex_trans); + tc->tex_trans = NULL; } - screen->tex_transfer_destroy(tc->tex_trans); - tc->tex_trans = NULL; - } + /* mark as entries as invalid/empty */ + /* XXX we should try to avoid this when the teximage hasn't changed */ + for (i = 0; i < NUM_ENTRIES; i++) { + tc->entries[i].x = -1; + } - /* mark as entries as invalid/empty */ - /* XXX we should try to avoid this when the teximage hasn't changed */ - for (i = 0; i < NUM_ENTRIES; i++) { - tc->entries[i].x = -1; + tc->tex_face = -1; /* any invalid value here */ } - - tc->tex_face = -1; /* any invalid value here */ } @@ -443,7 +465,7 @@ sp_get_cached_tile(struct softpipe_tile_cache *tc, int x, int y) if (tile_x != tile->x || tile_y != tile->y) { - if (tile->x != -1) { + if (tile->x >= 0) { /* put dirty tile back in framebuffer */ if (tc->depth_stencil) { pipe_put_tile_raw(pt, @@ -522,30 +544,24 @@ sp_get_cached_tile_tex(struct softpipe_tile_cache *tc, face, level); struct softpipe_cached_tile *tile = tc->entries + pos; - if (tc->texture) { - struct softpipe_texture *spt = softpipe_texture(tc->texture); - if (spt->modified) { - /* texture was modified, invalidate all cached tiles */ - uint p; - for (p = 0; p < NUM_ENTRIES; p++) { - tile = tc->entries + p; - tile->x = -1; - } - spt->modified = FALSE; - } - } - if (tile_x != tile->x || tile_y != tile->y || z != tile->z || face != tile->face || level != tile->level) { - /* cache miss */ + /* cache miss. Most misses are because we've invaldiated the + * texture cache previously -- most commonly on binding a new + * texture. Currently we effectively flush the cache on texture + * bind. + */ #if 0 - printf("miss at %u x=%d y=%d z=%d face=%d level=%d\n", pos, - x/TILE_SIZE, y/TILE_SIZE, z, face, level); + _debug_printf("miss at %u: x=%d y=%d z=%d face=%d level=%d\n" + " tile %u: x=%d y=%d z=%d face=%d level=%d\n", + pos, x/TILE_SIZE, y/TILE_SIZE, z, face, level, + pos, tile->x, tile->y, tile->z, tile->face, tile->level); #endif + /* check if we need to get a new transfer */ if (!tc->tex_trans || tc->tex_face != face || diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index 639cde6705..0d165b4ad7 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -83,6 +83,9 @@ extern void sp_tile_cache_set_texture(struct softpipe_tile_cache *tc, struct pipe_texture *texture); +void +sp_tile_cache_validate_texture(struct softpipe_tile_cache *tc); + extern void sp_flush_tile_cache(struct softpipe_tile_cache *tc); -- cgit v1.2.3 From 73e7356385a703c214b35fbb29aaf3108764f033 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 17 Jul 2009 10:47:32 +0100 Subject: softpipe: simplify flush_spans No loss of performance, but simpler code. --- src/gallium/drivers/softpipe/sp_setup.c | 72 +++++++++------------------------ 1 file changed, 19 insertions(+), 53 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index de3ae3c369..f29fcbd5a2 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -428,65 +428,31 @@ static void flush_spans( struct setup_context *setup ) int minleft, maxright; int x; - switch (setup->span.y_flags) { - case 0x3: - /* both odd and even lines written (both quad rows) */ - minleft = block(MIN2(xleft0, xleft1)); - maxright = block(MAX2(xright0, xright1)); - for (x = minleft; x <= maxright; x += 2) { - /* determine which of the four pixels is inside the span bounds */ - uint mask = 0x0; - if (x >= xleft0 && x < xright0) - mask |= MASK_TOP_LEFT; - if (x >= xleft1 && x < xright1) - mask |= MASK_BOTTOM_LEFT; - if (x+1 >= xleft0 && x+1 < xright0) - mask |= MASK_TOP_RIGHT; - if (x+1 >= xleft1 && x+1 < xright1) - mask |= MASK_BOTTOM_RIGHT; - if (mask) - EMIT_QUAD( setup, x, setup->span.y, mask ); - } - break; - - case 0x1: - /* only even line written (quad top row) */ - minleft = block(xleft0); - maxright = block(xright0); - for (x = minleft; x <= maxright; x += 2) { - uint mask = 0x0; - if (x >= xleft0 && x < xright0) - mask |= MASK_TOP_LEFT; - if (x+1 >= xleft0 && x+1 < xright0) - mask |= MASK_TOP_RIGHT; - if (mask) - EMIT_QUAD( setup, x, setup->span.y, mask ); - } - break; - - case 0x2: - /* only odd line written (quad bottom row) */ - minleft = block(xleft1); - maxright = block(xright1); - for (x = minleft; x <= maxright; x += 2) { - uint mask = 0x0; - if (x >= xleft1 && x < xright1) - mask |= MASK_BOTTOM_LEFT; - if (x+1 >= xleft1 && x+1 < xright1) - mask |= MASK_BOTTOM_RIGHT; - if (mask) - EMIT_QUAD( setup, x, setup->span.y, mask ); - } - break; - - default: - return; + minleft = block(MIN2(xleft0, xleft1)); + maxright = block(MAX2(xright0, xright1)); + + for (x = minleft; x <= maxright; x += 2) { + /* determine which of the four pixels is inside the span bounds */ + uint mask = 0x0; + if (x >= xleft0 && x < xright0) + mask |= MASK_TOP_LEFT; + if (x >= xleft1 && x < xright1) + mask |= MASK_BOTTOM_LEFT; + if (x+1 >= xleft0 && x+1 < xright0) + mask |= MASK_TOP_RIGHT; + if (x+1 >= xleft1 && x+1 < xright1) + mask |= MASK_BOTTOM_RIGHT; + if (mask) + EMIT_QUAD( setup, x, setup->span.y, mask ); } + setup->span.y = 0; setup->span.y_flags = 0; setup->span.right[0] = 0; setup->span.right[1] = 0; + setup->span.left[0] = 1; /* greater than right[0] */ + setup->span.left[1] = 1; /* greater than right[1] */ } -- cgit v1.2.3 From 0ed99f45529178c77e47838f226231ea1bc9b918 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 17 Jul 2009 12:03:51 +0100 Subject: softpipe: use bitwise logic to setup quad masks in sp_setup --- src/gallium/drivers/softpipe/sp_setup.c | 65 ++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 25 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index f29fcbd5a2..8ac22bd302 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -388,19 +388,19 @@ emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job ) emit_quad( setup, &quad, thread ); } -#define EMIT_QUAD(setup,x,y,mask) do {\ +#define EMIT_QUAD(setup,x,y,qmask) do {\ setup->quad.input.x0 = x;\ setup->quad.input.y0 = y;\ - setup->quad.inout.mask = mask;\ + setup->quad.inout.mask = qmask;\ add_quad_job( &setup->que, &setup->quad, emit_quad_job );\ } while (0) #else -#define EMIT_QUAD(setup,x,y,mask) do {\ +#define EMIT_QUAD(setup,x,y,qmask) do {\ setup->quad.input.x0 = x;\ setup->quad.input.y0 = y;\ - setup->quad.inout.mask = mask;\ + setup->quad.inout.mask = qmask;\ emit_quad( setup, &setup->quad, 0 );\ } while (0) @@ -421,29 +421,42 @@ static INLINE int block( int x ) */ static void flush_spans( struct setup_context *setup ) { + const int step = 30; const int xleft0 = setup->span.left[0]; const int xleft1 = setup->span.left[1]; const int xright0 = setup->span.right[0]; const int xright1 = setup->span.right[1]; - int minleft, maxright; + + int minleft = block(MIN2(xleft0, xleft1)); + int maxright = MAX2(xright0, xright1); int x; - minleft = block(MIN2(xleft0, xleft1)); - maxright = block(MAX2(xright0, xright1)); - - for (x = minleft; x <= maxright; x += 2) { - /* determine which of the four pixels is inside the span bounds */ - uint mask = 0x0; - if (x >= xleft0 && x < xright0) - mask |= MASK_TOP_LEFT; - if (x >= xleft1 && x < xright1) - mask |= MASK_BOTTOM_LEFT; - if (x+1 >= xleft0 && x+1 < xright0) - mask |= MASK_TOP_RIGHT; - if (x+1 >= xleft1 && x+1 < xright1) - mask |= MASK_BOTTOM_RIGHT; - if (mask) - EMIT_QUAD( setup, x, setup->span.y, mask ); + for (x = minleft; x < maxright; x += step) { + unsigned skip_left0 = CLAMP(xleft0 - x, 0, step); + unsigned skip_left1 = CLAMP(xleft1 - x, 0, step); + unsigned skip_right0 = CLAMP(x + step - xright0, 0, step); + unsigned skip_right1 = CLAMP(x + step - xright1, 0, step); + unsigned lx = x; + + unsigned skipmask_left0 = (1U << skip_left0) - 1U; + unsigned skipmask_left1 = (1U << skip_left1) - 1U; + + /* These calculations fail when step == 32 and skip_right == 0. + */ + unsigned skipmask_right0 = ~0U << (unsigned)(step - skip_right0); + unsigned skipmask_right1 = ~0U << (unsigned)(step - skip_right1); + + unsigned mask0 = ~skipmask_left0 & ~skipmask_right0; + unsigned mask1 = ~skipmask_left1 & ~skipmask_right1; + + while (mask0 | mask1) { + unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2); + if (quadmask) + EMIT_QUAD( setup, lx, setup->span.y, quadmask ); + mask0 >>= 2; + mask1 >>= 2; + lx += 2; + } } @@ -451,8 +464,8 @@ static void flush_spans( struct setup_context *setup ) setup->span.y_flags = 0; setup->span.right[0] = 0; setup->span.right[1] = 0; - setup->span.left[0] = 1; /* greater than right[0] */ - setup->span.left[1] = 1; /* greater than right[1] */ + setup->span.left[0] = 1000000; /* greater than right[0] */ + setup->span.left[1] = 1000000; /* greater than right[1] */ } @@ -810,11 +823,10 @@ static void subtriangle( struct setup_context *setup, /* clip top/bottom */ start_y = sy; - finish_y = sy + lines; - if (start_y < miny) start_y = miny; + finish_y = sy + lines; if (finish_y > maxy) finish_y = maxy; @@ -1495,6 +1507,9 @@ struct setup_context *setup_create_context( struct softpipe_context *softpipe ) setup->quad.coef = setup->coef; setup->quad.posCoef = &setup->posCoef; + setup->span.left[0] = 1000000; /* greater than right[0] */ + setup->span.left[1] = 1000000; /* greater than right[1] */ + #if SP_NUM_QUAD_THREADS > 1 setup->que.first = 0; setup->que.last = 0; -- cgit v1.2.3 From 13e2d35764e0c8de3356ee663885568fc00424f0 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 17 Jul 2009 12:12:04 +0100 Subject: softpipe: remove unused vars in sp_setup.c --- src/gallium/drivers/softpipe/sp_setup.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 8ac22bd302..d05c9ad57c 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -178,8 +178,6 @@ struct setup_context { int left[2]; /**< [0] = row0, [1] = row1 */ int right[2]; int y; - unsigned y_flags; - unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ } span; #if DEBUG_FRAGS @@ -461,7 +459,6 @@ static void flush_spans( struct setup_context *setup ) setup->span.y = 0; - setup->span.y_flags = 0; setup->span.right[0] = 0; setup->span.right[1] = 0; setup->span.left[0] = 1000000; /* greater than right[0] */ @@ -863,7 +860,6 @@ static void subtriangle( struct setup_context *setup, setup->span.left[_y&1] = left; setup->span.right[_y&1] = right; - setup->span.y_flags |= 1<<(_y&1); } } @@ -939,7 +935,6 @@ void setup_tri( struct setup_context *setup, setup->quad.input.prim = QUAD_PRIM_TRI; setup->span.y = 0; - setup->span.y_flags = 0; setup->span.right[0] = 0; setup->span.right[1] = 0; /* setup->span.z_mode = tri_z_mode( setup->ctx ); */ -- cgit v1.2.3 From f911c3b9897b90132c8621a72bfeb824eb3b01e5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 22 Jul 2009 15:08:42 +0100 Subject: softpipe: shortcircuit repeated lookups of the same tile The sp_tile_cache is often called repeatedly to look up the same tile. Add a cache (to the cache) of the single tile most recently retreived and make a quick inline check to see if this matches the subsequent request. Add a tile_address bitfield struct to make this check easier. --- src/gallium/auxiliary/util/u_math.h | 12 +++ src/gallium/drivers/softpipe/sp_tex_sample.c | 12 ++- src/gallium/drivers/softpipe/sp_tile_cache.c | 153 ++++++++++++--------------- src/gallium/drivers/softpipe/sp_tile_cache.h | 88 ++++++++++++++- 4 files changed, 168 insertions(+), 97 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index e5003af01d..d30fa3c2d5 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -366,6 +366,18 @@ unsigned ffs( unsigned u ) #endif +/* Could also binary search for the highest bit. + */ +static INLINE unsigned +util_unsigned_logbase2(unsigned n) +{ + unsigned log2 = 0; + while (n >>= 1) + ++log2; + return log2; +} + + /** * Return float bits. */ diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 3daa88eedd..46c56b0c83 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -680,11 +680,13 @@ get_texel(const struct tgsi_sampler *tgsi_sampler, rgba[3][j] = sampler->border_color[3]; } else { - const int tx = x % TILE_SIZE; - const int ty = y % TILE_SIZE; - const struct softpipe_cached_tile *tile - = sp_get_cached_tile_tex(samp->cache, - x, y, z, face, level); + const unsigned tx = x % TILE_SIZE; + const unsigned ty = y % TILE_SIZE; + const struct softpipe_cached_tile *tile; + + tile = sp_get_cached_tile_tex(samp->cache, + tile_address(x, y, z, face, level)); + rgba[0][j] = tile->data.color[ty][tx][0]; rgba[1][j] = tile->data.color[ty][tx][1]; rgba[2][j] = tile->data.color[ty][tx][2]; diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 79b1e036be..06d9c6a80a 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -40,36 +40,6 @@ #include "sp_texture.h" #include "sp_tile_cache.h" -#define NUM_ENTRIES 50 - - -/** XXX move these */ -#define MAX_WIDTH 2048 -#define MAX_HEIGHT 2048 - - -struct softpipe_tile_cache -{ - struct pipe_screen *screen; - struct pipe_surface *surface; /**< the surface we're caching */ - struct pipe_transfer *transfer; - void *transfer_map; - - struct pipe_texture *texture; /**< if caching a texture */ - unsigned timestamp; - - struct softpipe_cached_tile entries[NUM_ENTRIES]; - uint clear_flags[(MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32]; - float clear_color[4]; /**< for color bufs */ - uint clear_val; /**< for z+stencil, or packed color clear value */ - boolean depth_stencil; /**< Is the surface a depth/stencil format? */ - - struct pipe_transfer *tex_trans; - void *tex_trans_map; - int tex_face, tex_level, tex_z; - - struct softpipe_cached_tile tile; /**< scratch tile for clears */ -}; /** @@ -124,9 +94,9 @@ sp_create_tile_cache( struct pipe_screen *screen ) if (tc) { tc->screen = screen; for (pos = 0; pos < NUM_ENTRIES; pos++) { - tc->entries[pos].x = - tc->entries[pos].y = -1; + tc->entries[pos].addr.bits.invalid = 1; } + tc->last_tile = &tc->entries[0]; /* any tile */ } return tc; } @@ -244,7 +214,7 @@ sp_tile_cache_validate_texture(struct softpipe_tile_cache *tc) uint i; _debug_printf("INV %d %d\n", tc->timestamp, spt->timestamp); for (i = 0; i < NUM_ENTRIES; i++) { - tc->entries[i].x = -3; + tc->entries[i].addr.bits.invalid = 1; } tc->timestamp = spt->timestamp; @@ -281,7 +251,7 @@ sp_tile_cache_set_texture(struct softpipe_tile_cache *tc, /* mark as entries as invalid/empty */ /* XXX we should try to avoid this when the teximage hasn't changed */ for (i = 0; i < NUM_ENTRIES; i++) { - tc->entries[i].x = -1; + tc->entries[i].addr.bits.invalid = 1; } tc->tex_face = -1; /* any invalid value here */ @@ -411,18 +381,22 @@ sp_flush_tile_cache(struct softpipe_tile_cache *tc) /* caching a drawing transfer */ for (pos = 0; pos < NUM_ENTRIES; pos++) { struct softpipe_cached_tile *tile = tc->entries + pos; - if (tile->x >= 0) { + if (!tile->addr.bits.invalid) { if (tc->depth_stencil) { pipe_put_tile_raw(pt, - tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->addr.bits.x * TILE_SIZE, + tile->addr.bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { pipe_put_tile_rgba(pt, - tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->addr.bits.x * TILE_SIZE, + tile->addr.bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, (float *) tile->data.color); } - tile->x = tile->y = -1; /* mark as empty */ + tile->addr.bits.invalid = 1; /* mark as empty */ inuse++; } } @@ -434,7 +408,7 @@ sp_flush_tile_cache(struct softpipe_tile_cache *tc) else if (tc->texture) { /* caching a texture, mark all entries as empty */ for (pos = 0; pos < NUM_ENTRIES; pos++) { - tc->entries[pos].x = -1; + tc->entries[pos].addr.bits.invalid = 1; } tc->tex_face = -1; } @@ -453,34 +427,34 @@ struct softpipe_cached_tile * sp_get_cached_tile(struct softpipe_tile_cache *tc, int x, int y) { struct pipe_transfer *pt = tc->transfer; - + /* tile pos in framebuffer: */ - const int tile_x = x & ~(TILE_SIZE - 1); - const int tile_y = y & ~(TILE_SIZE - 1); - + union tile_address addr = tile_address( x, y, 0, 0, 0 ); /* cache pos/entry: */ const int pos = CACHE_POS(x, y); struct softpipe_cached_tile *tile = tc->entries + pos; - if (tile_x != tile->x || - tile_y != tile->y) { + if (addr.value != tile->addr.value) { - if (tile->x >= 0) { + if (tile->addr.bits.invalid == 0) { /* put dirty tile back in framebuffer */ if (tc->depth_stencil) { pipe_put_tile_raw(pt, - tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->addr.bits.x * TILE_SIZE, + tile->addr.bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { pipe_put_tile_rgba(pt, - tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->addr.bits.x * TILE_SIZE, + tile->addr.bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, (float *) tile->data.color); } } - tile->x = tile_x; - tile->y = tile_y; + tile->addr = addr; if (is_clear_flag_set(tc->clear_flags, x, y)) { /* don't get tile from framebuffer, just clear it */ @@ -496,12 +470,16 @@ sp_get_cached_tile(struct softpipe_tile_cache *tc, int x, int y) /* get new tile data from transfer */ if (tc->depth_stencil) { pipe_get_tile_raw(pt, - tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->addr.bits.x * TILE_SIZE, + tile->addr.bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { pipe_get_tile_rgba(pt, - tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->addr.bits.x * TILE_SIZE, + tile->addr.bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, (float *) tile->data.color); } } @@ -519,36 +497,31 @@ sp_get_cached_tile(struct softpipe_tile_cache *tc, int x, int y) * XXX There's probably lots of ways in which we can improve this. */ static INLINE uint -tex_cache_pos(int x, int y, int z, int face, int level) +tex_cache_pos( union tile_address addr ) { - uint entry = x + y * 9 + z * 3 + face + level * 7; + uint entry = (addr.bits.x + + addr.bits.y * 9 + + addr.bits.z * 3 + + addr.bits.face + + addr.bits.level * 7); + return entry % NUM_ENTRIES; } - /** * Similar to sp_get_cached_tile() but for textures. * Tiles are read-only and indexed with more params. */ const struct softpipe_cached_tile * -sp_get_cached_tile_tex(struct softpipe_tile_cache *tc, - int x, int y, int z, - int face, int level) +sp_find_cached_tile_tex(struct softpipe_tile_cache *tc, + union tile_address addr ) { struct pipe_screen *screen = tc->screen; - /* tile pos in framebuffer: */ - const int tile_x = x & ~(TILE_SIZE - 1); - const int tile_y = y & ~(TILE_SIZE - 1); - /* cache pos/entry: */ - const uint pos = tex_cache_pos(x / TILE_SIZE, y / TILE_SIZE, z, - face, level); - struct softpipe_cached_tile *tile = tc->entries + pos; + struct softpipe_cached_tile *tile; + + tile = tc->entries + tex_cache_pos( addr ); - if (tile_x != tile->x || - tile_y != tile->y || - z != tile->z || - face != tile->face || - level != tile->level) { + if (addr.value != tile->addr.value) { /* cache miss. Most misses are because we've invaldiated the * texture cache previously -- most commonly on binding a new @@ -559,14 +532,14 @@ sp_get_cached_tile_tex(struct softpipe_tile_cache *tc, _debug_printf("miss at %u: x=%d y=%d z=%d face=%d level=%d\n" " tile %u: x=%d y=%d z=%d face=%d level=%d\n", pos, x/TILE_SIZE, y/TILE_SIZE, z, face, level, - pos, tile->x, tile->y, tile->z, tile->face, tile->level); + pos, tile->addr.bits.x, tile->addr.bits.y, tile->z, tile->face, tile->level); #endif /* check if we need to get a new transfer */ if (!tc->tex_trans || - tc->tex_face != face || - tc->tex_level != level || - tc->tex_z != z) { + tc->tex_face != addr.bits.face || + tc->tex_level != addr.bits.level || + tc->tex_z != addr.bits.z) { /* get new transfer (view into texture) */ if (tc->tex_trans) { @@ -579,28 +552,32 @@ sp_get_cached_tile_tex(struct softpipe_tile_cache *tc, tc->tex_trans = NULL; } - tc->tex_trans = screen->get_tex_transfer(screen, tc->texture, face, level, z, - PIPE_TRANSFER_READ, 0, 0, - tc->texture->width[level], - tc->texture->height[level]); + tc->tex_trans = + screen->get_tex_transfer(screen, tc->texture, + addr.bits.face, + addr.bits.level, + addr.bits.z, + PIPE_TRANSFER_READ, 0, 0, + tc->texture->width[addr.bits.level], + tc->texture->height[addr.bits.level]); + tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans); - tc->tex_face = face; - tc->tex_level = level; - tc->tex_z = z; + tc->tex_face = addr.bits.face; + tc->tex_level = addr.bits.level; + tc->tex_z = addr.bits.z; } /* get tile from the transfer (view into texture) */ pipe_get_tile_rgba(tc->tex_trans, - tile_x, tile_y, TILE_SIZE, TILE_SIZE, + addr.bits.x * TILE_SIZE, + addr.bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, (float *) tile->data.color); - tile->x = tile_x; - tile->y = tile_y; - tile->z = z; - tile->face = face; - tile->level = level; + tile->addr = addr; } + tc->last_tile = tile; return tile; } @@ -633,6 +610,6 @@ sp_tile_cache_clear(struct softpipe_tile_cache *tc, const float *rgba, for (pos = 0; pos < NUM_ENTRIES; pos++) { struct softpipe_cached_tile *tile = tc->entries + pos; - tile->x = tile->y = -1; + tile->addr.bits.invalid = 1; } } diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index 0d165b4ad7..3017fcbebc 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -44,11 +44,25 @@ struct softpipe_tile_cache; #define TILE_SIZE 64 +/* If we need to support > 4096, just expand this to be a 64 bit + * union, or consider tiling in Z as well. + */ +union tile_address { + struct { + unsigned x:6; /* 4096 / TILE_SIZE */ + unsigned y:6; /* 4096 / TILE_SIZE */ + unsigned z:12; /* 4096 -- z not tiled */ + unsigned face:3; + unsigned level:4; + unsigned invalid:1; + } bits; + unsigned value; +}; + struct softpipe_cached_tile { - int x, y; /**< pos of tile in window coords */ - int z, face, level; /**< Extra texture indexes */ + union tile_address addr; union { float color[TILE_SIZE][TILE_SIZE][4]; uint color32[TILE_SIZE][TILE_SIZE]; @@ -59,6 +73,39 @@ struct softpipe_cached_tile } data; }; +#define NUM_ENTRIES 50 + + +/** XXX move these */ +#define MAX_WIDTH 2048 +#define MAX_HEIGHT 2048 + + +struct softpipe_tile_cache +{ + struct pipe_screen *screen; + struct pipe_surface *surface; /**< the surface we're caching */ + struct pipe_transfer *transfer; + void *transfer_map; + + struct pipe_texture *texture; /**< if caching a texture */ + unsigned timestamp; + + struct softpipe_cached_tile entries[NUM_ENTRIES]; + uint clear_flags[(MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32]; + float clear_color[4]; /**< for color bufs */ + uint clear_val; /**< for z+stencil, or packed color clear value */ + boolean depth_stencil; /**< Is the surface a depth/stencil format? */ + + struct pipe_transfer *tex_trans; + void *tex_trans_map; + int tex_face, tex_level, tex_z; + + struct softpipe_cached_tile tile; /**< scratch tile for clears */ + + struct softpipe_cached_tile *last_tile; /**< most recently retrieved tile */ +}; + extern struct softpipe_tile_cache * sp_create_tile_cache( struct pipe_screen *screen ); @@ -97,8 +144,41 @@ extern struct softpipe_cached_tile * sp_get_cached_tile(struct softpipe_tile_cache *tc, int x, int y); extern const struct softpipe_cached_tile * -sp_get_cached_tile_tex(struct softpipe_tile_cache *tc, int x, int y, int z, - int face, int level); +sp_find_cached_tile_tex(struct softpipe_tile_cache *tc, + union tile_address addr ); + +static INLINE const union tile_address +tile_address( unsigned x, + unsigned y, + unsigned z, + unsigned face, + unsigned level ) +{ + union tile_address addr; + + addr.value = 0; + addr.bits.x = x / TILE_SIZE; + addr.bits.y = y / TILE_SIZE; + addr.bits.z = z; + addr.bits.face = face; + addr.bits.level = level; + + return addr; +} + +/* Quickly retrieve tile if it matches last lookup. + */ +static INLINE const struct softpipe_cached_tile * +sp_get_cached_tile_tex(struct softpipe_tile_cache *tc, + union tile_address addr ) +{ + if (tc->last_tile->addr.value == addr.value) + return tc->last_tile; + + return sp_find_cached_tile_tex( tc, addr ); +} + + #endif /* SP_TILE_CACHE_H */ -- cgit v1.2.3 From 19097907ef042b97bbbda39b34bf3212f4cf154a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 22 Jul 2009 15:36:25 +0100 Subject: softpipe: also shortcircuit non-texture tile lookups --- src/gallium/drivers/softpipe/sp_tile_cache.c | 33 ++++++++++++++-------------- src/gallium/drivers/softpipe/sp_tile_cache.h | 16 +++++++++++++- 2 files changed, 31 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 06d9c6a80a..77d02fa3e7 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -49,7 +49,7 @@ * a LRU replacement policy. */ #define CACHE_POS(x, y) \ - (((x) / TILE_SIZE + ((y) / TILE_SIZE) * 5) % NUM_ENTRIES) + (((x) + (y) * 5) % NUM_ENTRIES) @@ -57,12 +57,10 @@ * Is the tile at (x,y) in cleared state? */ static INLINE uint -is_clear_flag_set(const uint *bitvec, int x, int y) +is_clear_flag_set(const uint *bitvec, union tile_address addr) { int pos, bit; - x /= TILE_SIZE; - y /= TILE_SIZE; - pos = y * (MAX_WIDTH / TILE_SIZE) + x; + pos = addr.bits.y * (MAX_WIDTH / TILE_SIZE) + addr.bits.x; assert(pos / 32 < (MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32); bit = bitvec[pos / 32] & (1 << (pos & 31)); return bit; @@ -73,12 +71,10 @@ is_clear_flag_set(const uint *bitvec, int x, int y) * Mark the tile at (x,y) as not cleared. */ static INLINE void -clear_clear_flag(uint *bitvec, int x, int y) +clear_clear_flag(uint *bitvec, union tile_address addr) { int pos; - x /= TILE_SIZE; - y /= TILE_SIZE; - pos = y * (MAX_WIDTH / TILE_SIZE) + x; + pos = addr.bits.y * (MAX_WIDTH / TILE_SIZE) + addr.bits.x; assert(pos / 32 < (MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32); bitvec[pos / 32] &= ~(1 << (pos & 31)); } @@ -349,13 +345,15 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc) /* push the tile to all positions marked as clear */ for (y = 0; y < h; y += TILE_SIZE) { for (x = 0; x < w; x += TILE_SIZE) { - if (is_clear_flag_set(tc->clear_flags, x, y)) { + union tile_address addr = tile_address(x, y, 0, 0, 0); + + if (is_clear_flag_set(tc->clear_flags, addr)) { pipe_put_tile_raw(pt, x, y, TILE_SIZE, TILE_SIZE, tc->tile.data.color32, 0/*STRIDE*/); /* do this? */ - clear_clear_flag(tc->clear_flags, x, y); + clear_clear_flag(tc->clear_flags, addr); numCleared++; } @@ -424,14 +422,14 @@ sp_flush_tile_cache(struct softpipe_tile_cache *tc) * \param x, y position of tile, in pixels */ struct softpipe_cached_tile * -sp_get_cached_tile(struct softpipe_tile_cache *tc, int x, int y) +sp_find_cached_tile(struct softpipe_tile_cache *tc, + union tile_address addr ) { struct pipe_transfer *pt = tc->transfer; - /* tile pos in framebuffer: */ - union tile_address addr = tile_address( x, y, 0, 0, 0 ); /* cache pos/entry: */ - const int pos = CACHE_POS(x, y); + const int pos = CACHE_POS(addr.bits.x, + addr.bits.y); struct softpipe_cached_tile *tile = tc->entries + pos; if (addr.value != tile->addr.value) { @@ -456,7 +454,7 @@ sp_get_cached_tile(struct softpipe_tile_cache *tc, int x, int y) tile->addr = addr; - if (is_clear_flag_set(tc->clear_flags, x, y)) { + if (is_clear_flag_set(tc->clear_flags, addr)) { /* don't get tile from framebuffer, just clear it */ if (tc->depth_stencil) { clear_tile(tile, pt->format, tc->clear_val); @@ -464,7 +462,7 @@ sp_get_cached_tile(struct softpipe_tile_cache *tc, int x, int y) else { clear_tile_rgba(tile, pt->format, tc->clear_color); } - clear_clear_flag(tc->clear_flags, x, y); + clear_clear_flag(tc->clear_flags, addr); } else { /* get new tile data from transfer */ @@ -485,6 +483,7 @@ sp_get_cached_tile(struct softpipe_tile_cache *tc, int x, int y) } } + tc->last_tile = tile; return tile; } diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index 3017fcbebc..ac2aae5875 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -141,7 +141,8 @@ sp_tile_cache_clear(struct softpipe_tile_cache *tc, const float *rgba, uint clearValue); extern struct softpipe_cached_tile * -sp_get_cached_tile(struct softpipe_tile_cache *tc, int x, int y); +sp_find_cached_tile(struct softpipe_tile_cache *tc, + union tile_address addr ); extern const struct softpipe_cached_tile * sp_find_cached_tile_tex(struct softpipe_tile_cache *tc, @@ -179,6 +180,19 @@ sp_get_cached_tile_tex(struct softpipe_tile_cache *tc, } +static INLINE struct softpipe_cached_tile * +sp_get_cached_tile(struct softpipe_tile_cache *tc, + int x, int y ) +{ + union tile_address addr = tile_address( x, y, 0, 0, 0 ); + + if (tc->last_tile->addr.value == addr.value) + return tc->last_tile; + + return sp_find_cached_tile( tc, addr ); +} + + #endif /* SP_TILE_CACHE_H */ -- cgit v1.2.3 From 93a026d4baf90266f4c9cc48d039b4d65ce1ab6d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 23 Jul 2009 11:14:39 +0100 Subject: softpipe: avoid flushing depth buffer cache on swapbuffers There's no need to push out depth buffer contents on swapbuffers. Note that this change doesn't throw away depth buffer changes, it simply holds them in the cache over calls to swapbuffers. The hope is that swapbuffers will be followed by a clear() which means in that case we won't have to write the changes out. --- src/gallium/drivers/softpipe/sp_context.c | 5 +---- src/gallium/drivers/softpipe/sp_flush.c | 20 +++++++++++++++----- 2 files changed, 16 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index f085889d3a..1b2c6aded0 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -72,13 +72,10 @@ softpipe_unmap_transfers(struct softpipe_context *sp) { uint i; - for (i = 0; i < sp->framebuffer.nr_cbufs; i++) - sp_flush_tile_cache(sp->cbuf_cache[i]); - sp_flush_tile_cache(sp->zsbuf_cache); - for (i = 0; i < sp->framebuffer.nr_cbufs; i++) { sp_tile_cache_unmap_transfers(sp->cbuf_cache[i]); } + sp_tile_cache_unmap_transfers(sp->zsbuf_cache); } diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index 732277a2c5..679ad0cd3d 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -56,14 +56,16 @@ softpipe_flush( struct pipe_context *pipe, } } - if (flags & PIPE_FLUSH_RENDER_CACHE) { + if (flags & PIPE_FLUSH_SWAPBUFFERS) { + /* If this is a swapbuffers, just flush color buffers. + * + * The zbuffer changes are not discarded, but held in the cache + * in the hope that a later clear will wipe them out. + */ for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) if (softpipe->cbuf_cache[i]) sp_flush_tile_cache(softpipe->cbuf_cache[i]); - if (softpipe->zsbuf_cache) - sp_flush_tile_cache(softpipe->zsbuf_cache); - /* Need this call for hardware buffers before swapbuffers. * * there should probably be another/different flush-type function @@ -71,7 +73,15 @@ softpipe_flush( struct pipe_context *pipe, * to unmap surfaces when flushing. */ softpipe_unmap_transfers(softpipe); - + } + else if (flags & PIPE_FLUSH_RENDER_CACHE) { + for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) + if (softpipe->cbuf_cache[i]) + sp_flush_tile_cache(softpipe->cbuf_cache[i]); + + if (softpipe->zsbuf_cache) + sp_flush_tile_cache(softpipe->zsbuf_cache); + softpipe->dirty_render_cache = FALSE; } -- cgit v1.2.3 From 6153a1c28f118be1a74ffee0e19c16fb83b5cab7 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 24 Jul 2009 16:12:48 +0100 Subject: softpipe: rip out old mulithread support --- src/gallium/drivers/softpipe/sp_context.c | 48 +++--- src/gallium/drivers/softpipe/sp_context.h | 13 +- src/gallium/drivers/softpipe/sp_quad_pipe.c | 48 ++---- src/gallium/drivers/softpipe/sp_setup.c | 253 ++-------------------------- 4 files changed, 57 insertions(+), 305 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 1b2c6aded0..4418ef0ff4 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -88,19 +88,17 @@ static void softpipe_destroy( struct pipe_context *pipe ) if (softpipe->draw) draw_destroy( softpipe->draw ); - for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { - softpipe->quad[i].polygon_stipple->destroy( softpipe->quad[i].polygon_stipple ); - softpipe->quad[i].earlyz->destroy( softpipe->quad[i].earlyz ); - softpipe->quad[i].shade->destroy( softpipe->quad[i].shade ); - softpipe->quad[i].alpha_test->destroy( softpipe->quad[i].alpha_test ); - softpipe->quad[i].depth_test->destroy( softpipe->quad[i].depth_test ); - softpipe->quad[i].stencil_test->destroy( softpipe->quad[i].stencil_test ); - softpipe->quad[i].occlusion->destroy( softpipe->quad[i].occlusion ); - softpipe->quad[i].coverage->destroy( softpipe->quad[i].coverage ); - softpipe->quad[i].blend->destroy( softpipe->quad[i].blend ); - softpipe->quad[i].colormask->destroy( softpipe->quad[i].colormask ); - softpipe->quad[i].output->destroy( softpipe->quad[i].output ); - } + softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple ); + softpipe->quad.earlyz->destroy( softpipe->quad.earlyz ); + softpipe->quad.shade->destroy( softpipe->quad.shade ); + softpipe->quad.alpha_test->destroy( softpipe->quad.alpha_test ); + softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); + softpipe->quad.stencil_test->destroy( softpipe->quad.stencil_test ); + softpipe->quad.occlusion->destroy( softpipe->quad.occlusion ); + softpipe->quad.coverage->destroy( softpipe->quad.coverage ); + softpipe->quad.blend->destroy( softpipe->quad.blend ); + softpipe->quad.colormask->destroy( softpipe->quad.colormask ); + softpipe->quad.output->destroy( softpipe->quad.output ); for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) sp_destroy_tile_cache(softpipe->cbuf_cache[i]); @@ -234,19 +232,17 @@ softpipe_create( struct pipe_screen *screen ) /* setup quad rendering stages */ - for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { - softpipe->quad[i].polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); - softpipe->quad[i].earlyz = sp_quad_earlyz_stage(softpipe); - softpipe->quad[i].shade = sp_quad_shade_stage(softpipe); - softpipe->quad[i].alpha_test = sp_quad_alpha_test_stage(softpipe); - softpipe->quad[i].depth_test = sp_quad_depth_test_stage(softpipe); - softpipe->quad[i].stencil_test = sp_quad_stencil_test_stage(softpipe); - softpipe->quad[i].occlusion = sp_quad_occlusion_stage(softpipe); - softpipe->quad[i].coverage = sp_quad_coverage_stage(softpipe); - softpipe->quad[i].blend = sp_quad_blend_stage(softpipe); - softpipe->quad[i].colormask = sp_quad_colormask_stage(softpipe); - softpipe->quad[i].output = sp_quad_output_stage(softpipe); - } + softpipe->quad.polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); + softpipe->quad.earlyz = sp_quad_earlyz_stage(softpipe); + softpipe->quad.shade = sp_quad_shade_stage(softpipe); + softpipe->quad.alpha_test = sp_quad_alpha_test_stage(softpipe); + softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe); + softpipe->quad.stencil_test = sp_quad_stencil_test_stage(softpipe); + softpipe->quad.occlusion = sp_quad_occlusion_stage(softpipe); + softpipe->quad.coverage = sp_quad_coverage_stage(softpipe); + softpipe->quad.blend = sp_quad_blend_stage(softpipe); + softpipe->quad.colormask = sp_quad_colormask_stage(softpipe); + softpipe->quad.output = sp_quad_output_stage(softpipe); /* vertex shader samplers */ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index f66f0b7849..414d903a37 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -39,17 +39,6 @@ #include "sp_tex_sample.h" -/** - * This is a temporary variable for testing draw-stage polygon stipple. - * If zero, do stipple in sp_quad_stipple.c - */ -#define USE_DRAW_STAGE_PSTIPPLE 1 - -/* Number of threads working on individual quads. - * Setting to 1 disables this feature. - */ -#define SP_NUM_QUAD_THREADS 1 - struct softpipe_vbuf_render; struct draw_context; struct draw_stage; @@ -129,7 +118,7 @@ struct softpipe_context { struct quad_stage *output; struct quad_stage *first; /**< points to one of the above stages */ - } quad[SP_NUM_QUAD_THREADS]; + } quad; /** TGSI exec things */ struct { diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c index b5f69b7426..3a3359d303 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.c +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c @@ -31,35 +31,29 @@ #include "pipe/p_shader_tokens.h" static void -sp_push_quad_first( - struct softpipe_context *sp, - struct quad_stage *quad, - uint i ) +sp_push_quad_first( struct softpipe_context *sp, + struct quad_stage *quad ) { - quad->next = sp->quad[i].first; - sp->quad[i].first = quad; + quad->next = sp->quad.first; + sp->quad.first = quad; } static void -sp_build_depth_stencil( - struct softpipe_context *sp, - uint i ) +sp_build_depth_stencil( struct softpipe_context *sp ) { if (sp->depth_stencil->stencil[0].enabled || sp->depth_stencil->stencil[1].enabled) { - sp_push_quad_first( sp, sp->quad[i].stencil_test, i ); + sp_push_quad_first( sp, sp->quad.stencil_test ); } else if (sp->depth_stencil->depth.enabled && sp->framebuffer.zsbuf) { - sp_push_quad_first( sp, sp->quad[i].depth_test, i ); + sp_push_quad_first( sp, sp->quad.depth_test ); } } void sp_build_quad_pipeline(struct softpipe_context *sp) { - uint i; - boolean early_depth_test = sp->depth_stencil->depth.enabled && sp->framebuffer.zsbuf && @@ -68,51 +62,43 @@ sp_build_quad_pipeline(struct softpipe_context *sp) !sp->fs->info.writes_z; /* build up the pipeline in reverse order... */ - for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { - sp->quad[i].first = sp->quad[i].output; + sp->quad.first = sp->quad.output; if (sp->blend->colormask != 0xf) { - sp_push_quad_first( sp, sp->quad[i].colormask, i ); + sp_push_quad_first( sp, sp->quad.colormask ); } if (sp->blend->blend_enable || sp->blend->logicop_enable) { - sp_push_quad_first( sp, sp->quad[i].blend, i ); + sp_push_quad_first( sp, sp->quad.blend ); } if (sp->active_query_count) { - sp_push_quad_first( sp, sp->quad[i].occlusion, i ); + sp_push_quad_first( sp, sp->quad.occlusion ); } if (sp->rasterizer->poly_smooth || sp->rasterizer->line_smooth || sp->rasterizer->point_smooth) { - sp_push_quad_first( sp, sp->quad[i].coverage, i ); + sp_push_quad_first( sp, sp->quad.coverage ); } if (!early_depth_test) { - sp_build_depth_stencil( sp, i ); + sp_build_depth_stencil( sp ); } if (sp->depth_stencil->alpha.enabled) { - sp_push_quad_first( sp, sp->quad[i].alpha_test, i ); + sp_push_quad_first( sp, sp->quad.alpha_test ); } /* XXX always enable shader? */ if (1) { - sp_push_quad_first( sp, sp->quad[i].shade, i ); + sp_push_quad_first( sp, sp->quad.shade ); } if (early_depth_test) { - sp_build_depth_stencil( sp, i ); - sp_push_quad_first( sp, sp->quad[i].earlyz, i ); - } - -#if !USE_DRAW_STAGE_PSTIPPLE - if (sp->rasterizer->poly_stipple_enable) { - sp_push_quad_first( sp, sp->quad[i].polygon_stipple, i ); + sp_build_depth_stencil( sp ); + sp_push_quad_first( sp, sp->quad.earlyz ); } -#endif - } } diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index d05c9ad57c..eaf84ed9de 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -61,87 +61,7 @@ struct edge { int lines; /**< number of lines on this edge */ }; -#if SP_NUM_QUAD_THREADS > 1 -/* Set to 1 if you want other threads to be instantly - * notified of pending jobs. - */ -#define INSTANT_NOTEMPTY_NOTIFY 0 - -struct thread_info -{ - struct setup_context *setup; - uint id; - pipe_thread handle; -}; - -struct quad_job; - -typedef void (* quad_job_routine)( struct setup_context *setup, uint thread, struct quad_job *job ); - -struct quad_job -{ - struct quad_header_input input; - struct quad_header_inout inout; - quad_job_routine routine; -}; - -#define NUM_QUAD_JOBS 64 - -struct quad_job_que -{ - struct quad_job jobs[NUM_QUAD_JOBS]; - uint first; - uint last; - pipe_mutex que_mutex; - pipe_condvar que_notfull_condvar; - pipe_condvar que_notempty_condvar; - uint jobs_added; - uint jobs_done; - pipe_condvar que_done_condvar; -}; - -static void -add_quad_job( struct quad_job_que *que, struct quad_header *quad, quad_job_routine routine ) -{ -#if INSTANT_NOTEMPTY_NOTIFY - boolean empty; -#endif - - /* Wait for empty slot, see if the que is empty. - */ - pipe_mutex_lock( que->que_mutex ); - while ((que->last + 1) % NUM_QUAD_JOBS == que->first) { -#if !INSTANT_NOTEMPTY_NOTIFY - pipe_condvar_broadcast( que->que_notempty_condvar ); -#endif - pipe_condvar_wait( que->que_notfull_condvar, que->que_mutex ); - } -#if INSTANT_NOTEMPTY_NOTIFY - empty = que->last == que->first; -#endif - que->jobs_added++; - pipe_mutex_unlock( que->que_mutex ); - - /* Submit new job. - */ - que->jobs[que->last].input = quad->input; - que->jobs[que->last].inout = quad->inout; - que->jobs[que->last].routine = routine; - que->last = (que->last + 1) % NUM_QUAD_JOBS; - -#if INSTANT_NOTEMPTY_NOTIFY - /* If the que was empty, notify consumers there's a job to be done. - */ - if (empty) { - pipe_mutex_lock( que->que_mutex ); - pipe_condvar_broadcast( que->que_notempty_condvar ); - pipe_mutex_unlock( que->que_mutex ); - } -#endif -} - -#endif /** * Triangle setup info (derived from draw_stage). @@ -169,11 +89,6 @@ struct setup_context { struct tgsi_interp_coef posCoef; /* For Z, W */ struct quad_header quad; -#if SP_NUM_QUAD_THREADS > 1 - struct quad_job_que que; - struct thread_info threads[SP_NUM_QUAD_THREADS]; -#endif - struct { int left[2]; /**< [0] = row0, [1] = row1 */ int right[2]; @@ -188,67 +103,6 @@ struct setup_context { unsigned winding; /* which winding to cull */ }; -#if SP_NUM_QUAD_THREADS > 1 - -static PIPE_THREAD_ROUTINE( quad_thread, param ) -{ - struct thread_info *info = (struct thread_info *) param; - struct quad_job_que *que = &info->setup->que; - - for (;;) { - struct quad_job job; - boolean full; - - /* Wait for an available job. - */ - pipe_mutex_lock( que->que_mutex ); - while (que->last == que->first) - pipe_condvar_wait( que->que_notempty_condvar, que->que_mutex ); - - /* See if the que is full. - */ - full = (que->last + 1) % NUM_QUAD_JOBS == que->first; - - /* Take a job and remove it from que. - */ - job = que->jobs[que->first]; - que->first = (que->first + 1) % NUM_QUAD_JOBS; - - /* Notify the producer if the que is not full. - */ - if (full) - pipe_condvar_signal( que->que_notfull_condvar ); - pipe_mutex_unlock( que->que_mutex ); - - job.routine( info->setup, info->id, &job ); - - /* Notify the producer if that's the last finished job. - */ - pipe_mutex_lock( que->que_mutex ); - que->jobs_done++; - if (que->jobs_added == que->jobs_done) - pipe_condvar_signal( que->que_done_condvar ); - pipe_mutex_unlock( que->que_mutex ); - } - - return NULL; -} - -#define WAIT_FOR_COMPLETION(setup) \ - do {\ - pipe_mutex_lock( setup->que.que_mutex );\ - if (!INSTANT_NOTEMPTY_NOTIFY)\ - pipe_condvar_broadcast( setup->que.que_notempty_condvar );\ - while (setup->que.jobs_added != setup->que.jobs_done)\ - pipe_condvar_wait( setup->que.que_done_condvar, setup->que.que_mutex );\ - pipe_mutex_unlock( setup->que.que_mutex );\ - } while (0) - -#else - -#define WAIT_FOR_COMPLETION(setup) ((void) 0) - -#endif @@ -311,39 +165,17 @@ quad_clip( struct setup_context *setup, struct quad_header *quad ) * Emit a quad (pass to next stage) with clipping. */ static INLINE void -clip_emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread ) +clip_emit_quad( struct setup_context *setup, struct quad_header *quad ) { quad_clip( setup, quad ); + if (quad->inout.mask) { struct softpipe_context *sp = setup->softpipe; - sp->quad[thread].first->run( sp->quad[thread].first, quad ); + sp->quad.first->run( sp->quad.first, quad ); } } -#if SP_NUM_QUAD_THREADS > 1 - -static void -clip_emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job ) -{ - struct quad_header quad; - - quad.input = job->input; - quad.inout = job->inout; - quad.coef = setup->quad.coef; - quad.posCoef = setup->quad.posCoef; - quad.nr_attrs = setup->quad.nr_attrs; - clip_emit_quad( setup, &quad, thread ); -} - -#define CLIP_EMIT_QUAD(setup) add_quad_job( &setup->que, &setup->quad, clip_emit_quad_job ) - -#else - -#define CLIP_EMIT_QUAD(setup) clip_emit_quad( setup, &setup->quad, 0 ) - -#endif - /** * Emit a quad (pass to next stage). No clipping is done. */ @@ -361,7 +193,7 @@ emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread ) if (mask & 4) setup->numFragsEmitted++; if (mask & 8) setup->numFragsEmitted++; #endif - sp->quad[thread].first->run( sp->quad[thread].first, quad ); + sp->quad.first->run( sp->quad.first, quad ); #if DEBUG_FRAGS mask = quad->inout.mask; if (mask & 1) setup->numFragsWritten++; @@ -371,38 +203,15 @@ emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread ) #endif } -#if SP_NUM_QUAD_THREADS > 1 - -static void -emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job ) -{ - struct quad_header quad; - - quad.input = job->input; - quad.inout = job->inout; - quad.coef = setup->quad.coef; - quad.posCoef = setup->quad.posCoef; - quad.nr_attrs = setup->quad.nr_attrs; - emit_quad( setup, &quad, thread ); -} - -#define EMIT_QUAD(setup,x,y,qmask) do {\ - setup->quad.input.x0 = x;\ - setup->quad.input.y0 = y;\ - setup->quad.inout.mask = qmask;\ - add_quad_job( &setup->que, &setup->quad, emit_quad_job );\ - } while (0) -#else +#define EMIT_QUAD(setup,x,y,qmask) \ +do { \ + setup->quad.input.x0 = x; \ + setup->quad.input.y0 = y; \ + setup->quad.inout.mask = qmask; \ + emit_quad( setup, &setup->quad, 0 ); \ +} while (0) -#define EMIT_QUAD(setup,x,y,qmask) do {\ - setup->quad.input.x0 = x;\ - setup->quad.input.y0 = y;\ - setup->quad.inout.mask = qmask;\ - emit_quad( setup, &setup->quad, 0 );\ - } while (0) - -#endif /** * Given an X or Y coordinate, return the block/quad coordinate that it @@ -956,8 +765,6 @@ void setup_tri( struct setup_context *setup, flush_spans( setup ); - WAIT_FOR_COMPLETION(setup); - #if DEBUG_FRAGS printf("Tri: %u frags emitted, %u written\n", setup->numFragsEmitted, @@ -1101,7 +908,7 @@ plot(struct setup_context *setup, int x, int y) /* flush prev quad, start new quad */ if (setup->quad.input.x0 != -1) - CLIP_EMIT_QUAD(setup); + clip_emit_quad( setup, &setup->quad ); setup->quad.input.x0 = quadX; setup->quad.input.y0 = quadY; @@ -1223,10 +1030,8 @@ setup_line(struct setup_context *setup, /* draw final quad */ if (setup->quad.inout.mask) { - CLIP_EMIT_QUAD(setup); + clip_emit_quad( setup, &setup->quad ); } - - WAIT_FOR_COMPLETION(setup); } @@ -1334,7 +1139,7 @@ setup_point( struct setup_context *setup, setup->quad.input.x0 = (int) x - ix; setup->quad.input.y0 = (int) y - iy; setup->quad.inout.mask = (1 << ix) << (2 * iy); - CLIP_EMIT_QUAD(setup); + clip_emit_quad( setup, &setup->quad ); } else { if (round) { @@ -1395,7 +1200,7 @@ setup_point( struct setup_context *setup, if (setup->quad.inout.mask) { setup->quad.input.x0 = ix; setup->quad.input.y0 = iy; - CLIP_EMIT_QUAD(setup); + clip_emit_quad( setup, &setup->quad ); } } } @@ -1442,19 +1247,16 @@ setup_point( struct setup_context *setup, setup->quad.inout.mask = mask; setup->quad.input.x0 = ix; setup->quad.input.y0 = iy; - CLIP_EMIT_QUAD(setup); + clip_emit_quad( setup, &setup->quad ); } } } } - - WAIT_FOR_COMPLETION(setup); } void setup_prepare( struct setup_context *setup ) { struct softpipe_context *sp = setup->softpipe; - unsigned i; if (sp->dirty) { softpipe_update_derived(sp); @@ -1463,9 +1265,7 @@ void setup_prepare( struct setup_context *setup ) /* Note: nr_attrs is only used for debugging (vertex printing) */ setup->quad.nr_attrs = draw_num_vs_outputs(sp->draw); - for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { - sp->quad[i].first->begin( sp->quad[i].first ); - } + sp->quad.first->begin( sp->quad.first ); if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES && sp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && @@ -1493,9 +1293,6 @@ void setup_destroy_context( struct setup_context *setup ) struct setup_context *setup_create_context( struct softpipe_context *softpipe ) { struct setup_context *setup = CALLOC_STRUCT(setup_context); -#if SP_NUM_QUAD_THREADS > 1 - uint i; -#endif setup->softpipe = softpipe; @@ -1505,22 +1302,6 @@ struct setup_context *setup_create_context( struct softpipe_context *softpipe ) setup->span.left[0] = 1000000; /* greater than right[0] */ setup->span.left[1] = 1000000; /* greater than right[1] */ -#if SP_NUM_QUAD_THREADS > 1 - setup->que.first = 0; - setup->que.last = 0; - pipe_mutex_init( setup->que.que_mutex ); - pipe_condvar_init( setup->que.que_notfull_condvar ); - pipe_condvar_init( setup->que.que_notempty_condvar ); - setup->que.jobs_added = 0; - setup->que.jobs_done = 0; - pipe_condvar_init( setup->que.que_done_condvar ); - for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { - setup->threads[i].setup = setup; - setup->threads[i].id = i; - setup->threads[i].handle = pipe_thread_create( quad_thread, &setup->threads[i] ); - } -#endif - return setup; } -- cgit v1.2.3 From ab9fb5167023a26566b53e98f206dd73a18000f3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 24 Jul 2009 16:49:35 +0100 Subject: softpipe: expand quad pipeline to process >1 quad at a time This is part one -- we still only pass a single quad down, but the code can now cope with more. The quads must all be from the same tile. --- src/gallium/drivers/softpipe/sp_quad_alpha_test.c | 106 ++-- src/gallium/drivers/softpipe/sp_quad_blend.c | 730 +++++++++++----------- src/gallium/drivers/softpipe/sp_quad_colormask.c | 15 +- src/gallium/drivers/softpipe/sp_quad_coverage.c | 48 +- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 23 +- src/gallium/drivers/softpipe/sp_quad_earlyz.c | 28 +- src/gallium/drivers/softpipe/sp_quad_fs.c | 40 +- src/gallium/drivers/softpipe/sp_quad_occlusion.c | 10 +- src/gallium/drivers/softpipe/sp_quad_output.c | 49 +- src/gallium/drivers/softpipe/sp_quad_pipe.c | 88 +-- src/gallium/drivers/softpipe/sp_quad_pipe.h | 4 +- src/gallium/drivers/softpipe/sp_quad_stencil.c | 185 +++--- src/gallium/drivers/softpipe/sp_quad_stipple.c | 48 +- src/gallium/drivers/softpipe/sp_setup.c | 4 +- 14 files changed, 745 insertions(+), 633 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c index 0845bae0e6..3a282208b6 100644 --- a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c @@ -9,76 +9,80 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" +#define ALPHATEST( FUNC, COMP ) \ + static void \ + alpha_test_quads_##FUNC( struct quad_stage *qs, \ + struct quad_header *quads[], \ + unsigned nr ) \ + { \ + const float ref = qs->softpipe->depth_stencil->alpha.ref_value; \ + const uint cbuf = 0; /* only output[0].alpha is tested */ \ + unsigned pass_nr = 0; \ + unsigned i; \ + \ + for (i = 0; i < nr; i++) { \ + const float *aaaa = quads[i]->output.color[cbuf][3]; \ + unsigned passMask = 0; \ + \ + if (aaaa[0] COMP ref) passMask |= (1 << 0); \ + if (aaaa[1] COMP ref) passMask |= (1 << 1); \ + if (aaaa[2] COMP ref) passMask |= (1 << 2); \ + if (aaaa[3] COMP ref) passMask |= (1 << 3); \ + \ + quads[i]->inout.mask &= passMask; \ + \ + if (quads[i]->inout.mask) \ + quads[pass_nr++] = quads[i]; \ + } \ + \ + if (pass_nr) \ + qs->next->run(qs->next, quads, pass_nr); \ + } + + +ALPHATEST( LESS, < ) +ALPHATEST( EQUAL, == ) +ALPHATEST( LEQUAL, <= ) +ALPHATEST( GREATER, > ) +ALPHATEST( NOTEQUAL, != ) +ALPHATEST( GEQUAL, >= ) + +/* XXX: Incorporate into shader using KILP. + */ static void -alpha_test_quad(struct quad_stage *qs, struct quad_header *quad) +alpha_test_quad(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) { - struct softpipe_context *softpipe = qs->softpipe; - const float ref = softpipe->depth_stencil->alpha.ref_value; - unsigned passMask = 0x0, j; - const uint cbuf = 0; /* only output[0].alpha is tested */ - const float *aaaa = quad->output.color[cbuf][3]; - - switch (softpipe->depth_stencil->alpha.func) { - case PIPE_FUNC_NEVER: - break; + switch (qs->softpipe->depth_stencil->alpha.func) { case PIPE_FUNC_LESS: - /* - * If mask were an array [4] we could do this SIMD-style: - * passMask = (quad->outputs.color[0][3] <= vec4(ref)); - */ - for (j = 0; j < QUAD_SIZE; j++) { - if (aaaa[j] < ref) { - passMask |= (1 << j); - } - } + alpha_test_quads_LESS( qs, quads, nr ); break; case PIPE_FUNC_EQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (aaaa[j] == ref) { - passMask |= (1 << j); - } - } + alpha_test_quads_EQUAL( qs, quads, nr ); break; case PIPE_FUNC_LEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (aaaa[j] <= ref) { - passMask |= (1 << j); - } - } + alpha_test_quads_LEQUAL( qs, quads, nr ); break; case PIPE_FUNC_GREATER: - for (j = 0; j < QUAD_SIZE; j++) { - if (aaaa[j] > ref) { - passMask |= (1 << j); - } - } + alpha_test_quads_GREATER( qs, quads, nr ); break; case PIPE_FUNC_NOTEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (aaaa[j] != ref) { - passMask |= (1 << j); - } - } + alpha_test_quads_NOTEQUAL( qs, quads, nr ); break; case PIPE_FUNC_GEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (aaaa[j] >= ref) { - passMask |= (1 << j); - } - } + alpha_test_quads_GEQUAL( qs, quads, nr ); break; case PIPE_FUNC_ALWAYS: - passMask = MASK_ALL; + assert(0); /* should be caught earlier */ + qs->next->run(qs->next, quads, nr); break; + case PIPE_FUNC_NEVER: default: - assert(0); + assert(0); /* should be caught earlier */ + return; } - - quad->inout.mask &= passMask; - - if (quad->inout.mask) - qs->next->run(qs->next, quad); } diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 04b5daf3a4..fdf1bb4552 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -117,10 +117,16 @@ do { \ static void -logicop_quad(struct quad_stage *qs, struct quad_header *quad) +logicop_quad(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) { struct softpipe_context *softpipe = qs->softpipe; uint cbuf; + struct softpipe_cached_tile * + tile = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], + quads[0]->input.x0, + quads[0]->input.y0); /* loop over colorbuffer outputs */ for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { @@ -129,165 +135,161 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad) uint *src4 = (uint *) src; uint *dst4 = (uint *) dst; uint *res4 = (uint *) res; - struct softpipe_cached_tile * - tile = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], - quad->input.x0, quad->input.y0); - float (*quadColor)[4] = quad->output.color[cbuf]; uint i, j; - /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); - for (i = 0; i < 4; i++) { - dest[i][j] = tile->data.color[y][x][i]; + for (i = 0; i < nr; i++) { + struct quad_header *quad = quads[i]; + float (*quadColor)[4] = quad->output.color[cbuf]; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } } - } - /* convert to ubyte */ - for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */ - dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */ - dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */ - dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */ - dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */ - - src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */ - src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */ - src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */ - src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */ - } + /* convert to ubyte */ + for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */ + dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */ + dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */ + dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */ + dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */ + + src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */ + src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */ + src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */ + src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */ + } - switch (softpipe->blend->logicop_func) { - case PIPE_LOGICOP_CLEAR: - for (j = 0; j < 4; j++) - res4[j] = 0; - break; - case PIPE_LOGICOP_NOR: - for (j = 0; j < 4; j++) - res4[j] = ~(src4[j] | dst4[j]); - break; - case PIPE_LOGICOP_AND_INVERTED: - for (j = 0; j < 4; j++) - res4[j] = ~src4[j] & dst4[j]; - break; - case PIPE_LOGICOP_COPY_INVERTED: - for (j = 0; j < 4; j++) - res4[j] = ~src4[j]; - break; - case PIPE_LOGICOP_AND_REVERSE: - for (j = 0; j < 4; j++) - res4[j] = src4[j] & ~dst4[j]; - break; - case PIPE_LOGICOP_INVERT: - for (j = 0; j < 4; j++) - res4[j] = ~dst4[j]; - break; - case PIPE_LOGICOP_XOR: - for (j = 0; j < 4; j++) - res4[j] = dst4[j] ^ src4[j]; - break; - case PIPE_LOGICOP_NAND: - for (j = 0; j < 4; j++) - res4[j] = ~(src4[j] & dst4[j]); - break; - case PIPE_LOGICOP_AND: - for (j = 0; j < 4; j++) - res4[j] = src4[j] & dst4[j]; - break; - case PIPE_LOGICOP_EQUIV: - for (j = 0; j < 4; j++) - res4[j] = ~(src4[j] ^ dst4[j]); - break; - case PIPE_LOGICOP_NOOP: - for (j = 0; j < 4; j++) - res4[j] = dst4[j]; - break; - case PIPE_LOGICOP_OR_INVERTED: - for (j = 0; j < 4; j++) - res4[j] = ~src4[j] | dst4[j]; - break; - case PIPE_LOGICOP_COPY: - for (j = 0; j < 4; j++) - res4[j] = src4[j]; - break; - case PIPE_LOGICOP_OR_REVERSE: - for (j = 0; j < 4; j++) - res4[j] = src4[j] | ~dst4[j]; - break; - case PIPE_LOGICOP_OR: - for (j = 0; j < 4; j++) - res4[j] = src4[j] | dst4[j]; - break; - case PIPE_LOGICOP_SET: - for (j = 0; j < 4; j++) - res4[j] = ~0; - break; - default: - assert(0); - } + switch (softpipe->blend->logicop_func) { + case PIPE_LOGICOP_CLEAR: + for (j = 0; j < 4; j++) + res4[j] = 0; + break; + case PIPE_LOGICOP_NOR: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] | dst4[j]); + break; + case PIPE_LOGICOP_AND_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j] & dst4[j]; + break; + case PIPE_LOGICOP_COPY_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j]; + break; + case PIPE_LOGICOP_AND_REVERSE: + for (j = 0; j < 4; j++) + res4[j] = src4[j] & ~dst4[j]; + break; + case PIPE_LOGICOP_INVERT: + for (j = 0; j < 4; j++) + res4[j] = ~dst4[j]; + break; + case PIPE_LOGICOP_XOR: + for (j = 0; j < 4; j++) + res4[j] = dst4[j] ^ src4[j]; + break; + case PIPE_LOGICOP_NAND: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] & dst4[j]); + break; + case PIPE_LOGICOP_AND: + for (j = 0; j < 4; j++) + res4[j] = src4[j] & dst4[j]; + break; + case PIPE_LOGICOP_EQUIV: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] ^ dst4[j]); + break; + case PIPE_LOGICOP_NOOP: + for (j = 0; j < 4; j++) + res4[j] = dst4[j]; + break; + case PIPE_LOGICOP_OR_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j] | dst4[j]; + break; + case PIPE_LOGICOP_COPY: + for (j = 0; j < 4; j++) + res4[j] = src4[j]; + break; + case PIPE_LOGICOP_OR_REVERSE: + for (j = 0; j < 4; j++) + res4[j] = src4[j] | ~dst4[j]; + break; + case PIPE_LOGICOP_OR: + for (j = 0; j < 4; j++) + res4[j] = src4[j] | dst4[j]; + break; + case PIPE_LOGICOP_SET: + for (j = 0; j < 4; j++) + res4[j] = ~0; + break; + default: + assert(0); + } - for (j = 0; j < 4; j++) { - quadColor[j][0] = ubyte_to_float(res[j][0]); - quadColor[j][1] = ubyte_to_float(res[j][1]); - quadColor[j][2] = ubyte_to_float(res[j][2]); - quadColor[j][3] = ubyte_to_float(res[j][3]); + for (j = 0; j < 4; j++) { + quadColor[j][0] = ubyte_to_float(res[j][0]); + quadColor[j][1] = ubyte_to_float(res[j][1]); + quadColor[j][2] = ubyte_to_float(res[j][2]); + quadColor[j][3] = ubyte_to_float(res[j][3]); + } } } - - /* pass quad to next stage */ - qs->next->run(qs->next, quad); } - - static void -blend_quad(struct quad_stage *qs, struct quad_header *quad) +blend_quads(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) { static const float zero[4] = { 0, 0, 0, 0 }; static const float one[4] = { 1, 1, 1, 1 }; - struct softpipe_context *softpipe = qs->softpipe; uint cbuf; - if (softpipe->blend->logicop_enable) { - logicop_quad(qs, quad); - return; - } - /* loop over colorbuffer outputs */ for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { float source[4][QUAD_SIZE], dest[4][QUAD_SIZE]; struct softpipe_cached_tile *tile = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], - quad->input.x0, quad->input.y0); - float (*quadColor)[4] = quad->output.color[cbuf]; - uint i, j; - - /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); - for (i = 0; i < 4; i++) { - dest[i][j] = tile->data.color[y][x][i]; + quads[0]->input.x0, + quads[0]->input.y0); + uint q, i, j; + + for (q = 0; q < nr; q++) { + struct quad_header *quad = quads[q]; + float (*quadColor)[4] = quad->output.color[cbuf]; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } } - } - /* - * Compute src/first term RGB - */ - switch (softpipe->blend->rgb_src_factor) { - case PIPE_BLENDFACTOR_ONE: - VEC4_COPY(source[0], quadColor[0]); /* R */ - VEC4_COPY(source[1], quadColor[1]); /* G */ - VEC4_COPY(source[2], quadColor[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */ - VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */ - VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: + /* + * Compute src/first term RGB + */ + switch (softpipe->blend->rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + VEC4_COPY(source[0], quadColor[0]); /* R */ + VEC4_COPY(source[1], quadColor[1]); /* G */ + VEC4_COPY(source[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */ + VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */ + VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: { const float *alpha = quadColor[3]; VEC4_MUL(source[0], quadColor[0], alpha); /* R */ @@ -295,12 +297,12 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], alpha); /* B */ } break; - case PIPE_BLENDFACTOR_DST_COLOR: - VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */ - VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */ - VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */ - break; - case PIPE_BLENDFACTOR_DST_ALPHA: + case PIPE_BLENDFACTOR_DST_COLOR: + VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */ + VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */ + VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */ + break; + case PIPE_BLENDFACTOR_DST_ALPHA: { const float *alpha = dest[3]; VEC4_MUL(source[0], quadColor[0], alpha); /* R */ @@ -308,7 +310,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], alpha); /* B */ } break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: { const float *alpha = quadColor[3]; float diff[4], temp[4]; @@ -319,7 +321,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], temp); /* B */ } break; - case PIPE_BLENDFACTOR_CONST_COLOR: + case PIPE_BLENDFACTOR_CONST_COLOR: { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ @@ -330,7 +332,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], comp); /* B */ } break; - case PIPE_BLENDFACTOR_CONST_ALPHA: + case PIPE_BLENDFACTOR_CONST_ALPHA: { float alpha[4]; VEC4_SCALAR(alpha, softpipe->blend_color.color[3]); @@ -339,18 +341,18 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], alpha); /* B */ } break; - case PIPE_BLENDFACTOR_SRC1_COLOR: - assert(0); /* to do */ - break; - case PIPE_BLENDFACTOR_SRC1_ALPHA: - assert(0); /* to do */ - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(source[0], zero); /* R */ - VEC4_COPY(source[1], zero); /* G */ - VEC4_COPY(source[2], zero); /* B */ - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: + case PIPE_BLENDFACTOR_SRC1_COLOR: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(source[0], zero); /* R */ + VEC4_COPY(source[1], zero); /* G */ + VEC4_COPY(source[2], zero); /* B */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: { float inv_comp[4]; VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ @@ -361,7 +363,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ } break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: { float inv_alpha[4]; VEC4_SUB(inv_alpha, one, quadColor[3]); @@ -370,7 +372,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ } break; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: + case PIPE_BLENDFACTOR_INV_DST_ALPHA: { float inv_alpha[4]; VEC4_SUB(inv_alpha, one, dest[3]); @@ -379,7 +381,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ } break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: + case PIPE_BLENDFACTOR_INV_DST_COLOR: { float inv_comp[4]; VEC4_SUB(inv_comp, one, dest[0]); /* R */ @@ -390,7 +392,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ } break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: + case PIPE_BLENDFACTOR_INV_CONST_COLOR: { float inv_comp[4]; /* R */ @@ -404,7 +406,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], inv_comp); } break; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: { float inv_alpha[4]; VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]); @@ -413,73 +415,73 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ } break; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - assert(0); /* to do */ - break; - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - assert(0); /* to do */ - break; - default: - assert(0); - } + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + assert(0); /* to do */ + break; + default: + assert(0); + } - /* - * Compute src/first term A - */ - switch (softpipe->blend->alpha_src_factor) { - case PIPE_BLENDFACTOR_ONE: - VEC4_COPY(source[3], quadColor[3]); /* A */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_SRC_ALPHA: + /* + * Compute src/first term A + */ + switch (softpipe->blend->alpha_src_factor) { + case PIPE_BLENDFACTOR_ONE: + VEC4_COPY(source[3], quadColor[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_SRC_ALPHA: { const float *alpha = quadColor[3]; VEC4_MUL(source[3], quadColor[3], alpha); /* A */ } break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - /* multiply alpha by 1.0 */ - VEC4_COPY(source[3], quadColor[3]); /* A */ - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_CONST_ALPHA: + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + /* multiply alpha by 1.0 */ + VEC4_COPY(source[3], quadColor[3]); /* A */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ VEC4_MUL(source[3], quadColor[3], comp); /* A */ } break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(source[3], zero); /* A */ - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(source[3], zero); /* A */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: { float inv_alpha[4]; VEC4_SUB(inv_alpha, one, quadColor[3]); VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ } break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_DST_ALPHA: + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_DST_ALPHA: { float inv_alpha[4]; VEC4_SUB(inv_alpha, one, dest[3]); VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ } break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: { float inv_comp[4]; /* A */ @@ -487,42 +489,42 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[3], quadColor[3], inv_comp); } break; - default: - assert(0); - } + default: + assert(0); + } - /* - * Compute dest/second term RGB - */ - switch (softpipe->blend->rgb_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ - VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ - VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ - break; - case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ - break; - case PIPE_BLENDFACTOR_DST_COLOR: - VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ - VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ - VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - assert(0); /* illegal */ - break; - case PIPE_BLENDFACTOR_CONST_COLOR: + /* + * Compute dest/second term RGB + */ + switch (softpipe->blend->rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* dest = dest * 1 NO-OP, leave dest as-is */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ + VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ + VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ + VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ + VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ + VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ + VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ + VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ + VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + assert(0); /* illegal */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ @@ -533,7 +535,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(dest[2], dest[2], comp); /* B */ } break; - case PIPE_BLENDFACTOR_CONST_ALPHA: + case PIPE_BLENDFACTOR_CONST_ALPHA: { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ @@ -542,17 +544,17 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(dest[2], dest[2], comp); /* B */ } break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[0], zero); /* R */ - VEC4_COPY(dest[1], zero); /* G */ - VEC4_COPY(dest[2], zero); /* B */ - break; - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - /* XXX what are these? */ - assert(0); - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(dest[0], zero); /* R */ + VEC4_COPY(dest[1], zero); /* G */ + VEC4_COPY(dest[2], zero); /* B */ + break; + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + /* XXX what are these? */ + assert(0); + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: { float inv_comp[4]; VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ @@ -563,7 +565,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ } break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: { float one_minus_alpha[QUAD_SIZE]; VEC4_SUB(one_minus_alpha, one, quadColor[3]); @@ -572,7 +574,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ } break; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: + case PIPE_BLENDFACTOR_INV_DST_ALPHA: { float inv_comp[4]; VEC4_SUB(inv_comp, one, dest[3]); /* A */ @@ -581,7 +583,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ } break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: + case PIPE_BLENDFACTOR_INV_DST_COLOR: { float inv_comp[4]; VEC4_SUB(inv_comp, one, dest[0]); /* R */ @@ -592,7 +594,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(dest[2], dest[2], inv_comp); /* B */ } break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: + case PIPE_BLENDFACTOR_INV_CONST_COLOR: { float inv_comp[4]; /* R */ @@ -606,7 +608,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(dest[2], dest[2], inv_comp); } break; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: { float inv_comp[4]; VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); @@ -615,138 +617,154 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(dest[2], dest[2], inv_comp); } break; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - /* XXX what are these? */ - assert(0); - break; - default: - assert(0); - } + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + /* XXX what are these? */ + assert(0); + break; + default: + assert(0); + } - /* - * Compute dest/second term A - */ - switch (softpipe->blend->alpha_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - assert(0); /* illegal */ - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_CONST_ALPHA: + /* + * Compute dest/second term A + */ + switch (softpipe->blend->alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* dest = dest * 1 NO-OP, leave dest as-is */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_SRC_ALPHA: + VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + assert(0); /* illegal */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: { float comp[4]; VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ VEC4_MUL(dest[3], dest[3], comp); /* A */ } break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[3], zero); /* A */ - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(dest[3], zero); /* A */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: { float one_minus_alpha[QUAD_SIZE]; VEC4_SUB(one_minus_alpha, one, quadColor[3]); VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */ } break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_DST_ALPHA: + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_DST_ALPHA: { float inv_comp[4]; VEC4_SUB(inv_comp, one, dest[3]); /* A */ VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ } break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: { float inv_comp[4]; VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); VEC4_MUL(dest[3], dest[3], inv_comp); } break; - default: - assert(0); - } + default: + assert(0); + } - /* - * Combine RGB terms - */ - switch (softpipe->blend->rgb_func) { - case PIPE_BLEND_ADD: - VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */ - VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ - VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */ - break; - case PIPE_BLEND_SUBTRACT: - VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */ - VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */ - VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */ - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */ - VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */ - VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */ - break; - case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ - break; - case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ - break; - default: - assert(0); - } + /* + * Combine RGB terms + */ + switch (softpipe->blend->rgb_func) { + case PIPE_BLEND_ADD: + VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */ + VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ + VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_SUBTRACT: + VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */ + VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */ + VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */ + VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */ + VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */ + break; + case PIPE_BLEND_MIN: + VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ + VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ + VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_MAX: + VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ + VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ + VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ + break; + default: + assert(0); + } - /* - * Combine A terms - */ - switch (softpipe->blend->alpha_func) { - case PIPE_BLEND_ADD: - VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ - break; - case PIPE_BLEND_SUBTRACT: - VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */ - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */ - break; - case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ - break; - case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ - break; - default: - assert(0); + /* + * Combine A terms + */ + switch (softpipe->blend->alpha_func) { + case PIPE_BLEND_ADD: + VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_SUBTRACT: + VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */ + break; + case PIPE_BLEND_MIN: + VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_MAX: + VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ + break; + default: + assert(0); + } } - } /* cbuf loop */ +} + + +static void +blend_quad(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + struct softpipe_context *softpipe = qs->softpipe; + + if (softpipe->blend->logicop_enable) { + logicop_quad(qs, quads, nr); + } + else if (softpipe->blend->blend_enable) { + blend_quads(qs, quads, nr ); + } /* pass blended quad to next stage */ - qs->next->run(qs->next, quad); + qs->next->run(qs->next, quads, nr); } diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c index 89efbe3b02..ac74287473 100644 --- a/src/gallium/drivers/softpipe/sp_quad_colormask.c +++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c @@ -84,12 +84,23 @@ colormask_quad(struct quad_stage *qs, struct quad_header *quad) if (!(softpipe->blend->colormask & PIPE_MASK_A)) COPY_4V(quadColor[3], dest[3]); } +} + +static void +colormask_quads(struct quad_stage *qs, struct quad_header *quads[], + unsigned nr) +{ + unsigned i; + + for (i = 0; i < nr; i++) + colormask_quad(qs, quads[i]); /* pass quad to next stage */ - qs->next->run(qs->next, quad); + qs->next->run(qs->next, quads, nr); } + static void colormask_begin(struct quad_stage *qs) { qs->next->begin(qs->next); @@ -108,7 +119,7 @@ struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe ) stage->softpipe = softpipe; stage->begin = colormask_begin; - stage->run = colormask_quad; + stage->run = colormask_quads; stage->destroy = colormask_destroy; return stage; diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c index 4aeee85870..eda5ce8e63 100644 --- a/src/gallium/drivers/softpipe/sp_quad_coverage.c +++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c @@ -42,33 +42,47 @@ /** * Multiply quad's alpha values by the fragment coverage. */ -static void +static INLINE void coverage_quad(struct quad_stage *qs, struct quad_header *quad) { struct softpipe_context *softpipe = qs->softpipe; - const uint prim = quad->input.prim; + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { + float (*quadColor)[4] = quad->output.color[cbuf]; + unsigned j; + for (j = 0; j < QUAD_SIZE; j++) { + assert(quad->input.coverage[j] >= 0.0); + assert(quad->input.coverage[j] <= 1.0); + quadColor[3][j] *= quad->input.coverage[j]; + } + } +} + + +/* XXX: Incorporate into shader after alpha_test. + */ +static void +coverage_run(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + struct softpipe_context *softpipe = qs->softpipe; + const uint prim = quads[0]->input.prim; + unsigned i; if ((softpipe->rasterizer->poly_smooth && prim == QUAD_PRIM_TRI) || (softpipe->rasterizer->line_smooth && prim == QUAD_PRIM_LINE) || (softpipe->rasterizer->point_smooth && prim == QUAD_PRIM_POINT)) { - uint cbuf; - - /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { - float (*quadColor)[4] = quad->output.color[cbuf]; - unsigned j; - for (j = 0; j < QUAD_SIZE; j++) { - assert(quad->input.coverage[j] >= 0.0); - assert(quad->input.coverage[j] <= 1.0); - quadColor[3][j] *= quad->input.coverage[j]; - } - } + + for (i = 0; i < nr; i++) + coverage_quad( qs, quads[i] ); } - qs->next->run(qs->next, quad); + qs->next->run(qs->next, quads, nr); } - static void coverage_begin(struct quad_stage *qs) { qs->next->begin(qs->next); @@ -87,7 +101,7 @@ struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe ) stage->softpipe = softpipe; stage->begin = coverage_begin; - stage->run = coverage_quad; + stage->run = coverage_run; stage->destroy = coverage_destroy; return stage; diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 768b9275b3..8f223a7eae 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -49,7 +49,7 @@ * Try to effectively do that with codegen... */ -void +boolean sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) { struct softpipe_context *softpipe = qs->softpipe; @@ -193,6 +193,8 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) } quad->inout.mask &= zmask; + if (quad->inout.mask == 0) + return FALSE; if (softpipe->depth_stencil->depth.writemask) { @@ -252,16 +254,25 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) assert(0); } } + + return TRUE; } static void -depth_test_quad(struct quad_stage *qs, struct quad_header *quad) +depth_test_quads(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) { - sp_depth_test_quad(qs, quad); + unsigned i, pass = 0; - if (quad->inout.mask) - qs->next->run(qs->next, quad); + for (i = 0; i < nr; i++) { + if (sp_depth_test_quad(qs, quads[i])) + quads[pass++] = quads[i]; + } + + if (pass) + qs->next->run(qs->next, quads, pass); } @@ -283,7 +294,7 @@ struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe ) stage->softpipe = softpipe; stage->begin = depth_test_begin; - stage->run = depth_test_quad; + stage->run = depth_test_quads; stage->destroy = depth_test_destroy; return stage; diff --git a/src/gallium/drivers/softpipe/sp_quad_earlyz.c b/src/gallium/drivers/softpipe/sp_quad_earlyz.c index 496fd39ed1..1048d44984 100644 --- a/src/gallium/drivers/softpipe/sp_quad_earlyz.c +++ b/src/gallium/drivers/softpipe/sp_quad_earlyz.c @@ -43,20 +43,26 @@ static void earlyz_quad( struct quad_stage *qs, - struct quad_header *quad ) + struct quad_header *quads[], + unsigned nr ) { - const float fx = (float) quad->input.x0; - const float fy = (float) quad->input.y0; - const float dzdx = quad->posCoef->dadx[2]; - const float dzdy = quad->posCoef->dady[2]; - const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; + const float a0z = quads[0]->posCoef->a0[2]; + const float dzdx = quads[0]->posCoef->dadx[2]; + const float dzdy = quads[0]->posCoef->dady[2]; + unsigned i; - quad->output.depth[0] = z0; - quad->output.depth[1] = z0 + dzdx; - quad->output.depth[2] = z0 + dzdy; - quad->output.depth[3] = z0 + dzdx + dzdy; + for (i = 0; i < nr; i++) { + const float fx = (float) quads[i]->input.x0; + const float fy = (float) quads[i]->input.y0; + const float z0 = a0z + dzdx * fx + dzdy * fy; - qs->next->run( qs->next, quad ); + quads[i]->output.depth[0] = z0; + quads[i]->output.depth[1] = z0 + dzdx; + quads[i]->output.depth[2] = z0 + dzdy; + quads[i]->output.depth[3] = z0 + dzdx + dzdy; + } + + qs->next->run( qs->next, quads, nr ); } static void diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 28f8d1a60e..ea5ed3bbd0 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -68,21 +68,18 @@ quad_shade_stage(struct quad_stage *qs) /** * Execute fragment shader for the four fragments in the quad. */ -static void +static boolean shade_quad(struct quad_stage *qs, struct quad_header *quad) { struct quad_shade_stage *qss = quad_shade_stage( qs ); struct softpipe_context *softpipe = qs->softpipe; struct tgsi_exec_machine *machine = qss->machine; boolean z_written; - - /* Consts do not require 16 byte alignment. */ - machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; - - machine->InterpCoefs = quad->coef; /* run shader */ quad->inout.mask &= softpipe->fs->run( softpipe->fs, machine, quad ); + if (quad->inout.mask == 0) + return FALSE; /* store outputs */ z_written = FALSE; @@ -129,11 +126,34 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad) quad->output.depth[3] = z0 + dzdx + dzdy; } - /* shader may cull fragments */ - if (quad->inout.mask) { - qs->next->run( qs->next, quad ); + return TRUE; +} + +static void +shade_quads(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + struct quad_shade_stage *qss = quad_shade_stage( qs ); + struct softpipe_context *softpipe = qs->softpipe; + struct tgsi_exec_machine *machine = qss->machine; + + unsigned i, pass = 0; + + machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; + machine->InterpCoefs = quads[0]->coef; + + for (i = 0; i < nr; i++) { + if (shade_quad(qs, quads[i])) + quads[pass++] = quads[i]; } + + if (pass) + qs->next->run(qs->next, quads, pass); } + + + /** @@ -174,7 +194,7 @@ sp_quad_shade_stage( struct softpipe_context *softpipe ) qss->stage.softpipe = softpipe; qss->stage.begin = shade_begin; - qss->stage.run = shade_quad; + qss->stage.run = shade_quads; qss->stage.destroy = shade_destroy; qss->machine = tgsi_exec_machine_create(); diff --git a/src/gallium/drivers/softpipe/sp_quad_occlusion.c b/src/gallium/drivers/softpipe/sp_quad_occlusion.c index dfa7ff3b1d..4adeb16546 100644 --- a/src/gallium/drivers/softpipe/sp_quad_occlusion.c +++ b/src/gallium/drivers/softpipe/sp_quad_occlusion.c @@ -50,13 +50,15 @@ static unsigned count_bits( unsigned val ) } static void -occlusion_count_quad(struct quad_stage *qs, struct quad_header *quad) +occlusion_count_quads(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) { struct softpipe_context *softpipe = qs->softpipe; + unsigned i; - softpipe->occlusion_count += count_bits(quad->inout.mask); + for (i = 0; i < nr; i++) + softpipe->occlusion_count += count_bits(quads[i]->inout.mask); - qs->next->run(qs->next, quad); + qs->next->run(qs->next, quads, nr); } @@ -78,7 +80,7 @@ struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe ) stage->softpipe = softpipe; stage->begin = occlusion_begin; - stage->run = occlusion_count_quad; + stage->run = occlusion_count_quads; stage->destroy = occlusion_destroy; return stage; diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c index dd8f5377e9..79a222ff58 100644 --- a/src/gallium/drivers/softpipe/sp_quad_output.c +++ b/src/gallium/drivers/softpipe/sp_quad_output.c @@ -38,11 +38,8 @@ * taking mask into account. */ static void -output_quad(struct quad_stage *qs, struct quad_header *quad) +output_quad(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) { - /* in-tile pos: */ - const int itx = quad->input.x0 % TILE_SIZE; - const int ity = quad->input.y0 % TILE_SIZE; struct softpipe_context *softpipe = qs->softpipe; uint cbuf; @@ -51,25 +48,35 @@ output_quad(struct quad_stage *qs, struct quad_header *quad) for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { struct softpipe_cached_tile *tile = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], - quad->input.x0, quad->input.y0); - float (*quadColor)[4] = quad->output.color[cbuf]; - int i, j; + quads[0]->input.x0, + quads[0]->input.y0); + int i, j, q; /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - if (quad->inout.mask & (1 << j)) { - int x = itx + (j & 1); - int y = ity + (j >> 1); - for (i = 0; i < 4; i++) { /* loop over color chans */ - tile->data.color[y][x][i] = quadColor[i][j]; - } - if (0) { - debug_printf("sp write pixel %d,%d: %g, %g, %g\n", - quad->input.x0 + x, - quad->input.y0 + y, - quadColor[0][j], - quadColor[1][j], - quadColor[2][j]); + for (q = 0; q < nr; q++) { + struct quad_header *quad = quads[q]; + float (*quadColor)[4] = quad->output.color[cbuf]; + + /* in-tile pos: */ + const int itx = quad->input.x0 % TILE_SIZE; + const int ity = quad->input.y0 % TILE_SIZE; + + + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->inout.mask & (1 << j)) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + for (i = 0; i < 4; i++) { /* loop over color chans */ + tile->data.color[y][x][i] = quadColor[i][j]; + } + if (0) { + debug_printf("sp write pixel %d,%d: %g, %g, %g\n", + quad->input.x0 + x, + quad->input.y0 + y, + quadColor[0][j], + quadColor[1][j], + quadColor[2][j]); + } } } } diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c index 3a3359d303..594fade455 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.c +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c @@ -55,50 +55,52 @@ void sp_build_quad_pipeline(struct softpipe_context *sp) { boolean early_depth_test = - sp->depth_stencil->depth.enabled && - sp->framebuffer.zsbuf && - !sp->depth_stencil->alpha.enabled && - !sp->fs->info.uses_kill && - !sp->fs->info.writes_z; + sp->depth_stencil->depth.enabled && + sp->framebuffer.zsbuf && + !sp->depth_stencil->alpha.enabled && + !sp->fs->info.uses_kill && + !sp->fs->info.writes_z; /* build up the pipeline in reverse order... */ - sp->quad.first = sp->quad.output; - - if (sp->blend->colormask != 0xf) { - sp_push_quad_first( sp, sp->quad.colormask ); - } - - if (sp->blend->blend_enable || - sp->blend->logicop_enable) { - sp_push_quad_first( sp, sp->quad.blend ); - } - - if (sp->active_query_count) { - sp_push_quad_first( sp, sp->quad.occlusion ); - } - - if (sp->rasterizer->poly_smooth || - sp->rasterizer->line_smooth || - sp->rasterizer->point_smooth) { - sp_push_quad_first( sp, sp->quad.coverage ); - } - - if (!early_depth_test) { - sp_build_depth_stencil( sp ); - } - - if (sp->depth_stencil->alpha.enabled) { - sp_push_quad_first( sp, sp->quad.alpha_test ); - } - - /* XXX always enable shader? */ - if (1) { - sp_push_quad_first( sp, sp->quad.shade ); - } - - if (early_depth_test) { - sp_build_depth_stencil( sp ); - sp_push_quad_first( sp, sp->quad.earlyz ); - } + + /* Color combine + */ + sp->quad.first = sp->quad.output; + + if (sp->blend->colormask != 0xf) { + sp_push_quad_first( sp, sp->quad.colormask ); + } + + if (sp->blend->blend_enable || + sp->blend->logicop_enable) { + sp_push_quad_first( sp, sp->quad.blend ); + } + + if (sp->rasterizer->poly_smooth || + sp->rasterizer->line_smooth || + sp->rasterizer->point_smooth) { + sp_push_quad_first( sp, sp->quad.coverage ); + } + + /* Shade/Depth/Stencil/Alpha + */ + if (sp->active_query_count) { + sp_push_quad_first( sp, sp->quad.occlusion ); + } + + if (!early_depth_test) { + sp_build_depth_stencil( sp ); + } + + if (sp->depth_stencil->alpha.enabled) { + sp_push_quad_first( sp, sp->quad.alpha_test ); + } + + sp_push_quad_first( sp, sp->quad.shade ); + + if (early_depth_test) { + sp_build_depth_stencil( sp ); + sp_push_quad_first( sp, sp->quad.earlyz ); + } } diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.h b/src/gallium/drivers/softpipe/sp_quad_pipe.h index 0e40586ffc..add31ba705 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.h +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.h @@ -49,7 +49,7 @@ struct quad_stage { void (*begin)(struct quad_stage *qs); /** the stage action */ - void (*run)(struct quad_stage *qs, struct quad_header *quad); + void (*run)(struct quad_stage *qs, struct quad_header *quad[], unsigned nr); void (*destroy)(struct quad_stage *qs); }; @@ -69,6 +69,6 @@ struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe ); void sp_build_quad_pipeline(struct softpipe_context *sp); -void sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad); +boolean sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad); #endif /* SP_QUAD_PIPE_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c index 34a8d9e9f6..706dd2f756 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stencil.c +++ b/src/gallium/drivers/softpipe/sp_quad_stencil.c @@ -198,7 +198,8 @@ apply_stencil_op(ubyte stencilVals[QUAD_SIZE], * depth testing. */ static void -stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) +stencil_test_quad(struct quad_stage *qs, struct quad_header *quads[], + unsigned nr) { struct softpipe_context *softpipe = qs->softpipe; struct pipe_surface *ps = softpipe->framebuffer.zsbuf; @@ -206,9 +207,12 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) ubyte ref, wrtMask, valMask; ubyte stencilVals[QUAD_SIZE]; struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); - uint j; - uint face = quad->input.facing; + = sp_get_cached_tile(softpipe->zsbuf_cache, + quads[0]->input.x0, + quads[0]->input.y0); + uint face = quads[0]->input.facing; + uint pass = 0; + uint j, q; if (!softpipe->depth_stencil->stencil[1].enabled) { /* single-sided stencil test, use front (face=0) state */ @@ -227,103 +231,110 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) assert(ps); /* shouldn't get here if there's no stencil buffer */ - /* get stencil values from cached tile */ - switch (ps->format) { - case PIPE_FORMAT_S8Z24_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - stencilVals[j] = tile->data.depth32[y][x] >> 24; - } - break; - case PIPE_FORMAT_Z24S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - stencilVals[j] = tile->data.depth32[y][x] & 0xff; - } - break; - case PIPE_FORMAT_S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - stencilVals[j] = tile->data.stencil8[y][x]; + for (q = 0; q < nr; q++) { + struct quad_header *quad = quads[q]; + + /* get stencil values from cached tile */ + switch (ps->format) { + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + stencilVals[j] = tile->data.depth32[y][x] >> 24; + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + stencilVals[j] = tile->data.depth32[y][x] & 0xff; + } + break; + case PIPE_FORMAT_S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + stencilVals[j] = tile->data.stencil8[y][x]; + } + break; + default: + assert(0); } - break; - default: - assert(0); - } - /* do the stencil test first */ - { - unsigned passMask, failMask; - passMask = do_stencil_test(stencilVals, func, ref, valMask); - failMask = quad->inout.mask & ~passMask; - quad->inout.mask &= passMask; + /* do the stencil test first */ + { + unsigned passMask, failMask; + passMask = do_stencil_test(stencilVals, func, ref, valMask); + failMask = quad->inout.mask & ~passMask; + quad->inout.mask &= passMask; - if (failOp != PIPE_STENCIL_OP_KEEP) { - apply_stencil_op(stencilVals, failMask, failOp, ref, wrtMask); + if (failOp != PIPE_STENCIL_OP_KEEP) { + apply_stencil_op(stencilVals, failMask, failOp, ref, wrtMask); + } } - } - if (quad->inout.mask) { - /* now the pixels that passed the stencil test are depth tested */ - if (softpipe->depth_stencil->depth.enabled) { - const unsigned origMask = quad->inout.mask; + if (quad->inout.mask) { + /* now the pixels that passed the stencil test are depth tested */ + if (softpipe->depth_stencil->depth.enabled) { + const unsigned origMask = quad->inout.mask; - sp_depth_test_quad(qs, quad); /* quad->mask is updated */ + sp_depth_test_quad(qs, quad); /* quad->mask is updated */ - /* update stencil buffer values according to z pass/fail result */ - if (zFailOp != PIPE_STENCIL_OP_KEEP) { - const unsigned failMask = origMask & ~quad->inout.mask; - apply_stencil_op(stencilVals, failMask, zFailOp, ref, wrtMask); - } + /* update stencil buffer values according to z pass/fail result */ + if (zFailOp != PIPE_STENCIL_OP_KEEP) { + const unsigned failMask = origMask & ~quad->inout.mask; + apply_stencil_op(stencilVals, failMask, zFailOp, ref, wrtMask); + } - if (zPassOp != PIPE_STENCIL_OP_KEEP) { - const unsigned passMask = origMask & quad->inout.mask; - apply_stencil_op(stencilVals, passMask, zPassOp, ref, wrtMask); + if (zPassOp != PIPE_STENCIL_OP_KEEP) { + const unsigned passMask = origMask & quad->inout.mask; + apply_stencil_op(stencilVals, passMask, zPassOp, ref, wrtMask); + } + } + else { + /* no depth test, apply Zpass operator to stencil buffer values */ + apply_stencil_op(stencilVals, quad->inout.mask, zPassOp, ref, wrtMask); } - } - else { - /* no depth test, apply Zpass operator to stencil buffer values */ - apply_stencil_op(stencilVals, quad->inout.mask, zPassOp, ref, wrtMask); - } - - } - /* put new stencil values into cached tile */ - switch (ps->format) { - case PIPE_FORMAT_S8Z24_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - uint s8z24 = tile->data.depth32[y][x]; - s8z24 = (stencilVals[j] << 24) | (s8z24 & 0xffffff); - tile->data.depth32[y][x] = s8z24; } - break; - case PIPE_FORMAT_Z24S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - uint z24s8 = tile->data.depth32[y][x]; - z24s8 = (z24s8 & 0xffffff00) | stencilVals[j]; - tile->data.depth32[y][x] = z24s8; - } - break; - case PIPE_FORMAT_S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - tile->data.stencil8[y][x] = stencilVals[j]; + + /* put new stencil values into cached tile */ + switch (ps->format) { + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + uint s8z24 = tile->data.depth32[y][x]; + s8z24 = (stencilVals[j] << 24) | (s8z24 & 0xffffff); + tile->data.depth32[y][x] = s8z24; + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + uint z24s8 = tile->data.depth32[y][x]; + z24s8 = (z24s8 & 0xffffff00) | stencilVals[j]; + tile->data.depth32[y][x] = z24s8; + } + break; + case PIPE_FORMAT_S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.stencil8[y][x] = stencilVals[j]; + } + break; + default: + assert(0); } - break; - default: - assert(0); + + if (quad->inout.mask) + quads[pass++] = q; } - if (quad->inout.mask) - qs->next->run(qs->next, quad); + if (pass) + qs->next->run(qs->next, quads, pass); } diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c index 07162db7b6..05665d8e26 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stipple.c +++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c @@ -14,40 +14,46 @@ * Apply polygon stipple to quads produced by triangle rasterization */ static void -stipple_quad(struct quad_stage *qs, struct quad_header *quad) +stipple_quad(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) { static const uint bit31 = 1 << 31; static const uint bit30 = 1 << 30; + unsigned pass = nr; - if (quad->input.prim == QUAD_PRIM_TRI) { + if (quads[0]->input.prim == QUAD_PRIM_TRI) { struct softpipe_context *softpipe = qs->softpipe; - /* need to invert Y to index into OpenGL's stipple pattern */ - const int col0 = quad->input.x0 % 32; - const int y0 = quad->input.y0; - const int y1 = y0 + 1; - const uint stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; - const uint stipple1 = softpipe->poly_stipple.stipple[y1 % 32]; + unsigned q; - /* turn off quad mask bits that fail the stipple test */ - if ((stipple0 & (bit31 >> col0)) == 0) - quad->inout.mask &= ~MASK_TOP_LEFT; + pass = 0; - if ((stipple0 & (bit30 >> col0)) == 0) - quad->inout.mask &= ~MASK_TOP_RIGHT; + for (q = 0; q < nr; q++) { + struct quad_header *quad = quads[q]; - if ((stipple1 & (bit31 >> col0)) == 0) - quad->inout.mask &= ~MASK_BOTTOM_LEFT; + const int col0 = quad->input.x0 % 32; + const int y0 = quad->input.y0; + const int y1 = y0 + 1; + const uint stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; + const uint stipple1 = softpipe->poly_stipple.stipple[y1 % 32]; - if ((stipple1 & (bit30 >> col0)) == 0) - quad->inout.mask &= ~MASK_BOTTOM_RIGHT; + /* turn off quad mask bits that fail the stipple test */ + if ((stipple0 & (bit31 >> col0)) == 0) + quad->inout.mask &= ~MASK_TOP_LEFT; - if (!quad->inout.mask) { - /* all fragments failed stipple test, end of quad pipeline */ - return; + if ((stipple0 & (bit30 >> col0)) == 0) + quad->inout.mask &= ~MASK_TOP_RIGHT; + + if ((stipple1 & (bit31 >> col0)) == 0) + quad->inout.mask &= ~MASK_BOTTOM_LEFT; + + if ((stipple1 & (bit30 >> col0)) == 0) + quad->inout.mask &= ~MASK_BOTTOM_RIGHT; + + if (quad->inout.mask) + quads[pass++] = quad; } } - qs->next->run(qs->next, quad); + qs->next->run(qs->next, quads, pass); } diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index eaf84ed9de..23dcae89c6 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -172,7 +172,7 @@ clip_emit_quad( struct setup_context *setup, struct quad_header *quad ) if (quad->inout.mask) { struct softpipe_context *sp = setup->softpipe; - sp->quad.first->run( sp->quad.first, quad ); + sp->quad.first->run( sp->quad.first, &quad, 1 ); } } @@ -193,7 +193,7 @@ emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread ) if (mask & 4) setup->numFragsEmitted++; if (mask & 8) setup->numFragsEmitted++; #endif - sp->quad.first->run( sp->quad.first, quad ); + sp->quad.first->run( sp->quad.first, &quad, 1 ); #if DEBUG_FRAGS mask = quad->inout.mask; if (mask & 1) setup->numFragsWritten++; -- cgit v1.2.3 From a1dbd7aa159e266592a1e52504680992327ca9e0 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 24 Jul 2009 18:17:05 +0100 Subject: softpipe: actually pass >1 quad from triangle routine First attempt --- src/gallium/drivers/softpipe/sp_context.h | 11 +- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 2 + src/gallium/drivers/softpipe/sp_quad.h | 6 +- src/gallium/drivers/softpipe/sp_quad_blend.c | 9 +- src/gallium/drivers/softpipe/sp_quad_coverage.c | 10 +- src/gallium/drivers/softpipe/sp_quad_pipe.c | 6 +- src/gallium/drivers/softpipe/sp_quad_stipple.c | 44 +++--- src/gallium/drivers/softpipe/sp_setup.c | 186 ++++++++++++------------ 8 files changed, 135 insertions(+), 139 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 414d903a37..153a648b0e 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -96,7 +96,16 @@ struct softpipe_context { /** Which vertex shader output slot contains point size */ int psize_slot; - unsigned reduced_api_prim; /**< PIPE_PRIM_POINTS, _LINES or _TRIANGLES */ + /* The reduced version of the primitive supplied by the state + * tracker. + */ + unsigned reduced_api_prim; + + /* The reduced primitive after unfilled triangles, wide-line + * decomposition, etc, are taken into account. This is the + * primitive actually rasterized. + */ + unsigned reduced_prim; /** Derived from scissor and surface bounds: */ struct pipe_scissor_state cliprect; diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 42021789ea..1dd63d99ff 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -44,6 +44,7 @@ #include "draw/draw_context.h" #include "draw/draw_vbuf.h" #include "util/u_memory.h" +#include "util/u_prim.h" #define SP_MAX_VBUF_INDEXES 1024 @@ -167,6 +168,7 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) setup_prepare( setup_ctx ); + cvbr->softpipe->reduced_prim = u_reduced_prim(prim); cvbr->prim = prim; return TRUE; diff --git a/src/gallium/drivers/softpipe/sp_quad.h b/src/gallium/drivers/softpipe/sp_quad.h index bd6c6cb912..a3236bd116 100644 --- a/src/gallium/drivers/softpipe/sp_quad.h +++ b/src/gallium/drivers/softpipe/sp_quad.h @@ -97,10 +97,10 @@ struct quad_header { struct quad_header_inout inout; struct quad_header_output output; - const struct tgsi_interp_coef *coef; + /* Redundant/duplicated: + */ const struct tgsi_interp_coef *posCoef; - - unsigned nr_attrs; + const struct tgsi_interp_coef *coef; }; #endif /* SP_QUAD_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index fdf1bb4552..8ef8666c0e 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -123,10 +123,6 @@ logicop_quad(struct quad_stage *qs, { struct softpipe_context *softpipe = qs->softpipe; uint cbuf; - struct softpipe_cached_tile * - tile = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], - quads[0]->input.x0, - quads[0]->input.y0); /* loop over colorbuffer outputs */ for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { @@ -137,6 +133,11 @@ logicop_quad(struct quad_stage *qs, uint *res4 = (uint *) res; uint i, j; + struct softpipe_cached_tile * + tile = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], + quads[0]->input.x0, + quads[0]->input.y0); + for (i = 0; i < nr; i++) { struct quad_header *quad = quads[i]; float (*quadColor)[4] = quad->output.color[cbuf]; diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c index eda5ce8e63..f06a385b3c 100644 --- a/src/gallium/drivers/softpipe/sp_quad_coverage.c +++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c @@ -69,16 +69,10 @@ coverage_run(struct quad_stage *qs, unsigned nr) { struct softpipe_context *softpipe = qs->softpipe; - const uint prim = quads[0]->input.prim; unsigned i; - if ((softpipe->rasterizer->poly_smooth && prim == QUAD_PRIM_TRI) || - (softpipe->rasterizer->line_smooth && prim == QUAD_PRIM_LINE) || - (softpipe->rasterizer->point_smooth && prim == QUAD_PRIM_POINT)) { - - for (i = 0; i < nr; i++) - coverage_quad( qs, quads[i] ); - } + for (i = 0; i < nr; i++) + coverage_quad( qs, quads[i] ); qs->next->run(qs->next, quads, nr); } diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c index 594fade455..6fae7d552f 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.c +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c @@ -76,9 +76,9 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp_push_quad_first( sp, sp->quad.blend ); } - if (sp->rasterizer->poly_smooth || - sp->rasterizer->line_smooth || - sp->rasterizer->point_smooth) { + if ((sp->rasterizer->poly_smooth && sp->reduced_prim == PIPE_PRIM_TRIANGLES) || + (sp->rasterizer->line_smooth && sp->reduced_prim == PIPE_PRIM_LINES) || + (sp->rasterizer->point_smooth && sp->reduced_prim == PIPE_PRIM_POINTS)) { sp_push_quad_first( sp, sp->quad.coverage ); } diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c index 05665d8e26..a0527a596a 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stipple.c +++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c @@ -20,37 +20,35 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) static const uint bit30 = 1 << 30; unsigned pass = nr; - if (quads[0]->input.prim == QUAD_PRIM_TRI) { - struct softpipe_context *softpipe = qs->softpipe; - unsigned q; + struct softpipe_context *softpipe = qs->softpipe; + unsigned q; - pass = 0; + pass = 0; - for (q = 0; q < nr; q++) { - struct quad_header *quad = quads[q]; + for (q = 0; q < nr; q++) { + struct quad_header *quad = quads[q]; - const int col0 = quad->input.x0 % 32; - const int y0 = quad->input.y0; - const int y1 = y0 + 1; - const uint stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; - const uint stipple1 = softpipe->poly_stipple.stipple[y1 % 32]; + const int col0 = quad->input.x0 % 32; + const int y0 = quad->input.y0; + const int y1 = y0 + 1; + const uint stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; + const uint stipple1 = softpipe->poly_stipple.stipple[y1 % 32]; - /* turn off quad mask bits that fail the stipple test */ - if ((stipple0 & (bit31 >> col0)) == 0) - quad->inout.mask &= ~MASK_TOP_LEFT; + /* turn off quad mask bits that fail the stipple test */ + if ((stipple0 & (bit31 >> col0)) == 0) + quad->inout.mask &= ~MASK_TOP_LEFT; - if ((stipple0 & (bit30 >> col0)) == 0) - quad->inout.mask &= ~MASK_TOP_RIGHT; + if ((stipple0 & (bit30 >> col0)) == 0) + quad->inout.mask &= ~MASK_TOP_RIGHT; - if ((stipple1 & (bit31 >> col0)) == 0) - quad->inout.mask &= ~MASK_BOTTOM_LEFT; + if ((stipple1 & (bit31 >> col0)) == 0) + quad->inout.mask &= ~MASK_BOTTOM_LEFT; - if ((stipple1 & (bit30 >> col0)) == 0) - quad->inout.mask &= ~MASK_BOTTOM_RIGHT; + if ((stipple1 & (bit30 >> col0)) == 0) + quad->inout.mask &= ~MASK_BOTTOM_RIGHT; - if (quad->inout.mask) - quads[pass++] = quad; - } + if (quad->inout.mask) + quads[pass++] = quad; } qs->next->run(qs->next, quads, pass); diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 23dcae89c6..a132911c99 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -62,6 +62,8 @@ struct edge { }; +#define MAX_QUADS 16 + /** * Triangle setup info (derived from draw_stage). @@ -84,10 +86,14 @@ struct setup_context { struct edge emaj; float oneoverarea; + int facing; + + struct quad_header quad[MAX_QUADS]; + struct quad_header *quad_ptrs[MAX_QUADS]; + unsigned count; struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; struct tgsi_interp_coef posCoef; /* For Z, W */ - struct quad_header quad; struct { int left[2]; /**< [0] = row0, [1] = row1 */ @@ -176,41 +182,6 @@ clip_emit_quad( struct setup_context *setup, struct quad_header *quad ) } } -/** - * Emit a quad (pass to next stage). No clipping is done. - */ -static INLINE void -emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread ) -{ - struct softpipe_context *sp = setup->softpipe; -#if DEBUG_FRAGS - uint mask = quad->inout.mask; -#endif - -#if DEBUG_FRAGS - if (mask & 1) setup->numFragsEmitted++; - if (mask & 2) setup->numFragsEmitted++; - if (mask & 4) setup->numFragsEmitted++; - if (mask & 8) setup->numFragsEmitted++; -#endif - sp->quad.first->run( sp->quad.first, &quad, 1 ); -#if DEBUG_FRAGS - mask = quad->inout.mask; - if (mask & 1) setup->numFragsWritten++; - if (mask & 2) setup->numFragsWritten++; - if (mask & 4) setup->numFragsWritten++; - if (mask & 8) setup->numFragsWritten++; -#endif -} - - -#define EMIT_QUAD(setup,x,y,qmask) \ -do { \ - setup->quad.input.x0 = x; \ - setup->quad.input.y0 = y; \ - setup->quad.inout.mask = qmask; \ - emit_quad( setup, &setup->quad, 0 ); \ -} while (0) /** @@ -219,7 +190,12 @@ do { \ */ static INLINE int block( int x ) { - return x & ~1; + return x & ~(2-1); +} + +static INLINE int block_x( int x ) +{ + return x & ~(16-1); } @@ -228,13 +204,15 @@ static INLINE int block( int x ) */ static void flush_spans( struct setup_context *setup ) { - const int step = 30; + const int step = 16; const int xleft0 = setup->span.left[0]; const int xleft1 = setup->span.left[1]; const int xright0 = setup->span.right[0]; const int xright1 = setup->span.right[1]; + struct quad_stage *pipe = setup->softpipe->quad.first; + - int minleft = block(MIN2(xleft0, xleft1)); + int minleft = block_x(MIN2(xleft0, xleft1)); int maxright = MAX2(xright0, xright1); int x; @@ -244,7 +222,8 @@ static void flush_spans( struct setup_context *setup ) unsigned skip_right0 = CLAMP(x + step - xright0, 0, step); unsigned skip_right1 = CLAMP(x + step - xright1, 0, step); unsigned lx = x; - + unsigned q = 0; + unsigned skipmask_left0 = (1U << skip_left0) - 1U; unsigned skipmask_left1 = (1U << skip_left1) - 1U; @@ -256,13 +235,22 @@ static void flush_spans( struct setup_context *setup ) unsigned mask0 = ~skipmask_left0 & ~skipmask_right0; unsigned mask1 = ~skipmask_left1 & ~skipmask_right1; - while (mask0 | mask1) { - unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2); - if (quadmask) - EMIT_QUAD( setup, lx, setup->span.y, quadmask ); - mask0 >>= 2; - mask1 >>= 2; - lx += 2; + if (mask0 | mask1) { + do { + unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2); + if (quadmask) { + setup->quad[q].input.x0 = lx; + setup->quad[q].input.y0 = setup->span.y; + setup->quad[q].inout.mask = quadmask; + setup->quad_ptrs[q] = &setup->quad[q]; + q++; + } + mask0 >>= 2; + mask1 >>= 2; + lx += 2; + } while (mask0 | mask1); + + pipe->run( pipe, setup->quad_ptrs, q ); } } @@ -281,7 +269,7 @@ static void print_vertex(const struct setup_context *setup, { int i; debug_printf(" Vertex: (%p)\n", v); - for (i = 0; i < setup->quad.nr_attrs; i++) { + for (i = 0; i < setup->quad[0].nr_attrs; i++) { debug_printf(" %d: %f %f %f %f\n", i, v[i][0], v[i][1], v[i][2], v[i][3]); if (util_is_inf_or_nan(v[i][0])) { @@ -386,7 +374,9 @@ static boolean setup_sort_vertices( struct setup_context *setup, * - the GLSL gl_FrontFacing fragment attribute (bool) * - two-sided stencil test */ - setup->quad.input.facing = (det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW); + setup->facing = + ((det > 0.0) ^ + (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW)); return TRUE; } @@ -573,7 +563,7 @@ static void setup_tri_coefficients( struct setup_context *setup ) } if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { - setup->coef[fragSlot].a0[0] = 1.0f - setup->quad.input.facing; + setup->coef[fragSlot].a0[0] = 1.0f - setup->facing; setup->coef[fragSlot].dadx[0] = 0.0; setup->coef[fragSlot].dady[0] = 0.0; } @@ -741,7 +731,7 @@ void setup_tri( struct setup_context *setup, setup_tri_coefficients( setup ); setup_tri_edges( setup ); - setup->quad.input.prim = QUAD_PRIM_TRI; + assert(setup->softpipe->reduced_prim == PIPE_PRIM_TRIANGLES); setup->span.y = 0; setup->span.right[0] = 0; @@ -881,7 +871,7 @@ setup_line_coefficients(struct setup_context *setup, } if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { - setup->coef[fragSlot].a0[0] = 1.0f - setup->quad.input.facing; + setup->coef[fragSlot].a0[0] = 1.0f - setup->facing; setup->coef[fragSlot].dadx[0] = 0.0; setup->coef[fragSlot].dady[0] = 0.0; } @@ -902,20 +892,20 @@ plot(struct setup_context *setup, int x, int y) const int quadY = y - iy; const int mask = (1 << ix) << (2 * iy); - if (quadX != setup->quad.input.x0 || - quadY != setup->quad.input.y0) + if (quadX != setup->quad[0].input.x0 || + quadY != setup->quad[0].input.y0) { /* flush prev quad, start new quad */ - if (setup->quad.input.x0 != -1) - clip_emit_quad( setup, &setup->quad ); + if (setup->quad[0].input.x0 != -1) + clip_emit_quad( setup, &setup->quad[0] ); - setup->quad.input.x0 = quadX; - setup->quad.input.y0 = quadY; - setup->quad.inout.mask = 0x0; + setup->quad[0].input.x0 = quadX; + setup->quad[0].input.y0 = quadY; + setup->quad[0].inout.mask = 0x0; } - setup->quad.inout.mask |= mask; + setup->quad[0].inout.mask |= mask; } @@ -975,17 +965,18 @@ setup_line(struct setup_context *setup, assert(dx >= 0); assert(dy >= 0); + assert(setup->softpipe->reduced_prim == PIPE_PRIM_LINES); + + setup->quad[0].input.x0 = setup->quad[0].input.y0 = -1; + setup->quad[0].inout.mask = 0x0; - setup->quad.input.x0 = setup->quad.input.y0 = -1; - setup->quad.inout.mask = 0x0; - setup->quad.input.prim = QUAD_PRIM_LINE; /* XXX temporary: set coverage to 1.0 so the line appears * if AA mode happens to be enabled. */ - setup->quad.input.coverage[0] = - setup->quad.input.coverage[1] = - setup->quad.input.coverage[2] = - setup->quad.input.coverage[3] = 1.0; + setup->quad[0].input.coverage[0] = + setup->quad[0].input.coverage[1] = + setup->quad[0].input.coverage[2] = + setup->quad[0].input.coverage[3] = 1.0; if (dx > dy) { /*** X-major line ***/ @@ -1029,8 +1020,8 @@ setup_line(struct setup_context *setup, } /* draw final quad */ - if (setup->quad.inout.mask) { - clip_emit_quad( setup, &setup->quad ); + if (setup->quad[0].inout.mask) { + clip_emit_quad( setup, &setup->quad[0] ); } } @@ -1078,6 +1069,8 @@ setup_point( struct setup_context *setup, if (softpipe->no_rast) return; + assert(setup->softpipe->reduced_prim == PIPE_PRIM_POINTS); + /* For points, all interpolants are constant-valued. * However, for point sprites, we'll need to setup texcoords appropriately. * XXX: which coefficients are the texcoords??? @@ -1124,22 +1117,21 @@ setup_point( struct setup_context *setup, } if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { - setup->coef[fragSlot].a0[0] = 1.0f - setup->quad.input.facing; + setup->coef[fragSlot].a0[0] = 1.0f - setup->facing; setup->coef[fragSlot].dadx[0] = 0.0; setup->coef[fragSlot].dady[0] = 0.0; } } - setup->quad.input.prim = QUAD_PRIM_POINT; if (halfSize <= 0.5 && !round) { /* special case for 1-pixel points */ const int ix = ((int) x) & 1; const int iy = ((int) y) & 1; - setup->quad.input.x0 = (int) x - ix; - setup->quad.input.y0 = (int) y - iy; - setup->quad.inout.mask = (1 << ix) << (2 * iy); - clip_emit_quad( setup, &setup->quad ); + setup->quad[0].input.x0 = (int) x - ix; + setup->quad[0].input.y0 = (int) y - iy; + setup->quad[0].inout.mask = (1 << ix) << (2 * iy); + clip_emit_quad( setup, &setup->quad[0] ); } else { if (round) { @@ -1159,15 +1151,15 @@ setup_point( struct setup_context *setup, for (ix = ixmin; ix <= ixmax; ix += 2) { float dx, dy, dist2, cover; - setup->quad.inout.mask = 0x0; + setup->quad[0].inout.mask = 0x0; dx = (ix + 0.5f) - x; dy = (iy + 0.5f) - y; dist2 = dx * dx + dy * dy; if (dist2 <= rmax2) { cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); - setup->quad.inout.mask |= MASK_TOP_LEFT; + setup->quad[0].input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); + setup->quad[0].inout.mask |= MASK_TOP_LEFT; } dx = (ix + 1.5f) - x; @@ -1175,8 +1167,8 @@ setup_point( struct setup_context *setup, dist2 = dx * dx + dy * dy; if (dist2 <= rmax2) { cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); - setup->quad.inout.mask |= MASK_TOP_RIGHT; + setup->quad[0].input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); + setup->quad[0].inout.mask |= MASK_TOP_RIGHT; } dx = (ix + 0.5f) - x; @@ -1184,8 +1176,8 @@ setup_point( struct setup_context *setup, dist2 = dx * dx + dy * dy; if (dist2 <= rmax2) { cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); - setup->quad.inout.mask |= MASK_BOTTOM_LEFT; + setup->quad[0].input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); + setup->quad[0].inout.mask |= MASK_BOTTOM_LEFT; } dx = (ix + 1.5f) - x; @@ -1193,14 +1185,14 @@ setup_point( struct setup_context *setup, dist2 = dx * dx + dy * dy; if (dist2 <= rmax2) { cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); - setup->quad.inout.mask |= MASK_BOTTOM_RIGHT; + setup->quad[0].input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); + setup->quad[0].inout.mask |= MASK_BOTTOM_RIGHT; } - if (setup->quad.inout.mask) { - setup->quad.input.x0 = ix; - setup->quad.input.y0 = iy; - clip_emit_quad( setup, &setup->quad ); + if (setup->quad[0].inout.mask) { + setup->quad[0].input.x0 = ix; + setup->quad[0].input.y0 = iy; + clip_emit_quad( setup, &setup->quad[0] ); } } } @@ -1244,10 +1236,10 @@ setup_point( struct setup_context *setup, mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); } - setup->quad.inout.mask = mask; - setup->quad.input.x0 = ix; - setup->quad.input.y0 = iy; - clip_emit_quad( setup, &setup->quad ); + setup->quad[0].inout.mask = mask; + setup->quad[0].input.x0 = ix; + setup->quad[0].input.y0 = iy; + clip_emit_quad( setup, &setup->quad[0] ); } } } @@ -1262,9 +1254,6 @@ void setup_prepare( struct setup_context *setup ) softpipe_update_derived(sp); } - /* Note: nr_attrs is only used for debugging (vertex printing) */ - setup->quad.nr_attrs = draw_num_vs_outputs(sp->draw); - sp->quad.first->begin( sp->quad.first ); if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES && @@ -1293,11 +1282,14 @@ void setup_destroy_context( struct setup_context *setup ) struct setup_context *setup_create_context( struct softpipe_context *softpipe ) { struct setup_context *setup = CALLOC_STRUCT(setup_context); + unsigned i; setup->softpipe = softpipe; - setup->quad.coef = setup->coef; - setup->quad.posCoef = &setup->posCoef; + for (i = 0; i < MAX_QUADS; i++) { + setup->quad[i].coef = setup->coef; + setup->quad[i].posCoef = &setup->posCoef; + } setup->span.left[0] = 1000000; /* greater than right[0] */ setup->span.left[1] = 1000000; /* greater than right[1] */ -- cgit v1.2.3 From 333ec94380af502b1c492f61dcc1897bcf43a96c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 24 Jul 2009 18:46:17 +0100 Subject: softpipe: example fastpaths in blending --- src/gallium/drivers/softpipe/sp_quad_blend.c | 132 ++++++++++++++++++++++++--- 1 file changed, 121 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 8ef8666c0e..1ef7529cff 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -117,9 +117,9 @@ do { \ static void -logicop_quad(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) +logicop_quads(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) { struct softpipe_context *softpipe = qs->softpipe; uint cbuf; @@ -241,13 +241,102 @@ logicop_quad(struct quad_stage *qs, } } } + + /* pass blended quad to next stage */ + qs->next->run(qs->next, quads, nr); +} + +static void +blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + static const float one[4] = { 1, 1, 1, 1 }; + float one_minus_alpha[QUAD_SIZE]; + float dest[4][QUAD_SIZE]; + float source[4][QUAD_SIZE]; + uint i, j, q; + + struct softpipe_cached_tile *tile + = sp_get_cached_tile(qs->softpipe->cbuf_cache[0], + quads[0]->input.x0, + quads[0]->input.y0); + + for (q = 0; q < nr; q++) { + struct quad_header *quad = quads[q]; + float (*quadColor)[4] = quad->output.color[0]; + const float *alpha = quadColor[3]; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } + } + + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + VEC4_MUL(source[3], quadColor[3], alpha); /* A */ + + VEC4_SUB(one_minus_alpha, one, alpha); + VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ + VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ + VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ + VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* B */ + + VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */ + VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ + VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */ + VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ + } + + /* pass blended quad to next stage */ + qs->next->run(qs->next, quads, nr); } +static void +blend_single_add_one_one(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + float dest[4][QUAD_SIZE]; + uint i, j, q; + + struct softpipe_cached_tile *tile + = sp_get_cached_tile(qs->softpipe->cbuf_cache[0], + quads[0]->input.x0, + quads[0]->input.y0); + + for (q = 0; q < nr; q++) { + struct quad_header *quad = quads[q]; + float (*quadColor)[4] = quad->output.color[0]; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } + } + + VEC4_ADD_SAT(quadColor[0], quadColor[0], dest[0]); /* R */ + VEC4_ADD_SAT(quadColor[1], quadColor[1], dest[1]); /* G */ + VEC4_ADD_SAT(quadColor[2], quadColor[2], dest[2]); /* B */ + VEC4_ADD_SAT(quadColor[3], quadColor[3], dest[3]); /* A */ + } + + /* pass blended quad to next stage */ + qs->next->run(qs->next, quads, nr); +} static void -blend_quads(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) +blend_quads_fallback(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) { static const float zero[4] = { 0, 0, 0, 0 }; static const float one[4] = { 1, 1, 1, 1 }; @@ -747,6 +836,9 @@ blend_quads(struct quad_stage *qs, } } } /* cbuf loop */ + + /* pass blended quad to next stage */ + qs->next->run(qs->next, quads, nr); } @@ -756,21 +848,39 @@ blend_quad(struct quad_stage *qs, unsigned nr) { struct softpipe_context *softpipe = qs->softpipe; + const struct pipe_blend_state *blend = softpipe->blend; if (softpipe->blend->logicop_enable) { - logicop_quad(qs, quads, nr); + qs->run = logicop_quads; } - else if (softpipe->blend->blend_enable) { - blend_quads(qs, quads, nr ); + else { + qs->run = blend_quads_fallback; + + if (blend->rgb_src_factor == blend->alpha_src_factor && + blend->rgb_dst_factor == blend->alpha_dst_factor && + blend->rgb_func == blend->alpha_func && + softpipe->framebuffer.nr_cbufs == 1) + { + if (blend->alpha_func == PIPE_BLEND_ADD) { + if (blend->rgb_src_factor == PIPE_BLENDFACTOR_ONE && + blend->rgb_dst_factor == PIPE_BLENDFACTOR_ONE) { + qs->run = blend_single_add_one_one; + } + else if (blend->rgb_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA && + blend->rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA) + qs->run = blend_single_add_src_alpha_inv_src_alpha; + + } + } } - /* pass blended quad to next stage */ - qs->next->run(qs->next, quads, nr); + qs->run(qs, quads, nr); } static void blend_begin(struct quad_stage *qs) { + qs->run = blend_quad; qs->next->begin(qs->next); } -- cgit v1.2.3 From 42f1757189ba965e6d917d1124d0d6cf78b19a70 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 24 Jul 2009 20:18:52 +0100 Subject: softpipe: fix typo --- src/gallium/drivers/softpipe/sp_quad_stencil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c index 706dd2f756..d9ee80e59a 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stencil.c +++ b/src/gallium/drivers/softpipe/sp_quad_stencil.c @@ -330,7 +330,7 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quads[], } if (quad->inout.mask) - quads[pass++] = q; + quads[pass++] = quad; } if (pass) -- cgit v1.2.3 From a2f7ab1d155da52c689f7c6390c233e4eae44643 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 24 Jul 2009 20:19:18 +0100 Subject: softpipe: move all color-combine code into sp_quad_blend.c Consolidate the read-modify-write color combining code from the blend, colormask and output stages. --- src/gallium/drivers/softpipe/Makefile | 14 +- src/gallium/drivers/softpipe/SConscript | 2 - src/gallium/drivers/softpipe/sp_context.c | 4 - src/gallium/drivers/softpipe/sp_context.h | 2 - src/gallium/drivers/softpipe/sp_quad_blend.c | 1352 ++++++++++++---------- src/gallium/drivers/softpipe/sp_quad_bufloop.c | 74 -- src/gallium/drivers/softpipe/sp_quad_colormask.c | 126 -- src/gallium/drivers/softpipe/sp_quad_coverage.c | 1 - src/gallium/drivers/softpipe/sp_quad_output.c | 109 -- src/gallium/drivers/softpipe/sp_quad_pipe.c | 15 +- 10 files changed, 727 insertions(+), 972 deletions(-) delete mode 100644 src/gallium/drivers/softpipe/sp_quad_bufloop.c delete mode 100644 src/gallium/drivers/softpipe/sp_quad_colormask.c delete mode 100644 src/gallium/drivers/softpipe/sp_quad_output.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 516e3992fd..bdc1a5819f 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -15,17 +15,15 @@ C_SOURCES = \ sp_prim_setup.c \ sp_prim_vbuf.c \ sp_quad_pipe.c \ - sp_quad_alpha_test.c \ - sp_quad_blend.c \ - sp_quad_colormask.c \ - sp_quad_coverage.c \ - sp_quad_depth_test.c \ + sp_quad_stipple.c \ sp_quad_earlyz.c \ + sp_quad_depth_test.c \ + sp_quad_stencil.c \ sp_quad_fs.c \ + sp_quad_alpha_test.c \ sp_quad_occlusion.c \ - sp_quad_output.c \ - sp_quad_stencil.c \ - sp_quad_stipple.c \ + sp_quad_coverage.c \ + sp_quad_blend.c \ sp_screen.c \ sp_setup.c \ sp_state_blend.c \ diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index f8720638a7..dcc25732ba 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -18,13 +18,11 @@ softpipe = env.ConvenienceLibrary( 'sp_quad_alpha_test.c', 'sp_quad_blend.c', 'sp_quad_pipe.c', - 'sp_quad_colormask.c', 'sp_quad_coverage.c', 'sp_quad_depth_test.c', 'sp_quad_earlyz.c', 'sp_quad_fs.c', 'sp_quad_occlusion.c', - 'sp_quad_output.c', 'sp_quad_stencil.c', 'sp_quad_stipple.c', 'sp_query.c', diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 4418ef0ff4..28a0dd62ac 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -97,8 +97,6 @@ static void softpipe_destroy( struct pipe_context *pipe ) softpipe->quad.occlusion->destroy( softpipe->quad.occlusion ); softpipe->quad.coverage->destroy( softpipe->quad.coverage ); softpipe->quad.blend->destroy( softpipe->quad.blend ); - softpipe->quad.colormask->destroy( softpipe->quad.colormask ); - softpipe->quad.output->destroy( softpipe->quad.output ); for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) sp_destroy_tile_cache(softpipe->cbuf_cache[i]); @@ -241,8 +239,6 @@ softpipe_create( struct pipe_screen *screen ) softpipe->quad.occlusion = sp_quad_occlusion_stage(softpipe); softpipe->quad.coverage = sp_quad_coverage_stage(softpipe); softpipe->quad.blend = sp_quad_blend_stage(softpipe); - softpipe->quad.colormask = sp_quad_colormask_stage(softpipe); - softpipe->quad.output = sp_quad_output_stage(softpipe); /* vertex shader samplers */ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 153a648b0e..b76ff610a3 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -123,8 +123,6 @@ struct softpipe_context { struct quad_stage *occlusion; struct quad_stage *coverage; struct quad_stage *blend; - struct quad_stage *colormask; - struct quad_stage *output; struct quad_stage *first; /**< points to one of the above stages */ } quad; diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 1ef7529cff..e1f0e77255 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -117,135 +117,677 @@ do { \ static void -logicop_quads(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) +logicop_quad(struct quad_stage *qs, + float (*quadColor)[4], + float (*dest)[4]) { struct softpipe_context *softpipe = qs->softpipe; - uint cbuf; + ubyte src[4][4], dst[4][4], res[4][4]; + uint *src4 = (uint *) src; + uint *dst4 = (uint *) dst; + uint *res4 = (uint *) res; + uint j; + + + /* convert to ubyte */ + for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */ + dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */ + dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */ + dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */ + dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */ + + src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */ + src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */ + src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */ + src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */ + } + + switch (softpipe->blend->logicop_func) { + case PIPE_LOGICOP_CLEAR: + for (j = 0; j < 4; j++) + res4[j] = 0; + break; + case PIPE_LOGICOP_NOR: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] | dst4[j]); + break; + case PIPE_LOGICOP_AND_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j] & dst4[j]; + break; + case PIPE_LOGICOP_COPY_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j]; + break; + case PIPE_LOGICOP_AND_REVERSE: + for (j = 0; j < 4; j++) + res4[j] = src4[j] & ~dst4[j]; + break; + case PIPE_LOGICOP_INVERT: + for (j = 0; j < 4; j++) + res4[j] = ~dst4[j]; + break; + case PIPE_LOGICOP_XOR: + for (j = 0; j < 4; j++) + res4[j] = dst4[j] ^ src4[j]; + break; + case PIPE_LOGICOP_NAND: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] & dst4[j]); + break; + case PIPE_LOGICOP_AND: + for (j = 0; j < 4; j++) + res4[j] = src4[j] & dst4[j]; + break; + case PIPE_LOGICOP_EQUIV: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] ^ dst4[j]); + break; + case PIPE_LOGICOP_NOOP: + for (j = 0; j < 4; j++) + res4[j] = dst4[j]; + break; + case PIPE_LOGICOP_OR_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j] | dst4[j]; + break; + case PIPE_LOGICOP_COPY: + for (j = 0; j < 4; j++) + res4[j] = src4[j]; + break; + case PIPE_LOGICOP_OR_REVERSE: + for (j = 0; j < 4; j++) + res4[j] = src4[j] | ~dst4[j]; + break; + case PIPE_LOGICOP_OR: + for (j = 0; j < 4; j++) + res4[j] = src4[j] | dst4[j]; + break; + case PIPE_LOGICOP_SET: + for (j = 0; j < 4; j++) + res4[j] = ~0; + break; + default: + assert(0); + } + + for (j = 0; j < 4; j++) { + quadColor[j][0] = ubyte_to_float(res[j][0]); + quadColor[j][1] = ubyte_to_float(res[j][1]); + quadColor[j][2] = ubyte_to_float(res[j][2]); + quadColor[j][3] = ubyte_to_float(res[j][3]); + } +} + + + +static void +blend_quad(struct quad_stage *qs, + float (*quadColor)[4], + float (*dest)[4]) +{ + static const float zero[4] = { 0, 0, 0, 0 }; + static const float one[4] = { 1, 1, 1, 1 }; + struct softpipe_context *softpipe = qs->softpipe; + float source[4][QUAD_SIZE]; + + /* + * Compute src/first term RGB + */ + switch (softpipe->blend->rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + VEC4_COPY(source[0], quadColor[0]); /* R */ + VEC4_COPY(source[1], quadColor[1]); /* G */ + VEC4_COPY(source[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */ + VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */ + VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + { + const float *alpha = quadColor[3]; + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_DST_COLOR: + VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */ + VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */ + VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */ + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + { + const float *alpha = dest[3]; + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + { + const float *alpha = quadColor[3]; + float diff[4], temp[4]; + VEC4_SUB(diff, one, dest[3]); + VEC4_MIN(temp, alpha, diff); + VEC4_MUL(source[0], quadColor[0], temp); /* R */ + VEC4_MUL(source[1], quadColor[1], temp); /* G */ + VEC4_MUL(source[2], quadColor[2], temp); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], comp); /* R */ + VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], comp); /* G */ + VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float alpha[4]; + VEC4_SCALAR(alpha, softpipe->blend_color.color[3]); + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_SRC1_COLOR: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(source[0], zero); /* R */ + VEC4_COPY(source[1], zero); /* G */ + VEC4_COPY(source[2], zero); /* B */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, quadColor[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, dest[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, dest[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, dest[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + { + float inv_comp[4]; + /* R */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); + VEC4_MUL(source[0], quadColor[0], inv_comp); + /* G */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); + VEC4_MUL(source[1], quadColor[1], inv_comp); + /* B */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); + VEC4_MUL(source[2], quadColor[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_alpha[4]; + VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + assert(0); /* to do */ + break; + default: + assert(0); + } + + /* + * Compute src/first term A + */ + switch (softpipe->blend->alpha_src_factor) { + case PIPE_BLENDFACTOR_ONE: + VEC4_COPY(source[3], quadColor[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_SRC_ALPHA: + { + const float *alpha = quadColor[3]; + VEC4_MUL(source[3], quadColor[3], alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + /* multiply alpha by 1.0 */ + VEC4_COPY(source[3], quadColor[3]); /* A */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(source[3], quadColor[3], comp); /* A */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(source[3], zero); /* A */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, quadColor[3]); + VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, dest[3]); + VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + /* A */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(source[3], quadColor[3], inv_comp); + } + break; + default: + assert(0); + } + - /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { + /* + * Compute dest/second term RGB + */ + switch (softpipe->blend->rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* dest = dest * 1 NO-OP, leave dest as-is */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ + VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ + VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ + VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ + VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ + VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ + VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ + VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ + VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + assert(0); /* illegal */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ + VEC4_MUL(dest[0], dest[0], comp); /* R */ + VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ + VEC4_MUL(dest[1], dest[1], comp); /* G */ + VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ + VEC4_MUL(dest[2], dest[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(dest[0], dest[0], comp); /* R */ + VEC4_MUL(dest[1], dest[1], comp); /* G */ + VEC4_MUL(dest[2], dest[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(dest[0], zero); /* R */ + VEC4_COPY(dest[1], zero); /* G */ + VEC4_COPY(dest[2], zero); /* B */ + break; + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + /* XXX what are these? */ + assert(0); + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ + VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ + VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ + VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ + VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ + VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float one_minus_alpha[QUAD_SIZE]; + VEC4_SUB(one_minus_alpha, one, quadColor[3]); + VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ + VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ + VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[3]); /* A */ + VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ + VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ + VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[0]); /* R */ + VEC4_MUL(dest[0], dest[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, dest[1]); /* G */ + VEC4_MUL(dest[1], dest[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, dest[2]); /* B */ + VEC4_MUL(dest[2], dest[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + { + float inv_comp[4]; + /* R */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); + VEC4_MUL(dest[0], dest[0], inv_comp); + /* G */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); + VEC4_MUL(dest[1], dest[1], inv_comp); + /* B */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); + VEC4_MUL(dest[2], dest[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(dest[0], dest[0], inv_comp); + VEC4_MUL(dest[1], dest[1], inv_comp); + VEC4_MUL(dest[2], dest[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + /* XXX what are these? */ + assert(0); + break; + default: + assert(0); + } + + /* + * Compute dest/second term A + */ + switch (softpipe->blend->alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* dest = dest * 1 NO-OP, leave dest as-is */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_SRC_ALPHA: + VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + assert(0); /* illegal */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(dest[3], dest[3], comp); /* A */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(dest[3], zero); /* A */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float one_minus_alpha[QUAD_SIZE]; + VEC4_SUB(one_minus_alpha, one, quadColor[3]); + VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[3]); /* A */ + VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(dest[3], dest[3], inv_comp); + } + break; + default: + assert(0); + } + + /* + * Combine RGB terms + */ + switch (softpipe->blend->rgb_func) { + case PIPE_BLEND_ADD: + VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */ + VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ + VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_SUBTRACT: + VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */ + VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */ + VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */ + VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */ + VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */ + break; + case PIPE_BLEND_MIN: + VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ + VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ + VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_MAX: + VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ + VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ + VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ + break; + default: + assert(0); + } + + /* + * Combine A terms + */ + switch (softpipe->blend->alpha_func) { + case PIPE_BLEND_ADD: + VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_SUBTRACT: + VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */ + break; + case PIPE_BLEND_MIN: + VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_MAX: + VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ + break; + default: + assert(0); + } +} + +static void +colormask_quad(struct quad_stage *qs, + float (*quadColor)[4], + float (*dest)[4]) +{ + struct softpipe_context *softpipe = qs->softpipe; + + /* R */ + if (!(softpipe->blend->colormask & PIPE_MASK_R)) + COPY_4V(quadColor[0], dest[0]); + + /* G */ + if (!(softpipe->blend->colormask & PIPE_MASK_G)) + COPY_4V(quadColor[1], dest[1]); + + /* B */ + if (!(softpipe->blend->colormask & PIPE_MASK_B)) + COPY_4V(quadColor[2], dest[2]); + + /* A */ + if (!(softpipe->blend->colormask & PIPE_MASK_A)) + COPY_4V(quadColor[3], dest[3]); +} + + +static void +blend_fallback(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + struct softpipe_context *softpipe = qs->softpipe; + const struct pipe_blend_state *blend = softpipe->blend; + unsigned cbuf; + + for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) + { float dest[4][QUAD_SIZE]; - ubyte src[4][4], dst[4][4], res[4][4]; - uint *src4 = (uint *) src; - uint *dst4 = (uint *) dst; - uint *res4 = (uint *) res; - uint i, j; - - struct softpipe_cached_tile * - tile = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], - quads[0]->input.x0, - quads[0]->input.y0); - - for (i = 0; i < nr; i++) { - struct quad_header *quad = quads[i]; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], + quads[0]->input.x0, + quads[0]->input.y0); + uint q, i, j; + + for (q = 0; q < nr; q++) { + struct quad_header *quad = quads[q]; float (*quadColor)[4] = quad->output.color[cbuf]; + const int itx = (quad->input.x0 & (TILE_SIZE-1)); + const int ity = (quad->input.y0 & (TILE_SIZE-1)); - /* get/swizzle dest colors */ + /* get/swizzle dest colors + */ for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); + int x = itx + (j & 1); + int y = ity + (j >> 1); for (i = 0; i < 4; i++) { dest[i][j] = tile->data.color[y][x][i]; } } - /* convert to ubyte */ - for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */ - dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */ - dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */ - dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */ - dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */ - - src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */ - src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */ - src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */ - src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */ - } - switch (softpipe->blend->logicop_func) { - case PIPE_LOGICOP_CLEAR: - for (j = 0; j < 4; j++) - res4[j] = 0; - break; - case PIPE_LOGICOP_NOR: - for (j = 0; j < 4; j++) - res4[j] = ~(src4[j] | dst4[j]); - break; - case PIPE_LOGICOP_AND_INVERTED: - for (j = 0; j < 4; j++) - res4[j] = ~src4[j] & dst4[j]; - break; - case PIPE_LOGICOP_COPY_INVERTED: - for (j = 0; j < 4; j++) - res4[j] = ~src4[j]; - break; - case PIPE_LOGICOP_AND_REVERSE: - for (j = 0; j < 4; j++) - res4[j] = src4[j] & ~dst4[j]; - break; - case PIPE_LOGICOP_INVERT: - for (j = 0; j < 4; j++) - res4[j] = ~dst4[j]; - break; - case PIPE_LOGICOP_XOR: - for (j = 0; j < 4; j++) - res4[j] = dst4[j] ^ src4[j]; - break; - case PIPE_LOGICOP_NAND: - for (j = 0; j < 4; j++) - res4[j] = ~(src4[j] & dst4[j]); - break; - case PIPE_LOGICOP_AND: - for (j = 0; j < 4; j++) - res4[j] = src4[j] & dst4[j]; - break; - case PIPE_LOGICOP_EQUIV: - for (j = 0; j < 4; j++) - res4[j] = ~(src4[j] ^ dst4[j]); - break; - case PIPE_LOGICOP_NOOP: - for (j = 0; j < 4; j++) - res4[j] = dst4[j]; - break; - case PIPE_LOGICOP_OR_INVERTED: - for (j = 0; j < 4; j++) - res4[j] = ~src4[j] | dst4[j]; - break; - case PIPE_LOGICOP_COPY: - for (j = 0; j < 4; j++) - res4[j] = src4[j]; - break; - case PIPE_LOGICOP_OR_REVERSE: - for (j = 0; j < 4; j++) - res4[j] = src4[j] | ~dst4[j]; - break; - case PIPE_LOGICOP_OR: - for (j = 0; j < 4; j++) - res4[j] = src4[j] | dst4[j]; - break; - case PIPE_LOGICOP_SET: - for (j = 0; j < 4; j++) - res4[j] = ~0; - break; - default: - assert(0); + if (blend->logicop_enable) { + logicop_quad( qs, quadColor, dest ); + } + else if (blend->blend_enable) { + blend_quad( qs, quadColor, dest ); } - for (j = 0; j < 4; j++) { - quadColor[j][0] = ubyte_to_float(res[j][0]); - quadColor[j][1] = ubyte_to_float(res[j][1]); - quadColor[j][2] = ubyte_to_float(res[j][2]); - quadColor[j][3] = ubyte_to_float(res[j][3]); + if (blend->colormask != 0xf) + colormask_quad( qs, quadColor, dest ); + + /* Output color values + */ + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->inout.mask & (1 << j)) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + for (i = 0; i < 4; i++) { /* loop over color chans */ + tile->data.color[y][x][i] = quadColor[i][j]; + } + } } } } - - /* pass blended quad to next stage */ - qs->next->run(qs->next, quads, nr); } + static void blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs, struct quad_header *quads[], @@ -266,11 +808,13 @@ blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs, struct quad_header *quad = quads[q]; float (*quadColor)[4] = quad->output.color[0]; const float *alpha = quadColor[3]; + const int itx = (quad->input.x0 & (TILE_SIZE-1)); + const int ity = (quad->input.y0 & (TILE_SIZE-1)); /* get/swizzle dest colors */ for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); + int x = itx + (j & 1); + int y = ity + (j >> 1); for (i = 0; i < 4; i++) { dest[i][j] = tile->data.color[y][x][i]; } @@ -291,10 +835,17 @@ blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs, VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */ VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ - } - /* pass blended quad to next stage */ - qs->next->run(qs->next, quads, nr); + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->inout.mask & (1 << j)) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + for (i = 0; i < 4; i++) { /* loop over color chans */ + tile->data.color[y][x][i] = quadColor[i][j]; + } + } + } + } } static void @@ -313,11 +864,13 @@ blend_single_add_one_one(struct quad_stage *qs, for (q = 0; q < nr; q++) { struct quad_header *quad = quads[q]; float (*quadColor)[4] = quad->output.color[0]; + const int itx = (quad->input.x0 & (TILE_SIZE-1)); + const int ity = (quad->input.y0 & (TILE_SIZE-1)); /* get/swizzle dest colors */ for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); + int x = itx + (j & 1); + int y = ity + (j >> 1); for (i = 0; i < 4; i++) { dest[i][j] = tile->data.color[y][x][i]; } @@ -327,539 +880,71 @@ blend_single_add_one_one(struct quad_stage *qs, VEC4_ADD_SAT(quadColor[1], quadColor[1], dest[1]); /* G */ VEC4_ADD_SAT(quadColor[2], quadColor[2], dest[2]); /* B */ VEC4_ADD_SAT(quadColor[3], quadColor[3], dest[3]); /* A */ - } - /* pass blended quad to next stage */ - qs->next->run(qs->next, quads, nr); + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->inout.mask & (1 << j)) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + for (i = 0; i < 4; i++) { /* loop over color chans */ + tile->data.color[y][x][i] = quadColor[i][j]; + } + } + } + } } + static void -blend_quads_fallback(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) +single_output_color(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) { - static const float zero[4] = { 0, 0, 0, 0 }; - static const float one[4] = { 1, 1, 1, 1 }; - struct softpipe_context *softpipe = qs->softpipe; - uint cbuf; - - /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { - float source[4][QUAD_SIZE], dest[4][QUAD_SIZE]; - struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], - quads[0]->input.x0, - quads[0]->input.y0); - uint q, i, j; + uint i, j, q; - for (q = 0; q < nr; q++) { - struct quad_header *quad = quads[q]; - float (*quadColor)[4] = quad->output.color[cbuf]; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(qs->softpipe->cbuf_cache[0], + quads[0]->input.x0, + quads[0]->input.y0); - /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); - for (i = 0; i < 4; i++) { - dest[i][j] = tile->data.color[y][x][i]; + for (q = 0; q < nr; q++) { + struct quad_header *quad = quads[q]; + float (*quadColor)[4] = quad->output.color[0]; + const int itx = (quad->input.x0 & (TILE_SIZE-1)); + const int ity = (quad->input.y0 & (TILE_SIZE-1)); + + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->inout.mask & (1 << j)) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + for (i = 0; i < 4; i++) { /* loop over color chans */ + tile->data.color[y][x][i] = quadColor[i][j]; } } - - /* - * Compute src/first term RGB - */ - switch (softpipe->blend->rgb_src_factor) { - case PIPE_BLENDFACTOR_ONE: - VEC4_COPY(source[0], quadColor[0]); /* R */ - VEC4_COPY(source[1], quadColor[1]); /* G */ - VEC4_COPY(source[2], quadColor[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */ - VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */ - VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - { - const float *alpha = quadColor[3]; - VEC4_MUL(source[0], quadColor[0], alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_DST_COLOR: - VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */ - VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */ - VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */ - break; - case PIPE_BLENDFACTOR_DST_ALPHA: - { - const float *alpha = dest[3]; - VEC4_MUL(source[0], quadColor[0], alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - { - const float *alpha = quadColor[3]; - float diff[4], temp[4]; - VEC4_SUB(diff, one, dest[3]); - VEC4_MIN(temp, alpha, diff); - VEC4_MUL(source[0], quadColor[0], temp); /* R */ - VEC4_MUL(source[1], quadColor[1], temp); /* G */ - VEC4_MUL(source[2], quadColor[2], temp); /* B */ - } - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - { - float comp[4]; - VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ - VEC4_MUL(source[0], quadColor[0], comp); /* R */ - VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ - VEC4_MUL(source[1], quadColor[1], comp); /* G */ - VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ - VEC4_MUL(source[2], quadColor[2], comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_CONST_ALPHA: - { - float alpha[4]; - VEC4_SCALAR(alpha, softpipe->blend_color.color[3]); - VEC4_MUL(source[0], quadColor[0], alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_SRC1_COLOR: - assert(0); /* to do */ - break; - case PIPE_BLENDFACTOR_SRC1_ALPHA: - assert(0); /* to do */ - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(source[0], zero); /* R */ - VEC4_COPY(source[1], zero); /* G */ - VEC4_COPY(source[2], zero); /* B */ - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ - VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ - VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ - VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ - VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ - VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, quadColor[3]); - VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, dest[3]); - VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[0]); /* R */ - VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ - VEC4_SUB(inv_comp, one, dest[1]); /* G */ - VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ - VEC4_SUB(inv_comp, one, dest[2]); /* B */ - VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - { - float inv_comp[4]; - /* R */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); - VEC4_MUL(source[0], quadColor[0], inv_comp); - /* G */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); - VEC4_MUL(source[1], quadColor[1], inv_comp); - /* B */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); - VEC4_MUL(source[2], quadColor[2], inv_comp); - } - break; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - { - float inv_alpha[4]; - VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - assert(0); /* to do */ - break; - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - assert(0); /* to do */ - break; - default: - assert(0); - } - - /* - * Compute src/first term A - */ - switch (softpipe->blend->alpha_src_factor) { - case PIPE_BLENDFACTOR_ONE: - VEC4_COPY(source[3], quadColor[3]); /* A */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_SRC_ALPHA: - { - const float *alpha = quadColor[3]; - VEC4_MUL(source[3], quadColor[3], alpha); /* A */ - } - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - /* multiply alpha by 1.0 */ - VEC4_COPY(source[3], quadColor[3]); /* A */ - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_CONST_ALPHA: - { - float comp[4]; - VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ - VEC4_MUL(source[3], quadColor[3], comp); /* A */ - } - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(source[3], zero); /* A */ - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, quadColor[3]); - VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, dest[3]); - VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ - } - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - { - float inv_comp[4]; - /* A */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(source[3], quadColor[3], inv_comp); - } - break; - default: - assert(0); - } - - - /* - * Compute dest/second term RGB - */ - switch (softpipe->blend->rgb_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ - VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ - VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ - break; - case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ - break; - case PIPE_BLENDFACTOR_DST_COLOR: - VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ - VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ - VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - assert(0); /* illegal */ - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - { - float comp[4]; - VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ - VEC4_MUL(dest[0], dest[0], comp); /* R */ - VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ - VEC4_MUL(dest[1], dest[1], comp); /* G */ - VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ - VEC4_MUL(dest[2], dest[2], comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_CONST_ALPHA: - { - float comp[4]; - VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ - VEC4_MUL(dest[0], dest[0], comp); /* R */ - VEC4_MUL(dest[1], dest[1], comp); /* G */ - VEC4_MUL(dest[2], dest[2], comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[0], zero); /* R */ - VEC4_COPY(dest[1], zero); /* G */ - VEC4_COPY(dest[2], zero); /* B */ - break; - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - /* XXX what are these? */ - assert(0); - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ - VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ - VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ - VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ - VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ - VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - { - float one_minus_alpha[QUAD_SIZE]; - VEC4_SUB(one_minus_alpha, one, quadColor[3]); - VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ - VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ - VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[3]); /* A */ - VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ - VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ - VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[0]); /* R */ - VEC4_MUL(dest[0], dest[0], inv_comp); /* R */ - VEC4_SUB(inv_comp, one, dest[1]); /* G */ - VEC4_MUL(dest[1], dest[1], inv_comp); /* G */ - VEC4_SUB(inv_comp, one, dest[2]); /* B */ - VEC4_MUL(dest[2], dest[2], inv_comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - { - float inv_comp[4]; - /* R */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); - VEC4_MUL(dest[0], dest[0], inv_comp); - /* G */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); - VEC4_MUL(dest[1], dest[1], inv_comp); - /* B */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); - VEC4_MUL(dest[2], dest[2], inv_comp); - } - break; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - { - float inv_comp[4]; - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(dest[0], dest[0], inv_comp); - VEC4_MUL(dest[1], dest[1], inv_comp); - VEC4_MUL(dest[2], dest[2], inv_comp); - } - break; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - /* XXX what are these? */ - assert(0); - break; - default: - assert(0); - } - - /* - * Compute dest/second term A - */ - switch (softpipe->blend->alpha_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - assert(0); /* illegal */ - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_CONST_ALPHA: - { - float comp[4]; - VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ - VEC4_MUL(dest[3], dest[3], comp); /* A */ - } - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[3], zero); /* A */ - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - { - float one_minus_alpha[QUAD_SIZE]; - VEC4_SUB(one_minus_alpha, one, quadColor[3]); - VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[3]); /* A */ - VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ - } - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - { - float inv_comp[4]; - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(dest[3], dest[3], inv_comp); - } - break; - default: - assert(0); - } - - /* - * Combine RGB terms - */ - switch (softpipe->blend->rgb_func) { - case PIPE_BLEND_ADD: - VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */ - VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ - VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */ - break; - case PIPE_BLEND_SUBTRACT: - VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */ - VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */ - VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */ - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */ - VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */ - VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */ - break; - case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ - break; - case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ - break; - default: - assert(0); - } - - /* - * Combine A terms - */ - switch (softpipe->blend->alpha_func) { - case PIPE_BLEND_ADD: - VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ - break; - case PIPE_BLEND_SUBTRACT: - VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */ - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */ - break; - case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ - break; - case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ - break; - default: - assert(0); - } } - } /* cbuf loop */ - - /* pass blended quad to next stage */ - qs->next->run(qs->next, quads, nr); + } } static void -blend_quad(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) +choose_blend_quad(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) { struct softpipe_context *softpipe = qs->softpipe; const struct pipe_blend_state *blend = softpipe->blend; - if (softpipe->blend->logicop_enable) { - qs->run = logicop_quads; - } - else { - qs->run = blend_quads_fallback; + qs->run = blend_fallback; - if (blend->rgb_src_factor == blend->alpha_src_factor && - blend->rgb_dst_factor == blend->alpha_dst_factor && - blend->rgb_func == blend->alpha_func && - softpipe->framebuffer.nr_cbufs == 1) + if (!softpipe->blend->logicop_enable && + softpipe->blend->colormask == 0xf) + { + if (!blend->blend_enable) { + qs->run = single_output_color; + } + else if (blend->rgb_src_factor == blend->alpha_src_factor && + blend->rgb_dst_factor == blend->alpha_dst_factor && + blend->rgb_func == blend->alpha_func && + softpipe->framebuffer.nr_cbufs == 1) { if (blend->alpha_func == PIPE_BLEND_ADD) { if (blend->rgb_src_factor == PIPE_BLENDFACTOR_ONE && @@ -871,7 +956,7 @@ blend_quad(struct quad_stage *qs, qs->run = blend_single_add_src_alpha_inv_src_alpha; } - } + } } qs->run(qs, quads, nr); @@ -880,8 +965,7 @@ blend_quad(struct quad_stage *qs, static void blend_begin(struct quad_stage *qs) { - qs->run = blend_quad; - qs->next->begin(qs->next); + qs->run = choose_blend_quad; } @@ -897,7 +981,7 @@ struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe ) stage->softpipe = softpipe; stage->begin = blend_begin; - stage->run = blend_quad; + stage->run = choose_blend_quad; stage->destroy = blend_destroy; return stage; diff --git a/src/gallium/drivers/softpipe/sp_quad_bufloop.c b/src/gallium/drivers/softpipe/sp_quad_bufloop.c deleted file mode 100644 index 953d8516b9..0000000000 --- a/src/gallium/drivers/softpipe/sp_quad_bufloop.c +++ /dev/null @@ -1,74 +0,0 @@ - -#include "util/u_memory.h" -#include "sp_context.h" -#include "sp_quad.h" -#include "sp_surface.h" -#include "sp_quad_pipe.h" - - -/** - * Loop over colorbuffers, passing quad to next stage each time. - */ -static void -cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad) -{ - struct softpipe_context *softpipe = qs->softpipe; - float tmp[PIPE_MAX_COLOR_BUFS][4][QUAD_SIZE]; - unsigned i; - - assert(sizeof(quad->outputs.color) == sizeof(tmp)); - assert(softpipe->framebuffer.nr_cbufs <= PIPE_MAX_COLOR_BUFS); - - /* make copy of original colors since they can get modified - * by blending and masking. - * XXX we won't have to do this if the fragment program actually emits - * N separate colors and we're drawing to N color buffers (MRT). - * But if we emitted one color and glDrawBuffer(GL_FRONT_AND_BACK) is - * in effect, we need to save/restore colors like this. - */ - memcpy(tmp, quad->outputs.color, sizeof(tmp)); - - for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) { - /* set current cbuffer */ -#if 0 /* obsolete & going away */ - softpipe->current_cbuf = i; -#endif - - /* pass blended quad to next stage */ - qs->next->run(qs->next, quad); - - /* restore quad's colors for next buffer */ - memcpy(quad->outputs.color, tmp, sizeof(tmp)); - } -} - - -static void cbuf_loop_begin(struct quad_stage *qs) -{ - qs->next->begin(qs->next); -} - - -static void cbuf_loop_destroy(struct quad_stage *qs) -{ - FREE( qs ); -} - - -/** - * Create the colorbuffer loop stage. - * This is used to implement multiple render targets and GL_FRONT_AND_BACK - * rendering. - */ -struct quad_stage *sp_quad_bufloop_stage( struct softpipe_context *softpipe ) -{ - struct quad_stage *stage = CALLOC_STRUCT(quad_stage); - - stage->softpipe = softpipe; - stage->begin = cbuf_loop_begin; - stage->run = cbuf_loop_quad; - stage->destroy = cbuf_loop_destroy; - - return stage; -} - diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c deleted file mode 100644 index ac74287473..0000000000 --- a/src/gallium/drivers/softpipe/sp_quad_colormask.c +++ /dev/null @@ -1,126 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \brief quad colormask stage - * \author Brian Paul - */ - -#include "pipe/p_defines.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "sp_context.h" -#include "sp_quad.h" -#include "sp_surface.h" -#include "sp_quad_pipe.h" -#include "sp_tile_cache.h" - - - -/** - * XXX colormask could be rolled into blending... - */ -static void -colormask_quad(struct quad_stage *qs, struct quad_header *quad) -{ - struct softpipe_context *softpipe = qs->softpipe; - uint cbuf; - - /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { - float dest[4][QUAD_SIZE]; - struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], - quad->input.x0, quad->input.y0); - float (*quadColor)[4] = quad->output.color[cbuf]; - uint i, j; - - /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); - for (i = 0; i < 4; i++) { - dest[i][j] = tile->data.color[y][x][i]; - } - } - - /* R */ - if (!(softpipe->blend->colormask & PIPE_MASK_R)) - COPY_4V(quadColor[0], dest[0]); - - /* G */ - if (!(softpipe->blend->colormask & PIPE_MASK_G)) - COPY_4V(quadColor[1], dest[1]); - - /* B */ - if (!(softpipe->blend->colormask & PIPE_MASK_B)) - COPY_4V(quadColor[2], dest[2]); - - /* A */ - if (!(softpipe->blend->colormask & PIPE_MASK_A)) - COPY_4V(quadColor[3], dest[3]); - } -} - -static void -colormask_quads(struct quad_stage *qs, struct quad_header *quads[], - unsigned nr) -{ - unsigned i; - - for (i = 0; i < nr; i++) - colormask_quad(qs, quads[i]); - - /* pass quad to next stage */ - qs->next->run(qs->next, quads, nr); -} - - - -static void colormask_begin(struct quad_stage *qs) -{ - qs->next->begin(qs->next); -} - - -static void colormask_destroy(struct quad_stage *qs) -{ - FREE( qs ); -} - - -struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe ) -{ - struct quad_stage *stage = CALLOC_STRUCT(quad_stage); - - stage->softpipe = softpipe; - stage->begin = colormask_begin; - stage->run = colormask_quads; - stage->destroy = colormask_destroy; - - return stage; -} diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c index f06a385b3c..989e997f81 100644 --- a/src/gallium/drivers/softpipe/sp_quad_coverage.c +++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c @@ -68,7 +68,6 @@ coverage_run(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) { - struct softpipe_context *softpipe = qs->softpipe; unsigned i; for (i = 0; i < nr; i++) diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c deleted file mode 100644 index 79a222ff58..0000000000 --- a/src/gallium/drivers/softpipe/sp_quad_output.c +++ /dev/null @@ -1,109 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "util/u_memory.h" -#include "sp_context.h" -#include "sp_quad.h" -#include "sp_surface.h" -#include "sp_quad_pipe.h" -#include "sp_tile_cache.h" - - -/** - * Last step of quad processing: write quad colors to the framebuffer, - * taking mask into account. - */ -static void -output_quad(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) -{ - - struct softpipe_context *softpipe = qs->softpipe; - uint cbuf; - - /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { - struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], - quads[0]->input.x0, - quads[0]->input.y0); - int i, j, q; - - /* get/swizzle dest colors */ - for (q = 0; q < nr; q++) { - struct quad_header *quad = quads[q]; - float (*quadColor)[4] = quad->output.color[cbuf]; - - /* in-tile pos: */ - const int itx = quad->input.x0 % TILE_SIZE; - const int ity = quad->input.y0 % TILE_SIZE; - - - for (j = 0; j < QUAD_SIZE; j++) { - if (quad->inout.mask & (1 << j)) { - int x = itx + (j & 1); - int y = ity + (j >> 1); - for (i = 0; i < 4; i++) { /* loop over color chans */ - tile->data.color[y][x][i] = quadColor[i][j]; - } - if (0) { - debug_printf("sp write pixel %d,%d: %g, %g, %g\n", - quad->input.x0 + x, - quad->input.y0 + y, - quadColor[0][j], - quadColor[1][j], - quadColor[2][j]); - } - } - } - } - } -} - - -static void output_begin(struct quad_stage *qs) -{ - assert(qs->next == NULL); -} - - -static void output_destroy(struct quad_stage *qs) -{ - FREE( qs ); -} - - -struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe ) -{ - struct quad_stage *stage = CALLOC_STRUCT(quad_stage); - - stage->softpipe = softpipe; - stage->begin = output_begin; - stage->run = output_quad; - stage->destroy = output_destroy; - - return stage; -} diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c index 6fae7d552f..d138d417ac 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.c +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c @@ -65,25 +65,16 @@ sp_build_quad_pipeline(struct softpipe_context *sp) /* Color combine */ - sp->quad.first = sp->quad.output; - - if (sp->blend->colormask != 0xf) { - sp_push_quad_first( sp, sp->quad.colormask ); - } - - if (sp->blend->blend_enable || - sp->blend->logicop_enable) { - sp_push_quad_first( sp, sp->quad.blend ); - } + sp->quad.first = sp->quad.blend; + /* Shade/Depth/Stencil/Alpha + */ if ((sp->rasterizer->poly_smooth && sp->reduced_prim == PIPE_PRIM_TRIANGLES) || (sp->rasterizer->line_smooth && sp->reduced_prim == PIPE_PRIM_LINES) || (sp->rasterizer->point_smooth && sp->reduced_prim == PIPE_PRIM_POINTS)) { sp_push_quad_first( sp, sp->quad.coverage ); } - /* Shade/Depth/Stencil/Alpha - */ if (sp->active_query_count) { sp_push_quad_first( sp, sp->quad.occlusion ); } -- cgit v1.2.3 From ade8984f5023b05412f2467add4a59d14af53185 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 25 Jul 2009 10:01:06 +0100 Subject: softpipe: cleanup framebuffer state routine slightly --- src/gallium/drivers/softpipe/sp_state_surface.c | 50 +++++-------------------- 1 file changed, 10 insertions(+), 40 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index 1621a27614..c8f55c3cec 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -75,51 +75,21 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, /* update cache */ sp_tile_cache_set_surface(sp->zsbuf_cache, fb->zsbuf); - } - -#if 0 - /* XXX combined depth/stencil here */ - - /* sbuf changing? */ - if (sp->framebuffer.sbuf != fb->sbuf) { - /* flush old */ - sp_flush_tile_cache(sp, sp->sbuf_cache_sep); - - /* assign new */ - sp->framebuffer.sbuf = fb->sbuf; - - /* update cache */ - if (fb->sbuf != fb->zbuf) { - /* separate stencil buf */ - sp->sbuf_cache = sp->sbuf_cache_sep; - sp_tile_cache_set_surface(sp->sbuf_cache, fb->sbuf); - } - else { - /* combined depth/stencil */ - sp->sbuf_cache = sp->zbuf_cache; - sp_tile_cache_set_surface(sp->sbuf_cache, fb->sbuf); - } - } -#endif - /* Tell draw module how deep the Z/depth buffer is */ - { - int depth_bits; - double mrd; + /* Tell draw module how deep the Z/depth buffer is */ if (sp->framebuffer.zsbuf) { + int depth_bits; + double mrd; depth_bits = pf_get_component_bits(sp->framebuffer.zsbuf->format, PIPE_FORMAT_COMP_Z); + if (depth_bits > 16) { + mrd = 0.0000001; + } + else { + mrd = 0.00002; + } + draw_set_mrd(sp->draw, mrd); } - else { - depth_bits = 0; - } - if (depth_bits > 16) { - mrd = 0.0000001; - } - else { - mrd = 0.00002; - } - draw_set_mrd(sp->draw, mrd); } sp->framebuffer.width = fb->width; -- cgit v1.2.3 From 85613cc4f14de968ddd503610c5b8fcc77234c81 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 25 Jul 2009 11:01:48 +0100 Subject: softpipe: fix error in scissor state dependencies --- src/gallium/drivers/softpipe/sp_state_derived.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 629a1f8e29..0226501267 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -165,11 +165,19 @@ softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe) static void compute_cliprect(struct softpipe_context *sp) { + /* SP_NEW_FRAMEBUFFER + */ uint surfWidth = sp->framebuffer.width; uint surfHeight = sp->framebuffer.height; + /* SP_NEW_RASTERIZER + */ if (sp->rasterizer->scissor) { - /* clip to scissor rect */ + + /* SP_NEW_SCISSOR + * + * clip to scissor rect: + */ sp->cliprect.minx = MAX2(sp->scissor.minx, 0); sp->cliprect.miny = MAX2(sp->scissor.miny, 0); sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth); @@ -231,7 +239,7 @@ void softpipe_update_derived( struct softpipe_context *softpipe ) invalidate_vertex_layout( softpipe ); if (softpipe->dirty & (SP_NEW_SCISSOR | - SP_NEW_DEPTH_STENCIL_ALPHA | + SP_NEW_RASTERIZER | SP_NEW_FRAMEBUFFER)) compute_cliprect(softpipe); -- cgit v1.2.3 From bac8e34c9e4077d370923773d67fe565ce154849 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 27 Jul 2009 08:17:45 +0100 Subject: softpipe: move all depth/stencil/alpha pixel processing into one stage --- src/gallium/drivers/softpipe/Makefile | 5 - src/gallium/drivers/softpipe/sp_context.c | 12 - src/gallium/drivers/softpipe/sp_context.h | 6 - src/gallium/drivers/softpipe/sp_quad_alpha_test.c | 112 ---- src/gallium/drivers/softpipe/sp_quad_coverage.c | 101 ---- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 659 ++++++++++++++++++---- src/gallium/drivers/softpipe/sp_quad_earlyz.c | 94 --- src/gallium/drivers/softpipe/sp_quad_fs.c | 46 +- src/gallium/drivers/softpipe/sp_quad_occlusion.c | 87 --- src/gallium/drivers/softpipe/sp_quad_pipe.c | 46 +- src/gallium/drivers/softpipe/sp_quad_pipe.h | 2 - src/gallium/drivers/softpipe/sp_quad_stencil.c | 363 ------------ src/gallium/drivers/softpipe/sp_state_derived.c | 4 +- 13 files changed, 580 insertions(+), 957 deletions(-) delete mode 100644 src/gallium/drivers/softpipe/sp_quad_alpha_test.c delete mode 100644 src/gallium/drivers/softpipe/sp_quad_coverage.c delete mode 100644 src/gallium/drivers/softpipe/sp_quad_earlyz.c delete mode 100644 src/gallium/drivers/softpipe/sp_quad_occlusion.c delete mode 100644 src/gallium/drivers/softpipe/sp_quad_stencil.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index bdc1a5819f..48522abe98 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -16,13 +16,8 @@ C_SOURCES = \ sp_prim_vbuf.c \ sp_quad_pipe.c \ sp_quad_stipple.c \ - sp_quad_earlyz.c \ sp_quad_depth_test.c \ - sp_quad_stencil.c \ sp_quad_fs.c \ - sp_quad_alpha_test.c \ - sp_quad_occlusion.c \ - sp_quad_coverage.c \ sp_quad_blend.c \ sp_screen.c \ sp_setup.c \ diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 28a0dd62ac..e35c6b3aec 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -88,14 +88,8 @@ static void softpipe_destroy( struct pipe_context *pipe ) if (softpipe->draw) draw_destroy( softpipe->draw ); - softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple ); - softpipe->quad.earlyz->destroy( softpipe->quad.earlyz ); softpipe->quad.shade->destroy( softpipe->quad.shade ); - softpipe->quad.alpha_test->destroy( softpipe->quad.alpha_test ); softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); - softpipe->quad.stencil_test->destroy( softpipe->quad.stencil_test ); - softpipe->quad.occlusion->destroy( softpipe->quad.occlusion ); - softpipe->quad.coverage->destroy( softpipe->quad.coverage ); softpipe->quad.blend->destroy( softpipe->quad.blend ); for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) @@ -230,14 +224,8 @@ softpipe_create( struct pipe_screen *screen ) /* setup quad rendering stages */ - softpipe->quad.polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); - softpipe->quad.earlyz = sp_quad_earlyz_stage(softpipe); softpipe->quad.shade = sp_quad_shade_stage(softpipe); - softpipe->quad.alpha_test = sp_quad_alpha_test_stage(softpipe); softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe); - softpipe->quad.stencil_test = sp_quad_stencil_test_stage(softpipe); - softpipe->quad.occlusion = sp_quad_occlusion_stage(softpipe); - softpipe->quad.coverage = sp_quad_coverage_stage(softpipe); softpipe->quad.blend = sp_quad_blend_stage(softpipe); /* vertex shader samplers */ diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index b76ff610a3..fa3306c020 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -114,14 +114,8 @@ struct softpipe_context { /** Software quad rendering pipeline */ struct { - struct quad_stage *polygon_stipple; - struct quad_stage *earlyz; struct quad_stage *shade; - struct quad_stage *alpha_test; - struct quad_stage *stencil_test; struct quad_stage *depth_test; - struct quad_stage *occlusion; - struct quad_stage *coverage; struct quad_stage *blend; struct quad_stage *first; /**< points to one of the above stages */ diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c deleted file mode 100644 index 3a282208b6..0000000000 --- a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c +++ /dev/null @@ -1,112 +0,0 @@ - -/** - * quad alpha test - */ - -#include "sp_context.h" -#include "sp_quad.h" -#include "sp_quad_pipe.h" -#include "pipe/p_defines.h" -#include "util/u_memory.h" - -#define ALPHATEST( FUNC, COMP ) \ - static void \ - alpha_test_quads_##FUNC( struct quad_stage *qs, \ - struct quad_header *quads[], \ - unsigned nr ) \ - { \ - const float ref = qs->softpipe->depth_stencil->alpha.ref_value; \ - const uint cbuf = 0; /* only output[0].alpha is tested */ \ - unsigned pass_nr = 0; \ - unsigned i; \ - \ - for (i = 0; i < nr; i++) { \ - const float *aaaa = quads[i]->output.color[cbuf][3]; \ - unsigned passMask = 0; \ - \ - if (aaaa[0] COMP ref) passMask |= (1 << 0); \ - if (aaaa[1] COMP ref) passMask |= (1 << 1); \ - if (aaaa[2] COMP ref) passMask |= (1 << 2); \ - if (aaaa[3] COMP ref) passMask |= (1 << 3); \ - \ - quads[i]->inout.mask &= passMask; \ - \ - if (quads[i]->inout.mask) \ - quads[pass_nr++] = quads[i]; \ - } \ - \ - if (pass_nr) \ - qs->next->run(qs->next, quads, pass_nr); \ - } - - -ALPHATEST( LESS, < ) -ALPHATEST( EQUAL, == ) -ALPHATEST( LEQUAL, <= ) -ALPHATEST( GREATER, > ) -ALPHATEST( NOTEQUAL, != ) -ALPHATEST( GEQUAL, >= ) - - -/* XXX: Incorporate into shader using KILP. - */ -static void -alpha_test_quad(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) -{ - switch (qs->softpipe->depth_stencil->alpha.func) { - case PIPE_FUNC_LESS: - alpha_test_quads_LESS( qs, quads, nr ); - break; - case PIPE_FUNC_EQUAL: - alpha_test_quads_EQUAL( qs, quads, nr ); - break; - case PIPE_FUNC_LEQUAL: - alpha_test_quads_LEQUAL( qs, quads, nr ); - break; - case PIPE_FUNC_GREATER: - alpha_test_quads_GREATER( qs, quads, nr ); - break; - case PIPE_FUNC_NOTEQUAL: - alpha_test_quads_NOTEQUAL( qs, quads, nr ); - break; - case PIPE_FUNC_GEQUAL: - alpha_test_quads_GEQUAL( qs, quads, nr ); - break; - case PIPE_FUNC_ALWAYS: - assert(0); /* should be caught earlier */ - qs->next->run(qs->next, quads, nr); - break; - case PIPE_FUNC_NEVER: - default: - assert(0); /* should be caught earlier */ - return; - } -} - - -static void alpha_test_begin(struct quad_stage *qs) -{ - qs->next->begin(qs->next); -} - - -static void alpha_test_destroy(struct quad_stage *qs) -{ - FREE( qs ); -} - - -struct quad_stage * -sp_quad_alpha_test_stage( struct softpipe_context *softpipe ) -{ - struct quad_stage *stage = CALLOC_STRUCT(quad_stage); - - stage->softpipe = softpipe; - stage->begin = alpha_test_begin; - stage->run = alpha_test_quad; - stage->destroy = alpha_test_destroy; - - return stage; -} diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c deleted file mode 100644 index 989e997f81..0000000000 --- a/src/gallium/drivers/softpipe/sp_quad_coverage.c +++ /dev/null @@ -1,101 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * \brief Apply AA coverage to quad alpha valus - * \author Brian Paul - */ - - -#include "pipe/p_defines.h" -#include "util/u_memory.h" -#include "sp_context.h" -#include "sp_quad.h" -#include "sp_quad_pipe.h" - - -/** - * Multiply quad's alpha values by the fragment coverage. - */ -static INLINE void -coverage_quad(struct quad_stage *qs, struct quad_header *quad) -{ - struct softpipe_context *softpipe = qs->softpipe; - uint cbuf; - - /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { - float (*quadColor)[4] = quad->output.color[cbuf]; - unsigned j; - for (j = 0; j < QUAD_SIZE; j++) { - assert(quad->input.coverage[j] >= 0.0); - assert(quad->input.coverage[j] <= 1.0); - quadColor[3][j] *= quad->input.coverage[j]; - } - } -} - - -/* XXX: Incorporate into shader after alpha_test. - */ -static void -coverage_run(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) -{ - unsigned i; - - for (i = 0; i < nr; i++) - coverage_quad( qs, quads[i] ); - - qs->next->run(qs->next, quads, nr); -} - -static void coverage_begin(struct quad_stage *qs) -{ - qs->next->begin(qs->next); -} - - -static void coverage_destroy(struct quad_stage *qs) -{ - FREE( qs ); -} - - -struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe ) -{ - struct quad_stage *stage = CALLOC_STRUCT(quad_stage); - - stage->softpipe = softpipe; - stage->begin = coverage_begin; - stage->run = coverage_run; - stage->destroy = coverage_destroy; - - return stage; -} diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 8f223a7eae..bf65799a81 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -31,61 +31,109 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" +#include "tgsi/tgsi_scan.h" #include "sp_context.h" #include "sp_quad.h" #include "sp_surface.h" #include "sp_quad_pipe.h" #include "sp_tile_cache.h" +#include "sp_state.h" /* for sp_fragment_shader */ -/** - * Do depth testing for a quad. - * Not static since it's used by the stencil code. - */ +struct depth_data { + struct pipe_surface *ps; + enum pipe_format format; + unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */ + unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */ + ubyte stencilVals[QUAD_SIZE]; + struct softpipe_cached_tile *tile; +}; -/* - * To increase efficiency, we should probably have multiple versions - * of this function that are specifically for Z16, Z32 and FP Z buffers. - * Try to effectively do that with codegen... - */ -boolean -sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) + +static void +get_depth_stencil_values( struct depth_data *data, + const struct quad_header *quad ) { - struct softpipe_context *softpipe = qs->softpipe; - struct pipe_surface *ps = softpipe->framebuffer.zsbuf; - const enum pipe_format format = ps->format; - unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */ - unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */ - unsigned zmask = 0; unsigned j; - struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); + const struct softpipe_cached_tile *tile = data->tile; + + switch (data->format) { + case PIPE_FORMAT_Z16_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + data->bzzzz[j] = tile->data.depth16[y][x]; + } + break; + case PIPE_FORMAT_Z32_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + data->bzzzz[j] = tile->data.depth32[y][x]; + } + break; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff; + data->stencilVals[j] = tile->data.depth32[y][x] >> 24; + } + break; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + data->bzzzz[j] = tile->data.depth32[y][x] >> 8; + data->stencilVals[j] = tile->data.depth32[y][x] & 0xff; + } + break; + default: + assert(0); + } +} - assert(ps); /* shouldn't get here if there's no zbuffer */ +/* If the shader has not been run, interpolate the depth values + * ourselves. + */ +static void +interpolate_quad_depth( struct quad_header *quad ) +{ + const float fx = (float) quad->input.x0; + const float fy = (float) quad->input.y0; + const float dzdx = quad->posCoef->dadx[2]; + const float dzdy = quad->posCoef->dady[2]; + const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; + + quad->output.depth[0] = z0; + quad->output.depth[1] = z0 + dzdx; + quad->output.depth[2] = z0 + dzdy; + quad->output.depth[3] = z0 + dzdx + dzdy; +} - /* - * Convert quad's float depth values to int depth values (qzzzz). + +static void +convert_quad_depth( struct depth_data *data, + const struct quad_header *quad ) +{ + unsigned j; + + /* Convert quad's float depth values to int depth values (qzzzz). * If the Z buffer stores integer values, we _have_ to do the depth * compares with integers (not floats). Otherwise, the float->int->float * conversion of Z values (which isn't an identity function) will cause * Z-fighting errors. - * - * Also, get the zbuffer values (bzzzz) from the cached tile. */ - switch (format) { + switch (data->format) { case PIPE_FORMAT_Z16_UNORM: { float scale = 65535.0; for (j = 0; j < QUAD_SIZE; j++) { - qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); - } - - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - bzzzz[j] = tile->data.depth16[y][x]; + data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); } } break; @@ -94,47 +142,247 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) double scale = (double) (uint) ~0UL; for (j = 0; j < QUAD_SIZE; j++) { - qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); - } - - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - bzzzz[j] = tile->data.depth32[y][x]; + data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); } } break; case PIPE_FORMAT_X8Z24_UNORM: - /* fall-through */ case PIPE_FORMAT_S8Z24_UNORM: { float scale = (float) ((1 << 24) - 1); for (j = 0; j < QUAD_SIZE; j++) { - qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); - } - - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - bzzzz[j] = tile->data.depth32[y][x] & 0xffffff; + data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); } } break; case PIPE_FORMAT_Z24X8_UNORM: - /* fall-through */ case PIPE_FORMAT_Z24S8_UNORM: { float scale = (float) ((1 << 24) - 1); for (j = 0; j < QUAD_SIZE; j++) { - qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); + data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); } + } + break; + default: + assert(0); + } +} - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - bzzzz[j] = tile->data.depth32[y][x] >> 8; + + +static void +write_depth_stencil_values( struct depth_data *data, + struct quad_header *quad ) +{ + struct softpipe_cached_tile *tile = data->tile; + unsigned j; + + /* put updated Z values back into cached tile */ + switch (data->format) { + case PIPE_FORMAT_Z16_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth16[y][x] = (ushort) data->bzzzz[j]; + } + break; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_Z32_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth32[y][x] = data->bzzzz[j]; + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j]; + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j]; + } + break; + case PIPE_FORMAT_Z24X8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth32[y][x] = data->bzzzz[j] << 8; + } + break; + default: + assert(0); + } +} + + + + +/** Only 8-bit stencil supported */ +#define STENCIL_MAX 0xff + + +/** + * Do the basic stencil test (compare stencil buffer values against the + * reference value. + * + * \param data->stencilVals the stencil values from the stencil buffer + * \param func the stencil func (PIPE_FUNC_x) + * \param ref the stencil reference value + * \param valMask the stencil value mask indicating which bits of the stencil + * values and ref value are to be used. + * \return mask indicating which pixels passed the stencil test + */ +static unsigned +do_stencil_test(struct depth_data *data, + unsigned func, + unsigned ref, unsigned valMask) +{ + unsigned passMask = 0x0; + unsigned j; + + ref &= valMask; + + switch (func) { + case PIPE_FUNC_NEVER: + /* passMask = 0x0 */ + break; + case PIPE_FUNC_LESS: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref < (data->stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_EQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref == (data->stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_LEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref <= (data->stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GREATER: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref > (data->stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_NOTEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref != (data->stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref >= (data->stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_ALWAYS: + passMask = MASK_ALL; + break; + default: + assert(0); + } + + return passMask; +} + + +/** + * Apply the stencil operator to stencil values. + * + * \param data->stencilVals the stencil buffer values (read and written) + * \param mask indicates which pixels to update + * \param op the stencil operator (PIPE_STENCIL_OP_x) + * \param ref the stencil reference value + * \param wrtMask writemask controlling which bits are changed in the + * stencil values + */ +static void +apply_stencil_op(struct depth_data *data, + unsigned mask, unsigned op, ubyte ref, ubyte wrtMask) +{ + unsigned j; + ubyte newstencil[QUAD_SIZE]; + + for (j = 0; j < QUAD_SIZE; j++) { + newstencil[j] = data->stencilVals[j]; + } + + switch (op) { + case PIPE_STENCIL_OP_KEEP: + /* no-op */ + break; + case PIPE_STENCIL_OP_ZERO: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = 0; + } + } + break; + case PIPE_STENCIL_OP_REPLACE: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = ref; + } + } + break; + case PIPE_STENCIL_OP_INCR: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + if (data->stencilVals[j] < STENCIL_MAX) { + newstencil[j] = data->stencilVals[j] + 1; + } + } + } + break; + case PIPE_STENCIL_OP_DECR: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + if (data->stencilVals[j] > 0) { + newstencil[j] = data->stencilVals[j] - 1; + } + } + } + break; + case PIPE_STENCIL_OP_INCR_WRAP: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = data->stencilVals[j] + 1; + } + } + break; + case PIPE_STENCIL_OP_DECR_WRAP: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = data->stencilVals[j] - 1; + } + } + break; + case PIPE_STENCIL_OP_INVERT: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = ~data->stencilVals[j]; } } break; @@ -142,6 +390,39 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) assert(0); } + /* + * update the stencil values + */ + if (wrtMask != STENCIL_MAX) { + /* apply bit-wise stencil buffer writemask */ + for (j = 0; j < QUAD_SIZE; j++) { + data->stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & data->stencilVals[j]); + } + } + else { + for (j = 0; j < QUAD_SIZE; j++) { + data->stencilVals[j] = newstencil[j]; + } + } +} + + + +/* + * To increase efficiency, we should probably have multiple versions + * of this function that are specifically for Z16, Z32 and FP Z buffers. + * Try to effectively do that with codegen... + */ + +static boolean +depth_test_quad(struct quad_stage *qs, + struct depth_data *data, + struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + unsigned zmask = 0; + unsigned j; + switch (softpipe->depth_stencil->depth.func) { case PIPE_FUNC_NEVER: /* zmask = 0 */ @@ -151,37 +432,37 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) * Like this: quad->mask &= (quad->outputs.depth < zzzz); */ for (j = 0; j < QUAD_SIZE; j++) { - if (qzzzz[j] < bzzzz[j]) + if (data->qzzzz[j] < data->bzzzz[j]) zmask |= 1 << j; } break; case PIPE_FUNC_EQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (qzzzz[j] == bzzzz[j]) + if (data->qzzzz[j] == data->bzzzz[j]) zmask |= 1 << j; } break; case PIPE_FUNC_LEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (qzzzz[j] <= bzzzz[j]) + if (data->qzzzz[j] <= data->bzzzz[j]) zmask |= (1 << j); } break; case PIPE_FUNC_GREATER: for (j = 0; j < QUAD_SIZE; j++) { - if (qzzzz[j] > bzzzz[j]) + if (data->qzzzz[j] > data->bzzzz[j]) zmask |= (1 << j); } break; case PIPE_FUNC_NOTEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (qzzzz[j] != bzzzz[j]) + if (data->qzzzz[j] != data->bzzzz[j]) zmask |= (1 << j); } break; case PIPE_FUNC_GEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (qzzzz[j] >= bzzzz[j]) + if (data->qzzzz[j] >= data->bzzzz[j]) zmask |= (1 << j); } break; @@ -196,83 +477,231 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) if (quad->inout.mask == 0) return FALSE; + /* Update our internal copy only if writemask set. Even if + * depth.writemask is FALSE, may still need to write out buffer + * data due to stencil changes. + */ if (softpipe->depth_stencil->depth.writemask) { - - /* This is also efficient with sse / spe instructions: - */ for (j = 0; j < QUAD_SIZE; j++) { - if (quad->inout.mask & (1 << j)) { - bzzzz[j] = qzzzz[j]; - } + if (quad->inout.mask & (1 << j)) { + data->bzzzz[j] = data->qzzzz[j]; + } } + } - /* put updated Z values back into cached tile */ - switch (format) { - case PIPE_FORMAT_Z16_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - tile->data.depth16[y][x] = (ushort) bzzzz[j]; - } - break; - case PIPE_FORMAT_X8Z24_UNORM: - /* fall-through */ - /* (yes, this falls through to a different case than above) */ - case PIPE_FORMAT_Z32_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - tile->data.depth32[y][x] = bzzzz[j]; - } - break; - case PIPE_FORMAT_S8Z24_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - uint s8z24 = tile->data.depth32[y][x]; - s8z24 = (s8z24 & 0xff000000) | bzzzz[j]; - tile->data.depth32[y][x] = s8z24; - } - break; - case PIPE_FORMAT_Z24S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - uint z24s8 = tile->data.depth32[y][x]; - z24s8 = (z24s8 & 0xff) | (bzzzz[j] << 8); - tile->data.depth32[y][x] = z24s8; + return TRUE; +} + + + +/** + * Do stencil (and depth) testing. Stenciling depends on the outcome of + * depth testing. + */ +static boolean +depth_stencil_test_quad(struct quad_stage *qs, + struct depth_data *data, + struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + unsigned func, zFailOp, zPassOp, failOp; + ubyte ref, wrtMask, valMask; + uint face = quad->input.facing; + + if (!softpipe->depth_stencil->stencil[1].enabled) { + /* single-sided stencil test, use front (face=0) state */ + face = 0; + } + + /* choose front or back face function, operator, etc */ + /* XXX we could do these initializations once per primitive */ + func = softpipe->depth_stencil->stencil[face].func; + failOp = softpipe->depth_stencil->stencil[face].fail_op; + zFailOp = softpipe->depth_stencil->stencil[face].zfail_op; + zPassOp = softpipe->depth_stencil->stencil[face].zpass_op; + ref = softpipe->depth_stencil->stencil[face].ref_value; + wrtMask = softpipe->depth_stencil->stencil[face].writemask; + valMask = softpipe->depth_stencil->stencil[face].valuemask; + + + /* do the stencil test first */ + { + unsigned passMask, failMask; + passMask = do_stencil_test(data, func, ref, valMask); + failMask = quad->inout.mask & ~passMask; + quad->inout.mask &= passMask; + + if (failOp != PIPE_STENCIL_OP_KEEP) { + apply_stencil_op(data, failMask, failOp, ref, wrtMask); + } + } + + if (quad->inout.mask) { + /* now the pixels that passed the stencil test are depth tested */ + if (softpipe->depth_stencil->depth.enabled) { + const unsigned origMask = quad->inout.mask; + + depth_test_quad(qs, data, quad); /* quad->mask is updated */ + + /* update stencil buffer values according to z pass/fail result */ + if (zFailOp != PIPE_STENCIL_OP_KEEP) { + const unsigned failMask = origMask & ~quad->inout.mask; + apply_stencil_op(data, failMask, zFailOp, ref, wrtMask); } - break; - case PIPE_FORMAT_Z24X8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - tile->data.depth32[y][x] = bzzzz[j] << 8; + + if (zPassOp != PIPE_STENCIL_OP_KEEP) { + const unsigned passMask = origMask & quad->inout.mask; + apply_stencil_op(data, passMask, zPassOp, ref, wrtMask); } - break; - default: - assert(0); + } + else { + /* no depth test, apply Zpass operator to stencil buffer values */ + apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask); } } - return TRUE; + return quad->inout.mask != 0; } +#define ALPHATEST( FUNC, COMP ) \ + static int \ + alpha_test_quads_##FUNC( struct quad_stage *qs, \ + struct quad_header *quads[], \ + unsigned nr ) \ + { \ + const float ref = qs->softpipe->depth_stencil->alpha.ref_value; \ + const uint cbuf = 0; /* only output[0].alpha is tested */ \ + unsigned pass_nr = 0; \ + unsigned i; \ + \ + for (i = 0; i < nr; i++) { \ + const float *aaaa = quads[i]->output.color[cbuf][3]; \ + unsigned passMask = 0; \ + \ + if (aaaa[0] COMP ref) passMask |= (1 << 0); \ + if (aaaa[1] COMP ref) passMask |= (1 << 1); \ + if (aaaa[2] COMP ref) passMask |= (1 << 2); \ + if (aaaa[3] COMP ref) passMask |= (1 << 3); \ + \ + quads[i]->inout.mask &= passMask; \ + \ + if (quads[i]->inout.mask) \ + quads[pass_nr++] = quads[i]; \ + } \ + \ + return pass_nr; \ + } + + +ALPHATEST( LESS, < ) +ALPHATEST( EQUAL, == ) +ALPHATEST( LEQUAL, <= ) +ALPHATEST( GREATER, > ) +ALPHATEST( NOTEQUAL, != ) +ALPHATEST( GEQUAL, >= ) + + +/* XXX: Incorporate into shader using KILP. + */ +static int +alpha_test_quads(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + switch (qs->softpipe->depth_stencil->alpha.func) { + case PIPE_FUNC_LESS: + return alpha_test_quads_LESS( qs, quads, nr ); + case PIPE_FUNC_EQUAL: + return alpha_test_quads_EQUAL( qs, quads, nr ); + break; + case PIPE_FUNC_LEQUAL: + return alpha_test_quads_LEQUAL( qs, quads, nr ); + case PIPE_FUNC_GREATER: + return alpha_test_quads_GREATER( qs, quads, nr ); + case PIPE_FUNC_NOTEQUAL: + return alpha_test_quads_NOTEQUAL( qs, quads, nr ); + case PIPE_FUNC_GEQUAL: + return alpha_test_quads_GEQUAL( qs, quads, nr ); + case PIPE_FUNC_ALWAYS: + return nr; + case PIPE_FUNC_NEVER: + default: + return 0; + } +} + +static unsigned mask_count[0x8] = +{ + 0, /* 0x0 */ + 1, /* 0x1 */ + 1, /* 0x2 */ + 2, /* 0x3 */ + 1, /* 0x4 */ + 2, /* 0x5 */ + 2, /* 0x6 */ + 3, /* 0x7 */ +}; + + + static void depth_test_quads(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) { unsigned i, pass = 0; + const struct sp_fragment_shader *fs = qs->softpipe->fs; + boolean interp_depth = !fs->info.writes_z; + struct depth_data data; - for (i = 0; i < nr; i++) { - if (sp_depth_test_quad(qs, quads[i])) + + if (qs->softpipe->depth_stencil->alpha.enabled) { + nr = alpha_test_quads(qs, quads, nr); + } + + if (qs->softpipe->framebuffer.zsbuf && + (qs->softpipe->depth_stencil->depth.enabled || + qs->softpipe->depth_stencil->stencil[0].enabled)) { + + data.ps = qs->softpipe->framebuffer.zsbuf; + data.format = data.ps->format; + data.tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, + quads[0]->input.x0, + quads[0]->input.y0); + + for (i = 0; i < nr; i++) { + get_depth_stencil_values(&data, quads[i]); + + if (qs->softpipe->depth_stencil->depth.enabled) { + if (interp_depth) + interpolate_quad_depth(quads[i]); + + convert_quad_depth(&data, quads[i]); + } + + if (qs->softpipe->depth_stencil->stencil[0].enabled) { + if (!depth_stencil_test_quad(qs, &data, quads[i])) + continue; + } + else { + if (!depth_test_quad(qs, &data, quads[i])) + continue; + } + + if (qs->softpipe->depth_stencil->stencil[0].enabled || + qs->softpipe->depth_stencil->depth.writemask) + write_depth_stencil_values(&data, quads[i]); + + qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask]; quads[pass++] = quads[i]; + } + + nr = pass; } - - if (pass) - qs->next->run(qs->next, quads, pass); + + if (nr) + qs->next->run(qs->next, quads, nr); } diff --git a/src/gallium/drivers/softpipe/sp_quad_earlyz.c b/src/gallium/drivers/softpipe/sp_quad_earlyz.c deleted file mode 100644 index 1048d44984..0000000000 --- a/src/gallium/drivers/softpipe/sp_quad_earlyz.c +++ /dev/null @@ -1,94 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \brief Quad early-z testing - */ - -#include "pipe/p_defines.h" -#include "util/u_memory.h" -#include "sp_quad.h" -#include "sp_quad_pipe.h" - - -/** - * All this stage does is compute the quad's Z values (which is normally - * done by the shading stage). - * The next stage will do the actual depth test. - */ -static void -earlyz_quad( - struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr ) -{ - const float a0z = quads[0]->posCoef->a0[2]; - const float dzdx = quads[0]->posCoef->dadx[2]; - const float dzdy = quads[0]->posCoef->dady[2]; - unsigned i; - - for (i = 0; i < nr; i++) { - const float fx = (float) quads[i]->input.x0; - const float fy = (float) quads[i]->input.y0; - const float z0 = a0z + dzdx * fx + dzdy * fy; - - quads[i]->output.depth[0] = z0; - quads[i]->output.depth[1] = z0 + dzdx; - quads[i]->output.depth[2] = z0 + dzdy; - quads[i]->output.depth[3] = z0 + dzdx + dzdy; - } - - qs->next->run( qs->next, quads, nr ); -} - -static void -earlyz_begin( - struct quad_stage *qs ) -{ - qs->next->begin( qs->next ); -} - -static void -earlyz_destroy( - struct quad_stage *qs ) -{ - FREE( qs ); -} - -struct quad_stage * -sp_quad_earlyz_stage( - struct softpipe_context *softpipe ) -{ - struct quad_stage *stage = CALLOC_STRUCT( quad_stage ); - - stage->softpipe = softpipe; - stage->begin = earlyz_begin; - stage->run = earlyz_quad; - stage->destroy = earlyz_destroy; - - return stage; -} diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index ea5ed3bbd0..56a8f55d77 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -111,24 +111,31 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad) } } - if (!z_written) { - /* compute Z values now, as in the quad earlyz stage */ - /* XXX we should really only do this if the earlyz stage is not used */ - const float fx = (float) quad->input.x0; - const float fy = (float) quad->input.y0; - const float dzdx = quad->posCoef->dadx[2]; - const float dzdy = quad->posCoef->dady[2]; - const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; - - quad->output.depth[0] = z0; - quad->output.depth[1] = z0 + dzdx; - quad->output.depth[2] = z0 + dzdy; - quad->output.depth[3] = z0 + dzdx + dzdy; - } - return TRUE; } + + +static void +coverage_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { + float (*quadColor)[4] = quad->output.color[cbuf]; + unsigned j; + for (j = 0; j < QUAD_SIZE; j++) { + assert(quad->input.coverage[j] >= 0.0); + assert(quad->input.coverage[j] <= 1.0); + quadColor[3][j] *= quad->input.coverage[j]; + } + } +} + + + static void shade_quads(struct quad_stage *qs, struct quad_header *quads[], @@ -144,8 +151,13 @@ shade_quads(struct quad_stage *qs, machine->InterpCoefs = quads[0]->coef; for (i = 0; i < nr; i++) { - if (shade_quad(qs, quads[i])) - quads[pass++] = quads[i]; + if (!shade_quad(qs, quads[i])) + continue; + + if (/*do_coverage*/ 0) + coverage_quad( qs, quads[i] ); + + quads[pass++] = quads[i]; } if (pass) diff --git a/src/gallium/drivers/softpipe/sp_quad_occlusion.c b/src/gallium/drivers/softpipe/sp_quad_occlusion.c deleted file mode 100644 index 4adeb16546..0000000000 --- a/src/gallium/drivers/softpipe/sp_quad_occlusion.c +++ /dev/null @@ -1,87 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * \brief Quad occlusion counter stage - * \author Brian Paul - */ - - -#include "pipe/p_defines.h" -#include "util/u_memory.h" -#include "sp_context.h" -#include "sp_quad.h" -#include "sp_surface.h" -#include "sp_quad_pipe.h" - -static unsigned count_bits( unsigned val ) -{ - unsigned i; - - for (i = 0; val ; val >>= 1) - i += (val & 1); - - return i; -} - -static void -occlusion_count_quads(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) -{ - struct softpipe_context *softpipe = qs->softpipe; - unsigned i; - - for (i = 0; i < nr; i++) - softpipe->occlusion_count += count_bits(quads[i]->inout.mask); - - qs->next->run(qs->next, quads, nr); -} - - -static void occlusion_begin(struct quad_stage *qs) -{ - qs->next->begin(qs->next); -} - - -static void occlusion_destroy(struct quad_stage *qs) -{ - FREE( qs ); -} - - -struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe ) -{ - struct quad_stage *stage = CALLOC_STRUCT(quad_stage); - - stage->softpipe = softpipe; - stage->begin = occlusion_begin; - stage->run = occlusion_count_quads; - stage->destroy = occlusion_destroy; - - return stage; -} diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c index d138d417ac..1b5bab4eca 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.c +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c @@ -38,18 +38,6 @@ sp_push_quad_first( struct softpipe_context *sp, sp->quad.first = quad; } -static void -sp_build_depth_stencil( struct softpipe_context *sp ) -{ - if (sp->depth_stencil->stencil[0].enabled || - sp->depth_stencil->stencil[1].enabled) { - sp_push_quad_first( sp, sp->quad.stencil_test ); - } - else if (sp->depth_stencil->depth.enabled && - sp->framebuffer.zsbuf) { - sp_push_quad_first( sp, sp->quad.depth_test ); - } -} void sp_build_quad_pipeline(struct softpipe_context *sp) @@ -61,37 +49,15 @@ sp_build_quad_pipeline(struct softpipe_context *sp) !sp->fs->info.uses_kill && !sp->fs->info.writes_z; - /* build up the pipeline in reverse order... */ - - /* Color combine - */ sp->quad.first = sp->quad.blend; - /* Shade/Depth/Stencil/Alpha - */ - if ((sp->rasterizer->poly_smooth && sp->reduced_prim == PIPE_PRIM_TRIANGLES) || - (sp->rasterizer->line_smooth && sp->reduced_prim == PIPE_PRIM_LINES) || - (sp->rasterizer->point_smooth && sp->reduced_prim == PIPE_PRIM_POINTS)) { - sp_push_quad_first( sp, sp->quad.coverage ); - } - - if (sp->active_query_count) { - sp_push_quad_first( sp, sp->quad.occlusion ); - } - - if (!early_depth_test) { - sp_build_depth_stencil( sp ); - } - - if (sp->depth_stencil->alpha.enabled) { - sp_push_quad_first( sp, sp->quad.alpha_test ); - } - - sp_push_quad_first( sp, sp->quad.shade ); - if (early_depth_test) { - sp_build_depth_stencil( sp ); - sp_push_quad_first( sp, sp->quad.earlyz ); + sp_push_quad_first( sp, sp->quad.shade ); + sp_push_quad_first( sp, sp->quad.depth_test ); + } + else { + sp_push_quad_first( sp, sp->quad.depth_test ); + sp_push_quad_first( sp, sp->quad.shade ); } } diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.h b/src/gallium/drivers/softpipe/sp_quad_pipe.h index add31ba705..c0aa134831 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.h +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.h @@ -69,6 +69,4 @@ struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe ); void sp_build_quad_pipeline(struct softpipe_context *sp); -boolean sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad); - #endif /* SP_QUAD_PIPE_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c deleted file mode 100644 index d9ee80e59a..0000000000 --- a/src/gallium/drivers/softpipe/sp_quad_stencil.c +++ /dev/null @@ -1,363 +0,0 @@ - -/** - * \brief Quad stencil testing - */ - - -#include "sp_context.h" -#include "sp_quad.h" -#include "sp_surface.h" -#include "sp_tile_cache.h" -#include "sp_quad_pipe.h" -#include "pipe/p_defines.h" -#include "util/u_memory.h" - - -/** Only 8-bit stencil supported */ -#define STENCIL_MAX 0xff - - -/** - * Do the basic stencil test (compare stencil buffer values against the - * reference value. - * - * \param stencilVals the stencil values from the stencil buffer - * \param func the stencil func (PIPE_FUNC_x) - * \param ref the stencil reference value - * \param valMask the stencil value mask indicating which bits of the stencil - * values and ref value are to be used. - * \return mask indicating which pixels passed the stencil test - */ -static unsigned -do_stencil_test(const ubyte stencilVals[QUAD_SIZE], unsigned func, - unsigned ref, unsigned valMask) -{ - unsigned passMask = 0x0; - unsigned j; - - ref &= valMask; - - switch (func) { - case PIPE_FUNC_NEVER: - /* passMask = 0x0 */ - break; - case PIPE_FUNC_LESS: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref < (stencilVals[j] & valMask)) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_EQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref == (stencilVals[j] & valMask)) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_LEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref <= (stencilVals[j] & valMask)) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_GREATER: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref > (stencilVals[j] & valMask)) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_NOTEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref != (stencilVals[j] & valMask)) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_GEQUAL: - for (j = 0; j < QUAD_SIZE; j++) { - if (ref >= (stencilVals[j] & valMask)) { - passMask |= (1 << j); - } - } - break; - case PIPE_FUNC_ALWAYS: - passMask = MASK_ALL; - break; - default: - assert(0); - } - - return passMask; -} - - -/** - * Apply the stencil operator to stencil values. - * - * \param stencilVals the stencil buffer values (read and written) - * \param mask indicates which pixels to update - * \param op the stencil operator (PIPE_STENCIL_OP_x) - * \param ref the stencil reference value - * \param wrtMask writemask controlling which bits are changed in the - * stencil values - */ -static void -apply_stencil_op(ubyte stencilVals[QUAD_SIZE], - unsigned mask, unsigned op, ubyte ref, ubyte wrtMask) -{ - unsigned j; - ubyte newstencil[QUAD_SIZE]; - - for (j = 0; j < QUAD_SIZE; j++) { - newstencil[j] = stencilVals[j]; - } - - switch (op) { - case PIPE_STENCIL_OP_KEEP: - /* no-op */ - break; - case PIPE_STENCIL_OP_ZERO: - for (j = 0; j < QUAD_SIZE; j++) { - if (mask & (1 << j)) { - newstencil[j] = 0; - } - } - break; - case PIPE_STENCIL_OP_REPLACE: - for (j = 0; j < QUAD_SIZE; j++) { - if (mask & (1 << j)) { - newstencil[j] = ref; - } - } - break; - case PIPE_STENCIL_OP_INCR: - for (j = 0; j < QUAD_SIZE; j++) { - if (mask & (1 << j)) { - if (stencilVals[j] < STENCIL_MAX) { - newstencil[j] = stencilVals[j] + 1; - } - } - } - break; - case PIPE_STENCIL_OP_DECR: - for (j = 0; j < QUAD_SIZE; j++) { - if (mask & (1 << j)) { - if (stencilVals[j] > 0) { - newstencil[j] = stencilVals[j] - 1; - } - } - } - break; - case PIPE_STENCIL_OP_INCR_WRAP: - for (j = 0; j < QUAD_SIZE; j++) { - if (mask & (1 << j)) { - newstencil[j] = stencilVals[j] + 1; - } - } - break; - case PIPE_STENCIL_OP_DECR_WRAP: - for (j = 0; j < QUAD_SIZE; j++) { - if (mask & (1 << j)) { - newstencil[j] = stencilVals[j] - 1; - } - } - break; - case PIPE_STENCIL_OP_INVERT: - for (j = 0; j < QUAD_SIZE; j++) { - if (mask & (1 << j)) { - newstencil[j] = ~stencilVals[j]; - } - } - break; - default: - assert(0); - } - - /* - * update the stencil values - */ - if (wrtMask != STENCIL_MAX) { - /* apply bit-wise stencil buffer writemask */ - for (j = 0; j < QUAD_SIZE; j++) { - stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & stencilVals[j]); - } - } - else { - for (j = 0; j < QUAD_SIZE; j++) { - stencilVals[j] = newstencil[j]; - } - } -} - - -/** - * Do stencil (and depth) testing. Stenciling depends on the outcome of - * depth testing. - */ -static void -stencil_test_quad(struct quad_stage *qs, struct quad_header *quads[], - unsigned nr) -{ - struct softpipe_context *softpipe = qs->softpipe; - struct pipe_surface *ps = softpipe->framebuffer.zsbuf; - unsigned func, zFailOp, zPassOp, failOp; - ubyte ref, wrtMask, valMask; - ubyte stencilVals[QUAD_SIZE]; - struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe->zsbuf_cache, - quads[0]->input.x0, - quads[0]->input.y0); - uint face = quads[0]->input.facing; - uint pass = 0; - uint j, q; - - if (!softpipe->depth_stencil->stencil[1].enabled) { - /* single-sided stencil test, use front (face=0) state */ - face = 0; - } - - /* choose front or back face function, operator, etc */ - /* XXX we could do these initializations once per primitive */ - func = softpipe->depth_stencil->stencil[face].func; - failOp = softpipe->depth_stencil->stencil[face].fail_op; - zFailOp = softpipe->depth_stencil->stencil[face].zfail_op; - zPassOp = softpipe->depth_stencil->stencil[face].zpass_op; - ref = softpipe->depth_stencil->stencil[face].ref_value; - wrtMask = softpipe->depth_stencil->stencil[face].writemask; - valMask = softpipe->depth_stencil->stencil[face].valuemask; - - assert(ps); /* shouldn't get here if there's no stencil buffer */ - - for (q = 0; q < nr; q++) { - struct quad_header *quad = quads[q]; - - /* get stencil values from cached tile */ - switch (ps->format) { - case PIPE_FORMAT_S8Z24_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - stencilVals[j] = tile->data.depth32[y][x] >> 24; - } - break; - case PIPE_FORMAT_Z24S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - stencilVals[j] = tile->data.depth32[y][x] & 0xff; - } - break; - case PIPE_FORMAT_S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - stencilVals[j] = tile->data.stencil8[y][x]; - } - break; - default: - assert(0); - } - - /* do the stencil test first */ - { - unsigned passMask, failMask; - passMask = do_stencil_test(stencilVals, func, ref, valMask); - failMask = quad->inout.mask & ~passMask; - quad->inout.mask &= passMask; - - if (failOp != PIPE_STENCIL_OP_KEEP) { - apply_stencil_op(stencilVals, failMask, failOp, ref, wrtMask); - } - } - - if (quad->inout.mask) { - /* now the pixels that passed the stencil test are depth tested */ - if (softpipe->depth_stencil->depth.enabled) { - const unsigned origMask = quad->inout.mask; - - sp_depth_test_quad(qs, quad); /* quad->mask is updated */ - - /* update stencil buffer values according to z pass/fail result */ - if (zFailOp != PIPE_STENCIL_OP_KEEP) { - const unsigned failMask = origMask & ~quad->inout.mask; - apply_stencil_op(stencilVals, failMask, zFailOp, ref, wrtMask); - } - - if (zPassOp != PIPE_STENCIL_OP_KEEP) { - const unsigned passMask = origMask & quad->inout.mask; - apply_stencil_op(stencilVals, passMask, zPassOp, ref, wrtMask); - } - } - else { - /* no depth test, apply Zpass operator to stencil buffer values */ - apply_stencil_op(stencilVals, quad->inout.mask, zPassOp, ref, wrtMask); - } - - } - - /* put new stencil values into cached tile */ - switch (ps->format) { - case PIPE_FORMAT_S8Z24_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - uint s8z24 = tile->data.depth32[y][x]; - s8z24 = (stencilVals[j] << 24) | (s8z24 & 0xffffff); - tile->data.depth32[y][x] = s8z24; - } - break; - case PIPE_FORMAT_Z24S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - uint z24s8 = tile->data.depth32[y][x]; - z24s8 = (z24s8 & 0xffffff00) | stencilVals[j]; - tile->data.depth32[y][x] = z24s8; - } - break; - case PIPE_FORMAT_S8_UNORM: - for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->input.x0 % TILE_SIZE + (j & 1); - int y = quad->input.y0 % TILE_SIZE + (j >> 1); - tile->data.stencil8[y][x] = stencilVals[j]; - } - break; - default: - assert(0); - } - - if (quad->inout.mask) - quads[pass++] = quad; - } - - if (pass) - qs->next->run(qs->next, quads, pass); -} - - -static void stencil_begin(struct quad_stage *qs) -{ - qs->next->begin(qs->next); -} - - -static void stencil_destroy(struct quad_stage *qs) -{ - FREE( qs ); -} - - -struct quad_stage *sp_quad_stencil_test_stage( struct softpipe_context *softpipe ) -{ - struct quad_stage *stage = CALLOC_STRUCT(quad_stage); - - stage->softpipe = softpipe; - stage->begin = stencil_begin; - stage->run = stencil_test_quad; - stage->destroy = stencil_destroy; - - return stage; -} diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 0226501267..8654069bde 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -246,9 +246,7 @@ void softpipe_update_derived( struct softpipe_context *softpipe ) if (softpipe->dirty & (SP_NEW_BLEND | SP_NEW_DEPTH_STENCIL_ALPHA | SP_NEW_FRAMEBUFFER | - SP_NEW_RASTERIZER | - SP_NEW_FS | - SP_NEW_QUERY)) + SP_NEW_FS)) sp_build_quad_pipeline(softpipe); softpipe->dirty = 0; -- cgit v1.2.3 From 1078844d18367b4259cd3b6a3a73e3cd72ea019f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 27 Jul 2009 11:23:51 +0100 Subject: softpipe: cope with nr_cbufs == 0 Disable blend code when no color buffer --- src/gallium/drivers/softpipe/sp_quad_blend.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index e1f0e77255..e243c63fa2 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -924,6 +924,13 @@ single_output_color(struct quad_stage *qs, } } +static void +blend_noop(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ +} + static void choose_blend_quad(struct quad_stage *qs, @@ -934,9 +941,12 @@ choose_blend_quad(struct quad_stage *qs, const struct pipe_blend_state *blend = softpipe->blend; qs->run = blend_fallback; - - if (!softpipe->blend->logicop_enable && - softpipe->blend->colormask == 0xf) + + if (softpipe->framebuffer.nr_cbufs == 0) { + qs->run = blend_noop; + } + else if (!softpipe->blend->logicop_enable && + softpipe->blend->colormask == 0xf) { if (!blend->blend_enable) { qs->run = single_output_color; -- cgit v1.2.3 From c61145820556833dccd728eb6df3397bec7f70da Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 27 Jul 2009 12:11:16 +0100 Subject: softpipe: fastpath for interpolated z16 less depthtesting Because this is interpolated (ie. early) depth, we can build in an assumption about the quads emitted by triangle setup, ie that they are actually linear spans. Interpolate z over those spans in z16 format to save on math & conversion. --- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 143 +++++++++++++++++++++- 1 file changed, 139 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index bf65799a81..506867f4d0 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -646,9 +646,9 @@ static unsigned mask_count[0x8] = static void -depth_test_quads(struct quad_stage *qs, - struct quad_header *quads[], - unsigned nr) +depth_test_quads_fallback(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) { unsigned i, pass = 0; const struct sp_fragment_shader *fs = qs->softpipe->fs; @@ -704,9 +704,144 @@ depth_test_quads(struct quad_stage *qs, qs->next->run(qs->next, quads, nr); } +/* XXX: this function assumes setup function actually emits linear + * spans of quads. It seems a lot more natural to do (early) + * depth-testing on spans rather than quads. + */ +static void +depth_interp_z16_less_write(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + unsigned i, pass = 0; + const unsigned ix = quads[0]->input.x0; + const unsigned iy = quads[0]->input.y0; + const float fx = (float) ix; + const float fy = (float) iy; + const float dzdx = quads[0]->posCoef->dadx[2]; + const float dzdy = quads[0]->posCoef->dady[2]; + const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy; + struct softpipe_cached_tile *tile; + ushort (*depth16)[TILE_SIZE]; + ushort idepth[4], depth_step; + const float scale = 65535.0; + + idepth[0] = (ushort)((z0) * scale); + idepth[1] = (ushort)((z0 + dzdx) * scale); + idepth[2] = (ushort)((z0 + dzdy) * scale); + idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale); + + depth_step = (ushort)(dzdx * 2 * scale); + + tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy); + + depth16 = (ushort (*)[TILE_SIZE]) + &tile->data.depth16[iy % TILE_SIZE][ix % TILE_SIZE]; + + for (i = 0; i < nr; i++) { + unsigned outmask = quads[i]->inout.mask; + unsigned mask = 0; + + if ((outmask & 1) && idepth[0] < depth16[0][0]) { + depth16[0][0] = idepth[0]; + mask |= (1 << 0); + } + + if ((outmask & 2) && idepth[1] < depth16[0][1]) { + depth16[0][1] = idepth[1]; + mask |= (1 << 1); + } + + if ((outmask & 4) && idepth[2] < depth16[1][0]) { + depth16[1][0] = idepth[2]; + mask |= (1 << 2); + } + + if ((outmask & 8) && idepth[3] < depth16[1][1]) { + depth16[1][1] = idepth[3]; + mask |= (1 << 3); + } + + idepth[0] += depth_step; + idepth[1] += depth_step; + idepth[2] += depth_step; + idepth[3] += depth_step; + + depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2]; + + quads[i]->inout.mask = mask; + if (quads[i]->inout.mask) + quads[pass++] = quads[i]; + } + + if (pass) + qs->next->run(qs->next, quads, pass); + +} + + +static void +depth_noop(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + qs->next->run(qs->next, quads, nr); +} + + + +static void +choose_depth_test(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + boolean interp_depth = !qs->softpipe->fs->info.writes_z; + + boolean alpha = qs->softpipe->depth_stencil->alpha.enabled; + + boolean depth = (qs->softpipe->framebuffer.zsbuf && + qs->softpipe->depth_stencil->depth.enabled); + + unsigned depthfunc = qs->softpipe->depth_stencil->depth.func; + + boolean stencil = qs->softpipe->depth_stencil->stencil[0].enabled; + + boolean depthwrite = qs->softpipe->depth_stencil->depth.writemask; + + + qs->run = depth_test_quads_fallback; + + if (!alpha && + !depth && + !stencil) { + qs->run = depth_noop; + } + else if (!alpha && + interp_depth && + depth && + depthfunc == PIPE_FUNC_LESS && + depthwrite && + !stencil) + { + switch (qs->softpipe->framebuffer.zsbuf->format) { + case PIPE_FORMAT_Z16_UNORM: + qs->run = depth_interp_z16_less_write; + break; + default: + break; + } + } + + qs->run( qs, quads, nr ); +} + + + + static void depth_test_begin(struct quad_stage *qs) { + qs->run = choose_depth_test; qs->next->begin(qs->next); } @@ -723,7 +858,7 @@ struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe ) stage->softpipe = softpipe; stage->begin = depth_test_begin; - stage->run = depth_test_quads; + stage->run = choose_depth_test; stage->destroy = depth_test_destroy; return stage; -- cgit v1.2.3 From 6142de393fe34ff0866f8489f1292eb473276f11 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 27 Jul 2009 12:44:58 +0100 Subject: softpipe: example fast paths for simple samplers All these fastpaths are examples of the types of things we'd code-generate in a more sophisticated version of softpipe. --- src/gallium/drivers/softpipe/sp_state_derived.c | 1 + src/gallium/drivers/softpipe/sp_tex_sample.c | 327 +++++++++++++++++++++++- src/gallium/drivers/softpipe/sp_tex_sample.h | 10 +- 3 files changed, 333 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 8654069bde..3ed1de7e17 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -208,6 +208,7 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { softpipe->tgsi.frag_samplers[i].sampler = softpipe->sampler[i]; softpipe->tgsi.frag_samplers[i].texture = softpipe->texture[i]; + softpipe->tgsi.frag_samplers[i].base.get_samples = sp_get_samples_fragment; } for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 46c56b0c83..8248576e98 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -662,10 +662,64 @@ choose_mipmap_levels(const struct pipe_texture *texture, * XXX maybe move this into sp_tile_cache.c and merge with the * sp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... */ +static void +get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler, + unsigned face, unsigned level, int x, int y, + const float *out[4]) +{ + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + + const struct softpipe_cached_tile *tile + = sp_get_cached_tile_tex(samp->cache, + tile_address(x, y, 0, face, level)); + + y %= TILE_SIZE; + x %= TILE_SIZE; + + out[0] = &tile->data.color[y ][x ][0]; + out[1] = &tile->data.color[y ][x+1][0]; + out[2] = &tile->data.color[y+1][x ][0]; + out[3] = &tile->data.color[y+1][x+1][0]; +} + +static INLINE const float * +get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler, + unsigned face, unsigned level, int x, int y) +{ + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + + const struct softpipe_cached_tile *tile + = sp_get_cached_tile_tex(samp->cache, + tile_address(x, y, 0, face, level)); + + y %= TILE_SIZE; + x %= TILE_SIZE; + + return &tile->data.color[y][x][0]; +} + + +static void +get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler, + unsigned face, unsigned level, + int x0, int y0, + int x1, int y1, + const float *out[4]) +{ + unsigned i; + + for (i = 0; i < 4; i++) { + unsigned tx = (i & 1) ? x1 : x0; + unsigned ty = (i >> 1) ? y1 : y0; + + out[i] = get_texel_2d_ptr( tgsi_sampler, face, level, tx, ty ); + } +} + static void get_texel(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y, int z, - float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) + unsigned face, unsigned level, int x, int y, int z, + float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) { const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); const struct pipe_texture *texture = samp->texture; @@ -825,6 +879,193 @@ shadow_compare4(const struct pipe_sampler_state *sampler, } + +static void +sp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + unsigned j; + unsigned level = samp->level; + unsigned xpot = 1 << (samp->xpot - level); + unsigned ypot = 1 << (samp->ypot - level); + + for (j = 0; j < QUAD_SIZE; j++) { + int c; + + float u = s[j] * xpot - 0.5F; + float v = t[j] * ypot - 0.5F; + + int uflr = util_ifloor(u); + int vflr = util_ifloor(v); + + float xw = u - (float)uflr; + float yw = v - (float)vflr; + + int x0 = uflr & (xpot - 1); + int y0 = vflr & (ypot - 1); + + const float *tx[4]; + + + /* Can we fetch all four at once: + */ + if (x0 % TILE_SIZE != TILE_SIZE-1 && + y0 % TILE_SIZE != TILE_SIZE-1) + { + get_texel_quad_2d(tgsi_sampler, 0, level, x0, y0, tx); + } + else + { + unsigned x1 = (uflr + 1) & (xpot - 1); + unsigned y1 = (vflr + 1) & (ypot - 1); + get_texel_quad_2d_mt(tgsi_sampler, 0, level, + x0, y0, x1, y1, tx); + } + + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(xw, yw, + tx[0][c], tx[1][c], + tx[2][c], tx[3][c]); + } + } +} + + +static void +sp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + unsigned j; + unsigned level = samp->level; + unsigned xpot = 1 << (samp->xpot - level); + unsigned ypot = 1 << (samp->ypot - level); + + for (j = 0; j < QUAD_SIZE; j++) { + int c; + + float u = s[j] * xpot - 0.5F; + float v = t[j] * ypot - 0.5F; + + int uflr = util_ifloor(u); + int vflr = util_ifloor(v); + + int x0 = uflr & (xpot - 1); + int y0 = vflr & (ypot - 1); + + const float *out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); + + for (c = 0; c < 4; c++) { + rgba[c][j] = out[c]; + } + } +} + + +static void +sp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + unsigned j; + unsigned level = samp->level; + unsigned xpot = (1<xpot); + unsigned ypot = (1<ypot); + + for (j = 0; j < QUAD_SIZE; j++) { + int c; + + float u = s[j] * xpot - 0.5F; + float v = t[j] * ypot - 0.5F; + + int x0, y0; + const float *out; + + x0 = util_ifloor(u); + if (x0 < 0) + x0 = 0; + else if (x0 > xpot - 1) + x0 = xpot - 1; + + y0 = util_ifloor(v); + if (y0 < 0) + y0 = 0; + else if (y0 > ypot - 1) + y0 = ypot - 1; + + out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); + + for (c = 0; c < 4; c++) { + rgba[c][j] = out[c]; + } + } +} + + +static void +sp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; + int level0, level1; + float lambda; + + lambda = compute_lambda(texture, sampler, s, t, p, lodbias); + level0 = (int)lambda; + level1 = level0 + 1; + + if (lambda < 0.0) { + samp->level = 0; + sp_get_samples_2d_linear_repeat_POT( tgsi_sampler, + s, t, p, lodbias, rgba ); + } + else if (level0 >= texture->last_level) { + samp->level = texture->last_level; + sp_get_samples_2d_linear_repeat_POT( tgsi_sampler, + s, t, p, lodbias, rgba ); + } + else { + float rgba0[4][4]; + float rgba1[4][4]; + int c,j; + + float levelBlend = lambda - level0; /* blending weight between levels */ + + samp->level = level0; + sp_get_samples_2d_linear_repeat_POT( tgsi_sampler, + s, t, p, lodbias, rgba0 ); + + samp->level++; + sp_get_samples_2d_linear_repeat_POT( tgsi_sampler, + s, t, p, lodbias, rgba1 ); + + for (c = 0; c < 4; c++) + for (j = 0; j < 4; j++) + rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]); + } +} + /** * Common code for sampling 1D/2D/cube textures. * Could probably extend for 3D... @@ -1254,6 +1495,16 @@ sp_get_samples(struct tgsi_sampler *tgsi_sampler, #endif } +static void +sp_get_samples_fallback(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + sp_get_samples(tgsi_sampler, s, t, p, TRUE, lodbias, rgba); +} /** * Called via tgsi_sampler::get_samples() when running a fragment shader. @@ -1267,7 +1518,77 @@ sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler, float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - sp_get_samples(tgsi_sampler, s, t, p, TRUE, lodbias, rgba); + struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; + + tgsi_sampler->get_samples = sp_get_samples_fallback; + + /* Try to hook in a faster sampler. Ultimately we'll have to + * code-generate these. Luckily most of this looks like it is + * orthogonal state within the sampler. + */ + if (texture->target == PIPE_TEXTURE_2D && + sampler->min_img_filter == sampler->mag_img_filter && + sampler->wrap_s == sampler->wrap_t && + sampler->compare_mode == FALSE && + sampler->normalized_coords) + { + samp->xpot = util_unsigned_logbase2( samp->texture->width[0] ); + samp->ypot = util_unsigned_logbase2( samp->texture->height[0] ); + + if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { + samp->level = CLAMP((int) sampler->min_lod, + 0, (int) texture->last_level); + + if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { + switch (sampler->min_img_filter) { + case PIPE_TEX_FILTER_NEAREST: + tgsi_sampler->get_samples = sp_get_samples_2d_nearest_repeat_POT; + break; + case PIPE_TEX_FILTER_LINEAR: + tgsi_sampler->get_samples = sp_get_samples_2d_linear_repeat_POT; + break; + default: + break; + } + } + else if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP) { + switch (sampler->min_img_filter) { + case PIPE_TEX_FILTER_NEAREST: + tgsi_sampler->get_samples = sp_get_samples_2d_nearest_clamp_POT; + break; + default: + break; + } + } + } + else if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { + switch (sampler->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + /* This one not working yet: + */ + if (0) + tgsi_sampler->get_samples = sp_get_samples_2d_linear_mip_linear_repeat_POT; + break; + default: + break; + } + } + } + } + else if (0) { + _debug_printf("target %d/%d min_mip %d/%d min_img %d/%d wrap %d/%d compare %d/%d norm %d/%d\n", + texture->target, PIPE_TEXTURE_2D, + sampler->min_mip_filter, PIPE_TEX_MIPFILTER_NONE, + sampler->min_img_filter, sampler->mag_img_filter, + sampler->wrap_s, sampler->wrap_t, + sampler->compare_mode, FALSE, + sampler->normalized_coords, TRUE); + } + + tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba ); } diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index 3c5beb560f..0650c7830b 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -39,6 +39,12 @@ struct sp_shader_sampler { struct tgsi_sampler base; /**< base class */ + /* For sp_get_samples_2d_linear_POT: + */ + unsigned xpot; + unsigned ypot; + unsigned level; + const struct pipe_texture *texture; const struct pipe_sampler_state *sampler; @@ -47,10 +53,10 @@ struct sp_shader_sampler -static INLINE const struct sp_shader_sampler * +static INLINE struct sp_shader_sampler * sp_shader_sampler(const struct tgsi_sampler *sampler) { - return (const struct sp_shader_sampler *) sampler; + return (struct sp_shader_sampler *) sampler; } -- cgit v1.2.3 From 5fdac2dcea09c654725666b3cab5f59dfc9e31a5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 27 Jul 2009 15:51:15 +0100 Subject: softpipe: fix off-by-one in nearest texcoord routines Stray '- 0.5' copied from linear versions. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 8248576e98..4651d781a9 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -955,8 +955,8 @@ sp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, for (j = 0; j < QUAD_SIZE; j++) { int c; - float u = s[j] * xpot - 0.5F; - float v = t[j] * ypot - 0.5F; + float u = s[j] * xpot; + float v = t[j] * ypot; int uflr = util_ifloor(u); int vflr = util_ifloor(v); @@ -990,8 +990,8 @@ sp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, for (j = 0; j < QUAD_SIZE; j++) { int c; - float u = s[j] * xpot - 0.5F; - float v = t[j] * ypot - 0.5F; + float u = s[j] * xpot; + float v = t[j] * ypot; int x0, y0; const float *out; -- cgit v1.2.3 From 572c2fb5bb6cec71ef42e93416251a6a6c183de0 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 30 Jul 2009 11:34:36 +0100 Subject: softpipe: remove unused variable in shade_quad --- src/gallium/drivers/softpipe/sp_quad_fs.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 56a8f55d77..e1bc0712de 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -74,7 +74,6 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad) struct quad_shade_stage *qss = quad_shade_stage( qs ); struct softpipe_context *softpipe = qs->softpipe; struct tgsi_exec_machine *machine = qss->machine; - boolean z_written; /* run shader */ quad->inout.mask &= softpipe->fs->run( softpipe->fs, machine, quad ); @@ -82,7 +81,6 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad) return FALSE; /* store outputs */ - z_written = FALSE; { const ubyte *sem_name = softpipe->fs->info.output_semantic_name; const ubyte *sem_index = softpipe->fs->info.output_semantic_index; @@ -104,7 +102,6 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad) for (j = 0; j < 4; j++) { quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j]; } - z_written = TRUE; } break; } -- cgit v1.2.3 From 73a6178a73a4cc34195348a537d3f94aab6a43e1 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 30 Jul 2009 11:35:08 +0100 Subject: softpipe: add depth-lequal z16 path --- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 107 ++++++++++++++++++++-- 1 file changed, 100 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 506867f4d0..9cffea2c9e 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -780,6 +780,81 @@ depth_interp_z16_less_write(struct quad_stage *qs, } +static void +depth_interp_z16_lequal_write(struct quad_stage *qs, + struct quad_header *quads[], + unsigned nr) +{ + unsigned i, pass = 0; + const unsigned ix = quads[0]->input.x0; + const unsigned iy = quads[0]->input.y0; + const float fx = (float) ix; + const float fy = (float) iy; + const float dzdx = quads[0]->posCoef->dadx[2]; + const float dzdy = quads[0]->posCoef->dady[2]; + const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy; + struct softpipe_cached_tile *tile; + ushort (*depth16)[TILE_SIZE]; + ushort idepth[4], depth_step; + const float scale = 65535.0; + + idepth[0] = (ushort)((z0) * scale); + idepth[1] = (ushort)((z0 + dzdx) * scale); + idepth[2] = (ushort)((z0 + dzdy) * scale); + idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale); + + depth_step = (ushort)(dzdx * 2 * scale); + + tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy); + + depth16 = (ushort (*)[TILE_SIZE]) + &tile->data.depth16[iy % TILE_SIZE][ix % TILE_SIZE]; + + for (i = 0; i < nr; i++) { + unsigned outmask = quads[i]->inout.mask; + unsigned mask = 0; + + if ((outmask & 1) && idepth[0] <= depth16[0][0]) { + depth16[0][0] = idepth[0]; + mask |= (1 << 0); + } + + if ((outmask & 2) && idepth[1] <= depth16[0][1]) { + depth16[0][1] = idepth[1]; + mask |= (1 << 1); + } + + if ((outmask & 4) && idepth[2] <= depth16[1][0]) { + depth16[1][0] = idepth[2]; + mask |= (1 << 2); + } + + if ((outmask & 8) && idepth[3] <= depth16[1][1]) { + depth16[1][1] = idepth[3]; + mask |= (1 << 3); + } + + idepth[0] += depth_step; + idepth[1] += depth_step; + idepth[2] += depth_step; + idepth[3] += depth_step; + + depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2]; + + quads[i]->inout.mask = mask; + if (quads[i]->inout.mask) + quads[pass++] = quads[i]; + } + + if (pass) + qs->next->run(qs->next, quads, pass); + +} + + + + + static void depth_noop(struct quad_stage *qs, struct quad_header *quads[], @@ -809,8 +884,6 @@ choose_depth_test(struct quad_stage *qs, boolean depthwrite = qs->softpipe->depth_stencil->depth.writemask; - qs->run = depth_test_quads_fallback; - if (!alpha && !depth && !stencil) { @@ -819,18 +892,38 @@ choose_depth_test(struct quad_stage *qs, else if (!alpha && interp_depth && depth && - depthfunc == PIPE_FUNC_LESS && depthwrite && !stencil) { - switch (qs->softpipe->framebuffer.zsbuf->format) { - case PIPE_FORMAT_Z16_UNORM: - qs->run = depth_interp_z16_less_write; + switch (depthfunc) { + case PIPE_FUNC_LESS: + switch (qs->softpipe->framebuffer.zsbuf->format) { + case PIPE_FORMAT_Z16_UNORM: + qs->run = depth_interp_z16_less_write; + break; + default: + qs->run = depth_test_quads_fallback; + break; + } break; - default: + case PIPE_FUNC_LEQUAL: + switch (qs->softpipe->framebuffer.zsbuf->format) { + case PIPE_FORMAT_Z16_UNORM: + qs->run = depth_interp_z16_lequal_write; + break; + default: + qs->run = depth_test_quads_fallback; + break; + } break; + default: + qs->run = depth_test_quads_fallback; } } + else { + qs->run = depth_test_quads_fallback; + } + qs->run( qs, quads, nr ); } -- cgit v1.2.3 From 1295cf423e21dad04a947960782ffa8db2739709 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 30 Jul 2009 11:35:50 +0100 Subject: softpipe: rearrange blend fastpaths --- src/gallium/drivers/softpipe/sp_quad_blend.c | 82 +++++++++------------------- 1 file changed, 27 insertions(+), 55 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index e243c63fa2..b8ed086734 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -793,10 +793,7 @@ blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) { - static const float one[4] = { 1, 1, 1, 1 }; - float one_minus_alpha[QUAD_SIZE]; - float dest[4][QUAD_SIZE]; - float source[4][QUAD_SIZE]; + float source[4]; uint i, j, q; struct softpipe_cached_tile *tile @@ -806,45 +803,26 @@ blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs, for (q = 0; q < nr; q++) { struct quad_header *quad = quads[q]; - float (*quadColor)[4] = quad->output.color[0]; - const float *alpha = quadColor[3]; const int itx = (quad->input.x0 & (TILE_SIZE-1)); const int ity = (quad->input.y0 & (TILE_SIZE-1)); + float (*swzColor)[4] = quad->output.color[0]; - /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - int x = itx + (j & 1); - int y = ity + (j >> 1); - for (i = 0; i < 4; i++) { - dest[i][j] = tile->data.color[y][x][i]; - } - } - - VEC4_MUL(source[0], quadColor[0], alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], alpha); /* B */ - VEC4_MUL(source[3], quadColor[3], alpha); /* A */ + for (j = 0; j < 4; j++) { + if (quad->inout.mask & (1<data.color[ity + (j>>1)][itx + (j&1)]; + const float alpha = swzColor[3][j]; + const float one_minus_alpha = 1.0 - alpha; - VEC4_SUB(one_minus_alpha, one, alpha); - VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ - VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ - VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ - VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* B */ - - VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */ - VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ - VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */ - VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ + for (i = 0; i < 4; i++) { + dest[i] *= one_minus_alpha; + dest[i] += swzColor[i][j] * alpha; - for (j = 0; j < QUAD_SIZE; j++) { - if (quad->inout.mask & (1 << j)) { - int x = itx + (j & 1); - int y = ity + (j >> 1); - for (i = 0; i < 4; i++) { /* loop over color chans */ - tile->data.color[y][x][i] = quadColor[i][j]; + /* XXX: redundant, will be clamped later for argb8 surfaces: + */ + dest[i] = CLAMP(dest[i], 0.0, 1.0); } } - } + } } } @@ -863,33 +841,27 @@ blend_single_add_one_one(struct quad_stage *qs, for (q = 0; q < nr; q++) { struct quad_header *quad = quads[q]; - float (*quadColor)[4] = quad->output.color[0]; const int itx = (quad->input.x0 & (TILE_SIZE-1)); const int ity = (quad->input.y0 & (TILE_SIZE-1)); - + float (*dest)[64][4] = (float (*)[64][4])&tile->data.color[ity][itx]; + float (*swzColor)[4] = quad->output.color[0]; + float quadColor[4][4]; + /* get/swizzle dest colors */ for (j = 0; j < QUAD_SIZE; j++) { - int x = itx + (j & 1); - int y = ity + (j >> 1); for (i = 0; i < 4; i++) { - dest[i][j] = tile->data.color[y][x][i]; + quadColor[i][j] = swzColor[j][i]; } } - VEC4_ADD_SAT(quadColor[0], quadColor[0], dest[0]); /* R */ - VEC4_ADD_SAT(quadColor[1], quadColor[1], dest[1]); /* G */ - VEC4_ADD_SAT(quadColor[2], quadColor[2], dest[2]); /* B */ - VEC4_ADD_SAT(quadColor[3], quadColor[3], dest[3]); /* A */ - - for (j = 0; j < QUAD_SIZE; j++) { - if (quad->inout.mask & (1 << j)) { - int x = itx + (j & 1); - int y = ity + (j >> 1); - for (i = 0; i < 4; i++) { /* loop over color chans */ - tile->data.color[y][x][i] = quadColor[i][j]; - } - } - } + if (quad->inout.mask & 1) + VEC4_ADD_SAT(dest[0][0], quadColor[0], dest[0][0]); + if (quad->inout.mask & 2) + VEC4_ADD_SAT(dest[0][1], quadColor[1], dest[0][1]); + if (quad->inout.mask & 4) + VEC4_ADD_SAT(dest[1][0], quadColor[2], dest[1][0]); + if (quad->inout.mask & 8) + VEC4_ADD_SAT(dest[1][1], quadColor[3], dest[1][1]); } } -- cgit v1.2.3 From 95f7ed4638d4e379783abdd5b250e203b6b1b435 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 30 Jul 2009 11:59:32 +0100 Subject: softpipe: setup quad outputs from with fs->run --- src/gallium/drivers/softpipe/sp_fs_exec.c | 34 ++++++++++++++++++++++++++++- src/gallium/drivers/softpipe/sp_fs_sse.c | 35 +++++++++++++++++++++++++++++- src/gallium/drivers/softpipe/sp_quad_fs.c | 36 ++----------------------------- 3 files changed, 69 insertions(+), 36 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 9ee86fe787..91e04687c5 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -126,7 +126,39 @@ exec_run( const struct sp_fragment_shader *base, (float)quad->input.x0, (float)quad->input.y0, &machine->QuadPos); - return tgsi_exec_machine_run( machine ); + quad->inout.mask &= tgsi_exec_machine_run( machine ); + if (quad->inout.mask == 0) + return FALSE; + + /* store outputs */ + { + const ubyte *sem_name = base->info.output_semantic_name; + const ubyte *sem_index = base->info.output_semantic_index; + const uint n = base->info.num_outputs; + uint i; + for (i = 0; i < n; i++) { + switch (sem_name[i]) { + case TGSI_SEMANTIC_COLOR: + { + uint cbuf = sem_index[i]; + memcpy(quad->output.color[cbuf], + &machine->Outputs[i].xyzw[0].f[0], + sizeof(quad->output.color[0]) ); + } + break; + case TGSI_SEMANTIC_POSITION: + { + uint j; + for (j = 0; j < 4; j++) { + quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j]; + } + } + break; + } + } + } + + return TRUE; } diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index f4fa0905d7..364bb94a5f 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -104,7 +104,40 @@ fs_sse_run( const struct sp_fragment_shader *base, // , &machine->QuadPos ); - return ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]); + quad->inout.mask &= ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]); + if (quad->inout.mask == 0) + return FALSE; + + + /* store outputs */ + { + const ubyte *sem_name = shader->base.info.output_semantic_name; + const ubyte *sem_index = shader->base.info.output_semantic_index; + const uint n = shader->base.info.num_outputs; + uint i; + for (i = 0; i < n; i++) { + switch (sem_name[i]) { + case TGSI_SEMANTIC_COLOR: + { + uint cbuf = sem_index[i]; + memcpy(quad->output.color[cbuf], + &machine->Outputs[i].xyzw[0].f[0], + sizeof(quad->output.color[0]) ); + } + break; + case TGSI_SEMANTIC_POSITION: + { + uint j; + for (j = 0; j < 4; j++) { + quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j]; + } + } + break; + } + } + } + + return TRUE; } diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index e1bc0712de..1e7533d0f9 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -68,7 +68,7 @@ quad_shade_stage(struct quad_stage *qs) /** * Execute fragment shader for the four fragments in the quad. */ -static boolean +static INLINE boolean shade_quad(struct quad_stage *qs, struct quad_header *quad) { struct quad_shade_stage *qss = quad_shade_stage( qs ); @@ -76,39 +76,7 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad) struct tgsi_exec_machine *machine = qss->machine; /* run shader */ - quad->inout.mask &= softpipe->fs->run( softpipe->fs, machine, quad ); - if (quad->inout.mask == 0) - return FALSE; - - /* store outputs */ - { - const ubyte *sem_name = softpipe->fs->info.output_semantic_name; - const ubyte *sem_index = softpipe->fs->info.output_semantic_index; - const uint n = qss->stage.softpipe->fs->info.num_outputs; - uint i; - for (i = 0; i < n; i++) { - switch (sem_name[i]) { - case TGSI_SEMANTIC_COLOR: - { - uint cbuf = sem_index[i]; - memcpy(quad->output.color[cbuf], - &machine->Outputs[i].xyzw[0].f[0], - sizeof(quad->output.color[0]) ); - } - break; - case TGSI_SEMANTIC_POSITION: - { - uint j; - for (j = 0; j < 4; j++) { - quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j]; - } - } - break; - } - } - } - - return TRUE; + return softpipe->fs->run( softpipe->fs, machine, quad ); } -- cgit v1.2.3 From b5c389721aec09c260789e6371910937f15ef1a0 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 11 Aug 2009 18:03:01 +0100 Subject: softpipe: remove gallivm fragment shaders However we do llvm integration, it will be different & more comprehensive than this. --- src/gallium/drivers/softpipe/Makefile | 1 - src/gallium/drivers/softpipe/sp_fs_llvm.c | 205 ----------------------------- src/gallium/drivers/softpipe/sp_state_fs.c | 7 +- 3 files changed, 2 insertions(+), 211 deletions(-) delete mode 100644 src/gallium/drivers/softpipe/sp_fs_llvm.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 48522abe98..a6ed7ea6a2 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -6,7 +6,6 @@ LIBNAME = softpipe C_SOURCES = \ sp_fs_exec.c \ sp_fs_sse.c \ - sp_fs_llvm.c \ sp_clear.c \ sp_flush.c \ sp_query.c \ diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c deleted file mode 100644 index 95c0d982d1..0000000000 --- a/src/gallium/drivers/softpipe/sp_fs_llvm.c +++ /dev/null @@ -1,205 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Execute fragment shader using LLVM code generation. - * Authors: - * Zack Rusin - */ - -#include "sp_context.h" -#include "sp_state.h" -#include "sp_fs.h" - -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "util/u_memory.h" -#include "tgsi/tgsi_sse2.h" - -#if 0 - -/** - * Subclass of sp_fragment_shader - */ -struct sp_llvm_fragment_shader -{ - struct sp_fragment_shader base; - struct gallivm_prog *llvm_prog; -}; - - -static void -shade_quad_llvm(struct quad_stage *qs, - struct quad_header *quad) -{ - struct quad_shade_stage *qss = quad_shade_stage(qs); - struct softpipe_context *softpipe = qs->softpipe; - float dests[4][16][4] ALIGN16_ATTRIB; - float inputs[4][16][4] ALIGN16_ATTRIB; - const float fx = (float) quad->x0; - const float fy = (float) quad->y0; - struct gallivm_prog *llvm = qss->llvm_prog; - - inputs[0][0][0] = fx; - inputs[1][0][0] = fx + 1.0f; - inputs[2][0][0] = fx; - inputs[3][0][0] = fx + 1.0f; - - inputs[0][0][1] = fy; - inputs[1][0][1] = fy; - inputs[2][0][1] = fy + 1.0f; - inputs[3][0][1] = fy + 1.0f; - - - gallivm_prog_inputs_interpolate(llvm, inputs, quad->coef); - -#if DLLVM - debug_printf("MASK = %d\n", quad->mask); - for (int i = 0; i < 4; ++i) { - for (int j = 0; j < 2; ++j) { - debug_printf("IN(%d,%d) [%f %f %f %f]\n", i, j, - inputs[i][j][0], inputs[i][j][1], inputs[i][j][2], inputs[i][j][3]); - } - } -#endif - - quad->mask &= - gallivm_fragment_shader_exec(llvm, fx, fy, dests, inputs, - softpipe->mapped_constants[PIPE_SHADER_FRAGMENT], - qss->samplers); -#if DLLVM - debug_printf("OUT LLVM = 1[%f %f %f %f], 2[%f %f %f %f]\n", - dests[0][0][0], dests[0][0][1], dests[0][0][2], dests[0][0][3], - dests[0][1][0], dests[0][1][1], dests[0][1][2], dests[0][1][3]); -#endif - - /* store result color */ - if (qss->colorOutSlot >= 0) { - unsigned i; - /* XXX need to handle multiple color outputs someday */ - allvmrt(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] - == TGSI_SEMANTIC_COLOR); - for (i = 0; i < QUAD_SIZE; ++i) { - quad->outputs.color[0][0][i] = dests[i][qss->colorOutSlot][0]; - quad->outputs.color[0][1][i] = dests[i][qss->colorOutSlot][1]; - quad->outputs.color[0][2][i] = dests[i][qss->colorOutSlot][2]; - quad->outputs.color[0][3][i] = dests[i][qss->colorOutSlot][3]; - } - } -#if DLLVM - for (int i = 0; i < QUAD_SIZE; ++i) { - debug_printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot, - quad->outputs.color[0][0][i], - quad->outputs.color[0][1][i], - quad->outputs.color[0][2][i], - quad->outputs.color[0][3][i]); - } -#endif - - /* store result Z */ - if (qss->depthOutSlot >= 0) { - /* output[slot] is new Z */ - uint i; - for (i = 0; i < 4; i++) { - quad->outputs.depth[i] = dests[i][0][2]; - } - } - else { - /* copy input Z (which was interpolated by the executor) to output Z */ - uint i; - for (i = 0; i < 4; i++) { - quad->outputs.depth[i] = inputs[i][0][2]; - } - } -#if DLLVM - debug_printf("D [%f, %f, %f, %f] mask = %d\n", - quad->outputs.depth[0], - quad->outputs.depth[1], - quad->outputs.depth[2], - quad->outputs.depth[3], quad->mask); -#endif - - /* shader may cull fragments */ - if( quad->mask ) { - qs->next->run( qs->next, quad ); - } -} - - -unsigned -run_llvm_fs( const struct sp_fragment_shader *base, - struct foo *machine ) -{ -} - - -void -delete_llvm_fs( struct sp_fragment_shader *base ) -{ - FREE(base); -} - - -struct sp_fragment_shader * -softpipe_create_fs_llvm(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ) -{ - struct sp_llvm_fragment_shader *shader = NULL; - - /* LLVM fragment shaders currently disabled: - */ - state = CALLOC_STRUCT(sp_llvm_shader_state); - if (!state) - return NULL; - - state->llvm_prog = 0; - - if (!gallivm_global_cpu_engine()) { - gallivm_cpu_engine_create(state->llvm_prog); - } - else - gallivm_cpu_jit_compile(gallivm_global_cpu_engine(), state->llvm_prog); - - if (shader) { - shader->base.run = run_llvm_fs; - shader->base.delete = delete_llvm_fs; - } - - return shader; -} - - -#else - -struct sp_fragment_shader * -softpipe_create_fs_llvm(struct softpipe_context *softpipe, - const struct pipe_shader_state *templ) -{ - return NULL; -} - -#endif diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 4330c20393..108ac8b9bb 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -51,12 +51,9 @@ softpipe_create_fs_state(struct pipe_context *pipe, tgsi_dump(templ->tokens, 0); /* codegen */ - state = softpipe_create_fs_llvm( softpipe, templ ); + state = softpipe_create_fs_sse( softpipe, templ ); if (!state) { - state = softpipe_create_fs_sse( softpipe, templ ); - if (!state) { - state = softpipe_create_fs_exec( softpipe, templ ); - } + state = softpipe_create_fs_exec( softpipe, templ ); } assert(state); -- cgit v1.2.3 From da319095f2ca8869657ebda0db54eb9b2f7393ce Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 11 Aug 2009 18:06:16 +0100 Subject: softpipe: reduce textual differences between exec and sse shader paths Unshare one function (setup_pos_vector) as we want to push this code into the generated shader in the SSE case. --- src/gallium/drivers/softpipe/sp_fs_exec.c | 51 ++++++++++++++++--------------- src/gallium/drivers/softpipe/sp_fs_sse.c | 50 +++++++++++++++++++++++++----- 2 files changed, 69 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 91e04687c5..c469ac6340 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -59,15 +59,34 @@ sp_exec_fragment_shader(const struct sp_fragment_shader *base) } +static void +exec_prepare( const struct sp_fragment_shader *base, + struct tgsi_exec_machine *machine, + struct tgsi_sampler **samplers ) +{ + /* + * Bind tokens/shader to the interpreter's machine state. + * Avoid redundant binding. + */ + if (machine->Tokens != base->shader.tokens) { + tgsi_exec_machine_bind_shader( machine, + base->shader.tokens, + PIPE_MAX_SAMPLERS, + samplers ); + } +} + + + /** * Compute quad X,Y,Z,W for the four fragments in a quad. * * This should really be part of the compiled shader. */ -void -sp_setup_pos_vector(const struct tgsi_interp_coef *coef, - float x, float y, - struct tgsi_exec_vector *quadpos) +static void +setup_pos_vector(const struct tgsi_interp_coef *coef, + float x, float y, + struct tgsi_exec_vector *quadpos) { uint chan; /* do X */ @@ -95,24 +114,6 @@ sp_setup_pos_vector(const struct tgsi_interp_coef *coef, } -static void -exec_prepare( const struct sp_fragment_shader *base, - struct tgsi_exec_machine *machine, - struct tgsi_sampler **samplers ) -{ - /* - * Bind tokens/shader to the interpreter's machine state. - * Avoid redundant binding. - */ - if (machine->Tokens != base->shader.tokens) { - tgsi_exec_machine_bind_shader( machine, - base->shader.tokens, - PIPE_MAX_SAMPLERS, - samplers ); - } -} - - /* TODO: hide the machine struct in here somewhere, remove from this * interface: */ @@ -122,9 +123,9 @@ exec_run( const struct sp_fragment_shader *base, struct quad_header *quad ) { /* Compute X, Y, Z, W vals for this quad */ - sp_setup_pos_vector(quad->posCoef, - (float)quad->input.x0, (float)quad->input.y0, - &machine->QuadPos); + setup_pos_vector(quad->posCoef, + (float)quad->input.x0, (float)quad->input.y0, + &machine->QuadPos); quad->inout.mask &= tgsi_exec_machine_run( machine ); if (quad->inout.mask == 0) diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 364bb94a5f..9d3e4670ee 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -76,6 +76,43 @@ fs_sse_prepare( const struct sp_fragment_shader *base, } + +/** + * Compute quad X,Y,Z,W for the four fragments in a quad. + * + * This should really be part of the compiled shader. + */ +static void +setup_pos_vector(const struct tgsi_interp_coef *coef, + float x, float y, + struct tgsi_exec_vector *quadpos) +{ + uint chan; + /* do X */ + quadpos->xyzw[0].f[0] = x; + quadpos->xyzw[0].f[1] = x + 1; + quadpos->xyzw[0].f[2] = x; + quadpos->xyzw[0].f[3] = x + 1; + + /* do Y */ + quadpos->xyzw[1].f[0] = y; + quadpos->xyzw[1].f[1] = y; + quadpos->xyzw[1].f[2] = y + 1; + quadpos->xyzw[1].f[3] = y + 1; + + /* do Z and W for all fragments in the quad */ + for (chan = 2; chan < 4; chan++) { + const float dadx = coef->dadx[chan]; + const float dady = coef->dady[chan]; + const float a0 = coef->a0[chan] + dadx * x + dady * y; + quadpos->xyzw[chan].f[0] = a0; + quadpos->xyzw[chan].f[1] = a0 + dadx; + quadpos->xyzw[chan].f[2] = a0 + dady; + quadpos->xyzw[chan].f[3] = a0 + dadx + dady; + } +} + + /* TODO: codegenerate the whole run function, skip this wrapper. * TODO: break dependency on tgsi_exec_machine struct * TODO: push Position calculation into the generated shader @@ -89,9 +126,9 @@ fs_sse_run( const struct sp_fragment_shader *base, struct sp_sse_fragment_shader *shader = sp_sse_fragment_shader(base); /* Compute X, Y, Z, W vals for this quad -- place in temp[0] for now */ - sp_setup_pos_vector(quad->posCoef, - (float)quad->input.x0, (float)quad->input.y0, - machine->Temps); + setup_pos_vector(quad->posCoef, + (float)quad->input.x0, (float)quad->input.y0, + machine->Temps); /* init kill mask */ tgsi_set_kill_mask(machine, 0x0); @@ -108,12 +145,11 @@ fs_sse_run( const struct sp_fragment_shader *base, if (quad->inout.mask == 0) return FALSE; - /* store outputs */ { - const ubyte *sem_name = shader->base.info.output_semantic_name; - const ubyte *sem_index = shader->base.info.output_semantic_index; - const uint n = shader->base.info.num_outputs; + const ubyte *sem_name = base->info.output_semantic_name; + const ubyte *sem_index = base->info.output_semantic_index; + const uint n = base->info.num_outputs; uint i; for (i = 0; i < n; i++) { switch (sem_name[i]) { -- cgit v1.2.3 From 99ec78d9462d2a553982d0ea15d538b36b1c123b Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 11 Aug 2009 18:23:28 +0100 Subject: Revert "softpipe: rearrange blend fastpaths" This reverts commit 1295cf423e21dad04a947960782ffa8db2739709. The original formulation was easier to understand & work with. Will revisit this later. --- src/gallium/drivers/softpipe/sp_quad_blend.c | 82 +++++++++++++++++++--------- 1 file changed, 55 insertions(+), 27 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index b8ed086734..e243c63fa2 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -793,7 +793,10 @@ blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs, struct quad_header *quads[], unsigned nr) { - float source[4]; + static const float one[4] = { 1, 1, 1, 1 }; + float one_minus_alpha[QUAD_SIZE]; + float dest[4][QUAD_SIZE]; + float source[4][QUAD_SIZE]; uint i, j, q; struct softpipe_cached_tile *tile @@ -803,26 +806,45 @@ blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs, for (q = 0; q < nr; q++) { struct quad_header *quad = quads[q]; + float (*quadColor)[4] = quad->output.color[0]; + const float *alpha = quadColor[3]; const int itx = (quad->input.x0 & (TILE_SIZE-1)); const int ity = (quad->input.y0 & (TILE_SIZE-1)); - float (*swzColor)[4] = quad->output.color[0]; - for (j = 0; j < 4; j++) { - if (quad->inout.mask & (1<data.color[ity + (j>>1)][itx + (j&1)]; - const float alpha = swzColor[3][j]; - const float one_minus_alpha = 1.0 - alpha; + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } + } - for (i = 0; i < 4; i++) { - dest[i] *= one_minus_alpha; - dest[i] += swzColor[i][j] * alpha; + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + VEC4_MUL(source[3], quadColor[3], alpha); /* A */ + + VEC4_SUB(one_minus_alpha, one, alpha); + VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ + VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ + VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ + VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* B */ + + VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */ + VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */ + VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */ + VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */ - /* XXX: redundant, will be clamped later for argb8 surfaces: - */ - dest[i] = CLAMP(dest[i], 0.0, 1.0); + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->inout.mask & (1 << j)) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + for (i = 0; i < 4; i++) { /* loop over color chans */ + tile->data.color[y][x][i] = quadColor[i][j]; } } - } + } } } @@ -841,27 +863,33 @@ blend_single_add_one_one(struct quad_stage *qs, for (q = 0; q < nr; q++) { struct quad_header *quad = quads[q]; + float (*quadColor)[4] = quad->output.color[0]; const int itx = (quad->input.x0 & (TILE_SIZE-1)); const int ity = (quad->input.y0 & (TILE_SIZE-1)); - float (*dest)[64][4] = (float (*)[64][4])&tile->data.color[ity][itx]; - float (*swzColor)[4] = quad->output.color[0]; - float quadColor[4][4]; - + /* get/swizzle dest colors */ for (j = 0; j < QUAD_SIZE; j++) { + int x = itx + (j & 1); + int y = ity + (j >> 1); for (i = 0; i < 4; i++) { - quadColor[i][j] = swzColor[j][i]; + dest[i][j] = tile->data.color[y][x][i]; } } - if (quad->inout.mask & 1) - VEC4_ADD_SAT(dest[0][0], quadColor[0], dest[0][0]); - if (quad->inout.mask & 2) - VEC4_ADD_SAT(dest[0][1], quadColor[1], dest[0][1]); - if (quad->inout.mask & 4) - VEC4_ADD_SAT(dest[1][0], quadColor[2], dest[1][0]); - if (quad->inout.mask & 8) - VEC4_ADD_SAT(dest[1][1], quadColor[3], dest[1][1]); + VEC4_ADD_SAT(quadColor[0], quadColor[0], dest[0]); /* R */ + VEC4_ADD_SAT(quadColor[1], quadColor[1], dest[1]); /* G */ + VEC4_ADD_SAT(quadColor[2], quadColor[2], dest[2]); /* B */ + VEC4_ADD_SAT(quadColor[3], quadColor[3], dest[3]); /* A */ + + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->inout.mask & (1 << j)) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + for (i = 0; i < 4; i++) { /* loop over color chans */ + tile->data.color[y][x][i] = quadColor[i][j]; + } + } + } } } -- cgit v1.2.3 From d12bae9368e0c44a9943d9b37ab848ea307d70c7 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 18 Aug 2009 16:21:12 +0100 Subject: softpipe: move flatshade-first check out of loop --- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 80 ++++++++++++++++++----------- 1 file changed, 50 insertions(+), 30 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 1dd63d99ff..76524a8d41 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -239,14 +239,16 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) break; case PIPE_PRIM_TRIANGLES: - for (i = 2; i < nr; i += 3) { - if (softpipe->rasterizer->flatshade_first) { + if (softpipe->rasterizer->flatshade_first) { + for (i = 2; i < nr; i += 3) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-2], stride) ); } - else { + } + else { + for (i = 2; i < nr; i += 3) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-1], stride), @@ -256,14 +258,16 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) break; case PIPE_PRIM_TRIANGLE_STRIP: - for (i = 2; i < nr; i += 1) { - if (softpipe->rasterizer->flatshade_first) { + if (softpipe->rasterizer->flatshade_first) { + for (i = 2; i < nr; i += 1) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i+(i&1)-1], stride), get_vert(vertex_buffer, indices[i-(i&1)], stride), get_vert(vertex_buffer, indices[i-2], stride) ); } - else { + } + else { + for (i = 2; i < nr; i += 1) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i+(i&1)-2], stride), get_vert(vertex_buffer, indices[i-(i&1)-1], stride), @@ -273,14 +277,16 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) break; case PIPE_PRIM_TRIANGLE_FAN: - for (i = 2; i < nr; i += 1) { - if (softpipe->rasterizer->flatshade_first) { + if (softpipe->rasterizer->flatshade_first) { + for (i = 2; i < nr; i += 1) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[0], stride), get_vert(vertex_buffer, indices[i-1], stride) ); } - else { + } + else { + for (i = 2; i < nr; i += 1) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[0], stride), get_vert(vertex_buffer, indices[i-1], stride), @@ -290,8 +296,8 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) break; case PIPE_PRIM_QUADS: - for (i = 3; i < nr; i += 4) { - if (softpipe->rasterizer->flatshade_first) { + if (softpipe->rasterizer->flatshade_first) { + for (i = 3; i < nr; i += 4) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-1], stride), @@ -301,7 +307,9 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-3], stride) ); } - else { + } + else { + for (i = 3; i < nr; i += 4) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-2], stride), @@ -316,8 +324,8 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) break; case PIPE_PRIM_QUAD_STRIP: - for (i = 3; i < nr; i += 2) { - if (softpipe->rasterizer->flatshade_first) { + if (softpipe->rasterizer->flatshade_first) { + for (i = 3; i < nr; i += 2) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-1], stride), @@ -327,7 +335,9 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-3], stride) ); } - else { + } + else { + for (i = 3; i < nr; i += 2) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-2], stride), @@ -423,14 +433,16 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) break; case PIPE_PRIM_TRIANGLES: - for (i = 2; i < nr; i += 3) { - if (softpipe->rasterizer->flatshade_first) { + if (softpipe->rasterizer->flatshade_first) { + for (i = 2; i < nr; i += 3) { setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-2, stride) ); } - else { + } + else { + for (i = 2; i < nr; i += 3) { setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-1, stride), @@ -440,14 +452,16 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) break; case PIPE_PRIM_TRIANGLE_STRIP: - for (i = 2; i < nr; i++) { - if (softpipe->rasterizer->flatshade_first) { + if (softpipe->rasterizer->flatshade_first) { + for (i = 2; i < nr; i++) { setup_tri( setup_ctx, get_vert(vertex_buffer, i+(i&1)-1, stride), get_vert(vertex_buffer, i-(i&1), stride), get_vert(vertex_buffer, i-2, stride) ); } - else { + } + else { + for (i = 2; i < nr; i++) { setup_tri( setup_ctx, get_vert(vertex_buffer, i+(i&1)-2, stride), get_vert(vertex_buffer, i-(i&1)-1, stride), @@ -457,14 +471,16 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) break; case PIPE_PRIM_TRIANGLE_FAN: - for (i = 2; i < nr; i += 1) { - if (softpipe->rasterizer->flatshade_first) { + if (softpipe->rasterizer->flatshade_first) { + for (i = 2; i < nr; i += 1) { setup_tri( setup_ctx, get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, 0, stride), get_vert(vertex_buffer, i-1, stride) ); } - else { + } + else { + for (i = 2; i < nr; i += 1) { setup_tri( setup_ctx, get_vert(vertex_buffer, 0, stride), get_vert(vertex_buffer, i-1, stride), @@ -474,8 +490,8 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) break; case PIPE_PRIM_QUADS: - for (i = 3; i < nr; i += 4) { - if (softpipe->rasterizer->flatshade_first) { + if (softpipe->rasterizer->flatshade_first) { + for (i = 3; i < nr; i += 4) { setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-1, stride), @@ -485,7 +501,9 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-3, stride) ); } - else { + } + else { + for (i = 3; i < nr; i += 4) { setup_tri( setup_ctx, get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-2, stride), @@ -499,8 +517,8 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) break; case PIPE_PRIM_QUAD_STRIP: - for (i = 3; i < nr; i += 2) { - if (softpipe->rasterizer->flatshade_first) { + if (softpipe->rasterizer->flatshade_first) { + for (i = 3; i < nr; i += 2) { setup_tri( setup_ctx, get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-1, stride), @@ -510,7 +528,9 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-3, stride) ); } - else { + } + else { + for (i = 3; i < nr; i += 2) { setup_tri( setup_ctx, get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-2, stride), -- cgit v1.2.3 From 80c78472ad43f4288c9ef5076074ba9d31a39885 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 29 Jul 2009 07:40:50 +0100 Subject: softpipe: split texture and surface tile caches These do similar jobs but with largely disjoint code. Will want to evolve them separately going forward. --- src/gallium/drivers/softpipe/Makefile | 1 + src/gallium/drivers/softpipe/SConscript | 1 + src/gallium/drivers/softpipe/sp_context.c | 4 +- src/gallium/drivers/softpipe/sp_context.h | 3 +- src/gallium/drivers/softpipe/sp_flush.c | 3 +- src/gallium/drivers/softpipe/sp_state_derived.c | 2 +- src/gallium/drivers/softpipe/sp_state_sampler.c | 4 +- src/gallium/drivers/softpipe/sp_tex_sample.c | 16 +- src/gallium/drivers/softpipe/sp_tex_sample.h | 2 +- src/gallium/drivers/softpipe/sp_tex_tile_cache.c | 274 +++++++++++++++++++++++ src/gallium/drivers/softpipe/sp_tex_tile_cache.h | 161 +++++++++++++ src/gallium/drivers/softpipe/sp_tile_cache.c | 171 +------------- src/gallium/drivers/softpipe/sp_tile_cache.h | 42 +--- 13 files changed, 460 insertions(+), 224 deletions(-) create mode 100644 src/gallium/drivers/softpipe/sp_tex_tile_cache.c create mode 100644 src/gallium/drivers/softpipe/sp_tex_tile_cache.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index a6ed7ea6a2..3da9be6957 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -30,6 +30,7 @@ C_SOURCES = \ sp_state_vertex.c \ sp_texture.c \ sp_tex_sample.c \ + sp_tex_tile_cache.c \ sp_tile_cache.c \ sp_surface.c diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index dcc25732ba..30c099813e 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -37,6 +37,7 @@ softpipe = env.ConvenienceLibrary( 'sp_state_vertex.c', 'sp_surface.c', 'sp_tex_sample.c', + 'sp_tex_tile_cache.c', 'sp_texture.c', 'sp_tile_cache.c', ]) diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index e35c6b3aec..396d4c6557 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -97,7 +97,7 @@ static void softpipe_destroy( struct pipe_context *pipe ) sp_destroy_tile_cache(softpipe->zsbuf_cache); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - sp_destroy_tile_cache(softpipe->tex_cache[i]); + sp_destroy_tex_tile_cache(softpipe->tex_cache[i]); for (i = 0; i < Elements(softpipe->constants); i++) { if (softpipe->constants[i].buffer) { @@ -220,7 +220,7 @@ softpipe_create( struct pipe_screen *screen ) softpipe->zsbuf_cache = sp_create_tile_cache( screen ); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - softpipe->tex_cache[i] = sp_create_tile_cache( screen ); + softpipe->tex_cache[i] = sp_create_tex_tile_cache( screen ); /* setup quad rendering stages */ diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index fa3306c020..683c3aef9b 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -43,6 +43,7 @@ struct softpipe_vbuf_render; struct draw_context; struct draw_stage; struct softpipe_tile_cache; +struct softpipe_tex_tile_cache; struct sp_fragment_shader; struct sp_vertex_shader; @@ -141,7 +142,7 @@ struct softpipe_context { struct softpipe_tile_cache *zsbuf_cache; unsigned tex_timestamp; - struct softpipe_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; + struct softpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; unsigned use_sse : 1; unsigned dump_fs : 1; diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index 679ad0cd3d..e38b767cf2 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -37,6 +37,7 @@ #include "sp_surface.h" #include "sp_state.h" #include "sp_tile_cache.h" +#include "sp_tex_tile_cache.h" #include "sp_winsys.h" @@ -52,7 +53,7 @@ softpipe_flush( struct pipe_context *pipe, if (flags & PIPE_FLUSH_TEXTURE_CACHE) { for (i = 0; i < softpipe->num_textures; i++) { - sp_flush_tile_cache(softpipe->tex_cache[i]); + sp_flush_tex_tile_cache(softpipe->tex_cache[i]); } } diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 3ed1de7e17..88a6e4fbdf 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -212,7 +212,7 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) } for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - sp_tile_cache_validate_texture( softpipe->tex_cache[i] ); + sp_tex_tile_cache_validate_texture( softpipe->tex_cache[i] ); } } diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index aa2f3f2ccd..a725925264 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -37,7 +37,7 @@ #include "sp_context.h" #include "sp_state.h" #include "sp_texture.h" -#include "sp_tile_cache.h" +#include "sp_tex_tile_cache.h" #include "draw/draw_context.h" @@ -97,7 +97,7 @@ softpipe_set_sampler_textures(struct pipe_context *pipe, struct pipe_texture *tex = i < num ? texture[i] : NULL; pipe_texture_reference(&softpipe->texture[i], tex); - sp_tile_cache_set_texture(softpipe->tex_cache[i], tex); + sp_tex_tile_cache_set_texture(softpipe->tex_cache[i], tex); } softpipe->num_textures = num; diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 4651d781a9..8bed573e8c 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -38,7 +38,7 @@ #include "sp_surface.h" #include "sp_texture.h" #include "sp_tex_sample.h" -#include "sp_tile_cache.h" +#include "sp_tex_tile_cache.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "util/u_math.h" @@ -659,7 +659,7 @@ choose_mipmap_levels(const struct pipe_texture *texture, * \param rgba the quad to put the texel/color into * \param j which element of the rgba quad to write to * - * XXX maybe move this into sp_tile_cache.c and merge with the + * XXX maybe move this into sp_tex_tile_cache.c and merge with the * sp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... */ static void @@ -669,9 +669,9 @@ get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler, { const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); - const struct softpipe_cached_tile *tile + const struct softpipe_tex_cached_tile *tile = sp_get_cached_tile_tex(samp->cache, - tile_address(x, y, 0, face, level)); + tex_tile_address(x, y, 0, face, level)); y %= TILE_SIZE; x %= TILE_SIZE; @@ -688,9 +688,9 @@ get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler, { const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); - const struct softpipe_cached_tile *tile + const struct softpipe_tex_cached_tile *tile = sp_get_cached_tile_tex(samp->cache, - tile_address(x, y, 0, face, level)); + tex_tile_address(x, y, 0, face, level)); y %= TILE_SIZE; x %= TILE_SIZE; @@ -736,10 +736,10 @@ get_texel(const struct tgsi_sampler *tgsi_sampler, else { const unsigned tx = x % TILE_SIZE; const unsigned ty = y % TILE_SIZE; - const struct softpipe_cached_tile *tile; + const struct softpipe_tex_cached_tile *tile; tile = sp_get_cached_tile_tex(samp->cache, - tile_address(x, y, z, face, level)); + tex_tile_address(x, y, z, face, level)); rgba[0][j] = tile->data.color[ty][tx][0]; rgba[1][j] = tile->data.color[ty][tx][1]; diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index 0650c7830b..1756d3a4ee 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -48,7 +48,7 @@ struct sp_shader_sampler const struct pipe_texture *texture; const struct pipe_sampler_state *sampler; - struct softpipe_tile_cache *cache; + struct softpipe_tex_tile_cache *cache; }; diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c new file mode 100644 index 0000000000..c2dd68c7a2 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c @@ -0,0 +1,274 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Texture tile caching. + * + * Author: + * Brian Paul + */ + +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "util/u_tile.h" +#include "sp_context.h" +#include "sp_surface.h" +#include "sp_texture.h" +#include "sp_tex_tile_cache.h" + + + +struct softpipe_tex_tile_cache * +sp_create_tex_tile_cache( struct pipe_screen *screen ) +{ + struct softpipe_tex_tile_cache *tc; + uint pos; + + tc = CALLOC_STRUCT( softpipe_tex_tile_cache ); + if (tc) { + tc->screen = screen; + for (pos = 0; pos < NUM_ENTRIES; pos++) { + tc->entries[pos].addr.bits.invalid = 1; + } + tc->last_tile = &tc->entries[0]; /* any tile */ + } + return tc; +} + + +void +sp_destroy_tex_tile_cache(struct softpipe_tex_tile_cache *tc) +{ + struct pipe_screen *screen; + uint pos; + + for (pos = 0; pos < NUM_ENTRIES; pos++) { + /*assert(tc->entries[pos].x < 0);*/ + } + if (tc->transfer) { + screen = tc->transfer->texture->screen; + screen->tex_transfer_destroy(tc->transfer); + } + if (tc->tex_trans) { + screen = tc->tex_trans->texture->screen; + screen->tex_transfer_destroy(tc->tex_trans); + } + + FREE( tc ); +} + + + + +void +sp_tex_tile_cache_map_transfers(struct softpipe_tex_tile_cache *tc) +{ + if (tc->tex_trans && !tc->tex_trans_map) + tc->tex_trans_map = tc->screen->transfer_map(tc->screen, tc->tex_trans); +} + + +void +sp_tex_tile_cache_unmap_transfers(struct softpipe_tex_tile_cache *tc) +{ + if (tc->tex_trans_map) { + tc->screen->transfer_unmap(tc->screen, tc->tex_trans); + tc->tex_trans_map = NULL; + } +} + +void +sp_tex_tile_cache_validate_texture(struct softpipe_tex_tile_cache *tc) +{ + if (tc->texture) { + struct softpipe_texture *spt = softpipe_texture(tc->texture); + if (spt->timestamp != tc->timestamp) { + /* texture was modified, invalidate all cached tiles */ + uint i; + _debug_printf("INV %d %d\n", tc->timestamp, spt->timestamp); + for (i = 0; i < NUM_ENTRIES; i++) { + tc->entries[i].addr.bits.invalid = 1; + } + + tc->timestamp = spt->timestamp; + } + } +} + +/** + * Specify the texture to cache. + */ +void +sp_tex_tile_cache_set_texture(struct softpipe_tex_tile_cache *tc, + struct pipe_texture *texture) +{ + uint i; + + assert(!tc->transfer); + + if (tc->texture != texture) { + pipe_texture_reference(&tc->texture, texture); + + if (tc->tex_trans) { + struct pipe_screen *screen = tc->tex_trans->texture->screen; + + if (tc->tex_trans_map) { + screen->transfer_unmap(screen, tc->tex_trans); + tc->tex_trans_map = NULL; + } + + screen->tex_transfer_destroy(tc->tex_trans); + tc->tex_trans = NULL; + } + + /* mark as entries as invalid/empty */ + /* XXX we should try to avoid this when the teximage hasn't changed */ + for (i = 0; i < NUM_ENTRIES; i++) { + tc->entries[i].addr.bits.invalid = 1; + } + + tc->tex_face = -1; /* any invalid value here */ + } +} + + + + +/** + * Flush the tile cache: write all dirty tiles back to the transfer. + * any tiles "flagged" as cleared will be "really" cleared. + */ +void +sp_flush_tex_tile_cache(struct softpipe_tex_tile_cache *tc) +{ + int pos; + + if (tc->texture) { + /* caching a texture, mark all entries as empty */ + for (pos = 0; pos < NUM_ENTRIES; pos++) { + tc->entries[pos].addr.bits.invalid = 1; + } + tc->tex_face = -1; + } + +} + + +/** + * Given the texture face, level, zslice, x and y values, compute + * the cache entry position/index where we'd hope to find the + * cached texture tile. + * This is basically a direct-map cache. + * XXX There's probably lots of ways in which we can improve this. + */ +static INLINE uint +tex_cache_pos( union tex_tile_address addr ) +{ + uint entry = (addr.bits.x + + addr.bits.y * 9 + + addr.bits.z * 3 + + addr.bits.face + + addr.bits.level * 7); + + return entry % NUM_ENTRIES; +} + +/** + * Similar to sp_get_cached_tile() but for textures. + * Tiles are read-only and indexed with more params. + */ +const struct softpipe_tex_cached_tile * +sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, + union tex_tile_address addr ) +{ + struct pipe_screen *screen = tc->screen; + struct softpipe_tex_cached_tile *tile; + + tile = tc->entries + tex_cache_pos( addr ); + + if (addr.value != tile->addr.value) { + + /* cache miss. Most misses are because we've invaldiated the + * texture cache previously -- most commonly on binding a new + * texture. Currently we effectively flush the cache on texture + * bind. + */ +#if 0 + _debug_printf("miss at %u: x=%d y=%d z=%d face=%d level=%d\n" + " tile %u: x=%d y=%d z=%d face=%d level=%d\n", + pos, x/TILE_SIZE, y/TILE_SIZE, z, face, level, + pos, tile->addr.bits.x, tile->addr.bits.y, tile->z, tile->face, tile->level); +#endif + + /* check if we need to get a new transfer */ + if (!tc->tex_trans || + tc->tex_face != addr.bits.face || + tc->tex_level != addr.bits.level || + tc->tex_z != addr.bits.z) { + /* get new transfer (view into texture) */ + + if (tc->tex_trans) { + if (tc->tex_trans_map) { + tc->screen->transfer_unmap(tc->screen, tc->tex_trans); + tc->tex_trans_map = NULL; + } + + screen->tex_transfer_destroy(tc->tex_trans); + tc->tex_trans = NULL; + } + + tc->tex_trans = + screen->get_tex_transfer(screen, tc->texture, + addr.bits.face, + addr.bits.level, + addr.bits.z, + PIPE_TRANSFER_READ, 0, 0, + tc->texture->width[addr.bits.level], + tc->texture->height[addr.bits.level]); + + tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans); + + tc->tex_face = addr.bits.face; + tc->tex_level = addr.bits.level; + tc->tex_z = addr.bits.z; + } + + /* get tile from the transfer (view into texture) */ + pipe_get_tile_rgba(tc->tex_trans, + addr.bits.x * TILE_SIZE, + addr.bits.y * TILE_SIZE, + TILE_SIZE, TILE_SIZE, + (float *) tile->data.color); + tile->addr = addr; + } + + tc->last_tile = tile; + return tile; +} + + + diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h new file mode 100644 index 0000000000..c6003f3550 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h @@ -0,0 +1,161 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SP_TEX_TILE_CACHE_H +#define SP_TEX_TILE_CACHE_H + + +#include "pipe/p_compiler.h" + + +struct softpipe_context; +struct softpipe_tex_tile_cache; + + +/** + * Cache tile size (width and height). This needs to be a power of two. + */ +#define TILE_SIZE 64 + + +/* If we need to support > 4096, just expand this to be a 64 bit + * union, or consider tiling in Z as well. + */ +union tex_tile_address { + struct { + unsigned x:6; /* 4096 / TILE_SIZE */ + unsigned y:6; /* 4096 / TILE_SIZE */ + unsigned z:12; /* 4096 -- z not tiled */ + unsigned face:3; + unsigned level:4; + unsigned invalid:1; + } bits; + unsigned value; +}; + + +struct softpipe_tex_cached_tile +{ + union tex_tile_address addr; + union { + float color[TILE_SIZE][TILE_SIZE][4]; + } data; +}; + +#define NUM_ENTRIES 50 + + +/** XXX move these */ +#define MAX_WIDTH 2048 +#define MAX_HEIGHT 2048 + + +struct softpipe_tex_tile_cache +{ + struct pipe_screen *screen; + struct pipe_transfer *transfer; + void *transfer_map; + + struct pipe_texture *texture; /**< if caching a texture */ + unsigned timestamp; + + struct softpipe_tex_cached_tile entries[NUM_ENTRIES]; + + struct pipe_transfer *tex_trans; + void *tex_trans_map; + int tex_face, tex_level, tex_z; + + struct softpipe_tex_cached_tile *last_tile; /**< most recently retrieved tile */ +}; + + +extern struct softpipe_tex_tile_cache * +sp_create_tex_tile_cache( struct pipe_screen *screen ); + +extern void +sp_destroy_tex_tile_cache(struct softpipe_tex_tile_cache *tc); + + +extern void +sp_tex_tile_cache_map_transfers(struct softpipe_tex_tile_cache *tc); + +extern void +sp_tex_tile_cache_unmap_transfers(struct softpipe_tex_tile_cache *tc); + +extern void +sp_tex_tile_cache_set_texture(struct softpipe_tex_tile_cache *tc, + struct pipe_texture *texture); + +void +sp_tex_tile_cache_validate_texture(struct softpipe_tex_tile_cache *tc); + +extern void +sp_flush_tex_tile_cache(struct softpipe_tex_tile_cache *tc); + + + +extern const struct softpipe_tex_cached_tile * +sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, + union tex_tile_address addr ); + +static INLINE const union tex_tile_address +tex_tile_address( unsigned x, + unsigned y, + unsigned z, + unsigned face, + unsigned level ) +{ + union tex_tile_address addr; + + addr.value = 0; + addr.bits.x = x / TILE_SIZE; + addr.bits.y = y / TILE_SIZE; + addr.bits.z = z; + addr.bits.face = face; + addr.bits.level = level; + + return addr; +} + +/* Quickly retrieve tile if it matches last lookup. + */ +static INLINE const struct softpipe_tex_cached_tile * +sp_get_cached_tile_tex(struct softpipe_tex_tile_cache *tc, + union tex_tile_address addr ) +{ + if (tc->last_tile->addr.value == addr.value) + return tc->last_tile; + + return sp_find_cached_tile_tex( tc, addr ); +} + + + + + +#endif /* SP_TEX_TILE_CACHE_H */ + diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 77d02fa3e7..2d82badcec 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -26,7 +26,7 @@ **************************************************************************/ /** - * Texture tile caching. + * Render target tile caching. * * Author: * Brian Paul @@ -37,7 +37,6 @@ #include "util/u_tile.h" #include "sp_context.h" #include "sp_surface.h" -#include "sp_texture.h" #include "sp_tile_cache.h" @@ -111,10 +110,6 @@ sp_destroy_tile_cache(struct softpipe_tile_cache *tc) screen = tc->transfer->texture->screen; screen->tex_transfer_destroy(tc->transfer); } - if (tc->tex_trans) { - screen = tc->tex_trans->texture->screen; - screen->tex_transfer_destroy(tc->tex_trans); - } FREE( tc ); } @@ -127,8 +122,6 @@ void sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, struct pipe_surface *ps) { - assert(!tc->texture); - if (tc->transfer) { struct pipe_screen *screen = tc->transfer->texture->screen; @@ -180,9 +173,6 @@ sp_tile_cache_map_transfers(struct softpipe_tile_cache *tc) { if (tc->transfer && !tc->transfer_map) tc->transfer_map = tc->screen->transfer_map(tc->screen, tc->transfer); - - if (tc->tex_trans && !tc->tex_trans_map) - tc->tex_trans_map = tc->screen->transfer_map(tc->screen, tc->tex_trans); } @@ -193,68 +183,8 @@ sp_tile_cache_unmap_transfers(struct softpipe_tile_cache *tc) tc->screen->transfer_unmap(tc->screen, tc->transfer); tc->transfer_map = NULL; } - - if (tc->tex_trans_map) { - tc->screen->transfer_unmap(tc->screen, tc->tex_trans); - tc->tex_trans_map = NULL; - } -} - -void -sp_tile_cache_validate_texture(struct softpipe_tile_cache *tc) -{ - if (tc->texture) { - struct softpipe_texture *spt = softpipe_texture(tc->texture); - if (spt->timestamp != tc->timestamp) { - /* texture was modified, invalidate all cached tiles */ - uint i; - _debug_printf("INV %d %d\n", tc->timestamp, spt->timestamp); - for (i = 0; i < NUM_ENTRIES; i++) { - tc->entries[i].addr.bits.invalid = 1; - } - - tc->timestamp = spt->timestamp; - } - } -} - -/** - * Specify the texture to cache. - */ -void -sp_tile_cache_set_texture(struct softpipe_tile_cache *tc, - struct pipe_texture *texture) -{ - uint i; - - assert(!tc->transfer); - - if (tc->texture != texture) { - pipe_texture_reference(&tc->texture, texture); - - if (tc->tex_trans) { - struct pipe_screen *screen = tc->tex_trans->texture->screen; - - if (tc->tex_trans_map) { - screen->transfer_unmap(screen, tc->tex_trans); - tc->tex_trans_map = NULL; - } - - screen->tex_transfer_destroy(tc->tex_trans); - tc->tex_trans = NULL; - } - - /* mark as entries as invalid/empty */ - /* XXX we should try to avoid this when the teximage hasn't changed */ - for (i = 0; i < NUM_ENTRIES; i++) { - tc->entries[i].addr.bits.invalid = 1; - } - - tc->tex_face = -1; /* any invalid value here */ - } } - /** * Set pixels in a tile to the given clear color/value, float. */ @@ -345,7 +275,7 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc) /* push the tile to all positions marked as clear */ for (y = 0; y < h; y += TILE_SIZE) { for (x = 0; x < w; x += TILE_SIZE) { - union tile_address addr = tile_address(x, y, 0, 0, 0); + union tile_address addr = tile_address(x, y); if (is_clear_flag_set(tc->clear_flags, addr)) { pipe_put_tile_raw(pt, @@ -403,13 +333,6 @@ sp_flush_tile_cache(struct softpipe_tile_cache *tc) sp_tile_cache_flush_clear(tc); #endif } - else if (tc->texture) { - /* caching a texture, mark all entries as empty */ - for (pos = 0; pos < NUM_ENTRIES; pos++) { - tc->entries[pos].addr.bits.invalid = 1; - } - tc->tex_face = -1; - } #if 0 debug_printf("flushed tiles in use: %d\n", inuse); @@ -488,97 +411,7 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc, } -/** - * Given the texture face, level, zslice, x and y values, compute - * the cache entry position/index where we'd hope to find the - * cached texture tile. - * This is basically a direct-map cache. - * XXX There's probably lots of ways in which we can improve this. - */ -static INLINE uint -tex_cache_pos( union tile_address addr ) -{ - uint entry = (addr.bits.x + - addr.bits.y * 9 + - addr.bits.z * 3 + - addr.bits.face + - addr.bits.level * 7); - - return entry % NUM_ENTRIES; -} - -/** - * Similar to sp_get_cached_tile() but for textures. - * Tiles are read-only and indexed with more params. - */ -const struct softpipe_cached_tile * -sp_find_cached_tile_tex(struct softpipe_tile_cache *tc, - union tile_address addr ) -{ - struct pipe_screen *screen = tc->screen; - struct softpipe_cached_tile *tile; - - tile = tc->entries + tex_cache_pos( addr ); - - if (addr.value != tile->addr.value) { - - /* cache miss. Most misses are because we've invaldiated the - * texture cache previously -- most commonly on binding a new - * texture. Currently we effectively flush the cache on texture - * bind. - */ -#if 0 - _debug_printf("miss at %u: x=%d y=%d z=%d face=%d level=%d\n" - " tile %u: x=%d y=%d z=%d face=%d level=%d\n", - pos, x/TILE_SIZE, y/TILE_SIZE, z, face, level, - pos, tile->addr.bits.x, tile->addr.bits.y, tile->z, tile->face, tile->level); -#endif - - /* check if we need to get a new transfer */ - if (!tc->tex_trans || - tc->tex_face != addr.bits.face || - tc->tex_level != addr.bits.level || - tc->tex_z != addr.bits.z) { - /* get new transfer (view into texture) */ - - if (tc->tex_trans) { - if (tc->tex_trans_map) { - tc->screen->transfer_unmap(tc->screen, tc->tex_trans); - tc->tex_trans_map = NULL; - } - - screen->tex_transfer_destroy(tc->tex_trans); - tc->tex_trans = NULL; - } - - tc->tex_trans = - screen->get_tex_transfer(screen, tc->texture, - addr.bits.face, - addr.bits.level, - addr.bits.z, - PIPE_TRANSFER_READ, 0, 0, - tc->texture->width[addr.bits.level], - tc->texture->height[addr.bits.level]); - - tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans); - tc->tex_face = addr.bits.face; - tc->tex_level = addr.bits.level; - tc->tex_z = addr.bits.z; - } - - /* get tile from the transfer (view into texture) */ - pipe_get_tile_rgba(tc->tex_trans, - addr.bits.x * TILE_SIZE, - addr.bits.y * TILE_SIZE, - TILE_SIZE, TILE_SIZE, - (float *) tile->data.color); - tile->addr = addr; - } - - tc->last_tile = tile; - return tile; -} /** diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index ac2aae5875..3b0be274d5 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -51,10 +51,8 @@ union tile_address { struct { unsigned x:6; /* 4096 / TILE_SIZE */ unsigned y:6; /* 4096 / TILE_SIZE */ - unsigned z:12; /* 4096 -- z not tiled */ - unsigned face:3; - unsigned level:4; unsigned invalid:1; + unsigned pad:19; } bits; unsigned value; }; @@ -88,19 +86,12 @@ struct softpipe_tile_cache struct pipe_transfer *transfer; void *transfer_map; - struct pipe_texture *texture; /**< if caching a texture */ - unsigned timestamp; - struct softpipe_cached_tile entries[NUM_ENTRIES]; uint clear_flags[(MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32]; float clear_color[4]; /**< for color bufs */ uint clear_val; /**< for z+stencil, or packed color clear value */ boolean depth_stencil; /**< Is the surface a depth/stencil format? */ - struct pipe_transfer *tex_trans; - void *tex_trans_map; - int tex_face, tex_level, tex_z; - struct softpipe_cached_tile tile; /**< scratch tile for clears */ struct softpipe_cached_tile *last_tile; /**< most recently retrieved tile */ @@ -126,13 +117,6 @@ sp_tile_cache_map_transfers(struct softpipe_tile_cache *tc); extern void sp_tile_cache_unmap_transfers(struct softpipe_tile_cache *tc); -extern void -sp_tile_cache_set_texture(struct softpipe_tile_cache *tc, - struct pipe_texture *texture); - -void -sp_tile_cache_validate_texture(struct softpipe_tile_cache *tc); - extern void sp_flush_tile_cache(struct softpipe_tile_cache *tc); @@ -144,47 +128,27 @@ extern struct softpipe_cached_tile * sp_find_cached_tile(struct softpipe_tile_cache *tc, union tile_address addr ); -extern const struct softpipe_cached_tile * -sp_find_cached_tile_tex(struct softpipe_tile_cache *tc, - union tile_address addr ); static INLINE const union tile_address tile_address( unsigned x, - unsigned y, - unsigned z, - unsigned face, - unsigned level ) + unsigned y ) { union tile_address addr; addr.value = 0; addr.bits.x = x / TILE_SIZE; addr.bits.y = y / TILE_SIZE; - addr.bits.z = z; - addr.bits.face = face; - addr.bits.level = level; return addr; } /* Quickly retrieve tile if it matches last lookup. */ -static INLINE const struct softpipe_cached_tile * -sp_get_cached_tile_tex(struct softpipe_tile_cache *tc, - union tile_address addr ) -{ - if (tc->last_tile->addr.value == addr.value) - return tc->last_tile; - - return sp_find_cached_tile_tex( tc, addr ); -} - - static INLINE struct softpipe_cached_tile * sp_get_cached_tile(struct softpipe_tile_cache *tc, int x, int y ) { - union tile_address addr = tile_address( x, y, 0, 0, 0 ); + union tile_address addr = tile_address( x, y ); if (tc->last_tile->addr.value == addr.value) return tc->last_tile; -- cgit v1.2.3 From c84abe36a93312cfa061ce1bd005e43eb9f6a6df Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 29 Jul 2009 23:06:22 +0100 Subject: softpipe: fix typo in clear_tile --- src/gallium/drivers/softpipe/sp_tile_cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 2d82badcec..c520aef44f 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -225,7 +225,7 @@ clear_tile(struct softpipe_cached_tile *tile, switch (pf_get_size(format)) { case 1: - memset(tile->data.any, 0, TILE_SIZE * TILE_SIZE); + memset(tile->data.any, clear_value, TILE_SIZE * TILE_SIZE); break; case 2: if (clear_value == 0) { -- cgit v1.2.3 From 4f409da3456070946eda2d8ff5153b3b4306bb46 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 20 Aug 2009 11:25:20 +0100 Subject: softpipe: optimized path for simple mipmap sampling linear-mip-linear-repeat-POT sampling faspath, provides a very nice speedup to apps that do this common type of texturing. Test case: demos/terrain, turn fog off, turn texturing on. Without patch: 12 fps With patch: 20 fps. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 95 +++++++++++++++++++++------- 1 file changed, 71 insertions(+), 24 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 4651d781a9..b32ac9dabb 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -921,8 +921,8 @@ sp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, } else { - unsigned x1 = (uflr + 1) & (xpot - 1); - unsigned y1 = (vflr + 1) & (ypot - 1); + unsigned x1 = (x0 + 1) & (xpot - 1); + unsigned y1 = (y0 + 1) & (ypot - 1); get_texel_quad_2d_mt(tgsi_sampler, 0, level, x0, y0, x1, y1, tx); } @@ -1028,42 +1028,92 @@ sp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); const struct pipe_texture *texture = samp->texture; const struct pipe_sampler_state *sampler = samp->sampler; - int level0, level1; + int level0; float lambda; lambda = compute_lambda(texture, sampler, s, t, p, lodbias); level0 = (int)lambda; - level1 = level0 + 1; if (lambda < 0.0) { samp->level = 0; sp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, lodbias, rgba ); + s, t, p, 0, rgba ); } else if (level0 >= texture->last_level) { samp->level = texture->last_level; sp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, lodbias, rgba ); + s, t, p, 0, rgba ); } else { - float rgba0[4][4]; - float rgba1[4][4]; - int c,j; - float levelBlend = lambda - level0; /* blending weight between levels */ + unsigned xpot = 1 << (samp->xpot - level0); + unsigned ypot = 1 << (samp->ypot - level0); + unsigned xpot1 = 1 << (samp->xpot - (level0+1)); + unsigned ypot1 = 1 << (samp->ypot - (level0+1)); + unsigned j; - samp->level = level0; - sp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, lodbias, rgba0 ); + for (j = 0; j < QUAD_SIZE; j++) { + int c; + + float u = s[j] * xpot - 0.5F; + float v = t[j] * ypot - 0.5F; - samp->level++; - sp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, lodbias, rgba1 ); + int uflr = util_ifloor(u); + int vflr = util_ifloor(v); + + float xw = u - (float)uflr; + float yw = v - (float)vflr; + + int x0 = uflr & (xpot - 1); + int y0 = vflr & (ypot - 1); + + const float *tx0[4]; + const float *tx1[4]; + + if (x0 % TILE_SIZE != TILE_SIZE-1 && + y0 % TILE_SIZE != TILE_SIZE-1) + { + get_texel_quad_2d(tgsi_sampler, 0, level0, x0, y0, tx0); + } + else + { + unsigned x1 = (x0 + 1) & (xpot - 1); + unsigned y1 = (y0 + 1) & (ypot - 1); + get_texel_quad_2d_mt(tgsi_sampler, 0, level0, + x0, y0, x1, y1, tx0); + } - for (c = 0; c < 4; c++) - for (j = 0; j < 4; j++) - rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]); - } + x0 /= 2; + y0 /= 2; + /* also need to adjust xw, yw?? */ + + if (x0 % TILE_SIZE != TILE_SIZE-1 && + y0 % TILE_SIZE != TILE_SIZE-1) + { + get_texel_quad_2d(tgsi_sampler, 0, level0+1, x0, y0, tx1); + } + else + { + unsigned x1 = (x0 + 1) & (xpot1 - 1); + unsigned y1 = (y0 + 1) & (ypot1 - 1); + get_texel_quad_2d_mt(tgsi_sampler, 0, level0+1, + x0, y0, x1, y1, tx1); + } + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + float rgba0 = lerp_2d(xw, yw, + tx0[0][c], tx0[1][c], + tx0[2][c], tx0[3][c]); + + float rgba1 = lerp_2d(xw, yw, + tx1[0][c], tx1[1][c], + tx1[2][c], tx1[3][c]); + + rgba[c][j] = lerp(levelBlend, rgba0, rgba1); + } + } + } } /** @@ -1567,10 +1617,7 @@ sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler, if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { switch (sampler->min_img_filter) { case PIPE_TEX_FILTER_LINEAR: - /* This one not working yet: - */ - if (0) - tgsi_sampler->get_samples = sp_get_samples_2d_linear_mip_linear_repeat_POT; + tgsi_sampler->get_samples = sp_get_samples_2d_linear_mip_linear_repeat_POT; break; default: break; -- cgit v1.2.3 From 79a7ddb57a04cde5a4a0c27eb4a9b6889d12622a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 20 Aug 2009 15:46:51 +0100 Subject: softpipe: fix glitch in texel lookups on fastpaths Fixes two issues - firstly for mipmap levels with one or more dimensions smaller than tilesize, the code was sampling off the edge of the texture (but still within the tile). Secondly, in the linear_mipmap_linear case, both the default code and new fastpath were incorrect. This change fixes the fastpath and adds a comment to the default path, which still needs to be fixed. Basically the issue is that the coordinates in the smaller texture level are/were being computed by just dividing thecoordinates from the larger texture level by two, as in: x0[j] /= 2; y0[j] /= 2; x1[j] /= 2; y1[j] /= 2; The issues with this are signficant. Initially x1 is most often equal to x0+1, but after this, it will likely be equal to x0, so we will not actually be performing the linear blend within the smaller mipmap. The fastpath code avoided this (recalculated x1), but was still using the weighting factors from the larger mipmap level (xw, yw), which were incorrect. Change the fastpath code to do two full, independent linear samples of the two mipmap levels before blending. The default code needs to do the same thing. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 87 ++++++++-------------------- 1 file changed, 23 insertions(+), 64 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index b32ac9dabb..24f3311772 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -894,6 +894,9 @@ sp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, unsigned xpot = 1 << (samp->xpot - level); unsigned ypot = 1 << (samp->ypot - level); + unsigned xmax = MIN2(TILE_SIZE, xpot) - 1; + unsigned ymax = MIN2(TILE_SIZE, ypot) - 1; + for (j = 0; j < QUAD_SIZE; j++) { int c; @@ -914,8 +917,7 @@ sp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, /* Can we fetch all four at once: */ - if (x0 % TILE_SIZE != TILE_SIZE-1 && - y0 % TILE_SIZE != TILE_SIZE-1) + if (x0 < xmax && y0 < ymax) { get_texel_quad_2d(tgsi_sampler, 0, level, x0, y0, tx); } @@ -1045,72 +1047,22 @@ sp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler s, t, p, 0, rgba ); } else { - float levelBlend = lambda - level0; /* blending weight between levels */ - unsigned xpot = 1 << (samp->xpot - level0); - unsigned ypot = 1 << (samp->ypot - level0); - unsigned xpot1 = 1 << (samp->xpot - (level0+1)); - unsigned ypot1 = 1 << (samp->ypot - (level0+1)); - unsigned j; - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot - 0.5F; - float v = t[j] * ypot - 0.5F; - - int uflr = util_ifloor(u); - int vflr = util_ifloor(v); - - float xw = u - (float)uflr; - float yw = v - (float)vflr; - - int x0 = uflr & (xpot - 1); - int y0 = vflr & (ypot - 1); - - const float *tx0[4]; - const float *tx1[4]; - - if (x0 % TILE_SIZE != TILE_SIZE-1 && - y0 % TILE_SIZE != TILE_SIZE-1) - { - get_texel_quad_2d(tgsi_sampler, 0, level0, x0, y0, tx0); - } - else - { - unsigned x1 = (x0 + 1) & (xpot - 1); - unsigned y1 = (y0 + 1) & (ypot - 1); - get_texel_quad_2d_mt(tgsi_sampler, 0, level0, - x0, y0, x1, y1, tx0); - } + float levelBlend = lambda - level0; + float rgba0[4][4]; + float rgba1[4][4]; + int c,j; - x0 /= 2; - y0 /= 2; - /* also need to adjust xw, yw?? */ + samp->level = level0; + sp_get_samples_2d_linear_repeat_POT( tgsi_sampler, + s, t, p, 0, rgba0 ); - if (x0 % TILE_SIZE != TILE_SIZE-1 && - y0 % TILE_SIZE != TILE_SIZE-1) - { - get_texel_quad_2d(tgsi_sampler, 0, level0+1, x0, y0, tx1); - } - else - { - unsigned x1 = (x0 + 1) & (xpot1 - 1); - unsigned y1 = (y0 + 1) & (ypot1 - 1); - get_texel_quad_2d_mt(tgsi_sampler, 0, level0+1, - x0, y0, x1, y1, tx1); - } + samp->level = level0+1; + sp_get_samples_2d_linear_repeat_POT( tgsi_sampler, + s, t, p, 0, rgba1 ); - /* interpolate R, G, B, A */ + for (j = 0; j < QUAD_SIZE; j++) { for (c = 0; c < 4; c++) { - float rgba0 = lerp_2d(xw, yw, - tx0[0][c], tx0[1][c], - tx0[2][c], tx0[3][c]); - - float rgba1 = lerp_2d(xw, yw, - tx1[0][c], tx1[1][c], - tx1[2][c], tx1[3][c]); - - rgba[c][j] = lerp(levelBlend, rgba0, rgba1); + rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]); } } } @@ -1209,6 +1161,13 @@ sp_get_samples_2d_common(const struct tgsi_sampler *tgsi_sampler, if (level0 != level1) { /* get texels from second mipmap level and blend */ float rgba2[4][4]; + + /* XXX: This is incorrect -- will often end up with (x0 + * == x1 && y0 == y1), meaning that we fetch the same + * texel four times and linearly interpolate between + * identical values. The correct approach would be to + * call linear_texcoord again for the second level. + */ x0[j] /= 2; y0[j] /= 2; x1[j] /= 2; -- cgit v1.2.3 From 1fd40e506c2207664f0c3f435e4614472ea4c540 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 20 Aug 2009 18:12:44 +0100 Subject: softpipe: slightly optimized tiling calculation --- src/gallium/drivers/softpipe/sp_tex_sample.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 24f3311772..90371d6353 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -893,9 +893,8 @@ sp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, unsigned level = samp->level; unsigned xpot = 1 << (samp->xpot - level); unsigned ypot = 1 << (samp->ypot - level); - - unsigned xmax = MIN2(TILE_SIZE, xpot) - 1; - unsigned ymax = MIN2(TILE_SIZE, ypot) - 1; + unsigned xmax = (xpot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, xpot) - 1; */ + unsigned ymax = (ypot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, ypot) - 1; */ for (j = 0; j < QUAD_SIZE; j++) { int c; -- cgit v1.2.3 From 0d9979d9ec5b931856d29c4ec44edb1f4931d1ac Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 20 Aug 2009 18:13:25 +0100 Subject: softpipe: fix xpot calculation typo in sp_get_samples_2d_nearest_clamp_POT --- src/gallium/drivers/softpipe/sp_tex_sample.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 90371d6353..2987548fb3 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -985,8 +985,8 @@ sp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); unsigned j; unsigned level = samp->level; - unsigned xpot = (1<xpot); - unsigned ypot = (1<ypot); + unsigned xpot = 1 << (samp->xpot - level); + unsigned ypot = 1 << (samp->ypot - level); for (j = 0; j < QUAD_SIZE; j++) { int c; -- cgit v1.2.3 From 00c835918259f8d41c3f74eca679a972713b11b2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 20 Aug 2009 18:36:57 +0100 Subject: softpipe: allow the existing sampler routines to be hooked up directly Let eg. sp_get_samples_rect be hooked directly in as the tgsi sampler routine. Add a field to determine whether this is a vertex or fragment sampling call, and massage parameters to match the tgsi call. --- src/gallium/drivers/softpipe/sp_context.c | 6 +- src/gallium/drivers/softpipe/sp_state_derived.c | 3 +- src/gallium/drivers/softpipe/sp_tex_sample.c | 193 ++++++++++-------------- src/gallium/drivers/softpipe/sp_tex_sample.h | 21 +-- 4 files changed, 94 insertions(+), 129 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index e35c6b3aec..a0196955c8 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -230,14 +230,16 @@ softpipe_create( struct pipe_screen *screen ) /* vertex shader samplers */ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - softpipe->tgsi.vert_samplers[i].base.get_samples = sp_get_samples_vertex; + softpipe->tgsi.vert_samplers[i].base.get_samples = sp_get_samples; + softpipe->tgsi.vert_samplers[i].processor = TGSI_PROCESSOR_VERTEX; softpipe->tgsi.vert_samplers[i].cache = softpipe->tex_cache[i]; softpipe->tgsi.vert_samplers_list[i] = &softpipe->tgsi.vert_samplers[i]; } /* fragment shader samplers */ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - softpipe->tgsi.frag_samplers[i].base.get_samples = sp_get_samples_fragment; + softpipe->tgsi.frag_samplers[i].base.get_samples = sp_get_samples; + softpipe->tgsi.frag_samplers[i].processor = TGSI_PROCESSOR_FRAGMENT; softpipe->tgsi.frag_samplers[i].cache = softpipe->tex_cache[i]; softpipe->tgsi.frag_samplers_list[i] = &softpipe->tgsi.frag_samplers[i]; } diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 3ed1de7e17..1f6e2ccb83 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -202,13 +202,14 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { softpipe->tgsi.vert_samplers[i].sampler = softpipe->sampler[i]; softpipe->tgsi.vert_samplers[i].texture = softpipe->texture[i]; + softpipe->tgsi.frag_samplers[i].base.get_samples = sp_get_samples; } /* fragment shader samplers */ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { softpipe->tgsi.frag_samplers[i].sampler = softpipe->sampler[i]; softpipe->tgsi.frag_samplers[i].texture = softpipe->texture[i]; - softpipe->tgsi.frag_samplers[i].base.get_samples = sp_get_samples_fragment; + softpipe->tgsi.frag_samplers[i].base.get_samples = sp_get_samples; } for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 2987548fb3..6c75158d59 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -41,6 +41,7 @@ #include "sp_tile_cache.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -521,15 +522,20 @@ choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) * This is only done for fragment shaders, not vertex shaders. */ static float -compute_lambda(const struct pipe_texture *tex, - const struct pipe_sampler_state *sampler, +compute_lambda(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], float lodbias) { + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; float rho, lambda; + if (samp->processor == TGSI_PROCESSOR_VERTEX) + return lodbias; + assert(sampler->normalized_coords); assert(s); @@ -538,7 +544,7 @@ compute_lambda(const struct pipe_texture *tex, float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; dsdx = fabsf(dsdx); dsdy = fabsf(dsdy); - rho = MAX2(dsdx, dsdy) * tex->width[0]; + rho = MAX2(dsdx, dsdy) * texture->width[0]; } if (t) { float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; @@ -546,7 +552,7 @@ compute_lambda(const struct pipe_texture *tex, float max; dtdx = fabsf(dtdx); dtdy = fabsf(dtdy); - max = MAX2(dtdx, dtdy) * tex->height[0]; + max = MAX2(dtdx, dtdy) * texture->height[0]; rho = MAX2(rho, max); } if (p) { @@ -555,7 +561,7 @@ compute_lambda(const struct pipe_texture *tex, float max; dpdx = fabsf(dpdx); dpdy = fabsf(dpdy); - max = MAX2(dpdx, dpdy) * tex->depth[0]; + max = MAX2(dpdx, dpdy) * texture->depth[0]; rho = MAX2(rho, max); } @@ -579,16 +585,18 @@ compute_lambda(const struct pipe_texture *tex, * \param imgFilter Returns either the min or mag filter, depending on lambda */ static void -choose_mipmap_levels(const struct pipe_texture *texture, - const struct pipe_sampler_state *sampler, +choose_mipmap_levels(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - boolean computeLambda, float lodbias, unsigned *level0, unsigned *level1, float *levelBlend, unsigned *imgFilter) { + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; + if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { /* no mipmap selection needed */ *level0 = *level1 = CLAMP((int) sampler->min_lod, @@ -598,7 +606,7 @@ choose_mipmap_levels(const struct pipe_texture *texture, /* non-mipmapped texture, but still need to determine if doing * minification or magnification. */ - float lambda = compute_lambda(texture, sampler, s, t, p, lodbias); + float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); if (lambda <= 0.0) { *imgFilter = sampler->mag_img_filter; } @@ -611,14 +619,7 @@ choose_mipmap_levels(const struct pipe_texture *texture, } } else { - float lambda; - - if (computeLambda) - /* fragment shader */ - lambda = compute_lambda(texture, sampler, s, t, p, lodbias); - else - /* vertex shader */ - lambda = lodbias; /* not really a bias, but absolute LOD */ + float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); if (lambda <= 0.0) { /* XXX threshold depends on the filter */ /* magnifying */ @@ -1028,11 +1029,10 @@ sp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler { struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; int level0; float lambda; - lambda = compute_lambda(texture, sampler, s, t, p, lodbias); + lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); level0 = (int)lambda; if (lambda < 0.0) { @@ -1072,11 +1072,10 @@ sp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler * Could probably extend for 3D... */ static void -sp_get_samples_2d_common(const struct tgsi_sampler *tgsi_sampler, +sp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - boolean computeLambda, float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE], const unsigned faces[4]) @@ -1088,7 +1087,8 @@ sp_get_samples_2d_common(const struct tgsi_sampler *tgsi_sampler, int width, height; float levelBlend; - choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias, + choose_mipmap_levels(tgsi_sampler, s, t, p, + lodbias, &level0, &level1, &levelBlend, &imgFilter); assert(sampler->normalized_coords); @@ -1199,42 +1199,39 @@ sp_get_samples_2d_common(const struct tgsi_sampler *tgsi_sampler, static INLINE void -sp_get_samples_1d(const struct tgsi_sampler *sampler, +sp_get_samples_1d(struct tgsi_sampler *sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - boolean computeLambda, float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE]) { static const unsigned faces[4] = {0, 0, 0, 0}; static const float tzero[4] = {0, 0, 0, 0}; sp_get_samples_2d_common(sampler, s, tzero, NULL, - computeLambda, lodbias, rgba, faces); + lodbias, rgba, faces); } static INLINE void -sp_get_samples_2d(const struct tgsi_sampler *sampler, +sp_get_samples_2d(struct tgsi_sampler *sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - boolean computeLambda, float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE]) { static const unsigned faces[4] = {0, 0, 0, 0}; sp_get_samples_2d_common(sampler, s, t, p, - computeLambda, lodbias, rgba, faces); + lodbias, rgba, faces); } static INLINE void -sp_get_samples_3d(const struct tgsi_sampler *tgsi_sampler, +sp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - boolean computeLambda, float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE]) { @@ -1247,7 +1244,8 @@ sp_get_samples_3d(const struct tgsi_sampler *tgsi_sampler, float levelBlend; const uint face = 0; - choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias, + choose_mipmap_levels(tgsi_sampler, s, t, p, + lodbias, &level0, &level1, &levelBlend, &imgFilter); assert(sampler->normalized_coords); @@ -1356,11 +1354,10 @@ sp_get_samples_3d(const struct tgsi_sampler *tgsi_sampler, static void -sp_get_samples_cube(const struct tgsi_sampler *sampler, +sp_get_samples_cube(struct tgsi_sampler *sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - boolean computeLambda, float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE]) { @@ -1370,16 +1367,15 @@ sp_get_samples_cube(const struct tgsi_sampler *sampler, faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j); } sp_get_samples_2d_common(sampler, ssss, tttt, NULL, - computeLambda, lodbias, rgba, faces); + lodbias, rgba, faces); } static void -sp_get_samples_rect(const struct tgsi_sampler *tgsi_sampler, +sp_get_samples_rect(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - boolean computeLambda, float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE]) { @@ -1391,7 +1387,8 @@ sp_get_samples_rect(const struct tgsi_sampler *tgsi_sampler, int width, height; float levelBlend; - choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias, + choose_mipmap_levels(tgsi_sampler, s, t, p, + lodbias, &level0, &level1, &levelBlend, &imgFilter); /* texture RECTS cannot be mipmapped */ @@ -1447,90 +1444,77 @@ sp_get_samples_rect(const struct tgsi_sampler *tgsi_sampler, /** - * Common code for vertex/fragment program texture sampling. + * Error condition handler */ static INLINE void +sp_get_samples_null(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + int i,j; + + for (i = 0; i < 4; i++) + for (j = 0; j < 4; j++) + rgba[i][j] = 1.0; +} + +/** + * Called via tgsi_sampler::get_samples() when using a sampler for the + * first time. Determine the actual sampler function, link it in and + * call it. + */ +void sp_get_samples(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - boolean computeLambda, float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); const struct pipe_texture *texture = samp->texture; const struct pipe_sampler_state *sampler = samp->sampler; - if (!texture) - return; + /* Default to the 'undefined' case: + */ + tgsi_sampler->get_samples = sp_get_samples_null; + + if (!texture) { + assert(0); /* is this legal?? */ + goto out; + } + + if (!sampler->normalized_coords) { + assert (texture->target == PIPE_TEXTURE_2D); + tgsi_sampler->get_samples = sp_get_samples_rect; + goto out; + } switch (texture->target) { case PIPE_TEXTURE_1D: - assert(sampler->normalized_coords); - sp_get_samples_1d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); + tgsi_sampler->get_samples = sp_get_samples_1d; break; case PIPE_TEXTURE_2D: - if (sampler->normalized_coords) - sp_get_samples_2d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); - else - sp_get_samples_rect(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); + tgsi_sampler->get_samples = sp_get_samples_2d; break; case PIPE_TEXTURE_3D: - assert(sampler->normalized_coords); - sp_get_samples_3d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); + tgsi_sampler->get_samples = sp_get_samples_3d; break; case PIPE_TEXTURE_CUBE: - assert(sampler->normalized_coords); - sp_get_samples_cube(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); + tgsi_sampler->get_samples = sp_get_samples_cube; break; default: assert(0); + break; } -#if 0 /* DEBUG */ - { - int i; - printf("Sampled at %f, %f, %f:\n", s[0], t[0], p[0]); - for (i = 0; i < 4; i++) { - printf("Frag %d: %f %f %f %f\n", i, - rgba[0][i], - rgba[1][i], - rgba[2][i], - rgba[3][i]); - } - } -#endif -} - -static void -sp_get_samples_fallback(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - sp_get_samples(tgsi_sampler, s, t, p, TRUE, lodbias, rgba); -} - -/** - * Called via tgsi_sampler::get_samples() when running a fragment shader. - * Get four filtered RGBA values from the sampler's texture. - */ -void -sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - tgsi_sampler->get_samples = sp_get_samples_fallback; + /* Do this elsewhere: + */ + samp->xpot = util_unsigned_logbase2( samp->texture->width[0] ); + samp->ypot = util_unsigned_logbase2( samp->texture->height[0] ); /* Try to hook in a faster sampler. Ultimately we'll have to * code-generate these. Luckily most of this looks like it is @@ -1542,9 +1526,6 @@ sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler, sampler->compare_mode == FALSE && sampler->normalized_coords) { - samp->xpot = util_unsigned_logbase2( samp->texture->width[0] ); - samp->ypot = util_unsigned_logbase2( samp->texture->height[0] ); - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { samp->level = CLAMP((int) sampler->min_lod, 0, (int) texture->last_level); @@ -1593,21 +1574,7 @@ sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler, sampler->normalized_coords, TRUE); } +out: tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba ); } - -/** - * Called via tgsi_sampler::get_samples() when running a vertex shader. - * Get four filtered RGBA values from the sampler's texture. - */ -void -sp_get_samples_vertex(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - sp_get_samples(tgsi_sampler, s, t, p, FALSE, lodbias, rgba); -} diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index 0650c7830b..c73ae44131 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -39,6 +39,8 @@ struct sp_shader_sampler { struct tgsi_sampler base; /**< base class */ + unsigned processor; + /* For sp_get_samples_2d_linear_POT: */ unsigned xpot; @@ -60,21 +62,14 @@ sp_shader_sampler(const struct tgsi_sampler *sampler) } -extern void -sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]); extern void -sp_get_samples_vertex(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]); +sp_get_samples(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]); #endif /* SP_TEX_SAMPLE_H */ -- cgit v1.2.3 From 4fc7d0345a18042a79686940fb7cc4e698cc9192 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 21 Aug 2009 17:13:11 +0100 Subject: softpipe: rework texture sampling code Split into component pieces, stitch together at runtime using function pointers. Make it possible to utilize the existing fastpaths as image-level filters for generic mip-filtering routines. Remove special case for rectangle filtering, as it can now be handled by the 2d path. As most of the mesa demo texturing was already covered by fast paths, its harder to find examples of speedups, but tunnel gets a boost as mip-nearest filtering is now able to access the img_2d_linear_wrap_POT functions for sampling within a mipmap level. --- src/gallium/drivers/softpipe/sp_context.c | 15 - src/gallium/drivers/softpipe/sp_context.h | 23 +- src/gallium/drivers/softpipe/sp_state.h | 1 + src/gallium/drivers/softpipe/sp_state_blend.c | 4 +- src/gallium/drivers/softpipe/sp_state_derived.c | 18 +- src/gallium/drivers/softpipe/sp_state_fs.c | 3 + src/gallium/drivers/softpipe/sp_state_sampler.c | 104 +- src/gallium/drivers/softpipe/sp_tex_sample.c | 2198 ++++++++++++----------- src/gallium/drivers/softpipe/sp_tex_sample.h | 94 +- src/gallium/drivers/softpipe/sp_texture.c | 8 +- src/gallium/drivers/softpipe/sp_texture.h | 4 + 11 files changed, 1361 insertions(+), 1111 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index a0196955c8..ef8faab3bd 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -228,21 +228,6 @@ softpipe_create( struct pipe_screen *screen ) softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe); softpipe->quad.blend = sp_quad_blend_stage(softpipe); - /* vertex shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - softpipe->tgsi.vert_samplers[i].base.get_samples = sp_get_samples; - softpipe->tgsi.vert_samplers[i].processor = TGSI_PROCESSOR_VERTEX; - softpipe->tgsi.vert_samplers[i].cache = softpipe->tex_cache[i]; - softpipe->tgsi.vert_samplers_list[i] = &softpipe->tgsi.vert_samplers[i]; - } - - /* fragment shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - softpipe->tgsi.frag_samplers[i].base.get_samples = sp_get_samples; - softpipe->tgsi.frag_samplers[i].processor = TGSI_PROCESSOR_FRAGMENT; - softpipe->tgsi.frag_samplers[i].cache = softpipe->tex_cache[i]; - softpipe->tgsi.frag_samplers_list[i] = &softpipe->tgsi.frag_samplers[i]; - } /* * Create drawing context and plug our rendering stage into it. diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index fa3306c020..068a892f25 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -36,7 +36,6 @@ #include "draw/draw_vertex.h" #include "sp_quad_pipe.h" -#include "sp_tex_sample.h" struct softpipe_vbuf_render; @@ -51,12 +50,12 @@ struct softpipe_context { struct pipe_context pipe; /**< base class */ /** Constant state objects */ - const struct pipe_blend_state *blend; - const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; - const struct pipe_depth_stencil_alpha_state *depth_stencil; - const struct pipe_rasterizer_state *rasterizer; - const struct sp_fragment_shader *fs; - const struct sp_vertex_shader *vs; + struct pipe_blend_state *blend; + struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_depth_stencil_alpha_state *depth_stencil; + struct pipe_rasterizer_state *rasterizer; + struct sp_fragment_shader *fs; + struct sp_vertex_shader *vs; /** Other rendering state */ struct pipe_blend_color blend_color; @@ -123,10 +122,8 @@ struct softpipe_context { /** TGSI exec things */ struct { - struct sp_shader_sampler vert_samplers[PIPE_MAX_SAMPLERS]; - struct sp_shader_sampler *vert_samplers_list[PIPE_MAX_SAMPLERS]; - struct sp_shader_sampler frag_samplers[PIPE_MAX_SAMPLERS]; - struct sp_shader_sampler *frag_samplers_list[PIPE_MAX_SAMPLERS]; + struct sp_sampler_varient *vert_samplers_list[PIPE_MAX_SAMPLERS]; + struct sp_sampler_varient *frag_samplers_list[PIPE_MAX_SAMPLERS]; } tgsi; /** The primitive drawing context */ @@ -155,5 +152,9 @@ softpipe_context( struct pipe_context *pipe ) return (struct softpipe_context *)pipe; } +void +softpipe_reset_sampler_varients(struct softpipe_context *softpipe); + + #endif /* SP_CONTEXT_H */ diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 9776e978e3..77ee3c1136 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -87,6 +87,7 @@ struct sp_fragment_shader { struct sp_vertex_shader { struct pipe_shader_state shader; struct draw_vertex_shader *draw_data; + int max_sampler; /* -1 if no samplers */ }; diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c index 384fe559af..efed082f82 100644 --- a/src/gallium/drivers/softpipe/sp_state_blend.c +++ b/src/gallium/drivers/softpipe/sp_state_blend.c @@ -45,7 +45,7 @@ void softpipe_bind_blend_state( struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); - softpipe->blend = (const struct pipe_blend_state *)blend; + softpipe->blend = (struct pipe_blend_state *)blend; softpipe->dirty |= SP_NEW_BLEND; } @@ -86,7 +86,7 @@ softpipe_bind_depth_stencil_state(struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); - softpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + softpipe->depth_stencil = (struct pipe_depth_stencil_alpha_state *)depth_stencil; softpipe->dirty |= SP_NEW_DEPTH_STENCIL_ALPHA; } diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 1f6e2ccb83..5310928332 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -198,19 +198,7 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) { unsigned i; - /* vertex shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - softpipe->tgsi.vert_samplers[i].sampler = softpipe->sampler[i]; - softpipe->tgsi.vert_samplers[i].texture = softpipe->texture[i]; - softpipe->tgsi.frag_samplers[i].base.get_samples = sp_get_samples; - } - - /* fragment shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - softpipe->tgsi.frag_samplers[i].sampler = softpipe->sampler[i]; - softpipe->tgsi.frag_samplers[i].texture = softpipe->texture[i]; - softpipe->tgsi.frag_samplers[i].base.get_samples = sp_get_samples; - } + softpipe_reset_sampler_varients( softpipe ); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { sp_tile_cache_validate_texture( softpipe->tex_cache[i] ); @@ -232,7 +220,9 @@ void softpipe_update_derived( struct softpipe_context *softpipe ) } if (softpipe->dirty & (SP_NEW_SAMPLER | - SP_NEW_TEXTURE)) + SP_NEW_TEXTURE | + SP_NEW_FS | + SP_NEW_VS)) update_tgsi_samplers( softpipe ); if (softpipe->dirty & (SP_NEW_RASTERIZER | diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 108ac8b9bb..3a45321923 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -34,6 +34,7 @@ #include "pipe/internal/p_winsys_screen.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" +#include "draw/draw_vs.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_parse.h" @@ -108,6 +109,8 @@ softpipe_create_vs_state(struct pipe_context *pipe, if (state->draw_data == NULL) goto fail; + state->max_sampler = state->draw_data->info.file_max[TGSI_FILE_SAMPLER]; + return state; fail: diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index aa2f3f2ccd..714e638048 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -38,15 +38,32 @@ #include "sp_state.h" #include "sp_texture.h" #include "sp_tile_cache.h" +#include "sp_tex_sample.h" #include "draw/draw_context.h" +struct sp_sampler { + struct pipe_sampler_state base; + struct sp_sampler_varient *varients; + struct sp_sampler_varient *current; +}; + +static struct sp_sampler *sp_sampler( struct pipe_sampler_state *sampler ) +{ + return (struct sp_sampler *)sampler; +} + void * softpipe_create_sampler_state(struct pipe_context *pipe, const struct pipe_sampler_state *sampler) { - return mem_dup(sampler, sizeof(*sampler)); + struct sp_sampler *sp_sampler = CALLOC_STRUCT(sp_sampler); + + sp_sampler->base = *sampler; + sp_sampler->varients = NULL; + + return (void *)sp_sampler; } @@ -106,10 +123,95 @@ softpipe_set_sampler_textures(struct pipe_context *pipe, } + +static struct sp_sampler_varient * +get_sampler_varient( struct sp_sampler *sampler, + struct pipe_texture *texture, + unsigned processor ) +{ + struct softpipe_texture *sp_texture = softpipe_texture(texture); + struct sp_sampler_varient *v = NULL; + union sp_sampler_key key; + + key.bits.target = sp_texture->base.target; + key.bits.is_pot = sp_texture->pot; + key.bits.processor = processor; + key.bits.pad = 0; + + if (sampler->current && + key.value == sampler->current->key.value) { + v = sampler->current; + } + + if (v == NULL) { + for (v = sampler->varients; v; v = v->next) + if (v->key.value == key.value) + break; + + if (v == NULL) { + v = sp_create_sampler_varient( &sampler->base, key ); + v->next = sampler->varients; + sampler->varients = v; + } + } + + sampler->current = v; + return v; +} + + + + +void +softpipe_reset_sampler_varients(struct softpipe_context *softpipe) +{ + int i; + + /* It's a bit hard to build these samplers ahead of time -- don't + * really know which samplers are going to be used for vertex and + * fragment programs. + */ + for (i = 0; i <= softpipe->vs->max_sampler; i++) { + if (softpipe->sampler[i]) { + softpipe->tgsi.vert_samplers_list[i] = + get_sampler_varient( sp_sampler(softpipe->sampler[i]), + softpipe->texture[i], + TGSI_PROCESSOR_VERTEX ); + + sp_sampler_varient_bind_texture( softpipe->tgsi.vert_samplers_list[i], + softpipe->tex_cache[i], + softpipe->texture[i] ); + } + } + + for (i = 0; i <= softpipe->fs->info.file_max[TGSI_FILE_SAMPLER]; i++) { + if (softpipe->sampler[i]) { + softpipe->tgsi.frag_samplers_list[i] = + get_sampler_varient( sp_sampler(softpipe->sampler[i]), + softpipe->texture[i], + TGSI_PROCESSOR_FRAGMENT ); + + sp_sampler_varient_bind_texture( softpipe->tgsi.frag_samplers_list[i], + softpipe->tex_cache[i], + softpipe->texture[i] ); + } + } +} + + + void softpipe_delete_sampler_state(struct pipe_context *pipe, void *sampler) { + struct sp_sampler *sp_sampler = (struct sp_sampler *)sampler; + struct sp_sampler_varient *v, *tmp; + + for (v = sp_sampler->varients; v; v = tmp) { + tmp = v->next; + sp_sampler_varient_destroy(v); + } + FREE( sampler ); } diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 6c75158d59..7bc689a298 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -31,6 +31,7 @@ * * Authors: * Brian Paul + * Keith Whitwell */ #include "sp_context.h" @@ -116,133 +117,157 @@ lerp_3d(float a, float b, float c, * \param icoord returns the integer texcoords * \return integer texture index */ -static INLINE void -nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, +static void +wrap_nearest_repeat(const float s[4], unsigned size, + int icoord[4]) +{ + uint ch; + + /* s limited to [0,1) */ + /* i limited to [0,size-1] */ + for (ch = 0; ch < 4; ch++) { + int i = util_ifloor(s[ch] * size); + icoord[ch] = REMAINDER(i, size); + } +} + + +static void +wrap_nearest_clamp(const float s[4], unsigned size, int icoord[4]) { uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_REPEAT: - /* s limited to [0,1) */ - /* i limited to [0,size-1] */ - for (ch = 0; ch < 4; ch++) { - int i = util_ifloor(s[ch] * size); - icoord[ch] = REMAINDER(i, size); - } - return; - case PIPE_TEX_WRAP_CLAMP: + /* s limited to [0,1] */ + /* i limited to [0,size-1] */ + for (ch = 0; ch < 4; ch++) { + if (s[ch] <= 0.0F) + icoord[ch] = 0; + else if (s[ch] >= 1.0F) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(s[ch] * size); + } +} + + +static void +wrap_nearest_clamp_to_edge(const float s[4], unsigned size, + int icoord[4]) +{ + uint ch; + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + if (s[ch] < min) + icoord[ch] = 0; + else if (s[ch] > max) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(s[ch] * size); + } +} + + +static void +wrap_nearest_clamp_to_border(const float s[4], unsigned size, + int icoord[4]) +{ + uint ch; + /* s limited to [min,max] */ + /* i limited to [-1, size] */ + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + if (s[ch] <= min) + icoord[ch] = -1; + else if (s[ch] >= max) + icoord[ch] = size; + else + icoord[ch] = util_ifloor(s[ch] * size); + } +} + +static void +wrap_nearest_mirror_repeat(const float s[4], unsigned size, + int icoord[4]) +{ + uint ch; + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + const int flr = util_ifloor(s[ch]); + float u; + if (flr & 1) + u = 1.0F - (s[ch] - (float) flr); + else + u = s[ch] - (float) flr; + if (u < min) + icoord[ch] = 0; + else if (u > max) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(u * size); + } +} + +static void +wrap_nearest_mirror_clamp(const float s[4], unsigned size, + int icoord[4]) +{ + uint ch; + for (ch = 0; ch < 4; ch++) { /* s limited to [0,1] */ /* i limited to [0,size-1] */ - for (ch = 0; ch < 4; ch++) { - if (s[ch] <= 0.0F) - icoord[ch] = 0; - else if (s[ch] >= 1.0F) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - if (s[ch] < min) - icoord[ch] = 0; - else if (s[ch] > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - { - /* s limited to [min,max] */ - /* i limited to [-1, size] */ - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - if (s[ch] <= min) - icoord[ch] = -1; - else if (s[ch] >= max) - icoord[ch] = size; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - { - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const int flr = util_ifloor(s[ch]); - float u; - if (flr & 1) - u = 1.0F - (s[ch] - (float) flr); - else - u = s[ch] - (float) flr; - if (u < min) - icoord[ch] = 0; - else if (u > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - for (ch = 0; ch < 4; ch++) { - /* s limited to [0,1] */ - /* i limited to [0,size-1] */ - const float u = fabsf(s[ch]); - if (u <= 0.0F) - icoord[ch] = 0; - else if (u >= 1.0F) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const float u = fabsf(s[ch]); - if (u < min) - icoord[ch] = 0; - else if (u > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const float u = fabsf(s[ch]); - if (u < min) - icoord[ch] = -1; - else if (u > max) - icoord[ch] = size; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - default: - assert(0); + const float u = fabsf(s[ch]); + if (u <= 0.0F) + icoord[ch] = 0; + else if (u >= 1.0F) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(u * size); + } +} + +static void +wrap_nearest_mirror_clamp_to_edge(const float s[4], unsigned size, + int icoord[4]) +{ + uint ch; + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + const float u = fabsf(s[ch]); + if (u < min) + icoord[ch] = 0; + else if (u > max) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(u * size); + } +} + + +static void +wrap_nearest_mirror_clamp_to_border(const float s[4], unsigned size, + int icoord[4]) +{ + uint ch; + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + const float u = fabsf(s[ch]); + if (u < min) + icoord[ch] = -1; + else if (u > max) + icoord[ch] = size; + else + icoord[ch] = util_ifloor(u * size); } } @@ -257,125 +282,151 @@ nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, * \param w returns blend factor/weight between texture indexes * \param icoord returns the computed integer texture coords */ -static INLINE void -linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, +static void +wrap_linear_repeat(const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) +{ + uint ch; + + for (ch = 0; ch < 4; ch++) { + float u = s[ch] * size - 0.5F; + icoord0[ch] = REMAINDER(util_ifloor(u), size); + icoord1[ch] = REMAINDER(icoord0[ch] + 1, size); + w[ch] = FRAC(u); + } +} + +static void +wrap_linear_clamp(const float s[4], unsigned size, int icoord0[4], int icoord1[4], float w[4]) { uint ch; + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], 0.0F, 1.0F); + u = u * size - 0.5f; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } +} - switch (wrapMode) { - case PIPE_TEX_WRAP_REPEAT: - for (ch = 0; ch < 4; ch++) { - float u = s[ch] * size - 0.5F; - icoord0[ch] = REMAINDER(util_ifloor(u), size); - icoord1[ch] = REMAINDER(icoord0[ch] + 1, size); - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.0F, 1.0F); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.0F, 1.0F); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - { - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], min, max); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - } - break;; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - for (ch = 0; ch < 4; ch++) { - const int flr = util_ifloor(s[ch]); - float u; - if (flr & 1) - u = 1.0F - (s[ch] - (float) flr); - else - u = s[ch] - (float) flr; - u = u * size - 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u >= 1.0F) - u = (float) size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u >= 1.0F) - u = (float) size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - { - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u <= min) - u = min * size; - else if (u >= max) - u = max * size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - } - break;; - default: - assert(0); +static void +wrap_linear_clamp_to_edge(const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) +{ + uint ch; + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], 0.0F, 1.0F); + u = u * size - 0.5f; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord0[ch] < 0) + icoord0[ch] = 0; + if (icoord1[ch] >= (int) size) + icoord1[ch] = size - 1; + w[ch] = FRAC(u); + } +} + +static void +wrap_linear_clamp_to_border(const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) +{ + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + uint ch; + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], min, max); + u = u * size - 0.5f; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } +} + + +static void +wrap_linear_mirror_repeat(const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) +{ + uint ch; + for (ch = 0; ch < 4; ch++) { + const int flr = util_ifloor(s[ch]); + float u; + if (flr & 1) + u = 1.0F - (s[ch] - (float) flr); + else + u = s[ch] - (float) flr; + u = u * size - 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord0[ch] < 0) + icoord0[ch] = 0; + if (icoord1[ch] >= (int) size) + icoord1[ch] = size - 1; + w[ch] = FRAC(u); + } +} + +static void +wrap_linear_mirror_clamp(const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) +{ + uint ch; + for (ch = 0; ch < 4; ch++) { + float u = fabsf(s[ch]); + if (u >= 1.0F) + u = (float) size; + else + u *= size; + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } +} + +static void +wrap_linear_mirror_clamp_to_edge(const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) +{ + uint ch; + for (ch = 0; ch < 4; ch++) { + float u = fabsf(s[ch]); + if (u >= 1.0F) + u = (float) size; + else + u *= size; + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord0[ch] < 0) + icoord0[ch] = 0; + if (icoord1[ch] >= (int) size) + icoord1[ch] = size - 1; + w[ch] = FRAC(u); + } +} + +static void +wrap_linear_mirror_clamp_to_border(const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) +{ + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + uint ch; + for (ch = 0; ch < 4; ch++) { + float u = fabsf(s[ch]); + if (u <= min) + u = min * size; + else if (u >= max) + u = max * size; + else + u *= size; + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); } } @@ -384,27 +435,26 @@ linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, * For RECT textures / unnormalized texcoords * Only a subset of wrap modes supported. */ -static INLINE void -nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, +static void +wrap_nearest_unorm_clamp(const float s[4], unsigned size, int icoord[4]) { uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - int i = util_ifloor(s[ch]); - icoord[ch]= CLAMP(i, 0, (int) size-1); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - /* fall-through */ - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - for (ch = 0; ch < 4; ch++) { - icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) ); - } - return; - default: - assert(0); + for (ch = 0; ch < 4; ch++) { + int i = util_ifloor(s[ch]); + icoord[ch]= CLAMP(i, 0, (int) size-1); + } +} + +/* Handles clamp_to_edge and clamp_to_border: + */ +static void +wrap_nearest_unorm_clamp_to_border(const float s[4], unsigned size, + int icoord[4]) +{ + uint ch; + for (ch = 0; ch < 4; ch++) { + icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) ); } } @@ -413,157 +463,82 @@ nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, * For RECT textures / unnormalized texcoords. * Only a subset of wrap modes supported. */ -static INLINE void -linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, +static void +wrap_linear_unorm_clamp(const float s[4], unsigned size, int icoord0[4], int icoord1[4], float w[4]) { uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - /* Not exactly what the spec says, but it matches NVIDIA output */ - float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f); - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - /* fall-through */ - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F); - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord1[ch] > (int) size - 1) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break; - default: - assert(0); + for (ch = 0; ch < 4; ch++) { + /* Not exactly what the spec says, but it matches NVIDIA output */ + float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f); + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); } } - -static unsigned -choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) +static void +wrap_linear_unorm_clamp_to_border( const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) { - /* - major axis - direction target sc tc ma - ---------- ------------------------------- --- --- --- - +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx - -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx - +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry - -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry - +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz - -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz - */ - const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz); - unsigned face; - float sc, tc, ma; - - if (arx > ary && arx > arz) { - if (rx >= 0.0F) { - face = PIPE_TEX_FACE_POS_X; - sc = -rz; - tc = -ry; - ma = arx; - } - else { - face = PIPE_TEX_FACE_NEG_X; - sc = rz; - tc = -ry; - ma = arx; - } - } - else if (ary > arx && ary > arz) { - if (ry >= 0.0F) { - face = PIPE_TEX_FACE_POS_Y; - sc = rx; - tc = rz; - ma = ary; - } - else { - face = PIPE_TEX_FACE_NEG_Y; - sc = rx; - tc = -rz; - ma = ary; - } - } - else { - if (rz > 0.0F) { - face = PIPE_TEX_FACE_POS_Z; - sc = rx; - tc = -ry; - ma = arz; - } - else { - face = PIPE_TEX_FACE_NEG_Z; - sc = -rx; - tc = -ry; - ma = arz; - } + uint ch; + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F); + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord1[ch] > (int) size - 1) + icoord1[ch] = size - 1; + w[ch] = FRAC(u); } +} + - *newS = ( sc / ma + 1.0F ) * 0.5F; - *newT = ( tc / ma + 1.0F ) * 0.5F; - return face; -} /** * Examine the quad's texture coordinates to compute the partial * derivatives w.r.t X and Y, then compute lambda (level of detail). - * - * This is only done for fragment shaders, not vertex shaders. */ static float -compute_lambda(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) +compute_lambda_1d(const struct sp_sampler_varient *samp, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias) { - const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); const struct pipe_texture *texture = samp->texture; const struct pipe_sampler_state *sampler = samp->sampler; - float rho, lambda; + float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]); + float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); + float rho = MAX2(dsdx, dsdy) * texture->width[0]; + float lambda; - if (samp->processor == TGSI_PROCESSOR_VERTEX) - return lodbias; + lambda = util_fast_log2(rho); + lambda += lodbias + sampler->lod_bias; + lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); - assert(sampler->normalized_coords); + return lambda; +} - assert(s); - { - float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]; - float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; - dsdx = fabsf(dsdx); - dsdy = fabsf(dsdy); - rho = MAX2(dsdx, dsdy) * texture->width[0]; - } - if (t) { - float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; - float dtdy = t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]; - float max; - dtdx = fabsf(dtdx); - dtdy = fabsf(dtdy); - max = MAX2(dtdx, dtdy) * texture->height[0]; - rho = MAX2(rho, max); - } - if (p) { - float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]; - float dpdy = p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]; - float max; - dpdx = fabsf(dpdx); - dpdy = fabsf(dpdy); - max = MAX2(dpdx, dpdy) * texture->depth[0]; - rho = MAX2(rho, max); - } +static float +compute_lambda_2d(const struct sp_sampler_varient *samp, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias) +{ + const struct pipe_texture *texture = samp->texture; + const struct pipe_sampler_state *sampler = samp->sampler; + float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]); + float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); + float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]); + float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]); + float maxx = MAX2(dsdx, dsdy) * texture->width[0]; + float maxy = MAX2(dtdx, dtdy) * texture->height[0]; + float rho = MAX2(maxx, maxy); + float lambda; lambda = util_fast_log2(rho); lambda += lodbias + sampler->lod_bias; @@ -573,88 +548,56 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler, } -/** - * Do several things here: - * 1. Compute lambda from the texcoords, if needed - * 2. Determine if we're minifying or magnifying - * 3. If minifying, choose mipmap levels - * 4. Return image filter to use within mipmap images - * \param level0 Returns first mipmap level to sample from - * \param level1 Returns second mipmap level to sample from - * \param levelBlend Returns blend factor between levels, in [0,1] - * \param imgFilter Returns either the min or mag filter, depending on lambda - */ -static void -choose_mipmap_levels(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - unsigned *level0, unsigned *level1, float *levelBlend, - unsigned *imgFilter) +static float +compute_lambda_3d(const struct sp_sampler_varient *samp, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias) { - const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); const struct pipe_texture *texture = samp->texture; const struct pipe_sampler_state *sampler = samp->sampler; + float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]); + float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); + float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]); + float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]); + float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]); + float dpdy = fabsf(p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]); + float maxx = MAX2(dsdx, dsdy) * texture->width[0]; + float maxy = MAX2(dtdx, dtdy) * texture->height[0]; + float maxz = MAX2(dpdx, dpdy) * texture->depth[0]; + float rho, lambda; - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { - /* no mipmap selection needed */ - *level0 = *level1 = CLAMP((int) sampler->min_lod, - 0, (int) texture->last_level); - - if (sampler->min_img_filter != sampler->mag_img_filter) { - /* non-mipmapped texture, but still need to determine if doing - * minification or magnification. - */ - float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - if (lambda <= 0.0) { - *imgFilter = sampler->mag_img_filter; - } - else { - *imgFilter = sampler->min_img_filter; - } - } - else { - *imgFilter = sampler->mag_img_filter; - } - } - else { - float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); + rho = MAX2(maxx, maxy); + rho = MAX2(rho, maxz); - if (lambda <= 0.0) { /* XXX threshold depends on the filter */ - /* magnifying */ - *imgFilter = sampler->mag_img_filter; - *level0 = *level1 = 0; - } - else { - /* minifying */ - *imgFilter = sampler->min_img_filter; - - /* choose mipmap level(s) and compute the blend factor between them */ - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { - /* Nearest mipmap level */ - const int lvl = (int) (lambda + 0.5); - *level0 = - *level1 = CLAMP(lvl, 0, (int) texture->last_level); - } - else { - /* Linear interpolation between mipmap levels */ - const int lvl = (int) lambda; - *level0 = CLAMP(lvl, 0, (int) texture->last_level); - *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level); - *levelBlend = FRAC(lambda); /* blending weight between levels */ - } - } - } + lambda = util_fast_log2(rho); + lambda += lodbias + sampler->lod_bias; + lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); + + return lambda; } -/** - * Get a texel from a texture, using the texture tile cache. - * - * \param face the cube face in 0..5 - * \param level the mipmap level - * \param x the x coord of texel within 2D image + +static float +compute_lambda_vert(const struct sp_sampler_varient *samp, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias) +{ + return lodbias; +} + + + +/** + * Get a texel from a texture, using the texture tile cache. + * + * \param face the cube face in 0..5 + * \param level the mipmap level + * \param x the x coord of texel within 2D image * \param y the y coord of texel within 2D image * \param z which slice of a 3D texture * \param rgba the quad to put the texel/color into @@ -663,12 +606,12 @@ choose_mipmap_levels(struct tgsi_sampler *tgsi_sampler, * XXX maybe move this into sp_tile_cache.c and merge with the * sp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... */ -static void +static INLINE void get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler, unsigned face, unsigned level, int x, int y, const float *out[4]) { - const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct softpipe_cached_tile *tile = sp_get_cached_tile_tex(samp->cache, @@ -687,7 +630,7 @@ static INLINE const float * get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler, unsigned face, unsigned level, int x, int y) { - const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct softpipe_cached_tile *tile = sp_get_cached_tile_tex(samp->cache, @@ -700,7 +643,7 @@ get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler, } -static void +static INLINE void get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler, unsigned face, unsigned level, int x0, int y0, @@ -717,12 +660,12 @@ get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler, } } -static void +static INLINE void get_texel(const struct tgsi_sampler *tgsi_sampler, unsigned face, unsigned level, int x, int y, int z, float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) { - const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_texture *texture = samp->texture; const struct pipe_sampler_state *sampler = samp->sampler; @@ -756,140 +699,18 @@ get_texel(const struct tgsi_sampler *tgsi_sampler, } -/** - * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' - * When we sampled the depth texture, the depth value was put into all - * RGBA channels. We look at the red channel here. - * \param rgba quad of (depth) texel values - * \param p texture 'P' components for four pixels in quad - * \param j which pixel in the quad to test [0..3] - */ -static INLINE void -shadow_compare(const struct pipe_sampler_state *sampler, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const float p[QUAD_SIZE], - uint j) -{ - int k; - switch (sampler->compare_func) { - case PIPE_FUNC_LESS: - k = p[j] < rgba[0][j]; - break; - case PIPE_FUNC_LEQUAL: - k = p[j] <= rgba[0][j]; - break; - case PIPE_FUNC_GREATER: - k = p[j] > rgba[0][j]; - break; - case PIPE_FUNC_GEQUAL: - k = p[j] >= rgba[0][j]; - break; - case PIPE_FUNC_EQUAL: - k = p[j] == rgba[0][j]; - break; - case PIPE_FUNC_NOTEQUAL: - k = p[j] != rgba[0][j]; - break; - case PIPE_FUNC_ALWAYS: - k = 1; - break; - case PIPE_FUNC_NEVER: - k = 0; - break; - default: - k = 0; - assert(0); - break; - } - /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ - rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k; - rgba[3][j] = 1.0F; -} -/** - * As above, but do four z/texture comparisons. - */ static INLINE void -shadow_compare4(const struct pipe_sampler_state *sampler, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const float p[QUAD_SIZE]) -{ - int j, k0, k1, k2, k3; - float val; - - /* compare four texcoords vs. four texture samples */ - switch (sampler->compare_func) { - case PIPE_FUNC_LESS: - k0 = p[0] < rgba[0][0]; - k1 = p[1] < rgba[0][1]; - k2 = p[2] < rgba[0][2]; - k3 = p[3] < rgba[0][3]; - break; - case PIPE_FUNC_LEQUAL: - k0 = p[0] <= rgba[0][0]; - k1 = p[1] <= rgba[0][1]; - k2 = p[2] <= rgba[0][2]; - k3 = p[3] <= rgba[0][3]; - break; - case PIPE_FUNC_GREATER: - k0 = p[0] > rgba[0][0]; - k1 = p[1] > rgba[0][1]; - k2 = p[2] > rgba[0][2]; - k3 = p[3] > rgba[0][3]; - break; - case PIPE_FUNC_GEQUAL: - k0 = p[0] >= rgba[0][0]; - k1 = p[1] >= rgba[0][1]; - k2 = p[2] >= rgba[0][2]; - k3 = p[3] >= rgba[0][3]; - break; - case PIPE_FUNC_EQUAL: - k0 = p[0] == rgba[0][0]; - k1 = p[1] == rgba[0][1]; - k2 = p[2] == rgba[0][2]; - k3 = p[3] == rgba[0][3]; - break; - case PIPE_FUNC_NOTEQUAL: - k0 = p[0] != rgba[0][0]; - k1 = p[1] != rgba[0][1]; - k2 = p[2] != rgba[0][2]; - k3 = p[3] != rgba[0][3]; - break; - case PIPE_FUNC_ALWAYS: - k0 = k1 = k2 = k3 = 1; - break; - case PIPE_FUNC_NEVER: - k0 = k1 = k2 = k3 = 0; - break; - default: - k0 = k1 = k2 = k3 = 0; - assert(0); - break; - } - - /* convert four pass/fail values to an intensity in [0,1] */ - val = 0.25F * (k0 + k1 + k2 + k3); - - /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ - for (j = 0; j < 4; j++) { - rgba[0][j] = rgba[1][j] = rgba[2][j] = val; - rgba[3][j] = 1.0F; - } -} - - - -static void -sp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) +img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); unsigned j; unsigned level = samp->level; unsigned xpot = 1 << (samp->xpot - level); @@ -940,15 +761,15 @@ sp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, } -static void -sp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) +static INLINE void +img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); unsigned j; unsigned level = samp->level; unsigned xpot = 1 << (samp->xpot - level); @@ -975,15 +796,15 @@ sp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, } -static void -sp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) +static INLINE void +img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); unsigned j; unsigned level = samp->level; unsigned xpot = 1 << (samp->xpot - level); @@ -1018,238 +839,78 @@ sp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, } } - static void -sp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) +img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) { - struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_texture *texture = samp->texture; - int level0; - float lambda; - - lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - level0 = (int)lambda; + unsigned level0, j; + int width; + int x[4]; - if (lambda < 0.0) { - samp->level = 0; - sp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba ); - } - else if (level0 >= texture->last_level) { - samp->level = texture->last_level; - sp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba ); - } - else { - float levelBlend = lambda - level0; - float rgba0[4][4]; - float rgba1[4][4]; - int c,j; + level0 = samp->level; + width = texture->width[level0]; - samp->level = level0; - sp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba0 ); + assert(width > 0); - samp->level = level0+1; - sp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba1 ); + samp->nearest_texcoord_s(s, width, x); - for (j = 0; j < QUAD_SIZE; j++) { - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]); - } - } + for (j = 0; j < QUAD_SIZE; j++) { + get_texel(tgsi_sampler, 0, level0, x[j], 0, 0, rgba, j); } } -/** - * Common code for sampling 1D/2D/cube textures. - * Could probably extend for 3D... - */ + static void -sp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const unsigned faces[4]) +img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - unsigned level0, level1, j, imgFilter; + const unsigned *faces = samp->faces; /* zero when not cube-mapping */ + unsigned level0, j; int width, height; - float levelBlend; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - assert(sampler->normalized_coords); + int x[4], y[4]; + level0 = samp->level; width = texture->width[level0]; height = texture->height[level0]; assert(width > 0); - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4]; - nearest_texcoord_4(sampler->wrap_s, s, width, x); - nearest_texcoord_4(sampler->wrap_t, t, height, y); - - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(sampler, rgba, p, j); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - unsigned c; - x[j] /= 2; - y[j] /= 2; - get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0, - rgba2, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ - shadow_compare(sampler, rgba2, p, j); - } - - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], y0[4], x1[4], y1[4]; - float xw[4], yw[4]; /* weights */ - - linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); - - for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4]; /* texels */ - int c; - get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare4(sampler, tx, p); - } - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], - tx[c][2], tx[c][3]); - } + samp->nearest_texcoord_s(s, width, x); + samp->nearest_texcoord_t(t, height, y); - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - - /* XXX: This is incorrect -- will often end up with (x0 - * == x1 && y0 == y1), meaning that we fetch the same - * texel four times and linearly interpolate between - * identical values. The correct approach would be to - * call linear_texcoord again for the second level. - */ - x0[j] /= 2; - y0[j] /= 2; - x1[j] /= 2; - y1[j] /= 2; - get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ - shadow_compare4(sampler, tx, p); - } - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba2[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], tx[c][2], tx[c][3]); - } - - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - default: - assert(0); + for (j = 0; j < QUAD_SIZE; j++) { + get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j); } } -static INLINE void -sp_get_samples_1d(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - static const unsigned faces[4] = {0, 0, 0, 0}; - static const float tzero[4] = {0, 0, 0, 0}; - sp_get_samples_2d_common(sampler, s, tzero, NULL, - lodbias, rgba, faces); -} - - -static INLINE void -sp_get_samples_2d(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - static const unsigned faces[4] = {0, 0, 0, 0}; - sp_get_samples_2d_common(sampler, s, t, p, - lodbias, rgba, faces); -} - - -static INLINE void -sp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) +static void +img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - /* get/map pipe_surfaces corresponding to 3D tex slices */ - unsigned level0, level1, j, imgFilter; + unsigned level0, j; int width, height, depth; - float levelBlend; - const uint face = 0; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - assert(sampler->normalized_coords); + int x[4], y[4], z[4]; + level0 = samp->level; width = texture->width[level0]; height = texture->height[level0]; depth = texture->depth[level0]; @@ -1258,323 +919,746 @@ sp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, assert(height > 0); assert(depth > 0); - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4], z[4]; - nearest_texcoord_4(sampler->wrap_s, s, width, x); - nearest_texcoord_4(sampler->wrap_t, t, height, y); - nearest_texcoord_4(sampler->wrap_r, p, depth, z); - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j); - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - unsigned c; - x[j] /= 2; - y[j] /= 2; - z[j] /= 2; - get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j); - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]); - } - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4]; - float xw[4], yw[4], zw[4]; /* interpolation weights */ - linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); - linear_texcoord_4(sampler->wrap_r, p, depth, z0, z1, zw); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - float tx0[4][4], tx1[4][4]; - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3); - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3); - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j], - tx0[c][0], tx0[c][1], - tx0[c][2], tx0[c][3], - tx1[c][0], tx1[c][1], - tx1[c][2], tx1[c][3]); - } + samp->nearest_texcoord_s(s, width, x); + samp->nearest_texcoord_t(t, height, y); + samp->nearest_texcoord_p(p, depth, z); - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - x0[j] /= 2; - y0[j] /= 2; - z0[j] /= 2; - x1[j] /= 2; - y1[j] /= 2; - z1[j] /= 2; - get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0); - get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1); - get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2); - get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3); - get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0); - get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1); - get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2); - get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3); - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j], - tx0[c][0], tx0[c][1], - tx0[c][2], tx0[c][3], - tx1[c][0], tx1[c][1], - tx1[c][2], tx1[c][3]); - } - - /* blend mipmap levels */ - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - default: - assert(0); + for (j = 0; j < QUAD_SIZE; j++) { + get_texel(tgsi_sampler, 0, level0, x[j], y[j], z[j], rgba, j); } } static void -sp_get_samples_cube(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) +img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) { - unsigned faces[QUAD_SIZE], j; - float ssss[4], tttt[4]; + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + unsigned level0, j; + int width; + int x0[4], x1[4]; + float xw[4]; /* weights */ + + + level0 = samp->level; + width = texture->width[level0]; + + assert(width > 0); + + samp->linear_texcoord_s(s, width, x0, x1, xw); + + for (j = 0; j < QUAD_SIZE; j++) { - faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j); + float tx[4][4]; /* texels */ + int c; + get_texel(tgsi_sampler, 0, level0, x0[j], 0, 0, tx, 0); + get_texel(tgsi_sampler, 0, level0, x1[j], 0, 0, tx, 1); + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp(xw[j], tx[c][0], tx[c][1]); + } } - sp_get_samples_2d_common(sampler, ssss, tttt, NULL, - lodbias, rgba, faces); } - static void -sp_get_samples_rect(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) +img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) { - const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - const uint face = 0; - unsigned level0, level1, j, imgFilter; + const unsigned *faces = samp->faces; /* zero when not cube-mapping */ + unsigned level0, j; int width, height; - float levelBlend; + int x0[4], y0[4], x1[4], y1[4]; + float xw[4], yw[4]; /* weights */ - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - /* texture RECTS cannot be mipmapped */ - assert(level0 == level1); + level0 = samp->level; width = texture->width[level0]; height = texture->height[level0]; assert(width > 0); - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4]; - nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x); - nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y); - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(sampler, rgba, p, j); - } - } + samp->linear_texcoord_s(s, width, x0, x1, xw); + samp->linear_texcoord_s(t, height, y0, y1, yw); + + for (j = 0; j < QUAD_SIZE; j++) { + float tx[4][4]; /* texels */ + int c; + get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0); + get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1); + get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2); + get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3); + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(xw[j], yw[j], + tx[c][0], tx[c][1], + tx[c][2], tx[c][3]); } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], y0[4], x1[4], y1[4]; - float xw[4], yw[4]; /* weights */ - linear_texcoord_unnorm_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw); - for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4]; /* texels */ - int c; - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare4(sampler, tx, p); - } - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], tx[c][2], tx[c][3]); - } + } +} + + +static void +img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + unsigned level0, j; + int width, height, depth; + int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4]; + float xw[4], yw[4], zw[4]; /* interpolation weights */ + + level0 = samp->level; + width = texture->width[level0]; + height = texture->height[level0]; + depth = texture->depth[level0]; + + assert(width > 0); + assert(height > 0); + assert(depth > 0); + + samp->linear_texcoord_s(s, width, x0, x1, xw); + samp->linear_texcoord_s(t, height, y0, y1, yw); + samp->linear_texcoord_s(p, depth, z0, z1, zw); + + for (j = 0; j < QUAD_SIZE; j++) { + float tx0[4][4], tx1[4][4]; + int c; + + get_texel(tgsi_sampler, 0, level0, x0[j], y0[j], z0[j], tx0, 0); + get_texel(tgsi_sampler, 0, level0, x1[j], y0[j], z0[j], tx0, 1); + get_texel(tgsi_sampler, 0, level0, x0[j], y1[j], z0[j], tx0, 2); + get_texel(tgsi_sampler, 0, level0, x1[j], y1[j], z0[j], tx0, 3); + get_texel(tgsi_sampler, 0, level0, x0[j], y0[j], z1[j], tx1, 0); + get_texel(tgsi_sampler, 0, level0, x1[j], y0[j], z1[j], tx1, 1); + get_texel(tgsi_sampler, 0, level0, x0[j], y1[j], z1[j], tx1, 2); + get_texel(tgsi_sampler, 0, level0, x1[j], y1[j], z1[j], tx1, 3); + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j], + tx0[c][0], tx0[c][1], + tx0[c][2], tx0[c][3], + tx1[c][0], tx1[c][1], + tx1[c][2], tx1[c][3]); + } + } +} + + + + + + + +static void +mip_filter_linear(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + int level0; + float lambda; + + lambda = samp->compute_lambda(samp, s, t, p, lodbias); + level0 = (int)lambda; + + if (lambda < 0.0) { + samp->level = 0; + samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + } + else if (level0 >= texture->last_level) { + samp->level = texture->last_level; + samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + } + else { + float levelBlend = lambda - level0; + float rgba0[4][4]; + float rgba1[4][4]; + int c,j; + + samp->level = level0; + samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba0 ); + + samp->level = level0+1; + samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba1 ); + + for (j = 0; j < QUAD_SIZE; j++) { + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]); } } - break; - default: - assert(0); } } -/** - * Error condition handler + +static void +mip_filter_nearest(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + float lambda; + + lambda = samp->compute_lambda(samp, s, t, p, lodbias); + + if (lambda < 0.0) { + samp->level = 0; + samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + } + else { + samp->level = (int)(lambda + 0.5) ; + samp->level = MIN2(samp->level, (int)texture->last_level); + samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + } +} + + +static void +mip_filter_none(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + float lambda = samp->compute_lambda(samp, s, t, p, lodbias); + + if (lambda < 0.0) { + samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + } + else { + samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + } +} + + + +/* Specialized version of mip_filter_linear with hard-wired calls to + * 2d lambda calculation and 2d_linear_repeat_POT img filters. */ -static INLINE void -sp_get_samples_null(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) +static void +mip_filter_linear_2d_linear_repeat_POT( + struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) { - int i,j; + struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + int level0; + float lambda; - for (i = 0; i < 4; i++) - for (j = 0; j < 4; j++) - rgba[i][j] = 1.0; + lambda = compute_lambda_2d(samp, s, t, p, lodbias); + level0 = (int)lambda; + + /* Catches both negative and large values of level0: + */ + if ((unsigned)level0 >= texture->last_level) { + if (level0 < 0) + samp->level = 0; + else + samp->level = texture->last_level; + + img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba ); + } + else { + float levelBlend = lambda - level0; + float rgba0[4][4]; + float rgba1[4][4]; + int c,j; + + samp->level = level0; + img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba0 ); + + samp->level = level0+1; + img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba1 ); + + for (j = 0; j < QUAD_SIZE; j++) { + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]); + } + } + } } -/** - * Called via tgsi_sampler::get_samples() when using a sampler for the - * first time. Determine the actual sampler function, link it in and - * call it. + + +/* Compare stage in the little sampling pipeline. */ -void -sp_get_samples(struct tgsi_sampler *tgsi_sampler, +static void +sample_compare(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; + struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_sampler_state *sampler = samp->sampler; + int j, k0, k1, k2, k3; + float val; - /* Default to the 'undefined' case: - */ - tgsi_sampler->get_samples = sp_get_samples_null; + samp->mip_filter( tgsi_sampler, s, t, p, lodbias, rgba ); - if (!texture) { - assert(0); /* is this legal?? */ - goto out; - } - if (!sampler->normalized_coords) { - assert (texture->target == PIPE_TEXTURE_2D); - tgsi_sampler->get_samples = sp_get_samples_rect; - goto out; - } + /** + * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' + * When we sampled the depth texture, the depth value was put into all + * RGBA channels. We look at the red channel here. + */ - switch (texture->target) { - case PIPE_TEXTURE_1D: - tgsi_sampler->get_samples = sp_get_samples_1d; + /* compare four texcoords vs. four texture samples */ + switch (sampler->compare_func) { + case PIPE_FUNC_LESS: + k0 = p[0] < rgba[0][0]; + k1 = p[1] < rgba[0][1]; + k2 = p[2] < rgba[0][2]; + k3 = p[3] < rgba[0][3]; break; - case PIPE_TEXTURE_2D: - tgsi_sampler->get_samples = sp_get_samples_2d; + case PIPE_FUNC_LEQUAL: + k0 = p[0] <= rgba[0][0]; + k1 = p[1] <= rgba[0][1]; + k2 = p[2] <= rgba[0][2]; + k3 = p[3] <= rgba[0][3]; break; - case PIPE_TEXTURE_3D: - tgsi_sampler->get_samples = sp_get_samples_3d; + case PIPE_FUNC_GREATER: + k0 = p[0] > rgba[0][0]; + k1 = p[1] > rgba[0][1]; + k2 = p[2] > rgba[0][2]; + k3 = p[3] > rgba[0][3]; break; - case PIPE_TEXTURE_CUBE: - tgsi_sampler->get_samples = sp_get_samples_cube; + case PIPE_FUNC_GEQUAL: + k0 = p[0] >= rgba[0][0]; + k1 = p[1] >= rgba[0][1]; + k2 = p[2] >= rgba[0][2]; + k3 = p[3] >= rgba[0][3]; + break; + case PIPE_FUNC_EQUAL: + k0 = p[0] == rgba[0][0]; + k1 = p[1] == rgba[0][1]; + k2 = p[2] == rgba[0][2]; + k3 = p[3] == rgba[0][3]; + break; + case PIPE_FUNC_NOTEQUAL: + k0 = p[0] != rgba[0][0]; + k1 = p[1] != rgba[0][1]; + k2 = p[2] != rgba[0][2]; + k3 = p[3] != rgba[0][3]; + break; + case PIPE_FUNC_ALWAYS: + k0 = k1 = k2 = k3 = 1; + break; + case PIPE_FUNC_NEVER: + k0 = k1 = k2 = k3 = 0; break; default: + k0 = k1 = k2 = k3 = 0; assert(0); break; } - /* Do this elsewhere: - */ - samp->xpot = util_unsigned_logbase2( samp->texture->width[0] ); - samp->ypot = util_unsigned_logbase2( samp->texture->height[0] ); + /* convert four pass/fail values to an intensity in [0,1] */ + val = 0.25F * (k0 + k1 + k2 + k3); + + /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ + for (j = 0; j < 4; j++) { + rgba[0][j] = rgba[1][j] = rgba[2][j] = val; + rgba[3][j] = 1.0F; + } +} + +/* Calculate cube faces. + */ +static void +sample_cube(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + unsigned j; + float ssss[4], tttt[4]; + + /* + major axis + direction target sc tc ma + ---------- ------------------------------- --- --- --- + +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx + -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx + +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry + -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry + +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz + -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz + */ + for (j = 0; j < QUAD_SIZE; j++) { + float rx = s[j]; + float ry = t[j]; + float rz = p[j]; + const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz); + unsigned face; + float sc, tc, ma; + + if (arx > ary && arx > arz) { + if (rx >= 0.0F) { + face = PIPE_TEX_FACE_POS_X; + sc = -rz; + tc = -ry; + ma = arx; + } + else { + face = PIPE_TEX_FACE_NEG_X; + sc = rz; + tc = -ry; + ma = arx; + } + } + else if (ary > arx && ary > arz) { + if (ry >= 0.0F) { + face = PIPE_TEX_FACE_POS_Y; + sc = rx; + tc = rz; + ma = ary; + } + else { + face = PIPE_TEX_FACE_NEG_Y; + sc = rx; + tc = -rz; + ma = ary; + } + } + else { + if (rz > 0.0F) { + face = PIPE_TEX_FACE_POS_Z; + sc = rx; + tc = -ry; + ma = arz; + } + else { + face = PIPE_TEX_FACE_NEG_Z; + sc = -rx; + tc = -ry; + ma = arz; + } + } + + ssss[j] = ( sc / ma + 1.0F ) * 0.5F; + tttt[j] = ( tc / ma + 1.0F ) * 0.5F; + samp->faces[j] = face; + } - /* Try to hook in a faster sampler. Ultimately we'll have to - * code-generate these. Luckily most of this looks like it is - * orthogonal state within the sampler. + /* In our little pipeline, the compare stage is next. If compare + * is not active, this will point somewhere deeper into the + * pipeline, eg. to mip_filter or even img_filter. */ - if (texture->target == PIPE_TEXTURE_2D && - sampler->min_img_filter == sampler->mag_img_filter && - sampler->wrap_s == sampler->wrap_t && - sampler->compare_mode == FALSE && - sampler->normalized_coords) - { - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { - samp->level = CLAMP((int) sampler->min_lod, - 0, (int) texture->last_level); - - if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { - switch (sampler->min_img_filter) { + samp->compare(tgsi_sampler, ssss, tttt, NULL, lodbias, rgba); +} + + + + +static wrap_nearest_func get_nearest_unorm_wrap( unsigned mode ) +{ + switch (mode) { + case PIPE_TEX_WRAP_CLAMP: + return wrap_nearest_unorm_clamp; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return wrap_nearest_unorm_clamp_to_border; + default: + assert(0); + return wrap_nearest_unorm_clamp; + } +} + + +static wrap_nearest_func get_nearest_wrap( unsigned mode ) +{ + switch (mode) { + case PIPE_TEX_WRAP_REPEAT: + return wrap_nearest_repeat; + case PIPE_TEX_WRAP_CLAMP: + return wrap_nearest_clamp; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return wrap_nearest_clamp_to_edge; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return wrap_nearest_clamp_to_border; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return wrap_nearest_mirror_repeat; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return wrap_nearest_mirror_clamp; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return wrap_nearest_mirror_clamp_to_edge; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return wrap_nearest_mirror_clamp_to_border; + default: + assert(0); + return wrap_nearest_repeat; + } +} + +static wrap_linear_func get_linear_unorm_wrap( unsigned mode ) +{ + switch (mode) { + case PIPE_TEX_WRAP_CLAMP: + return wrap_linear_unorm_clamp; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return wrap_linear_unorm_clamp_to_border; + default: + assert(0); + return wrap_linear_unorm_clamp; + } +} + +static wrap_linear_func get_linear_wrap( unsigned mode ) +{ + switch (mode) { + case PIPE_TEX_WRAP_REPEAT: + return wrap_linear_repeat; + case PIPE_TEX_WRAP_CLAMP: + return wrap_linear_clamp; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return wrap_linear_clamp_to_edge; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return wrap_linear_clamp_to_border; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return wrap_linear_mirror_repeat; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return wrap_linear_mirror_clamp; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return wrap_linear_mirror_clamp_to_edge; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return wrap_linear_mirror_clamp_to_border; + default: + assert(0); + return wrap_linear_repeat; + } +} + +static compute_lambda_func get_lambda_func( const union sp_sampler_key key ) +{ + if (key.bits.processor == TGSI_PROCESSOR_VERTEX) + return compute_lambda_vert; + + switch (key.bits.target) { + case PIPE_TEXTURE_1D: + return compute_lambda_1d; + case PIPE_TEXTURE_2D: + return compute_lambda_2d; + case PIPE_TEXTURE_3D: + return compute_lambda_3d; + default: + assert(0); + return compute_lambda_1d; + } +} + +static filter_func get_img_filter( const union sp_sampler_key key, + unsigned filter, + const struct pipe_sampler_state *sampler ) +{ + switch (key.bits.target) { + case PIPE_TEXTURE_1D: + if (filter == PIPE_TEX_FILTER_NEAREST) + return img_filter_1d_nearest; + else + return img_filter_1d_linear; + break; + case PIPE_TEXTURE_2D: + /* Try for fast path: + */ + if (key.bits.is_pot && + sampler->wrap_s == sampler->wrap_t && + sampler->normalized_coords) + { + switch (sampler->wrap_s) { + case PIPE_TEX_WRAP_REPEAT: + switch (filter) { case PIPE_TEX_FILTER_NEAREST: - tgsi_sampler->get_samples = sp_get_samples_2d_nearest_repeat_POT; - break; + return img_filter_2d_nearest_repeat_POT; case PIPE_TEX_FILTER_LINEAR: - tgsi_sampler->get_samples = sp_get_samples_2d_linear_repeat_POT; - break; + return img_filter_2d_linear_repeat_POT; default: break; } - } - else if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP) { - switch (sampler->min_img_filter) { + break; + case PIPE_TEX_WRAP_CLAMP: + switch (filter) { case PIPE_TEX_FILTER_NEAREST: - tgsi_sampler->get_samples = sp_get_samples_2d_nearest_clamp_POT; - break; + return img_filter_2d_nearest_clamp_POT; default: break; } } } - else if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - tgsi_sampler->get_samples = sp_get_samples_2d_linear_mip_linear_repeat_POT; - break; - default: - break; - } - } + /* Fallthrough to default versions: + */ + case PIPE_TEXTURE_CUBE: + if (filter == PIPE_TEX_FILTER_NEAREST) + return img_filter_2d_nearest; + else + return img_filter_2d_linear; + break; + case PIPE_TEXTURE_3D: + if (filter == PIPE_TEX_FILTER_NEAREST) + return img_filter_3d_nearest; + else + return img_filter_3d_linear; + break; + default: + assert(0); + return img_filter_1d_nearest; + } +} + + +void +sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp, + struct softpipe_tile_cache *tex_cache, + const struct pipe_texture *texture ) +{ + const struct pipe_sampler_state *sampler = samp->sampler; + + samp->texture = texture; + samp->cache = tex_cache; + samp->xpot = util_unsigned_logbase2( texture->width[0] ); + samp->ypot = util_unsigned_logbase2( texture->height[0] ); + samp->level = CLAMP((int) sampler->min_lod, 0, (int) texture->last_level); +} + +/* Create a sampler varient for a given set of non-orthogonal state. Currently the + */ +struct sp_sampler_varient * +sp_create_sampler_varient( const struct pipe_sampler_state *sampler, + const union sp_sampler_key key ) +{ + struct sp_sampler_varient *samp = CALLOC_STRUCT(sp_sampler_varient); + if (!samp) + return NULL; + + samp->sampler = sampler; + samp->key = key; + + /* Note that (for instance) linear_texcoord_s and + * nearest_texcoord_s may be active at the same time, if the + * sampler min_img_filter differs from its mag_img_filter. + */ + if (sampler->normalized_coords) { + samp->linear_texcoord_s = get_linear_wrap( sampler->wrap_s ); + samp->linear_texcoord_t = get_linear_wrap( sampler->wrap_t ); + samp->linear_texcoord_p = get_linear_wrap( sampler->wrap_r ); + + samp->nearest_texcoord_s = get_nearest_wrap( sampler->wrap_s ); + samp->nearest_texcoord_t = get_nearest_wrap( sampler->wrap_t ); + samp->nearest_texcoord_p = get_nearest_wrap( sampler->wrap_r ); + } + else { + samp->linear_texcoord_s = get_linear_unorm_wrap( sampler->wrap_s ); + samp->linear_texcoord_t = get_linear_unorm_wrap( sampler->wrap_t ); + samp->linear_texcoord_p = get_linear_unorm_wrap( sampler->wrap_r ); + + samp->nearest_texcoord_s = get_nearest_unorm_wrap( sampler->wrap_s ); + samp->nearest_texcoord_t = get_nearest_unorm_wrap( sampler->wrap_t ); + samp->nearest_texcoord_p = get_nearest_unorm_wrap( sampler->wrap_r ); + } + + samp->compute_lambda = get_lambda_func( key ); + + samp->min_img_filter = get_img_filter(key, sampler->min_img_filter, sampler); + samp->mag_img_filter = get_img_filter(key, sampler->min_img_filter, sampler); + + switch (sampler->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NONE: + if (sampler->min_img_filter == sampler->mag_img_filter) + samp->mip_filter = samp->min_img_filter; + else + samp->mip_filter = mip_filter_none; + break; + + case PIPE_TEX_MIPFILTER_NEAREST: + samp->mip_filter = mip_filter_nearest; + break; + + case PIPE_TEX_MIPFILTER_LINEAR: + if (key.bits.is_pot && + sampler->min_img_filter == sampler->mag_img_filter && + sampler->wrap_s == sampler->wrap_t && + sampler->normalized_coords && + sampler->wrap_s == sampler->wrap_t && + sampler->wrap_s == PIPE_TEX_WRAP_REPEAT && + sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR) + { + samp->mip_filter = mip_filter_linear_2d_linear_repeat_POT; + } + else + { + samp->mip_filter = mip_filter_linear; } + break; + } + + if (sampler->compare_mode != FALSE) { + samp->compare = sample_compare; } - else if (0) { - _debug_printf("target %d/%d min_mip %d/%d min_img %d/%d wrap %d/%d compare %d/%d norm %d/%d\n", - texture->target, PIPE_TEXTURE_2D, - sampler->min_mip_filter, PIPE_TEX_MIPFILTER_NONE, - sampler->min_img_filter, sampler->mag_img_filter, - sampler->wrap_s, sampler->wrap_t, - sampler->compare_mode, FALSE, - sampler->normalized_coords, TRUE); + else { + /* Skip compare operation by promoting the mip_filter function + * pointer: + */ + samp->compare = samp->mip_filter; + } + + if (key.bits.target == PIPE_TEXTURE_CUBE) { + samp->base.get_samples = sample_cube; + } + else { + samp->faces[0] = 0; + samp->faces[1] = 0; + samp->faces[2] = 0; + samp->faces[3] = 0; + + /* Skip cube face determination by promoting the compare + * function pointer: + */ + samp->base.get_samples = samp->compare; } -out: - tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba ); + return samp; } + + + + + diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index c73ae44131..26f80eb88a 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -31,14 +31,61 @@ #include "tgsi/tgsi_exec.h" +struct sp_sampler_varient; + +typedef void (*wrap_nearest_func)(const float s[4], + unsigned size, + int icoord[4]); + +typedef void (*wrap_linear_func)(const float s[4], + unsigned size, + int icoord0[4], + int icoord1[4], + float w[4]); + +typedef float (*compute_lambda_func)(const struct sp_sampler_varient *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias); + +typedef void (*filter_func)(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]); + + +union sp_sampler_key { + struct { + unsigned target:3; + unsigned is_pot:1; + unsigned processor:2; + unsigned pad:26; + } bits; + unsigned value; +}; /** * Subclass of tgsi_sampler */ -struct sp_shader_sampler +struct sp_sampler_varient { struct tgsi_sampler base; /**< base class */ + union sp_sampler_key key; + + /* The owner of this struct: + */ + const struct pipe_sampler_state *sampler; + + + /* Currently bound texture: + */ + const struct pipe_texture *texture; + struct softpipe_tile_cache *cache; + unsigned processor; /* For sp_get_samples_2d_linear_POT: @@ -47,22 +94,51 @@ struct sp_shader_sampler unsigned ypot; unsigned level; - const struct pipe_texture *texture; - const struct pipe_sampler_state *sampler; + unsigned faces[4]; + + wrap_nearest_func nearest_texcoord_s; + wrap_nearest_func nearest_texcoord_t; + wrap_nearest_func nearest_texcoord_p; - struct softpipe_tile_cache *cache; + wrap_linear_func linear_texcoord_s; + wrap_linear_func linear_texcoord_t; + wrap_linear_func linear_texcoord_p; + + filter_func min_img_filter; + filter_func mag_img_filter; + + compute_lambda_func compute_lambda; + + filter_func mip_filter; + filter_func compare; + + /* Linked list: + */ + struct sp_sampler_varient *next; }; +struct sp_sampler; + +/* Create a sampler varient for a given set of non-orthogonal state. Currently the + */ +struct sp_sampler_varient * +sp_create_sampler_varient( const struct pipe_sampler_state *sampler, + const union sp_sampler_key key ); +void sp_sampler_varient_bind_texture( struct sp_sampler_varient *varient, + struct softpipe_tile_cache *tex_cache, + const struct pipe_texture *tex ); -static INLINE struct sp_shader_sampler * -sp_shader_sampler(const struct tgsi_sampler *sampler) -{ - return (struct sp_shader_sampler *) sampler; -} +void sp_sampler_varient_destroy( struct sp_sampler_varient * ); +static INLINE struct sp_sampler_varient * +sp_sampler_varient(const struct tgsi_sampler *sampler) +{ + return (struct sp_sampler_varient *) sampler; +} + extern void sp_get_samples(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 0c84375bf1..a3a54dada4 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -120,16 +120,20 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, static struct pipe_texture * softpipe_texture_create(struct pipe_screen *screen, - const struct pipe_texture *templat) + const struct pipe_texture *template) { struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture); if (!spt) return NULL; - spt->base = *templat; + spt->base = *template; pipe_reference_init(&spt->base.reference, 1); spt->base.screen = screen; + spt->pot = (util_is_power_of_two(template->width[0]) && + util_is_power_of_two(template->height[0]) && + util_is_power_of_two(template->depth[0])); + if (spt->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { if (!softpipe_displaytarget_layout(screen, spt)) goto fail; diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index 42df722a2d..4dd0c1239e 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -48,6 +48,10 @@ struct softpipe_texture */ struct pipe_buffer *buffer; + /* True if texture images are power-of-two in all dimensions: + */ + boolean pot; + unsigned timestamp; }; -- cgit v1.2.3 From 4e5c385d2183e7006c9d7ac0823919156bd4b8e6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 21 Aug 2009 11:40:33 -0600 Subject: softpipe: fix s/t/p typos --- src/gallium/drivers/softpipe/sp_tex_sample.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 7bc689a298..a626731105 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -990,7 +990,7 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, assert(width > 0); samp->linear_texcoord_s(s, width, x0, x1, xw); - samp->linear_texcoord_s(t, height, y0, y1, yw); + samp->linear_texcoord_t(t, height, y0, y1, yw); for (j = 0; j < QUAD_SIZE; j++) { float tx[4][4]; /* texels */ @@ -1035,8 +1035,8 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler, assert(depth > 0); samp->linear_texcoord_s(s, width, x0, x1, xw); - samp->linear_texcoord_s(t, height, y0, y1, yw); - samp->linear_texcoord_s(p, depth, z0, z1, zw); + samp->linear_texcoord_t(t, height, y0, y1, yw); + samp->linear_texcoord_p(p, depth, z0, z1, zw); for (j = 0; j < QUAD_SIZE; j++) { float tx0[4][4], tx1[4][4]; -- cgit v1.2.3 From 41483627f0fd3dc9df2cc55dfd5f3e5987fcfd22 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 21 Aug 2009 11:41:29 -0600 Subject: softpipe: fix min/mag filter typo --- src/gallium/drivers/softpipe/sp_tex_sample.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index a626731105..9502b60479 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1597,7 +1597,7 @@ sp_create_sampler_varient( const struct pipe_sampler_state *sampler, samp->compute_lambda = get_lambda_func( key ); samp->min_img_filter = get_img_filter(key, sampler->min_img_filter, sampler); - samp->mag_img_filter = get_img_filter(key, sampler->min_img_filter, sampler); + samp->mag_img_filter = get_img_filter(key, sampler->mag_img_filter, sampler); switch (sampler->min_mip_filter) { case PIPE_TEX_MIPFILTER_NONE: -- cgit v1.2.3 From cf102b031e7ef33c8e3ffce2f9dcd064f44e8190 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 21 Aug 2009 11:43:48 -0600 Subject: softpipe: remove redundant comparison, make test easier to understand --- src/gallium/drivers/softpipe/sp_tex_sample.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 9502b60479..51118ae38b 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1614,10 +1614,9 @@ sp_create_sampler_varient( const struct pipe_sampler_state *sampler, case PIPE_TEX_MIPFILTER_LINEAR: if (key.bits.is_pot && sampler->min_img_filter == sampler->mag_img_filter && - sampler->wrap_s == sampler->wrap_t && sampler->normalized_coords && - sampler->wrap_s == sampler->wrap_t && sampler->wrap_s == PIPE_TEX_WRAP_REPEAT && + sampler->wrap_t == PIPE_TEX_WRAP_REPEAT && sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR) { samp->mip_filter = mip_filter_linear_2d_linear_repeat_POT; -- cgit v1.2.3 From ecfa8be150ed276af816467b467e76e026f5b541 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 21 Aug 2009 18:44:27 +0100 Subject: softpipe: add missing sp_sampler_varient_destroy --- src/gallium/drivers/softpipe/sp_tex_sample.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 51118ae38b..a2e2a221e4 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1558,6 +1558,14 @@ sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp, samp->level = CLAMP((int) sampler->min_lod, 0, (int) texture->last_level); } + +void +sp_sampler_varient_destroy( struct sp_sampler_varient *samp ) +{ + FREE(samp); +} + + /* Create a sampler varient for a given set of non-orthogonal state. Currently the */ struct sp_sampler_varient * -- cgit v1.2.3 From 87ec83afd58536c31bf02c307f1d5488abc84861 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 21 Aug 2009 11:47:27 -0600 Subject: softpipe: add missing PIPE_TEXTURE_CUBE case in get_lambda_func() Fixes progs/demos/cubemap --- src/gallium/drivers/softpipe/sp_tex_sample.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index a2e2a221e4..f371003708 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1476,6 +1476,7 @@ static compute_lambda_func get_lambda_func( const union sp_sampler_key key ) case PIPE_TEXTURE_1D: return compute_lambda_1d; case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_CUBE: return compute_lambda_2d; case PIPE_TEXTURE_3D: return compute_lambda_3d; -- cgit v1.2.3 From a29447c33d44b3427e0c40a761067c0cc6e71c39 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 21 Aug 2009 12:11:44 -0600 Subject: softpipe: per-unit sampler varients Can't share sampler varients across multiple tex units because the texture pointer is in the sampler varient. That prevents different textures per unit. Fixes progs/demos/multiarb, progs/glsl/samplers, etc. --- src/gallium/drivers/softpipe/sp_state_sampler.c | 24 ++++++++++++++++++++---- src/gallium/drivers/softpipe/sp_tex_sample.c | 3 +++ src/gallium/drivers/softpipe/sp_tex_sample.h | 3 ++- 3 files changed, 25 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 714e638048..53210812f4 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -123,9 +123,19 @@ softpipe_set_sampler_textures(struct pipe_context *pipe, } - +/** + * Find/create an sp_sampler_varient object for sampling the given texture, + * sampler and tex unit. + * + * Note that the tex unit is significant. We can't re-use a sampler + * varient for multiple texture units because the sampler varient contains + * the texture object pointer. If the texture object pointer were stored + * somewhere outside the sampler varient, we could re-use samplers for + * multiple texture units. + */ static struct sp_sampler_varient * -get_sampler_varient( struct sp_sampler *sampler, +get_sampler_varient( unsigned unit, + struct sp_sampler *sampler, struct pipe_texture *texture, unsigned processor ) { @@ -133,9 +143,13 @@ get_sampler_varient( struct sp_sampler *sampler, struct sp_sampler_varient *v = NULL; union sp_sampler_key key; + /* if this fails, widen the key.unit field and update this assertion */ + assert(PIPE_MAX_SAMPLERS <= 16); + key.bits.target = sp_texture->base.target; key.bits.is_pot = sp_texture->pot; key.bits.processor = processor; + key.bits.unit = unit; key.bits.pad = 0; if (sampler->current && @@ -174,7 +188,8 @@ softpipe_reset_sampler_varients(struct softpipe_context *softpipe) for (i = 0; i <= softpipe->vs->max_sampler; i++) { if (softpipe->sampler[i]) { softpipe->tgsi.vert_samplers_list[i] = - get_sampler_varient( sp_sampler(softpipe->sampler[i]), + get_sampler_varient( i, + sp_sampler(softpipe->sampler[i]), softpipe->texture[i], TGSI_PROCESSOR_VERTEX ); @@ -187,7 +202,8 @@ softpipe_reset_sampler_varients(struct softpipe_context *softpipe) for (i = 0; i <= softpipe->fs->info.file_max[TGSI_FILE_SAMPLER]; i++) { if (softpipe->sampler[i]) { softpipe->tgsi.frag_samplers_list[i] = - get_sampler_varient( sp_sampler(softpipe->sampler[i]), + get_sampler_varient( i, + sp_sampler(softpipe->sampler[i]), softpipe->texture[i], TGSI_PROCESSOR_FRAGMENT ); diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index f371003708..8f3dc12d0f 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1545,6 +1545,9 @@ static filter_func get_img_filter( const union sp_sampler_key key, } +/** + * Bind the given texture object and texture cache to the sampler varient. + */ void sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp, struct softpipe_tile_cache *tex_cache, diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index 26f80eb88a..f6cd57ec0a 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -62,7 +62,8 @@ union sp_sampler_key { unsigned target:3; unsigned is_pot:1; unsigned processor:2; - unsigned pad:26; + unsigned unit:4; + unsigned pad:22; } bits; unsigned value; }; -- cgit v1.2.3 From 46fbc872881081ffcf0b526f8c4a909fd915ad78 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 21 Aug 2009 13:45:16 -0600 Subject: softpipe: remove unneeded const qualifier --- src/gallium/drivers/softpipe/sp_state_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 3a45321923..a055d6295f 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -128,7 +128,7 @@ softpipe_bind_vs_state(struct pipe_context *pipe, void *vs) { struct softpipe_context *softpipe = softpipe_context(pipe); - softpipe->vs = (const struct sp_vertex_shader *)vs; + softpipe->vs = (struct sp_vertex_shader *)vs; draw_bind_vertex_shader(softpipe->draw, (softpipe->vs ? softpipe->vs->draw_data : NULL)); -- cgit v1.2.3 From 4256c5829f8c23f8bd5c7c29491210f0f7813bf9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 21 Aug 2009 13:47:50 -0600 Subject: softpipe: remove unused #includes, white-space clean-up --- src/gallium/drivers/softpipe/sp_state_fs.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index a055d6295f..256faa94b8 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -31,8 +31,6 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" -#include "pipe/internal/p_winsys_screen.h" -#include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "draw/draw_vs.h" #include "tgsi/tgsi_dump.h" @@ -128,7 +126,7 @@ softpipe_bind_vs_state(struct pipe_context *pipe, void *vs) { struct softpipe_context *softpipe = softpipe_context(pipe); - softpipe->vs = (struct sp_vertex_shader *)vs; + softpipe->vs = (struct sp_vertex_shader *) vs; draw_bind_vertex_shader(softpipe->draw, (softpipe->vs ? softpipe->vs->draw_data : NULL)); @@ -142,8 +140,7 @@ softpipe_delete_vs_state(struct pipe_context *pipe, void *vs) { struct softpipe_context *softpipe = softpipe_context(pipe); - struct sp_vertex_shader *state = - (struct sp_vertex_shader *)vs; + struct sp_vertex_shader *state = (struct sp_vertex_shader *) vs; draw_delete_vertex_shader(softpipe->draw, state->draw_data); FREE( state ); -- cgit v1.2.3 From 3adc8c3779895c483ba8a1004939e7dd7d76fa9a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 21 Aug 2009 14:01:58 -0600 Subject: softpipe: minor code refactoring to remove softpipe/tile cache dependencies The tile cache code now has no hard dependencies on softpipe. --- src/gallium/drivers/softpipe/sp_state_derived.c | 13 ++++++++++++- src/gallium/drivers/softpipe/sp_tile_cache.c | 26 ++++++++++++------------- src/gallium/drivers/softpipe/sp_tile_cache.h | 1 - 3 files changed, 24 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 5310928332..202a2bc94c 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -34,6 +34,8 @@ #include "sp_context.h" #include "sp_screen.h" #include "sp_state.h" +#include "sp_texture.h" +#include "sp_tile_cache.h" /** @@ -201,10 +203,19 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) softpipe_reset_sampler_varients( softpipe ); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - sp_tile_cache_validate_texture( softpipe->tex_cache[i] ); + struct softpipe_tile_cache *tc = softpipe->tex_cache[i]; + if (tc->texture) { + struct softpipe_texture *spt = softpipe_texture(tc->texture); + if (spt->timestamp != tc->timestamp) { + sp_tile_cache_validate_texture( tc ); + _debug_printf("INV %d %d\n", tc->timestamp, spt->timestamp); + tc->timestamp = spt->timestamp; + } + } } } + /* Hopefully this will remain quite simple, otherwise need to pull in * something like the state tracker mechanism. */ diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 77d02fa3e7..e075ab6290 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -35,9 +35,6 @@ #include "pipe/p_inlines.h" #include "util/u_memory.h" #include "util/u_tile.h" -#include "sp_context.h" -#include "sp_surface.h" -#include "sp_texture.h" #include "sp_tile_cache.h" @@ -200,24 +197,25 @@ sp_tile_cache_unmap_transfers(struct softpipe_tile_cache *tc) } } + +/** + * Invalidate all cached tiles for the cached texture. + * Should be called when the texture is modified. + */ void sp_tile_cache_validate_texture(struct softpipe_tile_cache *tc) { - if (tc->texture) { - struct softpipe_texture *spt = softpipe_texture(tc->texture); - if (spt->timestamp != tc->timestamp) { - /* texture was modified, invalidate all cached tiles */ - uint i; - _debug_printf("INV %d %d\n", tc->timestamp, spt->timestamp); - for (i = 0; i < NUM_ENTRIES; i++) { - tc->entries[i].addr.bits.invalid = 1; - } + uint i; - tc->timestamp = spt->timestamp; - } + assert(tc); + assert(tc->texture); + + for (i = 0; i < NUM_ENTRIES; i++) { + tc->entries[i].addr.bits.invalid = 1; } } + /** * Specify the texture to cache. */ diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index ac2aae5875..1596cd0ae7 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -34,7 +34,6 @@ #include "pipe/p_compiler.h" -struct softpipe_context; struct softpipe_tile_cache; -- cgit v1.2.3 From d204659c8c725c02212ad4a49275c7447f2d02a6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 21 Aug 2009 14:04:47 -0600 Subject: softpipe: remove tex sample dependencies on softpipe The texture sampling code doesn't really have any dependencies on the rest of softpipe, just the tile cache. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 8f3dc12d0f..a9efb82491 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -34,17 +34,14 @@ * Keith Whitwell */ -#include "sp_context.h" -#include "sp_quad.h" -#include "sp_surface.h" -#include "sp_texture.h" -#include "sp_tex_sample.h" -#include "sp_tile_cache.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "sp_quad.h" /* only for #define QUAD_* tokens */ +#include "sp_tex_sample.h" +#include "sp_tile_cache.h" -- cgit v1.2.3 From 0f24886f922df3e00094a53b5b37b1588ea84bc0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 21 Aug 2009 14:07:37 -0600 Subject: softpipe: remove duplicate #include, move another --- src/gallium/drivers/softpipe/sp_state_sampler.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 53210812f4..afc6e1d2eb 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -31,15 +31,14 @@ #include "util/u_memory.h" +#include "draw/draw_context.h" #include "draw/draw_context.h" -#include "sp_context.h" #include "sp_context.h" #include "sp_state.h" #include "sp_texture.h" #include "sp_tile_cache.h" #include "sp_tex_sample.h" -#include "draw/draw_context.h" struct sp_sampler { -- cgit v1.2.3 From 47800c572f199e7857e02e0f999b410c727a275d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 23 Aug 2009 11:13:20 +0100 Subject: softpipe: add missing header --- src/gallium/drivers/softpipe/sp_context.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 48ec540ebf..3c465c95a5 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -42,6 +42,7 @@ #include "sp_state.h" #include "sp_surface.h" #include "sp_tile_cache.h" +#include "sp_tex_tile_cache.h" #include "sp_texture.h" #include "sp_winsys.h" #include "sp_query.h" -- cgit v1.2.3 From 4fe0fc3eba1f79beda890a5016359d549bab6ad4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 23 Aug 2009 11:22:41 +0100 Subject: softpipe: remove old prim_setup draw stage Everything now goes through the draw_vbuf handler, the same as regular drivers. --- src/gallium/drivers/softpipe/Makefile | 1 - src/gallium/drivers/softpipe/SConscript | 1 - src/gallium/drivers/softpipe/sp_context.c | 26 ++-- src/gallium/drivers/softpipe/sp_context.h | 5 +- src/gallium/drivers/softpipe/sp_prim_setup.c | 190 ------------------------ src/gallium/drivers/softpipe/sp_prim_setup.h | 85 ----------- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 107 ++++--------- src/gallium/drivers/softpipe/sp_prim_vbuf.h | 4 +- src/gallium/drivers/softpipe/sp_setup.c | 1 - src/gallium/drivers/softpipe/sp_state_derived.c | 25 ++-- 10 files changed, 59 insertions(+), 386 deletions(-) delete mode 100644 src/gallium/drivers/softpipe/sp_prim_setup.c delete mode 100644 src/gallium/drivers/softpipe/sp_prim_setup.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 3da9be6957..6ab3753762 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -11,7 +11,6 @@ C_SOURCES = \ sp_query.c \ sp_context.c \ sp_draw_arrays.c \ - sp_prim_setup.c \ sp_prim_vbuf.c \ sp_quad_pipe.c \ sp_quad_stipple.c \ diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index 30c099813e..153fe44546 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -12,7 +12,6 @@ softpipe = env.ConvenienceLibrary( 'sp_context.c', 'sp_draw_arrays.c', 'sp_flush.c', - 'sp_prim_setup.c', 'sp_prim_vbuf.c', 'sp_setup.c', 'sp_quad_alpha_test.c', diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 3c465c95a5..6b75ee6002 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -31,13 +31,13 @@ */ #include "draw/draw_context.h" +#include "draw/draw_vbuf.h" #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "sp_clear.h" #include "sp_context.h" #include "sp_flush.h" -#include "sp_prim_setup.h" #include "sp_prim_vbuf.h" #include "sp_state.h" #include "sp_surface.h" @@ -242,21 +242,21 @@ softpipe_create( struct pipe_screen *screen ) (struct tgsi_sampler **) softpipe->tgsi.vert_samplers_list); - softpipe->setup = sp_draw_render_stage(softpipe); - if (!softpipe->setup) - goto fail; - if (debug_get_bool_option( "SP_NO_RAST", FALSE )) softpipe->no_rast = TRUE; - if (debug_get_bool_option( "SP_NO_VBUF", FALSE )) { - /* Deprecated path -- vbuf is the intended interface to the draw module: - */ - draw_set_rasterize_stage(softpipe->draw, softpipe->setup); - } - else { - sp_init_vbuf(softpipe); - } + softpipe->vbuf_backend = sp_create_vbuf_backend(softpipe); + if (!softpipe->vbuf_backend) + goto fail; + + softpipe->vbuf = draw_vbuf_stage(softpipe->draw, softpipe->vbuf_backend); + if (!softpipe->vbuf) + goto fail; + + draw_set_rasterize_stage(softpipe->draw, softpipe->vbuf); + draw_set_render(softpipe->draw, softpipe->vbuf_backend); + + /* plug in AA line/point stages */ draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index df45d2249f..43a195c8ef 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -129,9 +129,10 @@ struct softpipe_context { /** The primitive drawing context */ struct draw_context *draw; - struct draw_stage *setup; + + /** Draw module backend */ + struct vbuf_render *vbuf_backend; struct draw_stage *vbuf; - struct softpipe_vbuf_render *vbuf_render; boolean dirty_render_cache; diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c deleted file mode 100644 index 038ff04d4f..0000000000 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ /dev/null @@ -1,190 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \brief A draw stage that drives our triangle setup routines from - * within the draw pipeline. One of two ways to drive setup, the - * other being in sp_prim_vbuf.c. - * - * \author Keith Whitwell - * \author Brian Paul - */ - - -#include "sp_context.h" -#include "sp_setup.h" -#include "sp_state.h" -#include "sp_prim_setup.h" -#include "draw/draw_pipe.h" -#include "draw/draw_vertex.h" -#include "util/u_memory.h" - -/** - * Triangle setup info (derived from draw_stage). - * Also used for line drawing (taking some liberties). - */ -struct setup_stage { - struct draw_stage stage; /**< This must be first (base class) */ - - struct setup_context *setup; -}; - - - -/** - * Basically a cast wrapper. - */ -static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) -{ - return (struct setup_stage *)stage; -} - - -typedef const float (*cptrf4)[4]; - -static void -do_tri(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - setup_tri( setup->setup, - (cptrf4)prim->v[0]->data, - (cptrf4)prim->v[1]->data, - (cptrf4)prim->v[2]->data ); -} - -static void -do_line(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - setup_line( setup->setup, - (cptrf4)prim->v[0]->data, - (cptrf4)prim->v[1]->data ); -} - -static void -do_point(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - setup_point( setup->setup, - (cptrf4)prim->v[0]->data ); -} - - - - -static void setup_begin( struct draw_stage *stage ) -{ - struct setup_stage *setup = setup_stage(stage); - - setup_prepare( setup->setup ); - - stage->point = do_point; - stage->line = do_line; - stage->tri = do_tri; -} - - -static void setup_first_point( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->point( stage, header ); -} - -static void setup_first_line( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->line( stage, header ); -} - - -static void setup_first_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->tri( stage, header ); -} - - - -static void setup_flush( struct draw_stage *stage, - unsigned flags ) -{ - stage->point = setup_first_point; - stage->line = setup_first_line; - stage->tri = setup_first_tri; -} - - -static void reset_stipple_counter( struct draw_stage *stage ) -{ -} - - -static void render_destroy( struct draw_stage *stage ) -{ - struct setup_stage *ssetup = setup_stage(stage); - setup_destroy_context(ssetup->setup); - FREE( stage ); -} - - -/** - * Create a new primitive setup/render stage. - */ -struct draw_stage *sp_draw_render_stage( struct softpipe_context *softpipe ) -{ - struct setup_stage *sstage = CALLOC_STRUCT(setup_stage); - - sstage->setup = setup_create_context(softpipe); - sstage->stage.draw = softpipe->draw; - sstage->stage.point = setup_first_point; - sstage->stage.line = setup_first_line; - sstage->stage.tri = setup_first_tri; - sstage->stage.flush = setup_flush; - sstage->stage.reset_stipple_counter = reset_stipple_counter; - sstage->stage.destroy = render_destroy; - - return (struct draw_stage *)sstage; -} - -struct setup_context * -sp_draw_setup_context( struct draw_stage *stage ) -{ - struct setup_stage *ssetup = setup_stage(stage); - return ssetup->setup; -} - -void -sp_draw_flush( struct draw_stage *stage ) -{ - stage->flush( stage, 0 ); -} diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.h b/src/gallium/drivers/softpipe/sp_prim_setup.h deleted file mode 100644 index 49bdd98ed8..0000000000 --- a/src/gallium/drivers/softpipe/sp_prim_setup.h +++ /dev/null @@ -1,85 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef SP_PRIM_SETUP_H -#define SP_PRIM_SETUP_H - - -/** - * vbuf is a special stage to gather the stream of triangles, lines, points - * together and reconstruct vertex buffers for hardware upload. - * - * First attempt, work in progress. - * - * TODO: - * - separate out vertex buffer building and primitive emit, ie >1 draw per vb. - * - tell vbuf stage how to build hw vertices directly - * - pass vbuf stage a buffer pointer for direct emit to agp/vram. - * - * - * - * Vertices are just an array of floats, with all the attributes - * packed. We currently assume a layout like: - * - * attr[0][0..3] - window position - * attr[1..n][0..3] - remaining attributes. - * - * Attributes are assumed to be 4 floats wide but are packed so that - * all the enabled attributes run contiguously. - */ - - -struct draw_stage; -struct softpipe_context; - - -typedef void (*vbuf_draw_func)( struct pipe_context *pipe, - unsigned prim, - const ushort *elements, - unsigned nr_elements, - const void *vertex_buffer, - unsigned nr_vertices ); - - -extern struct draw_stage * -sp_draw_render_stage( struct softpipe_context *softpipe ); - -extern struct setup_context * -sp_draw_setup_context( struct draw_stage * ); - -extern void -sp_draw_flush( struct draw_stage * ); - - -extern struct draw_stage * -sp_draw_vbuf_stage( struct draw_context *draw_context, - struct pipe_context *pipe, - vbuf_draw_func draw ); - - -#endif /* SP_PRIM_SETUP_H */ diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 76524a8d41..e603c20fc4 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -37,10 +37,9 @@ #include "sp_context.h" +#include "sp_setup.h" #include "sp_state.h" #include "sp_prim_vbuf.h" -#include "sp_prim_setup.h" -#include "sp_setup.h" #include "draw/draw_context.h" #include "draw/draw_vbuf.h" #include "util/u_memory.h" @@ -59,6 +58,8 @@ struct softpipe_vbuf_render { struct vbuf_render base; struct softpipe_context *softpipe; + struct setup_context *setup; + uint prim; uint vertex_size; uint nr_vertices; @@ -75,6 +76,11 @@ softpipe_vbuf_render(struct vbuf_render *vbr) } + + + + + static const struct vertex_info * sp_vbuf_get_vertex_info(struct vbuf_render *vbr) { @@ -105,36 +111,6 @@ sp_vbuf_allocate_vertices(struct vbuf_render *vbr, static void sp_vbuf_release_vertices(struct vbuf_render *vbr) { -#if 0 - { - struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); - const struct vertex_info *info = - softpipe_get_vbuf_vertex_info(cvbr->softpipe); - const float *vtx = (const float *) cvbr->vertex_buffer; - uint i, j; - debug_printf("%s (vtx_size = %u, vtx_used = %u)\n", - __FUNCTION__, cvbr->vertex_size, cvbr->nr_vertices); - for (i = 0; i < cvbr->nr_vertices; i++) { - for (j = 0; j < info->num_attribs; j++) { - uint k; - switch (info->attrib[j].emit) { - case EMIT_4F: k = 4; break; - case EMIT_3F: k = 3; break; - case EMIT_2F: k = 2; break; - case EMIT_1F: k = 1; break; - default: assert(0); - } - debug_printf("Vert %u attr %u: ", i, j); - while (k-- > 0) { - debug_printf("%g ", vtx[0]); - vtx++; - } - debug_printf("\n"); - } - } - } -#endif - /* keep the old allocation for next time */ } @@ -160,11 +136,7 @@ static boolean sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); - - /* XXX: break this dependency - make setup_context live under - * softpipe, rename the old "setup" draw stage to something else. - */ - struct setup_context *setup_ctx = sp_draw_setup_context(cvbr->softpipe->setup); + struct setup_context *setup_ctx = cvbr->setup; setup_prepare( setup_ctx ); @@ -193,14 +165,9 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) struct softpipe_context *softpipe = cvbr->softpipe; const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); const void *vertex_buffer = cvbr->vertex_buffer; + struct setup_context *setup_ctx = cvbr->setup; unsigned i; - /* XXX: break this dependency - make setup_context live under - * softpipe, rename the old "setup" draw stage to something else. - */ - struct draw_stage *setup = softpipe->setup; - struct setup_context *setup_ctx = sp_draw_setup_context(setup); - switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { @@ -367,11 +334,6 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) default: assert(0); } - - /* XXX: why are we calling this??? If we had to call something, it - * would be a function in sp_setup.c: - */ - sp_draw_flush( setup ); } @@ -384,17 +346,12 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); struct softpipe_context *softpipe = cvbr->softpipe; + struct setup_context *setup_ctx = cvbr->setup; const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); const void *vertex_buffer = (void *) get_vert(cvbr->vertex_buffer, start, stride); unsigned i; - /* XXX: break this dependency - make setup_context live under - * softpipe, rename the old "setup" draw stage to something else. - */ - struct draw_stage *setup = softpipe->setup; - struct setup_context *setup_ctx = sp_draw_setup_context(setup); - switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { @@ -568,40 +525,38 @@ static void sp_vbuf_destroy(struct vbuf_render *vbr) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); - cvbr->softpipe->vbuf_render = NULL; + setup_destroy_context(cvbr->setup); FREE(cvbr); } /** - * Initialize the post-transform vertex buffer information for the given - * context. + * Create the post-transform vertex handler for the given context. */ -void -sp_init_vbuf(struct softpipe_context *sp) +struct vbuf_render * +sp_create_vbuf_backend(struct softpipe_context *sp) { - assert(sp->draw); + struct softpipe_vbuf_render *cvbr = CALLOC_STRUCT(softpipe_vbuf_render); - sp->vbuf_render = CALLOC_STRUCT(softpipe_vbuf_render); + assert(sp->draw); - sp->vbuf_render->base.max_indices = SP_MAX_VBUF_INDEXES; - sp->vbuf_render->base.max_vertex_buffer_bytes = SP_MAX_VBUF_SIZE; - sp->vbuf_render->base.get_vertex_info = sp_vbuf_get_vertex_info; - sp->vbuf_render->base.allocate_vertices = sp_vbuf_allocate_vertices; - sp->vbuf_render->base.map_vertices = sp_vbuf_map_vertices; - sp->vbuf_render->base.unmap_vertices = sp_vbuf_unmap_vertices; - sp->vbuf_render->base.set_primitive = sp_vbuf_set_primitive; - sp->vbuf_render->base.draw = sp_vbuf_draw; - sp->vbuf_render->base.draw_arrays = sp_vbuf_draw_arrays; - sp->vbuf_render->base.release_vertices = sp_vbuf_release_vertices; - sp->vbuf_render->base.destroy = sp_vbuf_destroy; + cvbr->base.max_indices = SP_MAX_VBUF_INDEXES; + cvbr->base.max_vertex_buffer_bytes = SP_MAX_VBUF_SIZE; - sp->vbuf_render->softpipe = sp; + cvbr->base.get_vertex_info = sp_vbuf_get_vertex_info; + cvbr->base.allocate_vertices = sp_vbuf_allocate_vertices; + cvbr->base.map_vertices = sp_vbuf_map_vertices; + cvbr->base.unmap_vertices = sp_vbuf_unmap_vertices; + cvbr->base.set_primitive = sp_vbuf_set_primitive; + cvbr->base.draw = sp_vbuf_draw; + cvbr->base.draw_arrays = sp_vbuf_draw_arrays; + cvbr->base.release_vertices = sp_vbuf_release_vertices; + cvbr->base.destroy = sp_vbuf_destroy; - sp->vbuf = draw_vbuf_stage(sp->draw, &sp->vbuf_render->base); + cvbr->softpipe = sp; - draw_set_rasterize_stage(sp->draw, sp->vbuf); + cvbr->setup = setup_create_context(cvbr->softpipe); - draw_set_render(sp->draw, &sp->vbuf_render->base); + return &cvbr->base; } diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.h b/src/gallium/drivers/softpipe/sp_prim_vbuf.h index 1de9cc2a89..ad01cc2f28 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.h +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.h @@ -31,8 +31,8 @@ struct softpipe_context; -extern void -sp_init_vbuf(struct softpipe_context *softpipe); +extern struct vbuf_render * +sp_create_vbuf_backend(struct softpipe_context *softpipe); #endif /* SP_VBUF_H */ diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index a132911c99..bc8366b0e6 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -33,7 +33,6 @@ */ #include "sp_context.h" -#include "sp_prim_setup.h" #include "sp_quad.h" #include "sp_quad_pipe.h" #include "sp_setup.h" diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 04fc125e3d..2a40589e84 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -68,24 +68,19 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) const struct sp_fragment_shader *spfs = softpipe->fs; const enum interp_mode colorInterp = softpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; + const uint num = draw_num_vs_outputs(softpipe->draw); uint i; - if (softpipe->vbuf) { - /* if using the post-transform vertex buffer, tell draw_vbuf to - * simply emit the whole post-xform vertex as-is: - */ - struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; - const uint num = draw_num_vs_outputs(softpipe->draw); - uint i; - - /* No longer any need to try and emit draw vertex_header info. - */ - vinfo_vbuf->num_attribs = 0; - for (i = 0; i < num; i++) { - draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); - } - draw_compute_vertex_size(vinfo_vbuf); + /* Tell draw_vbuf to simply emit the whole post-xform vertex + * as-is. No longer any need to try and emit draw vertex_header + * info. + */ + vinfo_vbuf->num_attribs = 0; + for (i = 0; i < num; i++) { + draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); } + draw_compute_vertex_size(vinfo_vbuf); /* * Loop over fragment shader inputs, searching for the matching output -- cgit v1.2.3 From 153e474d22d1b440bb6bd7b04dabf244d7455582 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 23 Aug 2009 13:38:10 +0100 Subject: softpipe: lift tex_address construction up to img_filter For fastpaths at least, can avoid recalculating this sometimes. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 60 ++++++++++++++++------------ 1 file changed, 35 insertions(+), 25 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index d233924565..f2d4f7eb8c 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -605,14 +605,14 @@ compute_lambda_vert(const struct sp_sampler_varient *samp, */ static INLINE void get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y, + union tex_tile_address addr, + unsigned x, unsigned y, const float *out[4]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct softpipe_tex_cached_tile *tile - = sp_get_cached_tile_tex(samp->cache, - tex_tile_address(x, y, 0, face, level)); + = sp_get_cached_tile_tex(samp->cache, addr); y %= TILE_SIZE; x %= TILE_SIZE; @@ -625,36 +625,33 @@ get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler, static INLINE const float * get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y) + union tex_tile_address addr, int x, int y) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct softpipe_tex_cached_tile *tile; - const struct softpipe_tex_cached_tile *tile - = sp_get_cached_tile_tex(samp->cache, - tex_tile_address(x, y, 0, face, level)); - + addr.bits.x = x / TILE_SIZE; + addr.bits.y = y / TILE_SIZE; y %= TILE_SIZE; x %= TILE_SIZE; + tile = sp_get_cached_tile_tex(samp->cache, addr); + return &tile->data.color[y][x][0]; } static INLINE void get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, + union tex_tile_address addr, int x0, int y0, int x1, int y1, const float *out[4]) { - unsigned i; - - for (i = 0; i < 4; i++) { - unsigned tx = (i & 1) ? x1 : x0; - unsigned ty = (i >> 1) ? y1 : y0; - - out[i] = get_texel_2d_ptr( tgsi_sampler, face, level, tx, ty ); - } + out[0] = get_texel_2d_ptr( tgsi_sampler, addr, x0, y0 ); + out[1] = get_texel_2d_ptr( tgsi_sampler, addr, x1, y0 ); + out[2] = get_texel_2d_ptr( tgsi_sampler, addr, x0, y1 ); + out[3] = get_texel_2d_ptr( tgsi_sampler, addr, x1, y1 ); } static INLINE void @@ -714,7 +711,12 @@ img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, unsigned ypot = 1 << (samp->ypot - level); unsigned xmax = (xpot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, xpot) - 1; */ unsigned ymax = (ypot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, ypot) - 1; */ - + union tex_tile_address addr; + + addr.value = 0; + addr.bits.level = samp->level; + + for (j = 0; j < QUAD_SIZE; j++) { int c; @@ -730,21 +732,21 @@ img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, int x0 = uflr & (xpot - 1); int y0 = vflr & (ypot - 1); - const float *tx[4]; + const float *tx[4]; - /* Can we fetch all four at once: */ if (x0 < xmax && y0 < ymax) { - get_texel_quad_2d(tgsi_sampler, 0, level, x0, y0, tx); + addr.bits.x = x0 / TILE_SIZE; + addr.bits.y = y0 / TILE_SIZE; + get_texel_quad_2d(tgsi_sampler, addr, x0, y0, tx); } else { unsigned x1 = (x0 + 1) & (xpot - 1); unsigned y1 = (y0 + 1) & (ypot - 1); - get_texel_quad_2d_mt(tgsi_sampler, 0, level, - x0, y0, x1, y1, tx); + get_texel_quad_2d_mt(tgsi_sampler, addr, x0, y0, x1, y1, tx); } @@ -771,6 +773,10 @@ img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, unsigned level = samp->level; unsigned xpot = 1 << (samp->xpot - level); unsigned ypot = 1 << (samp->ypot - level); + union tex_tile_address addr; + + addr.value = 0; + addr.bits.level = samp->level; for (j = 0; j < QUAD_SIZE; j++) { int c; @@ -784,7 +790,7 @@ img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, int x0 = uflr & (xpot - 1); int y0 = vflr & (ypot - 1); - const float *out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); + const float *out = get_texel_2d_ptr(tgsi_sampler, addr, x0, y0); for (c = 0; c < 4; c++) { rgba[c][j] = out[c]; @@ -806,6 +812,10 @@ img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, unsigned level = samp->level; unsigned xpot = 1 << (samp->xpot - level); unsigned ypot = 1 << (samp->ypot - level); + union tex_tile_address addr; + + addr.value = 0; + addr.bits.level = samp->level; for (j = 0; j < QUAD_SIZE; j++) { int c; @@ -828,7 +838,7 @@ img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, else if (y0 > ypot - 1) y0 = ypot - 1; - out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); + out = get_texel_2d_ptr(tgsi_sampler, addr, x0, y0); for (c = 0; c < 4; c++) { rgba[c][j] = out[c]; -- cgit v1.2.3 From 81601d85ef6b82297b046d5aab1b70e75168c2fa Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 23 Aug 2009 19:14:09 +0100 Subject: softpipe: make the various get_texel routines more similar Remove arguments, return const float * by default. Add specialized 3d versions and remove 3d texture support from the others. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 268 ++++++++++++++++++--------- 1 file changed, 176 insertions(+), 92 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index f2d4f7eb8c..8283010740 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -592,30 +592,68 @@ compute_lambda_vert(const struct sp_sampler_varient *samp, /** * Get a texel from a texture, using the texture tile cache. * - * \param face the cube face in 0..5 - * \param level the mipmap level + * \param addr the template tex address containing cube, z, face info. * \param x the x coord of texel within 2D image * \param y the y coord of texel within 2D image - * \param z which slice of a 3D texture * \param rgba the quad to put the texel/color into - * \param j which element of the rgba quad to write to * * XXX maybe move this into sp_tex_tile_cache.c and merge with the * sp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... */ -static INLINE void -get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler, - union tex_tile_address addr, - unsigned x, unsigned y, - const float *out[4]) + + + + +static INLINE const float * +get_texel_2d_no_border(const struct sp_sampler_varient *samp, + union tex_tile_address addr, int x, int y) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct softpipe_tex_cached_tile *tile; + + addr.bits.x = x / TILE_SIZE; + addr.bits.y = y / TILE_SIZE; + y %= TILE_SIZE; + x %= TILE_SIZE; + + tile = sp_get_cached_tile_tex(samp->cache, addr); + + return &tile->data.color[y][x][0]; +} + + +static INLINE const float * +get_texel_2d(const struct sp_sampler_varient *samp, + union tex_tile_address addr, int x, int y) +{ + const struct pipe_texture *texture = samp->texture; + unsigned level = addr.bits.level; + + if (x < 0 || x >= (int) texture->width[level] || + y < 0 || y >= (int) texture->height[level]) { + return samp->sampler->border_color; + } + else { + return get_texel_2d_no_border( samp, addr, x, y ); + } +} - const struct softpipe_tex_cached_tile *tile - = sp_get_cached_tile_tex(samp->cache, addr); +/* Gather a quad of adjacent texels within a tile: + */ +static INLINE void +get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_varient *samp, + union tex_tile_address addr, + unsigned x, unsigned y, + const float *out[4]) +{ + const struct softpipe_tex_cached_tile *tile; + + addr.bits.x = x / TILE_SIZE; + addr.bits.y = y / TILE_SIZE; y %= TILE_SIZE; x %= TILE_SIZE; + + tile = sp_get_cached_tile_tex(samp->cache, addr); out[0] = &tile->data.color[y ][x ][0]; out[1] = &tile->data.color[y ][x+1][0]; @@ -623,15 +661,50 @@ get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler, out[3] = &tile->data.color[y+1][x+1][0]; } + +/* Gather a quad of potentially non-adjacent texels: + */ +static INLINE void +get_texel_quad_2d_no_border(const struct sp_sampler_varient *samp, + union tex_tile_address addr, + int x0, int y0, + int x1, int y1, + const float *out[4]) +{ + out[0] = get_texel_2d_no_border( samp, addr, x0, y0 ); + out[1] = get_texel_2d_no_border( samp, addr, x1, y0 ); + out[2] = get_texel_2d_no_border( samp, addr, x0, y1 ); + out[3] = get_texel_2d_no_border( samp, addr, x1, y1 ); +} + +/* Can involve a lot of unnecessary checks for border color: + */ +static INLINE void +get_texel_quad_2d(const struct sp_sampler_varient *samp, + union tex_tile_address addr, + int x0, int y0, + int x1, int y1, + const float *out[4]) +{ + out[0] = get_texel_2d( samp, addr, x0, y0 ); + out[1] = get_texel_2d( samp, addr, x1, y0 ); + out[3] = get_texel_2d( samp, addr, x1, y1 ); + out[2] = get_texel_2d( samp, addr, x0, y1 ); +} + + + +/* 3d varients: + */ static INLINE const float * -get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler, - union tex_tile_address addr, int x, int y) +get_texel_3d_no_border(const struct sp_sampler_varient *samp, + union tex_tile_address addr, int x, int y, int z) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct softpipe_tex_cached_tile *tile; addr.bits.x = x / TILE_SIZE; addr.bits.y = y / TILE_SIZE; + addr.bits.z = z; y %= TILE_SIZE; x %= TILE_SIZE; @@ -641,61 +714,26 @@ get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler, } -static INLINE void -get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler, - union tex_tile_address addr, - int x0, int y0, - int x1, int y1, - const float *out[4]) -{ - out[0] = get_texel_2d_ptr( tgsi_sampler, addr, x0, y0 ); - out[1] = get_texel_2d_ptr( tgsi_sampler, addr, x1, y0 ); - out[2] = get_texel_2d_ptr( tgsi_sampler, addr, x0, y1 ); - out[3] = get_texel_2d_ptr( tgsi_sampler, addr, x1, y1 ); -} - -static INLINE void -get_texel(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y, int z, - float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) +static INLINE const float * +get_texel_3d(const struct sp_sampler_varient *samp, + union tex_tile_address addr, int x, int y, int z ) { - const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; + unsigned level = addr.bits.level; if (x < 0 || x >= (int) texture->width[level] || y < 0 || y >= (int) texture->height[level] || z < 0 || z >= (int) texture->depth[level]) { - rgba[0][j] = sampler->border_color[0]; - rgba[1][j] = sampler->border_color[1]; - rgba[2][j] = sampler->border_color[2]; - rgba[3][j] = sampler->border_color[3]; + return samp->sampler->border_color; } else { - const unsigned tx = x % TILE_SIZE; - const unsigned ty = y % TILE_SIZE; - const struct softpipe_tex_cached_tile *tile; - - tile = sp_get_cached_tile_tex(samp->cache, - tex_tile_address(x, y, z, face, level)); - - rgba[0][j] = tile->data.color[ty][tx][0]; - rgba[1][j] = tile->data.color[ty][tx][1]; - rgba[2][j] = tile->data.color[ty][tx][2]; - rgba[3][j] = tile->data.color[ty][tx][3]; - if (0) - { - debug_printf("Get texel %f %f %f %f from %s\n", - rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j], - pf_name(texture->format)); - } + return get_texel_3d_no_border( samp, addr, x, y, z ); } } - - - +/* Some image-filter fastpaths: + */ static INLINE void img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], @@ -738,15 +776,13 @@ img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, */ if (x0 < xmax && y0 < ymax) { - addr.bits.x = x0 / TILE_SIZE; - addr.bits.y = y0 / TILE_SIZE; - get_texel_quad_2d(tgsi_sampler, addr, x0, y0, tx); + get_texel_quad_2d_no_border_single_tile(samp, addr, x0, y0, tx); } else { unsigned x1 = (x0 + 1) & (xpot - 1); unsigned y1 = (y0 + 1) & (ypot - 1); - get_texel_quad_2d_mt(tgsi_sampler, addr, x0, y0, x1, y1, tx); + get_texel_quad_2d_no_border(samp, addr, x0, y0, x1, y1, tx); } @@ -790,7 +826,7 @@ img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, int x0 = uflr & (xpot - 1); int y0 = vflr & (ypot - 1); - const float *out = get_texel_2d_ptr(tgsi_sampler, addr, x0, y0); + const float *out = get_texel_2d_no_border(samp, addr, x0, y0); for (c = 0; c < 4; c++) { rgba[c][j] = out[c]; @@ -838,7 +874,7 @@ img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, else if (y0 > ypot - 1) y0 = ypot - 1; - out = get_texel_2d_ptr(tgsi_sampler, addr, x0, y0); + out = get_texel_2d_no_border(samp, addr, x0, y0); for (c = 0; c < 4; c++) { rgba[c][j] = out[c]; @@ -859,20 +895,37 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler, unsigned level0, j; int width; int x[4]; + union tex_tile_address addr; level0 = samp->level; width = texture->width[level0]; assert(width > 0); + addr.value = 0; + addr.bits.level = samp->level; + samp->nearest_texcoord_s(s, width, x); for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, 0, level0, x[j], 0, 0, rgba, j); + const float *out = get_texel_2d(samp, addr, x[j], 0); + int c; + for (c = 0; c < 4; c++) { + rgba[c][j] = out[c]; + } } } + + +static inline union tex_tile_address face( union tex_tile_address addr, + unsigned face ) +{ + addr.bits.face = face; + return addr; +} + static void img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], @@ -887,18 +940,27 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, unsigned level0, j; int width, height; int x[4], y[4]; + union tex_tile_address addr; + level0 = samp->level; width = texture->width[level0]; height = texture->height[level0]; assert(width > 0); + + addr.value = 0; + addr.bits.level = samp->level; samp->nearest_texcoord_s(s, width, x); samp->nearest_texcoord_t(t, height, y); for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j); + const float *out = get_texel_2d(samp, face(addr, faces[j]), x[j], y[j]); + int c; + for (c = 0; c < 4; c++) { + rgba[c][j] = out[c]; + } } } @@ -916,6 +978,7 @@ img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler, unsigned level0, j; int width, height, depth; int x[4], y[4], z[4]; + union tex_tile_address addr; level0 = samp->level; width = texture->width[level0]; @@ -930,8 +993,15 @@ img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler, samp->nearest_texcoord_t(t, height, y); samp->nearest_texcoord_p(p, depth, z); + addr.value = 0; + addr.bits.level = samp->level; + for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, 0, level0, x[j], y[j], z[j], rgba, j); + const float *out = get_texel_3d(samp, addr, x[j], y[j], z[j]); + int c; + for (c = 0; c < 4; c++) { + rgba[c][j] = out[c]; + } } } @@ -950,6 +1020,7 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler, int width; int x0[4], x1[4]; float xw[4]; /* weights */ + union tex_tile_address addr; level0 = samp->level; @@ -957,22 +1028,27 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler, assert(width > 0); + addr.value = 0; + addr.bits.level = samp->level; + samp->linear_texcoord_s(s, width, x0, x1, xw); for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4]; /* texels */ + const float *tx0 = get_texel_2d(samp, addr, x0[j], 0); + const float *tx1 = get_texel_2d(samp, addr, x1[j], 0); int c; - get_texel(tgsi_sampler, 0, level0, x0[j], 0, 0, tx, 0); - get_texel(tgsi_sampler, 0, level0, x1[j], 0, 0, tx, 1); /* interpolate R, G, B, A */ for (c = 0; c < 4; c++) { - rgba[c][j] = lerp(xw[j], tx[c][0], tx[c][1]); + rgba[c][j] = lerp(xw[j], tx0[c], tx1[c]); } } } + + + static void img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], @@ -988,6 +1064,7 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, int width, height; int x0[4], y0[4], x1[4], y1[4]; float xw[4], yw[4]; /* weights */ + union tex_tile_address addr; level0 = samp->level; @@ -996,22 +1073,25 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, assert(width > 0); + addr.value = 0; + addr.bits.level = samp->level; + samp->linear_texcoord_s(s, width, x0, x1, xw); samp->linear_texcoord_t(t, height, y0, y1, yw); for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4]; /* texels */ + union tex_tile_address addrj = face(addr, faces[j]); + const float *tx0 = get_texel_2d(samp, addrj, x0[j], y0[j]); + const float *tx1 = get_texel_2d(samp, addrj, x1[j], y0[j]); + const float *tx2 = get_texel_2d(samp, addrj, x0[j], y1[j]); + const float *tx3 = get_texel_2d(samp, addrj, x1[j], y1[j]); int c; - get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3); /* interpolate R, G, B, A */ for (c = 0; c < 4; c++) { rgba[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], - tx[c][2], tx[c][3]); + tx0[c], tx1[c], + tx2[c], tx3[c]); } } } @@ -1031,12 +1111,16 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler, int width, height, depth; int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4]; float xw[4], yw[4], zw[4]; /* interpolation weights */ + union tex_tile_address addr; level0 = samp->level; width = texture->width[level0]; height = texture->height[level0]; depth = texture->depth[level0]; + addr.value = 0; + addr.bits.level = level0; + assert(width > 0); assert(height > 0); assert(depth > 0); @@ -1046,25 +1130,25 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler, samp->linear_texcoord_p(p, depth, z0, z1, zw); for (j = 0; j < QUAD_SIZE; j++) { - float tx0[4][4], tx1[4][4]; int c; - - get_texel(tgsi_sampler, 0, level0, x0[j], y0[j], z0[j], tx0, 0); - get_texel(tgsi_sampler, 0, level0, x1[j], y0[j], z0[j], tx0, 1); - get_texel(tgsi_sampler, 0, level0, x0[j], y1[j], z0[j], tx0, 2); - get_texel(tgsi_sampler, 0, level0, x1[j], y1[j], z0[j], tx0, 3); - get_texel(tgsi_sampler, 0, level0, x0[j], y0[j], z1[j], tx1, 0); - get_texel(tgsi_sampler, 0, level0, x1[j], y0[j], z1[j], tx1, 1); - get_texel(tgsi_sampler, 0, level0, x0[j], y1[j], z1[j], tx1, 2); - get_texel(tgsi_sampler, 0, level0, x1[j], y1[j], z1[j], tx1, 3); + const float *tx00 = get_texel_3d(samp, addr, x0[j], y0[j], z0[j]); + const float *tx01 = get_texel_3d(samp, addr, x1[j], y0[j], z0[j]); + const float *tx02 = get_texel_3d(samp, addr, x0[j], y1[j], z0[j]); + const float *tx03 = get_texel_3d(samp, addr, x1[j], y1[j], z0[j]); + + const float *tx10 = get_texel_3d(samp, addr, x0[j], y0[j], z1[j]); + const float *tx11 = get_texel_3d(samp, addr, x1[j], y0[j], z1[j]); + const float *tx12 = get_texel_3d(samp, addr, x0[j], y1[j], z1[j]); + const float *tx13 = get_texel_3d(samp, addr, x1[j], y1[j], z1[j]); + /* interpolate R, G, B, A */ for (c = 0; c < 4; c++) { rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j], - tx0[c][0], tx0[c][1], - tx0[c][2], tx0[c][3], - tx1[c][0], tx1[c][1], - tx1[c][2], tx1[c][3]); + tx00[c], tx01[c], + tx02[c], tx03[c], + tx10[c], tx11[c], + tx12[c], tx13[c]); } } } -- cgit v1.2.3 From 60adc15ba5633190fc8a68e7c182f06dc7909df4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 23 Aug 2009 19:17:35 +0100 Subject: softpipe: separate out 2d and cube img filter functions --- src/gallium/drivers/softpipe/sp_tex_sample.c | 92 ++++++++++++++++++++++++++-- 1 file changed, 88 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 8283010740..3bc4599e04 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -917,8 +917,43 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler, } +static void +img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + unsigned level0, j; + int width, height; + int x[4], y[4]; + union tex_tile_address addr; + level0 = samp->level; + width = texture->width[level0]; + height = texture->height[level0]; + + assert(width > 0); + + addr.value = 0; + addr.bits.level = samp->level; + + samp->nearest_texcoord_s(s, width, x); + samp->nearest_texcoord_t(t, height, y); + + for (j = 0; j < QUAD_SIZE; j++) { + const float *out = get_texel_2d(samp, addr, x[j], y[j]); + int c; + for (c = 0; c < 4; c++) { + rgba[c][j] = out[c]; + } + } +} + static inline union tex_tile_address face( union tex_tile_address addr, unsigned face ) { @@ -927,7 +962,7 @@ static inline union tex_tile_address face( union tex_tile_address addr, } static void -img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, +img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], @@ -1047,10 +1082,54 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler, } +static void +img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); + const struct pipe_texture *texture = samp->texture; + unsigned level0, j; + int width, height; + int x0[4], y0[4], x1[4], y1[4]; + float xw[4], yw[4]; /* weights */ + union tex_tile_address addr; + + + level0 = samp->level; + width = texture->width[level0]; + height = texture->height[level0]; + + assert(width > 0); + + addr.value = 0; + addr.bits.level = samp->level; + + samp->linear_texcoord_s(s, width, x0, x1, xw); + samp->linear_texcoord_t(t, height, y0, y1, yw); + + for (j = 0; j < QUAD_SIZE; j++) { + const float *tx0 = get_texel_2d(samp, addr, x0[j], y0[j]); + const float *tx1 = get_texel_2d(samp, addr, x1[j], y0[j]); + const float *tx2 = get_texel_2d(samp, addr, x0[j], y1[j]); + const float *tx3 = get_texel_2d(samp, addr, x1[j], y1[j]); + int c; + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(xw[j], yw[j], + tx0[c], tx1[c], + tx2[c], tx3[c]); + } + } +} static void -img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, +img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], @@ -1615,14 +1694,19 @@ static filter_func get_img_filter( const union sp_sampler_key key, } } } - /* Fallthrough to default versions: + /* Otherwise use default versions: */ - case PIPE_TEXTURE_CUBE: if (filter == PIPE_TEX_FILTER_NEAREST) return img_filter_2d_nearest; else return img_filter_2d_linear; break; + case PIPE_TEXTURE_CUBE: + if (filter == PIPE_TEX_FILTER_NEAREST) + return img_filter_cube_nearest; + else + return img_filter_cube_linear; + break; case PIPE_TEXTURE_3D: if (filter == PIPE_TEX_FILTER_NEAREST) return img_filter_3d_nearest; -- cgit v1.2.3 From fd19e8adcd82e88d0fc8d187360b528100fed244 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 23 Aug 2009 19:28:34 +0100 Subject: softpipe: use one fewer divide in sample_cube GCC won't do this for us. Makes a bigger difference to cubemap fps than previous set of compilcated rearrangements. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 3bc4599e04..50460df7cd 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1543,9 +1543,12 @@ sample_cube(struct tgsi_sampler *tgsi_sampler, } } - ssss[j] = ( sc / ma + 1.0F ) * 0.5F; - tttt[j] = ( tc / ma + 1.0F ) * 0.5F; - samp->faces[j] = face; + { + const float ima = 1.0 / ma; + ssss[j] = ( sc * ima + 1.0F ) * 0.5F; + tttt[j] = ( tc * ima + 1.0F ) * 0.5F; + samp->faces[j] = face; + } } /* In our little pipeline, the compare stage is next. If compare -- cgit v1.2.3 From d1b4351e603522be11061522cb6b685da9ef1fee Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Sun, 30 Aug 2009 18:51:29 +0200 Subject: r300: Remove all Mesa dependencies from the shader compiler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In particular, this removes the dependency on prog_instruction, which unfortunately creates some code duplication, but also opens a path towards adding some hardware-specific things in there. Signed-off-by: Nicolai Hähnle --- src/gallium/drivers/r300/Makefile | 4 +- src/gallium/drivers/r300/r300_tgsi_to_rc.c | 277 ++++++----- src/mesa/drivers/dri/r300/compiler/Makefile | 1 + src/mesa/drivers/dri/r300/compiler/r300_fragprog.c | 118 ++--- src/mesa/drivers/dri/r300/compiler/r300_fragprog.h | 5 +- .../drivers/dri/r300/compiler/r300_fragprog_emit.c | 96 ++-- .../dri/r300/compiler/r300_fragprog_swizzle.c | 84 ++-- .../dri/r300/compiler/r300_fragprog_swizzle.h | 11 +- src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 48 +- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 321 ++++++------- src/mesa/drivers/dri/r300/compiler/r500_fragprog.c | 166 +++---- src/mesa/drivers/dri/r300/compiler/r500_fragprog.h | 9 +- .../drivers/dri/r300/compiler/r500_fragprog_emit.c | 96 ++-- src/mesa/drivers/dri/r300/compiler/radeon_code.c | 14 +- src/mesa/drivers/dri/r300/compiler/radeon_code.h | 23 +- .../drivers/dri/r300/compiler/radeon_compiler.c | 92 ++-- .../drivers/dri/r300/compiler/radeon_compiler.h | 31 +- .../drivers/dri/r300/compiler/radeon_nqssadce.c | 159 +++---- .../drivers/dri/r300/compiler/radeon_nqssadce.h | 13 +- .../drivers/dri/r300/compiler/radeon_opcodes.c | 318 +++++++++++++ .../drivers/dri/r300/compiler/radeon_opcodes.h | 202 ++++++++ .../drivers/dri/r300/compiler/radeon_program.c | 203 ++++++-- .../drivers/dri/r300/compiler/radeon_program.h | 230 +++++++-- .../drivers/dri/r300/compiler/radeon_program_alu.c | 521 ++++++++++----------- .../drivers/dri/r300/compiler/radeon_program_alu.h | 10 +- .../dri/r300/compiler/radeon_program_pair.c | 361 +++++++------- .../dri/r300/compiler/radeon_program_pair.h | 68 +-- src/mesa/drivers/dri/r300/r300_fragprog_common.c | 4 +- src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c | 143 +++++- 29 files changed, 2268 insertions(+), 1360 deletions(-) create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index d7a2c8c462..ec58eeacc2 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -24,9 +24,7 @@ C_SOURCES = \ r300_tgsi_to_rc.c LIBRARY_INCLUDES = \ - -I$(TOP)/src/mesa/drivers/dri/r300/compiler \ - -I$(TOP)/src/mesa \ - -I$(TOP)/include + -I$(TOP)/src/mesa/drivers/dri/r300/compiler COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 3adbb715f3..2c63a46c27 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -33,152 +33,151 @@ static unsigned translate_opcode(unsigned opcode) { switch(opcode) { - case TGSI_OPCODE_ARL: return OPCODE_ARL; - case TGSI_OPCODE_MOV: return OPCODE_MOV; - case TGSI_OPCODE_LIT: return OPCODE_LIT; - case TGSI_OPCODE_RCP: return OPCODE_RCP; - case TGSI_OPCODE_RSQ: return OPCODE_RSQ; - case TGSI_OPCODE_EXP: return OPCODE_EXP; - case TGSI_OPCODE_LOG: return OPCODE_LOG; - case TGSI_OPCODE_MUL: return OPCODE_MUL; - case TGSI_OPCODE_ADD: return OPCODE_ADD; - case TGSI_OPCODE_DP3: return OPCODE_DP3; - case TGSI_OPCODE_DP4: return OPCODE_DP4; - case TGSI_OPCODE_DST: return OPCODE_DST; - case TGSI_OPCODE_MIN: return OPCODE_MIN; - case TGSI_OPCODE_MAX: return OPCODE_MAX; - case TGSI_OPCODE_SLT: return OPCODE_SLT; - case TGSI_OPCODE_SGE: return OPCODE_SGE; - case TGSI_OPCODE_MAD: return OPCODE_MAD; - case TGSI_OPCODE_SUB: return OPCODE_SUB; - case TGSI_OPCODE_LRP: return OPCODE_LRP; - /* case TGSI_OPCODE_CND: return OPCODE_CND; */ - /* case TGSI_OPCODE_CND0: return OPCODE_CND0; */ - case TGSI_OPCODE_DP2A: return OPCODE_DP2A; + case TGSI_OPCODE_ARL: return RC_OPCODE_ARL; + case TGSI_OPCODE_MOV: return RC_OPCODE_MOV; + case TGSI_OPCODE_LIT: return RC_OPCODE_LIT; + case TGSI_OPCODE_RCP: return RC_OPCODE_RCP; + case TGSI_OPCODE_RSQ: return RC_OPCODE_RSQ; + case TGSI_OPCODE_EXP: return RC_OPCODE_EXP; + case TGSI_OPCODE_LOG: return RC_OPCODE_LOG; + case TGSI_OPCODE_MUL: return RC_OPCODE_MUL; + case TGSI_OPCODE_ADD: return RC_OPCODE_ADD; + case TGSI_OPCODE_DP3: return RC_OPCODE_DP3; + case TGSI_OPCODE_DP4: return RC_OPCODE_DP4; + case TGSI_OPCODE_DST: return RC_OPCODE_DST; + case TGSI_OPCODE_MIN: return RC_OPCODE_MIN; + case TGSI_OPCODE_MAX: return RC_OPCODE_MAX; + case TGSI_OPCODE_SLT: return RC_OPCODE_SLT; + case TGSI_OPCODE_SGE: return RC_OPCODE_SGE; + case TGSI_OPCODE_MAD: return RC_OPCODE_MAD; + case TGSI_OPCODE_SUB: return RC_OPCODE_SUB; + case TGSI_OPCODE_LRP: return RC_OPCODE_LRP; + /* case TGSI_OPCODE_CND: return RC_OPCODE_CND; */ + /* case TGSI_OPCODE_CND0: return RC_OPCODE_CND0; */ + /* case TGSI_OPCODE_DP2A: return RC_OPCODE_DP2A; */ /* gap */ - case TGSI_OPCODE_FRC: return OPCODE_FRC; - /* case TGSI_OPCODE_CLAMP: return OPCODE_CLAMP; */ - case TGSI_OPCODE_FLR: return OPCODE_FLR; - /* case TGSI_OPCODE_ROUND: return OPCODE_ROUND; */ - case TGSI_OPCODE_EX2: return OPCODE_EX2; - case TGSI_OPCODE_LG2: return OPCODE_LG2; - case TGSI_OPCODE_POW: return OPCODE_POW; - case TGSI_OPCODE_XPD: return OPCODE_XPD; + case TGSI_OPCODE_FRC: return RC_OPCODE_FRC; + /* case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP; */ + case TGSI_OPCODE_FLR: return RC_OPCODE_FLR; + /* case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND; */ + case TGSI_OPCODE_EX2: return RC_OPCODE_EX2; + case TGSI_OPCODE_LG2: return RC_OPCODE_LG2; + case TGSI_OPCODE_POW: return RC_OPCODE_POW; + case TGSI_OPCODE_XPD: return RC_OPCODE_XPD; /* gap */ - case TGSI_OPCODE_ABS: return OPCODE_ABS; - case TGSI_OPCODE_RCC: return OPCODE_RCC; - case TGSI_OPCODE_DPH: return OPCODE_DPH; - case TGSI_OPCODE_COS: return OPCODE_COS; - case TGSI_OPCODE_DDX: return OPCODE_DDX; - case TGSI_OPCODE_DDY: return OPCODE_DDY; - /* case TGSI_OPCODE_KILP: return OPCODE_KILP; */ - case TGSI_OPCODE_PK2H: return OPCODE_PK2H; - case TGSI_OPCODE_PK2US: return OPCODE_PK2US; - case TGSI_OPCODE_PK4B: return OPCODE_PK4B; - case TGSI_OPCODE_PK4UB: return OPCODE_PK4UB; - case TGSI_OPCODE_RFL: return OPCODE_RFL; - case TGSI_OPCODE_SEQ: return OPCODE_SEQ; - case TGSI_OPCODE_SFL: return OPCODE_SFL; - case TGSI_OPCODE_SGT: return OPCODE_SGT; - case TGSI_OPCODE_SIN: return OPCODE_SIN; - case TGSI_OPCODE_SLE: return OPCODE_SLE; - case TGSI_OPCODE_SNE: return OPCODE_SNE; - case TGSI_OPCODE_STR: return OPCODE_STR; - case TGSI_OPCODE_TEX: return OPCODE_TEX; - case TGSI_OPCODE_TXD: return OPCODE_TXD; - case TGSI_OPCODE_TXP: return OPCODE_TXP; - case TGSI_OPCODE_UP2H: return OPCODE_UP2H; - case TGSI_OPCODE_UP2US: return OPCODE_UP2US; - case TGSI_OPCODE_UP4B: return OPCODE_UP4B; - case TGSI_OPCODE_UP4UB: return OPCODE_UP4UB; - case TGSI_OPCODE_X2D: return OPCODE_X2D; - case TGSI_OPCODE_ARA: return OPCODE_ARA; - case TGSI_OPCODE_ARR: return OPCODE_ARR; - case TGSI_OPCODE_BRA: return OPCODE_BRA; - case TGSI_OPCODE_CAL: return OPCODE_CAL; - case TGSI_OPCODE_RET: return OPCODE_RET; - case TGSI_OPCODE_SSG: return OPCODE_SSG; - case TGSI_OPCODE_CMP: return OPCODE_CMP; - case TGSI_OPCODE_SCS: return OPCODE_SCS; - case TGSI_OPCODE_TXB: return OPCODE_TXB; - /* case TGSI_OPCODE_NRM: return OPCODE_NRM; */ - /* case TGSI_OPCODE_DIV: return OPCODE_DIV; */ - case TGSI_OPCODE_DP2: return OPCODE_DP2; - case TGSI_OPCODE_TXL: return OPCODE_TXL; - case TGSI_OPCODE_BRK: return OPCODE_BRK; - case TGSI_OPCODE_IF: return OPCODE_IF; - /* case TGSI_OPCODE_LOOP: return OPCODE_LOOP; */ - /* case TGSI_OPCODE_REP: return OPCODE_REP; */ - case TGSI_OPCODE_ELSE: return OPCODE_ELSE; - case TGSI_OPCODE_ENDIF: return OPCODE_ENDIF; - case TGSI_OPCODE_ENDLOOP: return OPCODE_ENDLOOP; - /* case TGSI_OPCODE_ENDREP: return OPCODE_ENDREP; */ - case TGSI_OPCODE_PUSHA: return OPCODE_PUSHA; - case TGSI_OPCODE_POPA: return OPCODE_POPA; - /* case TGSI_OPCODE_CEIL: return OPCODE_CEIL; */ - /* case TGSI_OPCODE_I2F: return OPCODE_I2F; */ - case TGSI_OPCODE_NOT: return OPCODE_NOT; - case TGSI_OPCODE_TRUNC: return OPCODE_TRUNC; - /* case TGSI_OPCODE_SHL: return OPCODE_SHL; */ - /* case TGSI_OPCODE_SHR: return OPCODE_SHR; */ - case TGSI_OPCODE_AND: return OPCODE_AND; - case TGSI_OPCODE_OR: return OPCODE_OR; - /* case TGSI_OPCODE_MOD: return OPCODE_MOD; */ - case TGSI_OPCODE_XOR: return OPCODE_XOR; - /* case TGSI_OPCODE_SAD: return OPCODE_SAD; */ - /* case TGSI_OPCODE_TXF: return OPCODE_TXF; */ - /* case TGSI_OPCODE_TXQ: return OPCODE_TXQ; */ - case TGSI_OPCODE_CONT: return OPCODE_CONT; - /* case TGSI_OPCODE_EMIT: return OPCODE_EMIT; */ - /* case TGSI_OPCODE_ENDPRIM: return OPCODE_ENDPRIM; */ - /* case TGSI_OPCODE_BGNLOOP2: return OPCODE_BGNLOOP2; */ - case TGSI_OPCODE_BGNSUB: return OPCODE_BGNSUB; - /* case TGSI_OPCODE_ENDLOOP2: return OPCODE_ENDLOOP2; */ - case TGSI_OPCODE_ENDSUB: return OPCODE_ENDSUB; - case TGSI_OPCODE_NOISE1: return OPCODE_NOISE1; - case TGSI_OPCODE_NOISE2: return OPCODE_NOISE2; - case TGSI_OPCODE_NOISE3: return OPCODE_NOISE3; - case TGSI_OPCODE_NOISE4: return OPCODE_NOISE4; - case TGSI_OPCODE_NOP: return OPCODE_NOP; + case TGSI_OPCODE_ABS: return RC_OPCODE_ABS; + /* case TGSI_OPCODE_RCC: return RC_OPCODE_RCC; */ + case TGSI_OPCODE_DPH: return RC_OPCODE_DPH; + case TGSI_OPCODE_COS: return RC_OPCODE_COS; + case TGSI_OPCODE_DDX: return RC_OPCODE_DDX; + case TGSI_OPCODE_DDY: return RC_OPCODE_DDY; + /* case TGSI_OPCODE_KILP: return RC_OPCODE_KILP; */ + /* case TGSI_OPCODE_PK2H: return RC_OPCODE_PK2H; */ + /* case TGSI_OPCODE_PK2US: return RC_OPCODE_PK2US; */ + /* case TGSI_OPCODE_PK4B: return RC_OPCODE_PK4B; */ + /* case TGSI_OPCODE_PK4UB: return RC_OPCODE_PK4UB; */ + /* case TGSI_OPCODE_RFL: return RC_OPCODE_RFL; */ + case TGSI_OPCODE_SEQ: return RC_OPCODE_SEQ; + case TGSI_OPCODE_SFL: return RC_OPCODE_SFL; + case TGSI_OPCODE_SGT: return RC_OPCODE_SGT; + case TGSI_OPCODE_SIN: return RC_OPCODE_SIN; + case TGSI_OPCODE_SLE: return RC_OPCODE_SLE; + case TGSI_OPCODE_SNE: return RC_OPCODE_SNE; + /* case TGSI_OPCODE_STR: return RC_OPCODE_STR; */ + case TGSI_OPCODE_TEX: return RC_OPCODE_TEX; + case TGSI_OPCODE_TXD: return RC_OPCODE_TXD; + case TGSI_OPCODE_TXP: return RC_OPCODE_TXP; + /* case TGSI_OPCODE_UP2H: return RC_OPCODE_UP2H; */ + /* case TGSI_OPCODE_UP2US: return RC_OPCODE_UP2US; */ + /* case TGSI_OPCODE_UP4B: return RC_OPCODE_UP4B; */ + /* case TGSI_OPCODE_UP4UB: return RC_OPCODE_UP4UB; */ + /* case TGSI_OPCODE_X2D: return RC_OPCODE_X2D; */ + /* case TGSI_OPCODE_ARA: return RC_OPCODE_ARA; */ + /* case TGSI_OPCODE_ARR: return RC_OPCODE_ARR; */ + /* case TGSI_OPCODE_BRA: return RC_OPCODE_BRA; */ + /* case TGSI_OPCODE_CAL: return RC_OPCODE_CAL; */ + /* case TGSI_OPCODE_RET: return RC_OPCODE_RET; */ + /* case TGSI_OPCODE_SSG: return RC_OPCODE_SSG; */ + case TGSI_OPCODE_CMP: return RC_OPCODE_CMP; + case TGSI_OPCODE_SCS: return RC_OPCODE_SCS; + case TGSI_OPCODE_TXB: return RC_OPCODE_TXB; + /* case TGSI_OPCODE_NRM: return RC_OPCODE_NRM; */ + /* case TGSI_OPCODE_DIV: return RC_OPCODE_DIV; */ + /* case TGSI_OPCODE_DP2: return RC_OPCODE_DP2; */ + case TGSI_OPCODE_TXL: return RC_OPCODE_TXL; + /* case TGSI_OPCODE_BRK: return RC_OPCODE_BRK; */ + /* case TGSI_OPCODE_IF: return RC_OPCODE_IF; */ + /* case TGSI_OPCODE_LOOP: return RC_OPCODE_LOOP; */ + /* case TGSI_OPCODE_REP: return RC_OPCODE_REP; */ + /* case TGSI_OPCODE_ELSE: return RC_OPCODE_ELSE; */ + /* case TGSI_OPCODE_ENDIF: return RC_OPCODE_ENDIF; */ + /* case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP; */ + /* case TGSI_OPCODE_ENDREP: return RC_OPCODE_ENDREP; */ + /* case TGSI_OPCODE_PUSHA: return RC_OPCODE_PUSHA; */ + /* case TGSI_OPCODE_POPA: return RC_OPCODE_POPA; */ + /* case TGSI_OPCODE_CEIL: return RC_OPCODE_CEIL; */ + /* case TGSI_OPCODE_I2F: return RC_OPCODE_I2F; */ + /* case TGSI_OPCODE_NOT: return RC_OPCODE_NOT; */ + /* case TGSI_OPCODE_TRUNC: return RC_OPCODE_TRUNC; */ + /* case TGSI_OPCODE_SHL: return RC_OPCODE_SHL; */ + /* case TGSI_OPCODE_SHR: return RC_OPCODE_SHR; */ + /* case TGSI_OPCODE_AND: return RC_OPCODE_AND; */ + /* case TGSI_OPCODE_OR: return RC_OPCODE_OR; */ + /* case TGSI_OPCODE_MOD: return RC_OPCODE_MOD; */ + /* case TGSI_OPCODE_XOR: return RC_OPCODE_XOR; */ + /* case TGSI_OPCODE_SAD: return RC_OPCODE_SAD; */ + /* case TGSI_OPCODE_TXF: return RC_OPCODE_TXF; */ + /* case TGSI_OPCODE_TXQ: return RC_OPCODE_TXQ; */ + /* case TGSI_OPCODE_CONT: return RC_OPCODE_CONT; */ + /* case TGSI_OPCODE_EMIT: return RC_OPCODE_EMIT; */ + /* case TGSI_OPCODE_ENDPRIM: return RC_OPCODE_ENDPRIM; */ + /* case TGSI_OPCODE_BGNLOOP2: return RC_OPCODE_BGNLOOP2; */ + /* case TGSI_OPCODE_BGNSUB: return RC_OPCODE_BGNSUB; */ + /* case TGSI_OPCODE_ENDLOOP2: return RC_OPCODE_ENDLOOP2; */ + /* case TGSI_OPCODE_ENDSUB: return RC_OPCODE_ENDSUB; */ + /* case TGSI_OPCODE_NOISE1: return RC_OPCODE_NOISE1; */ + /* case TGSI_OPCODE_NOISE2: return RC_OPCODE_NOISE2; */ + /* case TGSI_OPCODE_NOISE3: return RC_OPCODE_NOISE3; */ + /* case TGSI_OPCODE_NOISE4: return RC_OPCODE_NOISE4; */ + case TGSI_OPCODE_NOP: return RC_OPCODE_NOP; /* gap */ - case TGSI_OPCODE_NRM4: return OPCODE_NRM4; - /* case TGSI_OPCODE_CALLNZ: return OPCODE_CALLNZ; */ - /* case TGSI_OPCODE_IFC: return OPCODE_IFC; */ - /* case TGSI_OPCODE_BREAKC: return OPCODE_BREAKC; */ - case TGSI_OPCODE_KIL: return OPCODE_KIL; - case TGSI_OPCODE_END: return OPCODE_END; - case TGSI_OPCODE_SWZ: return OPCODE_SWZ; + /* case TGSI_OPCODE_NRM4: return RC_OPCODE_NRM4; */ + /* case TGSI_OPCODE_CALLNZ: return RC_OPCODE_CALLNZ; */ + /* case TGSI_OPCODE_IFC: return RC_OPCODE_IFC; */ + /* case TGSI_OPCODE_BREAKC: return RC_OPCODE_BREAKC; */ + case TGSI_OPCODE_KIL: return RC_OPCODE_KIL; + case TGSI_OPCODE_SWZ: return RC_OPCODE_SWZ; } fprintf(stderr, "Unknown opcode: %i\n", opcode); - abort(); + return RC_OPCODE_ILLEGAL_OPCODE; } static unsigned translate_saturate(unsigned saturate) { switch(saturate) { - case TGSI_SAT_NONE: return SATURATE_OFF; - case TGSI_SAT_ZERO_ONE: return SATURATE_ZERO_ONE; - case TGSI_SAT_MINUS_PLUS_ONE: return SATURATE_PLUS_MINUS_ONE; + default: + fprintf(stderr, "Unknown saturate mode: %i\n", saturate); + /* fall-through */ + case TGSI_SAT_NONE: return RC_SATURATE_NONE; + case TGSI_SAT_ZERO_ONE: return RC_SATURATE_ZERO_ONE; + case TGSI_SAT_MINUS_PLUS_ONE: return RC_SATURATE_MINUS_PLUS_ONE; } - - fprintf(stderr, "Unknown saturate mode: %i\n", saturate); - abort(); } static unsigned translate_register_file(unsigned file) { switch(file) { - case TGSI_FILE_CONSTANT: return PROGRAM_CONSTANT; - case TGSI_FILE_IMMEDIATE: return PROGRAM_CONSTANT; - case TGSI_FILE_INPUT: return PROGRAM_INPUT; - case TGSI_FILE_OUTPUT: return PROGRAM_OUTPUT; - case TGSI_FILE_TEMPORARY: return PROGRAM_TEMPORARY; - case TGSI_FILE_ADDRESS: return PROGRAM_ADDRESS; + case TGSI_FILE_CONSTANT: return RC_FILE_CONSTANT; + case TGSI_FILE_IMMEDIATE: return RC_FILE_CONSTANT; + case TGSI_FILE_INPUT: return RC_FILE_INPUT; + case TGSI_FILE_OUTPUT: return RC_FILE_OUTPUT; + default: + fprintf(stderr, "Unhandled register file: %i\n", file); + /* fall-through */ + case TGSI_FILE_TEMPORARY: return RC_FILE_TEMPORARY; + case TGSI_FILE_ADDRESS: return RC_FILE_ADDRESS; } - - fprintf(stderr, "Unhandled register file: %i\n", file); - abort(); } static int translate_register_index( @@ -194,7 +193,7 @@ static int translate_register_index( static void transform_dstreg( struct tgsi_to_rc * ttr, - struct prog_dst_register * dst, + struct rc_dst_register * dst, struct tgsi_full_dst_register * src) { dst->File = translate_register_file(src->DstRegister.File); @@ -205,7 +204,7 @@ static void transform_dstreg( static void transform_srcreg( struct tgsi_to_rc * ttr, - struct prog_src_register * dst, + struct rc_src_register * dst, struct tgsi_full_src_register * src) { dst->File = translate_register_file(src->SrcRegister.File); @@ -221,37 +220,37 @@ static void transform_srcreg( (src->SrcRegisterExtSwz.NegateY << 1) | (src->SrcRegisterExtSwz.NegateZ << 2) | (src->SrcRegisterExtSwz.NegateW << 3); - dst->Negate ^= src->SrcRegister.Negate ? NEGATE_XYZW : 0; + dst->Negate ^= src->SrcRegister.Negate ? RC_MASK_XYZW : 0; } static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_ext_texture src) { switch(src.Texture) { case TGSI_TEXTURE_1D: - dst->I.TexSrcTarget = TEXTURE_1D_INDEX; + dst->I.TexSrcTarget = RC_TEXTURE_1D; break; case TGSI_TEXTURE_2D: - dst->I.TexSrcTarget = TEXTURE_2D_INDEX; + dst->I.TexSrcTarget = RC_TEXTURE_2D; break; case TGSI_TEXTURE_3D: - dst->I.TexSrcTarget = TEXTURE_3D_INDEX; + dst->I.TexSrcTarget = RC_TEXTURE_3D; break; case TGSI_TEXTURE_CUBE: - dst->I.TexSrcTarget = TEXTURE_CUBE_INDEX; + dst->I.TexSrcTarget = RC_TEXTURE_CUBE; break; case TGSI_TEXTURE_RECT: - dst->I.TexSrcTarget = TEXTURE_RECT_INDEX; + dst->I.TexSrcTarget = RC_TEXTURE_RECT; break; case TGSI_TEXTURE_SHADOW1D: - dst->I.TexSrcTarget = TEXTURE_1D_INDEX; + dst->I.TexSrcTarget = RC_TEXTURE_1D; dst->I.TexShadow = 1; break; case TGSI_TEXTURE_SHADOW2D: - dst->I.TexSrcTarget = TEXTURE_2D_INDEX; + dst->I.TexSrcTarget = RC_TEXTURE_2D; dst->I.TexShadow = 1; break; case TGSI_TEXTURE_SHADOWRECT: - dst->I.TexSrcTarget = TEXTURE_RECT_INDEX; + dst->I.TexSrcTarget = RC_TEXTURE_RECT; dst->I.TexShadow = 1; break; } diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index d973844192..080c79898b 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -10,6 +10,7 @@ C_SOURCES = \ radeon_compiler.c \ radeon_nqssadce.c \ radeon_program.c \ + radeon_opcodes.c \ radeon_program_alu.c \ radeon_program_pair.c \ r3xx_fragprog.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c index 6c9fba4914..94f8fdfea3 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c @@ -27,17 +27,17 @@ #include "r300_fragprog.h" -#include "shader/prog_parameter.h" +#include #include "../r300_reg.h" -static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu) +static struct rc_src_register shadow_ambient(struct radeon_compiler * c, int tmu) { - struct prog_src_register reg = { 0, }; + struct rc_src_register reg = { 0, }; - reg.File = PROGRAM_STATE_VAR; + reg.File = RC_FILE_CONSTANT; reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu); - reg.Swizzle = SWIZZLE_WWWW; + reg.Swizzle = RC_SWIZZLE_WWWW; return reg; } @@ -47,7 +47,7 @@ static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int t * - extract operand swizzles * - introduce a temporary register when write masks are needed */ -GLboolean r300_transform_TEX( +int r300_transform_TEX( struct radeon_compiler * c, struct rc_instruction* inst, void* data) @@ -55,77 +55,77 @@ GLboolean r300_transform_TEX( struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)data; - if (inst->I.Opcode != OPCODE_TEX && - inst->I.Opcode != OPCODE_TXB && - inst->I.Opcode != OPCODE_TXP && - inst->I.Opcode != OPCODE_KIL) - return GL_FALSE; + if (inst->I.Opcode != RC_OPCODE_TEX && + inst->I.Opcode != RC_OPCODE_TXB && + inst->I.Opcode != RC_OPCODE_TXP && + inst->I.Opcode != RC_OPCODE_KIL) + return 0; /* ARB_shadow & EXT_shadow_funcs */ - if (inst->I.Opcode != OPCODE_KIL && + if (inst->I.Opcode != RC_OPCODE_KIL && c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + rc_compare_func comparefunc = compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; - if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { - inst->I.Opcode = OPCODE_MOV; + if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->I.Opcode = RC_OPCODE_MOV; - if (comparefunc == GL_ALWAYS) { - inst->I.SrcReg[0].File = PROGRAM_BUILTIN; - inst->I.SrcReg[0].Swizzle = SWIZZLE_1111; + if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->I.SrcReg[0].File = RC_FILE_NONE; + inst->I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; } else { inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit); } - return GL_TRUE; + return 1; } else { - GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; - GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode; + rc_compare_func comparefunc = compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + unsigned int depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode; struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); int pass, fail; - inst_rcp->I.Opcode = OPCODE_RCP; - inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY; + inst_rcp->I.Opcode = RC_OPCODE_RCP; + inst_rcp->I.DstReg.File = RC_FILE_TEMPORARY; inst_rcp->I.DstReg.Index = rc_find_free_temporary(c); - inst_rcp->I.DstReg.WriteMask = WRITEMASK_W; + inst_rcp->I.DstReg.WriteMask = RC_MASK_W; inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0]; - inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW; + inst_rcp->I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; inst_cmp->I.DstReg = inst->I.DstReg; - inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.File = RC_FILE_TEMPORARY; inst->I.DstReg.Index = rc_find_free_temporary(c); - inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + inst->I.DstReg.WriteMask = RC_MASK_XYZW; - inst_mad->I.Opcode = OPCODE_MAD; - inst_mad->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mad->I.Opcode = RC_OPCODE_MAD; + inst_mad->I.DstReg.File = RC_FILE_TEMPORARY; inst_mad->I.DstReg.Index = rc_find_free_temporary(c); inst_mad->I.SrcReg[0] = inst->I.SrcReg[0]; - inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ; - inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ; + inst_mad->I.SrcReg[1].File = RC_FILE_TEMPORARY; inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index; - inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW; - inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + inst_mad->I.SrcReg[2].File = RC_FILE_TEMPORARY; inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index; if (depthmode == 0) /* GL_LUMINANCE */ - inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + inst_mad->I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z); else if (depthmode == 2) /* GL_ALPHA */ - inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW; + inst_mad->I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW; /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: * r < tex <=> -tex+r < 0 * r >= tex <=> not (-tex+r < 0 */ - if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW; + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL) + inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ RC_MASK_XYZW; else - inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW; + inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ RC_MASK_XYZW; - inst_cmp->I.Opcode = OPCODE_CMP; + inst_cmp->I.Opcode = RC_OPCODE_CMP; /* DstReg has been filled out above */ - inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_cmp->I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index; - if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) { pass = 1; fail = 2; } else { @@ -133,8 +133,8 @@ GLboolean r300_transform_TEX( fail = 1; } - inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN; - inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111; + inst_cmp->I.SrcReg[pass].File = RC_FILE_NONE; + inst_cmp->I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit); } } @@ -143,52 +143,52 @@ GLboolean r300_transform_TEX( * instead of [0..Width]x[0..Height]. * Add a scaling instruction. */ - if (inst->I.Opcode != OPCODE_KIL && inst->I.TexSrcTarget == TEXTURE_RECT_INDEX) { + if (inst->I.Opcode != RC_OPCODE_KIL && inst->I.TexSrcTarget == RC_TEXTURE_RECT) { struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst->Prev); - inst_mul->I.Opcode = OPCODE_MUL; - inst_mul->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mul->I.Opcode = RC_OPCODE_MUL; + inst_mul->I.DstReg.File = RC_FILE_TEMPORARY; inst_mul->I.DstReg.Index = rc_find_free_temporary(c); inst_mul->I.SrcReg[0] = inst->I.SrcReg[0]; - inst_mul->I.SrcReg[1].File = PROGRAM_STATE_VAR; + inst_mul->I.SrcReg[1].File = RC_FILE_CONSTANT; inst_mul->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->I.TexSrcUnit); reset_srcreg(&inst->I.SrcReg[0]); - inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->I.SrcReg[0].Index = inst_mul->I.DstReg.Index; } /* Cannot write texture to output registers or with masks */ - if (inst->I.Opcode != OPCODE_KIL && - (inst->I.DstReg.File != PROGRAM_TEMPORARY || inst->I.DstReg.WriteMask != WRITEMASK_XYZW)) { + if (inst->I.Opcode != RC_OPCODE_KIL && + (inst->I.DstReg.File != RC_FILE_TEMPORARY || inst->I.DstReg.WriteMask != RC_MASK_XYZW)) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); - inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.Opcode = RC_OPCODE_MOV; inst_mov->I.DstReg = inst->I.DstReg; - inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_mov->I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c); - inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.File = RC_FILE_TEMPORARY; inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; - inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + inst->I.DstReg.WriteMask = RC_MASK_XYZW; } /* Cannot read texture coordinate from constants file */ - if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) { + if (inst->I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->I.SrcReg[0].File != RC_FILE_INPUT) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->I.Opcode = OPCODE_MOV; - inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.Opcode = RC_OPCODE_MOV; + inst_mov->I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->I.DstReg.Index = rc_find_free_temporary(c); inst_mov->I.SrcReg[0] = inst->I.SrcReg[0]; reset_srcreg(&inst->I.SrcReg[0]); - inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index; } - return GL_TRUE; + return 1; } /* just some random things... */ diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h index 0ac46dbd9c..418df36c93 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h @@ -33,9 +33,6 @@ #ifndef __R300_FRAGPROG_H_ #define __R300_FRAGPROG_H_ -#include "shader/program.h" -#include "shader/prog_instruction.h" - #include "radeon_compiler.h" #include "radeon_program.h" @@ -44,6 +41,6 @@ extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c); -extern GLboolean r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data); +extern int r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data); #endif diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index 305dc074ee..58953f6b04 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -69,64 +69,64 @@ struct r300_emit_state { /** * Mark a temporary register as used. */ -static void use_temporary(struct r300_fragment_program_code *code, GLuint index) +static void use_temporary(struct r300_fragment_program_code *code, unsigned int index) { if (index > code->pixsize) code->pixsize = index; } -static GLuint translate_rgb_opcode(struct r300_fragment_program_compiler * c, GLuint opcode) +static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) { switch(opcode) { - case OPCODE_CMP: return R300_ALU_OUTC_CMP; - case OPCODE_DP3: return R300_ALU_OUTC_DP3; - case OPCODE_DP4: return R300_ALU_OUTC_DP4; - case OPCODE_FRC: return R300_ALU_OUTC_FRC; + case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP; + case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3; + case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4; + case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC; default: error("translate_rgb_opcode(%i): Unknown opcode", opcode); /* fall through */ - case OPCODE_NOP: + case RC_OPCODE_NOP: /* fall through */ - case OPCODE_MAD: return R300_ALU_OUTC_MAD; - case OPCODE_MAX: return R300_ALU_OUTC_MAX; - case OPCODE_MIN: return R300_ALU_OUTC_MIN; - case OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; + case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD; + case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX; + case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN; + case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; } } -static GLuint translate_alpha_opcode(struct r300_fragment_program_compiler * c, GLuint opcode) +static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) { switch(opcode) { - case OPCODE_CMP: return R300_ALU_OUTA_CMP; - case OPCODE_DP3: return R300_ALU_OUTA_DP4; - case OPCODE_DP4: return R300_ALU_OUTA_DP4; - case OPCODE_EX2: return R300_ALU_OUTA_EX2; - case OPCODE_FRC: return R300_ALU_OUTA_FRC; - case OPCODE_LG2: return R300_ALU_OUTA_LG2; + case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP; + case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4; + case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4; + case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2; + case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC; + case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2; default: error("translate_rgb_opcode(%i): Unknown opcode", opcode); /* fall through */ - case OPCODE_NOP: + case RC_OPCODE_NOP: /* fall through */ - case OPCODE_MAD: return R300_ALU_OUTA_MAD; - case OPCODE_MAX: return R300_ALU_OUTA_MAX; - case OPCODE_MIN: return R300_ALU_OUTA_MIN; - case OPCODE_RCP: return R300_ALU_OUTA_RCP; - case OPCODE_RSQ: return R300_ALU_OUTA_RSQ; + case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD; + case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX; + case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN; + case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP; + case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ; } } /** * Emit one paired ALU instruction. */ -static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) +static int emit_alu(void* data, struct radeon_pair_instruction* inst) { PROG_CODE; if (code->alu.length >= R300_PFS_MAX_ALU_INST) { error("Too many ALU instructions"); - return GL_FALSE; + return 0; } int ip = code->alu.length++; @@ -136,7 +136,7 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); for(j = 0; j < 3; ++j) { - GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5); + unsigned int src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5); if (!inst->RGB.Src[j].Constant) use_temporary(code, inst->RGB.Src[j].Index); code->alu.inst[ip].rgb_addr |= src << (6*j); @@ -146,7 +146,7 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) use_temporary(code, inst->Alpha.Src[j].Index); code->alu.inst[ip].alpha_addr |= src << (6*j); - GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); + unsigned int arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); arg |= inst->RGB.Arg[j].Abs << 6; arg |= inst->RGB.Arg[j].Negate << 5; code->alu.inst[ip].rgb_inst |= arg << (7*j); @@ -186,17 +186,17 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) if (inst->Alpha.DepthWriteMask) { code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH; emit->node_flags |= R300_W_OUT; - c->code->writes_depth = GL_TRUE; + c->code->writes_depth = 1; } - return GL_TRUE; + return 1; } /** * Finish the current node without advancing to the next one. */ -static GLboolean finish_node(struct r300_emit_state * emit) +static int finish_node(struct r300_emit_state * emit) { struct r300_fragment_program_compiler * c = emit->compiler; struct r300_fragment_program_code *code = &emit->compiler->code->code.r300; @@ -204,9 +204,9 @@ static GLboolean finish_node(struct r300_emit_state * emit) if (code->alu.length == emit->node_first_alu) { /* Generate a single NOP for this node */ struct radeon_pair_instruction inst; - _mesa_bzero(&inst, sizeof(inst)); + memset(&inst, 0, sizeof(inst)); if (!emit_alu(emit, &inst)) - return GL_FALSE; + return 0; } unsigned alu_offset = emit->node_first_alu; @@ -217,7 +217,7 @@ static GLboolean finish_node(struct r300_emit_state * emit) if (code->tex.length == emit->node_first_tex) { if (emit->current_node > 0) { error("Node %i has no TEX instructions", emit->current_node); - return GL_FALSE; + return 0; } tex_end = 0; @@ -240,7 +240,7 @@ static GLboolean finish_node(struct r300_emit_state * emit) (tex_end << R300_TEX_SIZE_SHIFT) | emit->node_flags; - return GL_TRUE; + return 1; } @@ -248,43 +248,43 @@ static GLboolean finish_node(struct r300_emit_state * emit) * Begin a block of texture instructions. * Create the necessary indirection. */ -static GLboolean begin_tex(void* data) +static int begin_tex(void* data) { PROG_CODE; if (code->alu.length == emit->node_first_alu && code->tex.length == emit->node_first_tex) { - return GL_TRUE; + return 1; } if (emit->current_node == 3) { error("Too many texture indirections"); - return GL_FALSE; + return 0; } if (!finish_node(emit)) - return GL_FALSE; + return 0; emit->current_node++; emit->node_first_tex = code->tex.length; emit->node_first_alu = code->alu.length; emit->node_flags = 0; - return GL_TRUE; + return 1; } -static GLboolean emit_tex(void* data, struct radeon_pair_texture_instruction* inst) +static int emit_tex(void* data, struct radeon_pair_texture_instruction* inst) { PROG_CODE; if (code->tex.length >= R300_PFS_MAX_TEX_INST) { error("Too many TEX instructions"); - return GL_FALSE; + return 0; } - GLuint unit = inst->TexSrcUnit; - GLuint dest = inst->DestIndex; - GLuint opcode; + unsigned int unit = inst->TexSrcUnit; + unsigned int dest = inst->DestIndex; + unsigned int opcode; switch(inst->Opcode) { case RADEON_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; @@ -293,7 +293,7 @@ static GLboolean emit_tex(void* data, struct radeon_pair_texture_instruction* in case RADEON_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; default: error("Unknown texture opcode %i", inst->Opcode); - return GL_FALSE; + return 0; } if (inst->Opcode == RADEON_OPCODE_KIL) { @@ -310,7 +310,7 @@ static GLboolean emit_tex(void* data, struct radeon_pair_texture_instruction* in (dest << R300_DST_ADDR_SHIFT) | (unit << R300_TEX_ID_SHIFT) | (opcode << R300_TEX_INST_SHIFT); - return GL_TRUE; + return 1; } @@ -333,7 +333,7 @@ void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi memset(&emit, 0, sizeof(emit)); emit.compiler = compiler; - _mesa_bzero(code, sizeof(struct r300_fragment_program_code)); + memset(code, 0, sizeof(struct r300_fragment_program_code)); radeonPairProgram(compiler, &pair_handler, &emit); if (compiler->Base.Error) diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c index 1b14cc3888..ded6966d08 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -33,16 +33,18 @@ #include "r300_fragprog_swizzle.h" +#include + #include "../r300_reg.h" #include "radeon_nqssadce.h" #include "radeon_compiler.h" -#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO)) +#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO)) struct swizzle_data { - GLuint hash; /**< swizzle value this matches */ - GLuint base; /**< base value for hw swizzle */ - GLuint stride; /**< difference in base between arg0/1/2 */ + unsigned int hash; /**< swizzle value this matches */ + unsigned int base; /**< base value for hw swizzle */ + unsigned int stride; /**< difference in base between arg0/1/2 */ }; static const struct swizzle_data native_swizzles[] = { @@ -65,15 +67,15 @@ static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swi * Find a native RGB swizzle that matches the given swizzle. * Returns 0 if none found. */ -static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle) +static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle) { int i, comp; for(i = 0; i < num_native_swizzles; ++i) { const struct swizzle_data* sd = &native_swizzles[i]; for(comp = 0; comp < 3; ++comp) { - GLuint swz = GET_SWZ(swizzle, comp); - if (swz == SWIZZLE_NIL) + unsigned int swz = GET_SWZ(swizzle, comp); + if (swz == RC_SWIZZLE_UNUSED) continue; if (swz != GET_SWZ(sd->hash, comp)) break; @@ -90,71 +92,71 @@ static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle) * Check whether the given instruction supports the swizzle and negate * combinations in the given source register. */ -GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) +int r300FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg) { if (reg.Abs) - reg.Negate = NEGATE_NONE; + reg.Negate = RC_MASK_NONE; - if (opcode == OPCODE_KIL || - opcode == OPCODE_TEX || - opcode == OPCODE_TXB || - opcode == OPCODE_TXP) { + if (opcode == RC_OPCODE_KIL || + opcode == RC_OPCODE_TEX || + opcode == RC_OPCODE_TXB || + opcode == RC_OPCODE_TXP) { int j; if (reg.Abs || reg.Negate) - return GL_FALSE; + return 0; for(j = 0; j < 4; ++j) { - GLuint swz = GET_SWZ(reg.Swizzle, j); - if (swz == SWIZZLE_NIL) + unsigned int swz = GET_SWZ(reg.Swizzle, j); + if (swz == RC_SWIZZLE_UNUSED) continue; if (swz != j) - return GL_FALSE; + return 0; } - return GL_TRUE; + return 1; } - GLuint relevant = 0; + unsigned int relevant = 0; int j; for(j = 0; j < 3; ++j) - if (GET_SWZ(reg.Swizzle, j) != SWIZZLE_NIL) + if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED) relevant |= 1 << j; if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) - return GL_FALSE; + return 0; if (!lookup_native_swizzle(reg.Swizzle)) - return GL_FALSE; + return 0; - return GL_TRUE; + return 1; } /** * Generate MOV dst, src using only native swizzles. */ -void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) +void r300FPBuildSwizzle(struct nqssadce_state *s, struct rc_dst_register dst, struct rc_src_register src) { if (src.Abs) - src.Negate = NEGATE_NONE; + src.Negate = RC_MASK_NONE; while(dst.WriteMask) { const struct swizzle_data *best_swizzle = 0; - GLuint best_matchcount = 0; - GLuint best_matchmask = 0; + unsigned int best_matchcount = 0; + unsigned int best_matchmask = 0; int i, comp; for(i = 0; i < num_native_swizzles; ++i) { const struct swizzle_data *sd = &native_swizzles[i]; - GLuint matchcount = 0; - GLuint matchmask = 0; + unsigned int matchcount = 0; + unsigned int matchmask = 0; for(comp = 0; comp < 3; ++comp) { if (!GET_BIT(dst.WriteMask, comp)) continue; - GLuint swz = GET_SWZ(src.Swizzle, comp); - if (swz == SWIZZLE_NIL) + unsigned int swz = GET_SWZ(src.Swizzle, comp); + if (swz == RC_SWIZZLE_UNUSED) continue; if (swz == GET_SWZ(sd->hash, comp)) { /* check if the negate bit of current component @@ -170,17 +172,17 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, best_swizzle = sd; best_matchcount = matchcount; best_matchmask = matchmask; - if (matchmask == (dst.WriteMask & WRITEMASK_XYZ)) + if (matchmask == (dst.WriteMask & RC_MASK_XYZ)) break; } } struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev); - inst->I.Opcode = OPCODE_MOV; + inst->I.Opcode = RC_OPCODE_MOV; inst->I.DstReg = dst; - inst->I.DstReg.WriteMask &= (best_matchmask | WRITEMASK_W); + inst->I.DstReg.WriteMask &= (best_matchmask | RC_MASK_W); inst->I.SrcReg[0] = src; - inst->I.SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE; + inst->I.SrcReg[0].Negate = (best_matchmask & src.Negate) ? RC_MASK_XYZW : RC_MASK_NONE; /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */ dst.WriteMask &= ~inst->I.DstReg.WriteMask; @@ -192,12 +194,12 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, * Translate an RGB (XYZ) swizzle into the hardware code for the given * instruction source. */ -GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle) +unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle) { const struct swizzle_data* sd = lookup_native_swizzle(swizzle); if (!sd) { - _mesa_printf("Not a native swizzle: %08x\n", swizzle); + fprintf(stderr, "Not a native swizzle: %08x\n", swizzle); return 0; } @@ -209,15 +211,15 @@ GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle) * Translate an Alpha (W) swizzle into the hardware code for the given * instruction source. */ -GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle) +unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle) { if (swizzle < 3) return swizzle + 3*src; switch(swizzle) { - case SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; - case SWIZZLE_ONE: return R300_ALU_ARGA_ONE; - case SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; + case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; + case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE; + case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; default: return R300_ALU_ARGA_ONE; } } diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h index 231bf4eef5..728c2cd972 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h @@ -28,15 +28,14 @@ #ifndef __R300_FRAGPROG_SWIZZLE_H_ #define __R300_FRAGPROG_SWIZZLE_H_ -#include "main/glheader.h" -#include "shader/prog_instruction.h" +#include "radeon_program.h" struct nqssadce_state; -GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); -void r300FPBuildSwizzle(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); +int r300FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg); +void r300FPBuildSwizzle(struct nqssadce_state*, struct rc_dst_register dst, struct rc_src_register src); -GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle); -GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle); +unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle); +unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle); #endif /* __R300_FRAGPROG_SWIZZLE_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 76c3a7ecfd..0aa40c0587 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -22,9 +22,7 @@ #include "radeon_compiler.h" -#include "shader/prog_parameter.h" -#include "shader/prog_print.h" -#include "shader/prog_statevars.h" +#include #include "radeon_nqssadce.h" #include "radeon_program_alu.h" @@ -36,8 +34,8 @@ static void nqssadce_init(struct nqssadce_state* s) { struct r300_fragment_program_compiler * c = s->UserData; - s->Outputs[c->OutputColor].Sourced = WRITEMASK_XYZW; - s->Outputs[c->OutputDepth].Sourced = WRITEMASK_W; + s->Outputs[c->OutputColor].Sourced = RC_MASK_XYZW; + s->Outputs[c->OutputDepth].Sourced = RC_MASK_W; } static void rewrite_depth_out(struct r300_fragment_program_compiler * c) @@ -45,35 +43,35 @@ static void rewrite_depth_out(struct r300_fragment_program_compiler * c) struct rc_instruction *rci; for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) { - struct prog_instruction * inst = &rci->I; + struct rc_sub_instruction * inst = &rci->I; - if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != c->OutputDepth) + if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth) continue; - if (inst->DstReg.WriteMask & WRITEMASK_Z) { - inst->DstReg.WriteMask = WRITEMASK_W; + if (inst->DstReg.WriteMask & RC_MASK_Z) { + inst->DstReg.WriteMask = RC_MASK_W; } else { inst->DstReg.WriteMask = 0; continue; } switch (inst->Opcode) { - case OPCODE_FRC: - case OPCODE_MOV: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + case RC_OPCODE_FRC: + case RC_OPCODE_MOV: + inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); break; - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + case RC_OPCODE_ADD: + case RC_OPCODE_MAX: + case RC_OPCODE_MIN: + case RC_OPCODE_MUL: + inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]); break; - case OPCODE_CMP: - case OPCODE_MAD: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); - inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); + case RC_OPCODE_CMP: + case RC_OPCODE_MAD: + inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]); + inst->SrcReg[2] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[2]); break; default: // Scalar instructions needn't be reswizzled @@ -104,7 +102,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) } if (c->Base.Debug) { - _mesa_printf("Fragment Program: After native rewrite:\n"); + fprintf(stderr, "Fragment Program: After native rewrite:\n"); rc_print_program(&c->Base.Program); fflush(stderr); } @@ -126,7 +124,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) } if (c->Base.Debug) { - _mesa_printf("Compiler: after NqSSA-DCE:\n"); + fprintf(stderr, "Compiler: after NqSSA-DCE:\n"); rc_print_program(&c->Base.Program); fflush(stderr); } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 93a516105e..0efd2c91e6 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -22,14 +22,14 @@ #include "radeon_compiler.h" +#include + #include "../r300_reg.h" #include "radeon_nqssadce.h" #include "radeon_program.h" #include "radeon_program_alu.h" -#include "shader/prog_print.h" - /* * Take an already-setup and valid source then swizzle it appropriately to @@ -42,103 +42,82 @@ t_swizzle(y), \ t_swizzle(y), \ t_src_class(vpi->SrcReg[x].File), \ - NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4)) + RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4)) -static unsigned long t_dst_mask(GLuint mask) +static unsigned long t_dst_mask(unsigned int mask) { - /* WRITEMASK_* is equivalent to VSF_FLAG_* */ - return mask & WRITEMASK_XYZW; + /* RC_MASK_* is equivalent to VSF_FLAG_* */ + return mask & RC_MASK_XYZW; } -static unsigned long t_dst_class(gl_register_file file) +static unsigned long t_dst_class(rc_register_file file) { - switch (file) { - case PROGRAM_TEMPORARY: + default: + fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); + /* fall-through */ + case RC_FILE_TEMPORARY: return PVS_DST_REG_TEMPORARY; - case PROGRAM_OUTPUT: + case RC_FILE_OUTPUT: return PVS_DST_REG_OUT; - case PROGRAM_ADDRESS: + case RC_FILE_ADDRESS: return PVS_DST_REG_A0; - /* - case PROGRAM_INPUT: - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_STATE_VAR: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; } } static unsigned long t_dst_index(struct r300_vertex_program_code *vp, - struct prog_dst_register *dst) + struct rc_dst_register *dst) { - if (dst->File == PROGRAM_OUTPUT) + if (dst->File == RC_FILE_OUTPUT) return vp->outputs[dst->Index]; return dst->Index; } -static unsigned long t_src_class(gl_register_file file) +static unsigned long t_src_class(rc_register_file file) { switch (file) { - case PROGRAM_TEMPORARY: + default: + fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); + /* fall-through */ + case RC_FILE_TEMPORARY: return PVS_SRC_REG_TEMPORARY; - case PROGRAM_INPUT: + case RC_FILE_INPUT: return PVS_SRC_REG_INPUT; - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - case PROGRAM_STATE_VAR: + case RC_FILE_CONSTANT: return PVS_SRC_REG_CONSTANT; - /* - case PROGRAM_OUTPUT: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; } } -static GLboolean t_src_conflict(struct prog_src_register a, struct prog_src_register b) +static int t_src_conflict(struct rc_src_register a, struct rc_src_register b) { unsigned long aclass = t_src_class(a.File); unsigned long bclass = t_src_class(b.File); if (aclass != bclass) - return GL_FALSE; + return 0; if (aclass == PVS_SRC_REG_TEMPORARY) - return GL_FALSE; + return 0; if (a.RelAddr || b.RelAddr) - return GL_TRUE; + return 1; if (a.Index != b.Index) - return GL_TRUE; + return 1; - return GL_FALSE; + return 0; } -static INLINE unsigned long t_swizzle(GLubyte swizzle) +static inline unsigned long t_swizzle(unsigned int swizzle) { - /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ return swizzle; } static unsigned long t_src_index(struct r300_vertex_program_code *vp, - struct prog_src_register *src) + struct rc_src_register *src) { - if (src->File == PROGRAM_INPUT) { + if (src->File == RC_FILE_INPUT) { assert(vp->inputs[src->Index] != -1); return vp->inputs[src->Index]; } else { @@ -154,9 +133,9 @@ static unsigned long t_src_index(struct r300_vertex_program_code *vp, /* these two functions should probably be merged... */ static unsigned long t_src(struct r300_vertex_program_code *vp, - struct prog_src_register *src) + struct rc_src_register *src) { - /* src->Negate uses the NEGATE_ flags from program_instruction.h, + /* src->Negate uses the RC_MASK_ flags from program_instruction.h, * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. */ return PVS_SRC_OPERAND(t_src_index(vp, src), @@ -169,9 +148,9 @@ static unsigned long t_src(struct r300_vertex_program_code *vp, } static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, - struct prog_src_register *src) + struct rc_src_register *src) { - /* src->Negate uses the NEGATE_ flags from program_instruction.h, + /* src->Negate uses the RC_MASK_ flags from program_instruction.h, * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. */ return PVS_SRC_OPERAND(t_src_index(vp, src), @@ -180,79 +159,79 @@ static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, t_swizzle(GET_SWZ(src->Swizzle, 0)), t_swizzle(GET_SWZ(src->Swizzle, 0)), t_src_class(src->File), - src->Negate ? NEGATE_XYZW : NEGATE_NONE) | + src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) | (src->RelAddr << 4); } -static GLboolean valid_dst(struct r300_vertex_program_code *vp, - struct prog_dst_register *dst) +static int valid_dst(struct r300_vertex_program_code *vp, + struct rc_dst_register *dst) { - if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { - return GL_FALSE; - } else if (dst->File == PROGRAM_ADDRESS) { + if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) { + return 0; + } else if (dst->File == RC_FILE_ADDRESS) { assert(dst->Index == 0); } - return GL_TRUE; + return 1; } static void ei_vector1(struct r300_vertex_program_code *vp, - GLuint hw_opcode, - struct prog_instruction *vpi, - GLuint * inst) + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) { inst[0] = PVS_OP_DST_OPERAND(hw_opcode, - GL_FALSE, - GL_FALSE, + 0, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); inst[1] = t_src(vp, &vpi->SrcReg[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = __CONST(0, RC_SWIZZLE_ZERO); } static void ei_vector2(struct r300_vertex_program_code *vp, - GLuint hw_opcode, - struct prog_instruction *vpi, - GLuint * inst) + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) { inst[0] = PVS_OP_DST_OPERAND(hw_opcode, - GL_FALSE, - GL_FALSE, + 0, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); inst[1] = t_src(vp, &vpi->SrcReg[0]); inst[2] = t_src(vp, &vpi->SrcReg[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); + inst[3] = __CONST(1, RC_SWIZZLE_ZERO); } static void ei_math1(struct r300_vertex_program_code *vp, - GLuint hw_opcode, - struct prog_instruction *vpi, - GLuint * inst) + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) { inst[0] = PVS_OP_DST_OPERAND(hw_opcode, - GL_TRUE, - GL_FALSE, + 1, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = __CONST(0, RC_SWIZZLE_ZERO); } static void ei_lit(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst) + struct rc_sub_instruction *vpi, + unsigned int * inst) { //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, - GL_TRUE, - GL_FALSE, + 1, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); @@ -262,27 +241,27 @@ static void ei_lit(struct r300_vertex_program_code *vp, PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y t_src_class(vpi->SrcReg[0].File), - vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | (vpi->SrcReg[0].RelAddr << 4); inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X t_src_class(vpi->SrcReg[0].File), - vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | (vpi->SrcReg[0].RelAddr << 4); inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W t_src_class(vpi->SrcReg[0].File), - vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | (vpi->SrcReg[0].RelAddr << 4); } static void ei_mad(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst) + struct rc_sub_instruction *vpi, + unsigned int * inst) { /* Remarks about hardware limitations of MAD * (please preserve this comment, as this information is _NOT_ @@ -310,22 +289,22 @@ static void ei_mad(struct r300_vertex_program_code *vp, * according to AMD docs, this should improve performance by one clock * as a nice side bonus. */ - if (vpi->SrcReg[0].File == PROGRAM_TEMPORARY && - vpi->SrcReg[1].File == PROGRAM_TEMPORARY && - vpi->SrcReg[2].File == PROGRAM_TEMPORARY && + if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY && + vpi->SrcReg[1].File == RC_FILE_TEMPORARY && + vpi->SrcReg[2].File == RC_FILE_TEMPORARY && vpi->SrcReg[0].Index != vpi->SrcReg[1].Index && vpi->SrcReg[0].Index != vpi->SrcReg[2].Index && vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) { inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, - GL_FALSE, - GL_TRUE, + 0, + 1, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); } else { inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, + 0, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); @@ -336,17 +315,17 @@ static void ei_mad(struct r300_vertex_program_code *vp, } static void ei_pow(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst) + struct rc_sub_instruction *vpi, + unsigned int * inst) { inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, - GL_TRUE, - GL_FALSE, + 1, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); } @@ -361,8 +340,8 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi compiler->SetHwInputOutput(compiler); for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { - struct prog_instruction *vpi = &rci->I; - GLuint *inst = compiler->code->body.d + compiler->code->length; + struct rc_sub_instruction *vpi = &rci->I; + unsigned int *inst = compiler->code->body.d + compiler->code->length; /* Skip instructions writing to non-existing destination */ if (!valid_dst(compiler->code, &vpi->DstReg)) @@ -374,26 +353,26 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi } switch (vpi->Opcode) { - case OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; - case OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; - case OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; - case OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; - case OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; - case OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; - case OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; - case OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; - case OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; - case OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; - case OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break; - case OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break; - case OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break; - case OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break; - case OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break; - case OPCODE_POW: ei_pow(compiler->code, vpi, inst); break; - case OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break; - case OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; - case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; - case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; + case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; + case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; + case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; + case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; + case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; + case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; + case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; + case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; + case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; + case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; + case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break; + case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break; + case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break; + case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break; + case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break; + case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break; + case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break; + case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; + case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; + case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; default: rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode); return; @@ -407,36 +386,35 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi } struct temporary_allocation { - GLuint Allocated:1; - GLuint HwTemp:15; + unsigned int Allocated:1; + unsigned int HwTemp:15; struct rc_instruction * LastRead; }; static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler) { struct rc_instruction *inst; - GLuint num_orig_temps = 0; - GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS]; + unsigned int num_orig_temps = 0; + char hwtemps[VSF_MAX_FRAGMENT_TEMPS]; struct temporary_allocation * ta; - GLuint i, j; + unsigned int i, j; compiler->code->num_temporaries = 0; memset(hwtemps, 0, sizeof(hwtemps)); /* Pass 1: Count original temporaries and allocate structures */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { - GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); - GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); - for (i = 0; i < numsrcs; ++i) { - if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) { + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->I.SrcReg[i].File == RC_FILE_TEMPORARY) { if (inst->I.SrcReg[i].Index >= num_orig_temps) num_orig_temps = inst->I.SrcReg[i].Index + 1; } } - if (numdsts) { - if (inst->I.DstReg.File == PROGRAM_TEMPORARY) { + if (opcode->HasDstReg) { + if (inst->I.DstReg.File == RC_FILE_TEMPORARY) { if (inst->I.DstReg.Index >= num_orig_temps) num_orig_temps = inst->I.DstReg.Index + 1; } @@ -449,32 +427,31 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c /* Pass 2: Determine original temporary lifetimes */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { - GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); - for (i = 0; i < numsrcs; ++i) { - if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->I.SrcReg[i].File == RC_FILE_TEMPORARY) ta[inst->I.SrcReg[i].Index].LastRead = inst; } } /* Pass 3: Register allocation */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { - GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); - GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); - for (i = 0; i < numsrcs; ++i) { - if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) { - GLuint orig = inst->I.SrcReg[i].Index; + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->I.SrcReg[i].File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->I.SrcReg[i].Index; inst->I.SrcReg[i].Index = ta[orig].HwTemp; if (ta[orig].Allocated && inst == ta[orig].LastRead) - hwtemps[ta[orig].HwTemp] = GL_FALSE; + hwtemps[ta[orig].HwTemp] = 0; } } - if (numdsts) { - if (inst->I.DstReg.File == PROGRAM_TEMPORARY) { - GLuint orig = inst->I.DstReg.Index; + if (opcode->HasDstReg) { + if (inst->I.DstReg.File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->I.DstReg.Index; if (!ta[orig].Allocated) { for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) { @@ -484,9 +461,9 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c if (j >= VSF_MAX_FRAGMENT_TEMPS) { fprintf(stderr, "Out of hw temporaries\n"); } else { - ta[orig].Allocated = GL_TRUE; + ta[orig].Allocated = 1; ta[orig].HwTemp = j; - hwtemps[j] = GL_TRUE; + hwtemps[j] = 1; if (j >= compiler->code->num_temporaries) compiler->code->num_temporaries = j + 1; @@ -504,45 +481,45 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c * Vertex engine cannot read two inputs or two constants at the same time. * Introduce intermediate MOVs to temporary registers to account for this. */ -static GLboolean transform_source_conflicts( +static int transform_source_conflicts( struct radeon_compiler *c, struct rc_instruction* inst, void* unused) { - GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); - if (num_operands == 3) { + if (opcode->NumSrcRegs == 3) { if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2]) || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) { int tmpreg = rc_find_free_temporary(c); struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->I.Opcode = OPCODE_MOV; - inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.Opcode = RC_OPCODE_MOV; + inst_mov->I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->I.DstReg.Index = tmpreg; inst_mov->I.SrcReg[0] = inst->I.SrcReg[2]; reset_srcreg(&inst->I.SrcReg[2]); - inst->I.SrcReg[2].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[2].File = RC_FILE_TEMPORARY; inst->I.SrcReg[2].Index = tmpreg; } } - if (num_operands >= 2) { + if (opcode->NumSrcRegs >= 2) { if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) { int tmpreg = rc_find_free_temporary(c); struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->I.Opcode = OPCODE_MOV; - inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.Opcode = RC_OPCODE_MOV; + inst_mov->I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->I.DstReg.Index = tmpreg; inst_mov->I.SrcReg[0] = inst->I.SrcReg[1]; reset_srcreg(&inst->I.SrcReg[1]); - inst->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[1].File = RC_FILE_TEMPORARY; inst->I.SrcReg[1].Index = tmpreg; } } - return GL_TRUE; + return 1; } static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) @@ -553,15 +530,15 @@ static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) if ((compiler->RequiredOutputs & (1 << i)) && !(compiler->Base.Program.OutputsWritten & (1 << i))) { struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev); - inst->I.Opcode = OPCODE_MOV; + inst->I.Opcode = RC_OPCODE_MOV; - inst->I.DstReg.File = PROGRAM_OUTPUT; + inst->I.DstReg.File = RC_FILE_OUTPUT; inst->I.DstReg.Index = i; - inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + inst->I.DstReg.WriteMask = RC_MASK_XYZW; - inst->I.SrcReg[0].File = PROGRAM_CONSTANT; + inst->I.SrcReg[0].File = RC_FILE_CONSTANT; inst->I.SrcReg[0].Index = 0; - inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + inst->I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; compiler->Base.Program.OutputsWritten |= 1 << i; } @@ -573,18 +550,18 @@ static void nqssadceInit(struct nqssadce_state* s) struct r300_vertex_program_compiler * compiler = s->UserData; int i; - for(i = 0; i < VERT_RESULT_MAX; ++i) { + for(i = 0; i < 32; ++i) { if (compiler->RequiredOutputs & (1 << i)) - s->Outputs[i].Sourced = WRITEMASK_XYZW; + s->Outputs[i].Sourced = RC_MASK_XYZW; } } -static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) +static int swizzleIsNative(rc_opcode opcode, struct rc_src_register reg) { (void) opcode; (void) reg; - return GL_TRUE; + return 1; } diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index 7e2faed690..3e994ebd1b 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -27,15 +27,17 @@ #include "r500_fragprog.h" +#include + #include "../r300_reg.h" -static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu) +static struct rc_src_register shadow_ambient(struct radeon_compiler * c, int tmu) { - struct prog_src_register reg = { 0, }; + struct rc_src_register reg = { 0, }; - reg.File = PROGRAM_STATE_VAR; + reg.File = RC_FILE_CONSTANT; reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu); - reg.Swizzle = SWIZZLE_WWWW; + reg.Swizzle = RC_SWIZZLE_WWWW; return reg; } @@ -44,7 +46,7 @@ static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int t * - implement texture compare (shadow extensions) * - extract non-native source / destination operands */ -GLboolean r500_transform_TEX( +int r500_transform_TEX( struct radeon_compiler * c, struct rc_instruction * inst, void* data) @@ -52,77 +54,77 @@ GLboolean r500_transform_TEX( struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)data; - if (inst->I.Opcode != OPCODE_TEX && - inst->I.Opcode != OPCODE_TXB && - inst->I.Opcode != OPCODE_TXP && - inst->I.Opcode != OPCODE_KIL) - return GL_FALSE; + if (inst->I.Opcode != RC_OPCODE_TEX && + inst->I.Opcode != RC_OPCODE_TXB && + inst->I.Opcode != RC_OPCODE_TXP && + inst->I.Opcode != RC_OPCODE_KIL) + return 0; /* ARB_shadow & EXT_shadow_funcs */ - if (inst->I.Opcode != OPCODE_KIL && + if (inst->I.Opcode != RC_OPCODE_KIL && c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + rc_compare_func comparefunc = compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; - if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { - inst->I.Opcode = OPCODE_MOV; + if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->I.Opcode = RC_OPCODE_MOV; - if (comparefunc == GL_ALWAYS) { - inst->I.SrcReg[0].File = PROGRAM_BUILTIN; - inst->I.SrcReg[0].Swizzle = SWIZZLE_1111; + if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->I.SrcReg[0].File = RC_FILE_NONE; + inst->I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; } else { inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit); } - return GL_TRUE; + return 1; } else { - GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; - GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode; + rc_compare_func comparefunc = compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + unsigned int depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode; struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); int pass, fail; - inst_rcp->I.Opcode = OPCODE_RCP; - inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY; + inst_rcp->I.Opcode = RC_OPCODE_RCP; + inst_rcp->I.DstReg.File = RC_FILE_TEMPORARY; inst_rcp->I.DstReg.Index = rc_find_free_temporary(c); - inst_rcp->I.DstReg.WriteMask = WRITEMASK_W; + inst_rcp->I.DstReg.WriteMask = RC_MASK_W; inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0]; - inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW; + inst_rcp->I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; inst_cmp->I.DstReg = inst->I.DstReg; - inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.File = RC_FILE_TEMPORARY; inst->I.DstReg.Index = rc_find_free_temporary(c); - inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + inst->I.DstReg.WriteMask = RC_MASK_XYZW; - inst_mad->I.Opcode = OPCODE_MAD; - inst_mad->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mad->I.Opcode = RC_OPCODE_MAD; + inst_mad->I.DstReg.File = RC_FILE_TEMPORARY; inst_mad->I.DstReg.Index = rc_find_free_temporary(c); inst_mad->I.SrcReg[0] = inst->I.SrcReg[0]; - inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ; - inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ; + inst_mad->I.SrcReg[1].File = RC_FILE_TEMPORARY; inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index; - inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW; - inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + inst_mad->I.SrcReg[2].File = RC_FILE_TEMPORARY; inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index; if (depthmode == 0) /* GL_LUMINANCE */ - inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + inst_mad->I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z); else if (depthmode == 2) /* GL_ALPHA */ - inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW; + inst_mad->I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW; /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: * r < tex <=> -tex+r < 0 * r >= tex <=> not (-tex+r < 0 */ - if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW; + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL) + inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ RC_MASK_XYZW; else - inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW; + inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ RC_MASK_XYZW; - inst_cmp->I.Opcode = OPCODE_CMP; + inst_cmp->I.Opcode = RC_OPCODE_CMP; /* DstReg has been filled out above */ - inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_cmp->I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index; - if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) { pass = 1; fail = 2; } else { @@ -130,97 +132,97 @@ GLboolean r500_transform_TEX( fail = 1; } - inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN; - inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111; + inst_cmp->I.SrcReg[pass].File = RC_FILE_NONE; + inst_cmp->I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit); } } /* Cannot write texture to output registers */ - if (inst->I.Opcode != OPCODE_KIL && inst->I.DstReg.File != PROGRAM_TEMPORARY) { + if (inst->I.Opcode != RC_OPCODE_KIL && inst->I.DstReg.File != RC_FILE_TEMPORARY) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); - inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.Opcode = RC_OPCODE_MOV; inst_mov->I.DstReg = inst->I.DstReg; - inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_mov->I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c); - inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.File = RC_FILE_TEMPORARY; inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; - inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + inst->I.DstReg.WriteMask = RC_MASK_XYZW; } /* Cannot read texture coordinate from constants file */ - if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) { + if (inst->I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->I.SrcReg[0].File != RC_FILE_INPUT) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->I.Opcode = OPCODE_MOV; - inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.Opcode = RC_OPCODE_MOV; + inst_mov->I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->I.DstReg.Index = rc_find_free_temporary(c); inst_mov->I.SrcReg[0] = inst->I.SrcReg[0]; reset_srcreg(&inst->I.SrcReg[0]); - inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index; } - return GL_TRUE; + return 1; } -GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) +int r500FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg) { - GLuint relevant; + unsigned int relevant; int i; - if (opcode == OPCODE_TEX || - opcode == OPCODE_TXB || - opcode == OPCODE_TXP || - opcode == OPCODE_KIL) { + if (opcode == RC_OPCODE_TEX || + opcode == RC_OPCODE_TXB || + opcode == RC_OPCODE_TXP || + opcode == RC_OPCODE_KIL) { if (reg.Abs) - return GL_FALSE; + return 0; - if (opcode == OPCODE_KIL && (reg.Swizzle != SWIZZLE_NOOP || reg.Negate != NEGATE_NONE)) - return GL_FALSE; + if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE)) + return 0; if (reg.Negate) - reg.Negate ^= NEGATE_XYZW; + reg.Negate ^= RC_MASK_XYZW; for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(reg.Swizzle, i); - if (swz == SWIZZLE_NIL) { + unsigned int swz = GET_SWZ(reg.Swizzle, i); + if (swz == RC_SWIZZLE_UNUSED) { reg.Negate &= ~(1 << i); continue; } if (swz >= 4) - return GL_FALSE; + return 0; } if (reg.Negate) - return GL_FALSE; + return 0; - return GL_TRUE; - } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) { + return 1; + } else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) { /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; * if it doesn't fit perfectly into a .xyzw case... */ - if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs && !reg.Negate) - return GL_TRUE; + if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate) + return 1; - return GL_FALSE; + return 0; } else { /* ALU instructions support almost everything */ if (reg.Abs) - return GL_TRUE; + return 1; relevant = 0; for(i = 0; i < 3; ++i) { - GLuint swz = GET_SWZ(reg.Swizzle, i); - if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO) + unsigned int swz = GET_SWZ(reg.Swizzle, i); + if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO) relevant |= 1 << i; } if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) - return GL_FALSE; + return 0; - return GL_TRUE; + return 1; } } @@ -230,14 +232,14 @@ GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) * The only thing we *cannot* do in an ALU instruction is per-component * negation. Therefore, we split the MOV into two instructions when necessary. */ -void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) +void r500FPBuildSwizzle(struct nqssadce_state *s, struct rc_dst_register dst, struct rc_src_register src) { - GLuint negatebase[2] = { 0, 0 }; + unsigned int negatebase[2] = { 0, 0 }; int i; for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(src.Swizzle, i); - if (swz == SWIZZLE_NIL) + unsigned int swz = GET_SWZ(src.Swizzle, i); + if (swz == RC_SWIZZLE_UNUSED) continue; negatebase[GET_BIT(src.Negate, i)] |= 1 << i; } @@ -247,11 +249,11 @@ void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, continue; struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev); - inst->I.Opcode = OPCODE_MOV; + inst->I.Opcode = RC_OPCODE_MOV; inst->I.DstReg = dst; inst->I.DstReg.WriteMask = negatebase[i]; inst->I.SrcReg[0] = src; - inst->I.SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW; + inst->I.SrcReg[0].Negate = (i == 0) ? RC_MASK_NONE : RC_MASK_XYZW; } } diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h index 9091f65cd2..887d4abbd2 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h @@ -33,9 +33,6 @@ #ifndef __R500_FRAGPROG_H_ #define __R500_FRAGPROG_H_ -#include "shader/prog_parameter.h" -#include "shader/prog_instruction.h" - #include "radeon_compiler.h" #include "radeon_nqssadce.h" @@ -43,11 +40,11 @@ extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c); -extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); +extern int r500FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg); -extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src); +extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct rc_dst_register dst, struct rc_src_register src); -extern GLboolean r500_transform_TEX( +extern int r500_transform_TEX( struct radeon_compiler * c, struct rc_instruction * inst, void* data); diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index d694725c9b..2f0c0d5283 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -60,63 +60,63 @@ } while(0) -static GLuint translate_rgb_op(struct r300_fragment_program_compiler *c, GLuint opcode) +static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) { switch(opcode) { - case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; - case OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; - case OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; - case OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; - case OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; - case OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; + case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; + case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; + case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; + case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; + case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; + case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; default: error("translate_rgb_op(%d): unknown opcode\n", opcode); /* fall through */ - case OPCODE_NOP: + case RC_OPCODE_NOP: /* fall through */ - case OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; - case OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; - case OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; - case OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; + case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; + case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; + case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; + case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; } } -static GLuint translate_alpha_op(struct r300_fragment_program_compiler *c, GLuint opcode) +static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) { switch(opcode) { - case OPCODE_CMP: return R500_ALPHA_OP_CMP; - case OPCODE_COS: return R500_ALPHA_OP_COS; - case OPCODE_DDX: return R500_ALPHA_OP_MDH; - case OPCODE_DDY: return R500_ALPHA_OP_MDV; - case OPCODE_DP3: return R500_ALPHA_OP_DP; - case OPCODE_DP4: return R500_ALPHA_OP_DP; - case OPCODE_EX2: return R500_ALPHA_OP_EX2; - case OPCODE_FRC: return R500_ALPHA_OP_FRC; - case OPCODE_LG2: return R500_ALPHA_OP_LN2; + case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP; + case RC_OPCODE_COS: return R500_ALPHA_OP_COS; + case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH; + case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV; + case RC_OPCODE_DP3: return R500_ALPHA_OP_DP; + case RC_OPCODE_DP4: return R500_ALPHA_OP_DP; + case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2; + case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC; + case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2; default: error("translate_alpha_op(%d): unknown opcode\n", opcode); /* fall through */ - case OPCODE_NOP: + case RC_OPCODE_NOP: /* fall through */ - case OPCODE_MAD: return R500_ALPHA_OP_MAD; - case OPCODE_MAX: return R500_ALPHA_OP_MAX; - case OPCODE_MIN: return R500_ALPHA_OP_MIN; - case OPCODE_RCP: return R500_ALPHA_OP_RCP; - case OPCODE_RSQ: return R500_ALPHA_OP_RSQ; - case OPCODE_SIN: return R500_ALPHA_OP_SIN; + case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD; + case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX; + case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN; + case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP; + case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ; + case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN; } } -static GLuint fix_hw_swizzle(GLuint swz) +static unsigned int fix_hw_swizzle(unsigned int swz) { if (swz == 5) swz = 6; - if (swz == SWIZZLE_NIL) swz = 4; + if (swz == RC_SWIZZLE_UNUSED) swz = 4; return swz; } -static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg) +static unsigned int translate_arg_rgb(struct radeon_pair_instruction *inst, int arg) { - GLuint t = inst->RGB.Arg[arg].Source; + unsigned int t = inst->RGB.Arg[arg].Source; int comp; t |= inst->RGB.Arg[arg].Negate << 11; t |= inst->RGB.Arg[arg].Abs << 12; @@ -127,22 +127,22 @@ static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg) return t; } -static GLuint translate_arg_alpha(struct radeon_pair_instruction *inst, int i) +static unsigned int translate_arg_alpha(struct radeon_pair_instruction *inst, int i) { - GLuint t = inst->Alpha.Arg[i].Source; + unsigned int t = inst->Alpha.Arg[i].Source; t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2; t |= inst->Alpha.Arg[i].Negate << 5; t |= inst->Alpha.Arg[i].Abs << 6; return t; } -static void use_temporary(struct r500_fragment_program_code* code, GLuint index) +static void use_temporary(struct r500_fragment_program_code* code, unsigned int index) { if (index > code->max_temp_idx) code->max_temp_idx = index; } -static GLuint use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src) +static unsigned int use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src) { if (!src.Constant) use_temporary(code, src.Index); @@ -153,13 +153,13 @@ static GLuint use_source(struct r500_fragment_program_code* code, struct radeon_ /** * Emit a paired ALU instruction. */ -static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) +static int emit_paired(void *data, struct radeon_pair_instruction *inst) { PROG_CODE; if (code->inst_end >= 511) { error("emit_alu: Too many instructions"); - return GL_FALSE; + return 0; } int ip = ++code->inst_end; @@ -177,7 +177,7 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); if (inst->Alpha.DepthWriteMask) { code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; - c->code->writes_depth = GL_TRUE; + c->code->writes_depth = 1; } code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); @@ -206,12 +206,12 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; - return GL_TRUE; + return 1; } -static GLuint translate_strq_swizzle(GLuint swizzle) +static unsigned int translate_strq_swizzle(unsigned int swizzle) { - GLuint swiz = 0; + unsigned int swiz = 0; int i; for (i = 0; i < 4; i++) swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2; @@ -221,13 +221,13 @@ static GLuint translate_strq_swizzle(GLuint swizzle) /** * Emit a single TEX instruction */ -static GLboolean emit_tex(void *data, struct radeon_pair_texture_instruction *inst) +static int emit_tex(void *data, struct radeon_pair_texture_instruction *inst) { PROG_CODE; if (code->inst_end >= 511) { error("emit_tex: Too many instructions"); - return GL_FALSE; + return 0; } int ip = ++code->inst_end; @@ -238,7 +238,7 @@ static GLboolean emit_tex(void *data, struct radeon_pair_texture_instruction *in code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; - if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) + if (inst->TexSrcTarget == RC_TEXTURE_RECT) code->inst[ip].inst1 |= R500_TEX_UNSCALED; switch (inst->Opcode) { @@ -264,7 +264,7 @@ static GLboolean emit_tex(void *data, struct radeon_pair_texture_instruction *in | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; - return GL_TRUE; + return 1; } static const struct radeon_pair_handler pair_handler = { @@ -277,7 +277,7 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi { struct r500_fragment_program_code *code = &compiler->code->code.r500; - _mesa_bzero(code, sizeof(*code)); + memset(code, 0, sizeof(*code)); code->max_temp_idx = 1; code->inst_end = -1; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.c b/src/mesa/drivers/dri/r300/compiler/radeon_code.c index c7923004df..1a3d8bb641 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.c @@ -25,11 +25,13 @@ * */ -#include "main/mtypes.h" -#include "shader/prog_instruction.h" - #include "radeon_code.h" +#include +#include + +#include "radeon_program.h" + void rc_constants_init(struct rc_constant_list * c) { memset(c, 0, sizeof(*c)); @@ -143,7 +145,7 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { for(unsigned comp = 0; comp < c->Constants[index].Size; ++comp) { if (c->Constants[index].u.Immediate[comp] == data) { - *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp); + *swizzle = RC_MAKE_SWIZZLE(comp, comp, comp, comp); return index; } } @@ -156,7 +158,7 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da if (free_index >= 0) { unsigned comp = c->Constants[free_index].Size++; c->Constants[free_index].u.Immediate[comp] = data; - *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp); + *swizzle = RC_MAKE_SWIZZLE(comp, comp, comp, comp); return free_index; } @@ -164,7 +166,7 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da constant.Type = RC_CONSTANT_IMMEDIATE; constant.Size = 1; constant.u.Immediate[0] = data; - *swizzle = SWIZZLE_XXXX; + *swizzle = RC_SWIZZLE_XXXX; return rc_constants_add(c, &constant); } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index 0806fb1b5c..de0c88ed51 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -88,6 +88,23 @@ unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, un unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data); unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle); +/** + * Compare functions. + * + * \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you + * the correct GL compare function. + */ +typedef enum { + RC_COMPARE_FUNC_NEVER = 0, + RC_COMPARE_FUNC_LESS, + RC_COMPARE_FUNC_EQUAL, + RC_COMPARE_FUNC_LEQUAL, + RC_COMPARE_FUNC_GREATER, + RC_COMPARE_FUNC_NOTEQUAL, + RC_COMPARE_FUNC_GEQUAL, + RC_COMPARE_FUNC_ALWAYS +} rc_compare_func; + /** * Stores state that influences the compilation of a fragment program. */ @@ -105,10 +122,12 @@ struct r300_fragment_program_external_state { /** * If the sampler is used as a shadow sampler, - * this field is (texture_compare_func - GL_NEVER). - * [e.g. if compare function is GL_LEQUAL, this field is 3] + * this field specifies the compare function. + * + * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0). * * Otherwise, this field is 0. + * \sa rc_compare_func */ unsigned texture_compare_func : 3; } unit[16]; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index da950d5289..babdcffd3a 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -23,6 +23,8 @@ #include "radeon_compiler.h" #include +#include +#include #include "radeon_program.h" @@ -34,7 +36,7 @@ void rc_init(struct radeon_compiler * c) memory_pool_init(&c->Pool); c->Program.Instructions.Prev = &c->Program.Instructions; c->Program.Instructions.Next = &c->Program.Instructions; - c->Program.Instructions.I.Opcode = OPCODE_END; + c->Program.Instructions.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; } void rc_destroy(struct radeon_compiler * c) @@ -60,7 +62,7 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...) { va_list ap; - c->Error = GL_TRUE; + c->Error = 1; if (!c->ErrorMsg) { /* Only remember the first error */ @@ -95,18 +97,18 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...) * Rewrite the program such that everything that source the given input * register will source new_input instead. */ -void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input) +void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input) { struct rc_instruction * inst; c->Program.InputsRead &= ~(1 << input); for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); unsigned i; - for(i = 0; i < numsrcs; ++i) { - if (inst->I.SrcReg[i].File == PROGRAM_INPUT && inst->I.SrcReg[i].Index == input) { + for(i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->I.SrcReg[i].File == RC_FILE_INPUT && inst->I.SrcReg[i].Index == input) { inst->I.SrcReg[i].File = new_input.File; inst->I.SrcReg[i].Index = new_input.Index; inst->I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->I.SrcReg[i].Swizzle); @@ -134,10 +136,10 @@ void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_ou c->Program.OutputsWritten &= ~(1 << output); for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); - if (numdsts) { - if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) { + if (opcode->HasDstReg) { + if (inst->I.DstReg.File == RC_FILE_OUTPUT && inst->I.DstReg.Index == output) { inst->I.DstReg.Index = new_output; inst->I.DstReg.WriteMask &= writemask; @@ -157,33 +159,33 @@ void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_ou struct rc_instruction * inst; for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); - if (numdsts) { - if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) { - inst->I.DstReg.File = PROGRAM_TEMPORARY; + if (opcode->HasDstReg) { + if (inst->I.DstReg.File == RC_FILE_OUTPUT && inst->I.DstReg.Index == output) { + inst->I.DstReg.File = RC_FILE_TEMPORARY; inst->I.DstReg.Index = tempreg; } } } inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); - inst->I.Opcode = OPCODE_MOV; - inst->I.DstReg.File = PROGRAM_OUTPUT; + inst->I.Opcode = RC_OPCODE_MOV; + inst->I.DstReg.File = RC_FILE_OUTPUT; inst->I.DstReg.Index = output; - inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->I.SrcReg[0].Index = tempreg; - inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + inst->I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); - inst->I.Opcode = OPCODE_MOV; - inst->I.DstReg.File = PROGRAM_OUTPUT; + inst->I.Opcode = RC_OPCODE_MOV; + inst->I.DstReg.File = RC_FILE_OUTPUT; inst->I.DstReg.Index = dup_output; - inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].File = RC_FILE_TEMPORARY; inst->I.SrcReg[0].Index = tempreg; - inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + inst->I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; c->Program.OutputsWritten |= 1 << dup_output; } @@ -201,59 +203,59 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig /* perspective divide */ struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); - inst_rcp->I.Opcode = OPCODE_RCP; + inst_rcp->I.Opcode = RC_OPCODE_RCP; - inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY; + inst_rcp->I.DstReg.File = RC_FILE_TEMPORARY; inst_rcp->I.DstReg.Index = tempregi; - inst_rcp->I.DstReg.WriteMask = WRITEMASK_W; + inst_rcp->I.DstReg.WriteMask = RC_MASK_W; - inst_rcp->I.SrcReg[0].File = PROGRAM_INPUT; + inst_rcp->I.SrcReg[0].File = RC_FILE_INPUT; inst_rcp->I.SrcReg[0].Index = new_input; - inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW; + inst_rcp->I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst_rcp); - inst_mul->I.Opcode = OPCODE_MUL; + inst_mul->I.Opcode = RC_OPCODE_MUL; - inst_mul->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mul->I.DstReg.File = RC_FILE_TEMPORARY; inst_mul->I.DstReg.Index = tempregi; - inst_mul->I.DstReg.WriteMask = WRITEMASK_XYZ; + inst_mul->I.DstReg.WriteMask = RC_MASK_XYZ; - inst_mul->I.SrcReg[0].File = PROGRAM_INPUT; + inst_mul->I.SrcReg[0].File = RC_FILE_INPUT; inst_mul->I.SrcReg[0].Index = new_input; - inst_mul->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst_mul->I.SrcReg[1].File = RC_FILE_TEMPORARY; inst_mul->I.SrcReg[1].Index = tempregi; - inst_mul->I.SrcReg[1].Swizzle = SWIZZLE_WWWW; + inst_mul->I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; /* viewport transformation */ struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_mul); - inst_mad->I.Opcode = OPCODE_MAD; + inst_mad->I.Opcode = RC_OPCODE_MAD; - inst_mad->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mad->I.DstReg.File = RC_FILE_TEMPORARY; inst_mad->I.DstReg.Index = tempregi; - inst_mad->I.DstReg.WriteMask = WRITEMASK_XYZ; + inst_mad->I.DstReg.WriteMask = RC_MASK_XYZ; - inst_mad->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_mad->I.SrcReg[0].Index = tempregi; - inst_mad->I.SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + inst_mad->I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); - inst_mad->I.SrcReg[1].File = PROGRAM_STATE_VAR; + inst_mad->I.SrcReg[1].File = RC_FILE_CONSTANT; inst_mad->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); - inst_mad->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + inst_mad->I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); - inst_mad->I.SrcReg[2].File = PROGRAM_STATE_VAR; + inst_mad->I.SrcReg[2].File = RC_FILE_CONSTANT; inst_mad->I.SrcReg[2].Index = inst_mad->I.SrcReg[1].Index; - inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + inst_mad->I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); struct rc_instruction * inst; for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { - const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); unsigned i; - for(i = 0; i < numsrcs; i++) { - if (inst->I.SrcReg[i].File == PROGRAM_INPUT && + for(i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->I.SrcReg[i].File == RC_FILE_INPUT && inst->I.SrcReg[i].Index == wpos) { - inst->I.SrcReg[i].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[i].File = RC_FILE_TEMPORARY; inst->I.SrcReg[i].Index = tempregi; } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index fbe7c37dd1..018f9bba06 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -23,36 +23,11 @@ #ifndef RADEON_COMPILER_H #define RADEON_COMPILER_H -#include "main/mtypes.h" -#include "shader/prog_instruction.h" - #include "memory_pool.h" #include "radeon_code.h" +#include "radeon_program.h" -struct rc_instruction { - struct rc_instruction * Prev; - struct rc_instruction * Next; - struct prog_instruction I; -}; - -struct rc_program { - /** - * Instructions.Next points to the first instruction, - * Instructions.Prev points to the last instruction. - */ - struct rc_instruction Instructions; - - /* Long term, we should probably remove InputsRead & OutputsWritten, - * since updating dependent state can be fragile, and they aren't - * actually used very often. */ - uint32_t InputsRead; - uint32_t OutputsWritten; - uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */ - - struct rc_constant_list Constants; -}; - struct radeon_compiler { struct memory_pool Pool; struct rc_program Program; @@ -69,7 +44,7 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...); void rc_calculate_inputs_outputs(struct radeon_compiler * c); -void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input); +void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input); void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask); void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output); void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input); @@ -95,7 +70,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c); struct r300_vertex_program_compiler { struct radeon_compiler Base; struct r300_vertex_program_code *code; - GLbitfield RequiredOutputs; + uint32_t RequiredOutputs; void * UserData; void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c index aaaa50ad1f..3e02ebee81 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c @@ -29,9 +29,6 @@ * @file * * "Not-quite SSA" and Dead-Code Elimination. - * - * @note This code uses SWIZZLE_NIL in a source register to indicate that - * the corresponding component is ignored by the corresponding instruction. */ #include "radeon_nqssadce.h" @@ -43,76 +40,55 @@ * Return the @ref register_state for the given register (or 0 for untracked * registers, i.e. constants). */ -static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index) +static struct register_state *get_reg_state(struct nqssadce_state* s, rc_register_file file, unsigned int index) { + if (index >= RC_REGISTER_MAX_INDEX) + return 0; + switch(file) { - case PROGRAM_TEMPORARY: return &s->Temps[index]; - case PROGRAM_OUTPUT: return &s->Outputs[index]; - case PROGRAM_ADDRESS: return &s->Address; + case RC_FILE_TEMPORARY: return &s->Temps[index]; + case RC_FILE_OUTPUT: return &s->Outputs[index]; + case RC_FILE_ADDRESS: return &s->Address; default: return 0; } } -/** - * Left multiplication of a register with a swizzle - * - * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles. - */ -struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg) -{ - struct prog_src_register tmp = srcreg; - int i; - tmp.Swizzle = 0; - tmp.Negate = NEGATE_NONE; - for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(swizzle, i); - if (swz < 4) { - tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); - tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; - } else { - tmp.Swizzle |= swz << (i*3); - } - } - return tmp; -} - - static void track_used_srcreg(struct nqssadce_state* s, - GLint src, GLuint sourced) + int src, unsigned int sourced) { - struct prog_instruction * inst = &s->IP->I; + struct rc_sub_instruction * inst = &s->IP->I; int i; - GLuint deswz_source = 0; + unsigned int deswz_source = 0; for(i = 0; i < 4; ++i) { if (GET_BIT(sourced, i)) { - GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i); + unsigned int swz = GET_SWZ(inst->SrcReg[src].Swizzle, i); deswz_source |= 1 << swz; } else { inst->SrcReg[src].Swizzle &= ~(7 << (3*i)); - inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); + inst->SrcReg[src].Swizzle |= RC_SWIZZLE_UNUSED << (3*i); } } if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) { - struct prog_dst_register dstreg = inst->DstReg; - dstreg.File = PROGRAM_TEMPORARY; + struct rc_dst_register dstreg = inst->DstReg; + dstreg.File = RC_FILE_TEMPORARY; dstreg.Index = rc_find_free_temporary(s->Compiler); dstreg.WriteMask = sourced; s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]); - inst->SrcReg[src].File = PROGRAM_TEMPORARY; + inst->SrcReg[src].File = RC_FILE_TEMPORARY; inst->SrcReg[src].Index = dstreg.Index; inst->SrcReg[src].Swizzle = 0; - inst->SrcReg[src].Negate = NEGATE_NONE; + inst->SrcReg[src].Negate = RC_MASK_NONE; inst->SrcReg[src].Abs = 0; for(i = 0; i < 4; ++i) { if (GET_BIT(sourced, i)) inst->SrcReg[src].Swizzle |= i << (3*i); else - inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); + inst->SrcReg[src].Swizzle |= RC_SWIZZLE_UNUSED << (3*i); } deswz_source = sourced; } @@ -120,9 +96,9 @@ static void track_used_srcreg(struct nqssadce_state* s, struct register_state *regstate; if (inst->SrcReg[src].RelAddr) { - regstate = get_reg_state(s, PROGRAM_ADDRESS, 0); + regstate = get_reg_state(s, RC_FILE_ADDRESS, 0); if (regstate) - regstate->Sourced |= WRITEMASK_X; + regstate->Sourced |= RC_MASK_X; } else { regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index); if (regstate) @@ -130,21 +106,21 @@ static void track_used_srcreg(struct nqssadce_state* s, } } -static void unalias_srcregs(struct rc_instruction *inst, GLuint oldindex, GLuint newindex) +static void unalias_srcregs(struct rc_instruction *inst, unsigned int oldindex, unsigned int newindex) { - int nsrc = _mesa_num_inst_src_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); int i; - for(i = 0; i < nsrc; ++i) - if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY && inst->I.SrcReg[i].Index == oldindex) + for(i = 0; i < opcode->NumSrcRegs; ++i) + if (inst->I.SrcReg[i].File == RC_FILE_TEMPORARY && inst->I.SrcReg[i].Index == oldindex) inst->I.SrcReg[i].Index = newindex; } -static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex) +static void unalias_temporary(struct nqssadce_state* s, unsigned int oldindex) { - GLuint newindex = rc_find_free_temporary(s->Compiler); + unsigned int newindex = rc_find_free_temporary(s->Compiler); struct rc_instruction * inst; for(inst = s->Compiler->Program.Instructions.Next; inst != s->IP; inst = inst->Next) { - if (inst->I.DstReg.File == PROGRAM_TEMPORARY && inst->I.DstReg.Index == oldindex) + if (inst->I.DstReg.File == RC_FILE_TEMPORARY && inst->I.DstReg.Index == oldindex) inst->I.DstReg.Index = newindex; unalias_srcregs(inst, oldindex, newindex); } @@ -157,13 +133,10 @@ static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex) */ static void process_instruction(struct nqssadce_state* s) { - struct prog_instruction *inst = &s->IP->I; - GLuint WriteMask; - - if (inst->Opcode == OPCODE_END) - return; + struct rc_sub_instruction *inst = &s->IP->I; + unsigned int WriteMask; - if (inst->Opcode != OPCODE_KIL) { + if (inst->Opcode != RC_OPCODE_KIL) { struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index); if (!regstate) { rc_error(s->Compiler, "NqssaDce: bad destination register (%i[%i])\n", @@ -181,67 +154,67 @@ static void process_instruction(struct nqssadce_state* s) return; } - if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced) + if (inst->DstReg.File == RC_FILE_TEMPORARY && !regstate->Sourced) unalias_temporary(s, inst->DstReg.Index); } WriteMask = inst->DstReg.WriteMask; switch (inst->Opcode) { - case OPCODE_ARL: - case OPCODE_DDX: - case OPCODE_DDY: - case OPCODE_FRC: - case OPCODE_MOV: + case RC_OPCODE_ARL: + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + case RC_OPCODE_FRC: + case RC_OPCODE_MOV: track_used_srcreg(s, 0, WriteMask); break; - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: - case OPCODE_SGE: - case OPCODE_SLT: + case RC_OPCODE_ADD: + case RC_OPCODE_MAX: + case RC_OPCODE_MIN: + case RC_OPCODE_MUL: + case RC_OPCODE_SGE: + case RC_OPCODE_SLT: track_used_srcreg(s, 0, WriteMask); track_used_srcreg(s, 1, WriteMask); break; - case OPCODE_CMP: - case OPCODE_MAD: + case RC_OPCODE_CMP: + case RC_OPCODE_MAD: track_used_srcreg(s, 0, WriteMask); track_used_srcreg(s, 1, WriteMask); track_used_srcreg(s, 2, WriteMask); break; - case OPCODE_COS: - case OPCODE_EX2: - case OPCODE_LG2: - case OPCODE_RCP: - case OPCODE_RSQ: - case OPCODE_SIN: + case RC_OPCODE_COS: + case RC_OPCODE_EX2: + case RC_OPCODE_LG2: + case RC_OPCODE_RCP: + case RC_OPCODE_RSQ: + case RC_OPCODE_SIN: track_used_srcreg(s, 0, 0x1); break; - case OPCODE_DP3: + case RC_OPCODE_DP3: track_used_srcreg(s, 0, 0x7); track_used_srcreg(s, 1, 0x7); break; - case OPCODE_DP4: + case RC_OPCODE_DP4: track_used_srcreg(s, 0, 0xf); track_used_srcreg(s, 1, 0xf); break; - case OPCODE_KIL: - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXP: + case RC_OPCODE_KIL: + case RC_OPCODE_TEX: + case RC_OPCODE_TXB: + case RC_OPCODE_TXP: track_used_srcreg(s, 0, 0xf); break; - case OPCODE_DST: + case RC_OPCODE_DST: track_used_srcreg(s, 0, 0x6); track_used_srcreg(s, 1, 0xa); break; - case OPCODE_EXP: - case OPCODE_LOG: - case OPCODE_POW: + case RC_OPCODE_EXP: + case RC_OPCODE_LOG: + case RC_OPCODE_POW: track_used_srcreg(s, 0, 0x3); break; - case OPCODE_LIT: + case RC_OPCODE_LIT: track_used_srcreg(s, 0, 0xb); break; default: @@ -261,16 +234,16 @@ void rc_calculate_inputs_outputs(struct radeon_compiler * c) for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); int i; - int num_src_regs = _mesa_num_inst_src_regs(inst->I.Opcode); - for (i = 0; i < num_src_regs; ++i) { - if (inst->I.SrcReg[i].File == PROGRAM_INPUT) + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->I.SrcReg[i].File == RC_FILE_INPUT) c->Program.InputsRead |= 1 << inst->I.SrcReg[i].Index; } - if (_mesa_num_inst_dst_regs(inst->I.Opcode)) { - if (inst->I.DstReg.File == PROGRAM_OUTPUT) + if (opcode->HasDstReg) { + if (inst->I.DstReg.File == RC_FILE_OUTPUT) c->Program.OutputsWritten |= 1 << inst->I.DstReg.Index; } } @@ -280,7 +253,7 @@ void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* de { struct nqssadce_state s; - _mesa_bzero(&s, sizeof(s)); + memset(&s, 0, sizeof(s)); s.Compiler = c; s.Descr = descr; s.UserData = data; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h index b3fc77a35a..a2aa1eb8ca 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h @@ -35,7 +35,7 @@ struct register_state { * Bitmask indicating which components of the register are sourced * by later instructions. */ - GLuint Sourced : 4; + unsigned int Sourced : 4; }; /** @@ -54,8 +54,8 @@ struct nqssadce_state { /** * Which registers are read by subsequent instructions? */ - struct register_state Temps[MAX_PROGRAM_TEMPS]; - struct register_state Outputs[VERT_RESULT_MAX]; + struct register_state Temps[RC_REGISTER_MAX_INDEX]; + struct register_state Outputs[RC_REGISTER_MAX_INDEX]; struct register_state Address; void * UserData; @@ -75,17 +75,18 @@ struct radeon_nqssadce_descr { /** * Check whether the given swizzle, absolute and negate combination * can be implemented natively by the hardware for this opcode. + * + * \return 1 if the swizzle is native for the given opcode */ - GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg); + int (*IsNativeSwizzle)(rc_opcode opcode, struct rc_src_register reg); /** * Emit (at the current IP) the instruction MOV dst, src; * The transformation will work recursively on the emitted instruction(s). */ - void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); + void (*BuildSwizzle)(struct nqssadce_state*, struct rc_dst_register dst, struct rc_src_register src); }; void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data); -struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg); #endif /* __RADEON_PROGRAM_NQSSADCE_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c new file mode 100644 index 0000000000..ffe2de1a87 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -0,0 +1,318 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_opcodes.h" + +struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { + { + .Opcode = RC_OPCODE_NOP, + .Name = "NOP" + }, + { + .Opcode = RC_OPCODE_ILLEGAL_OPCODE, + .Name = "ILLEGAL OPCODE" + }, + { + .Opcode = RC_OPCODE_ABS, + .Name = "ABS", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_ADD, + .Name = "ADD", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_ARL, + .Name = "ARL", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_CMP, + .Name = "CMP", + .NumSrcRegs = 3, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_COS, + .Name = "COS", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DDX, + .Name = "DDX", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DDY, + .Name = "DDY", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DP3, + .Name = "DP3", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DP4, + .Name = "DP4", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DPH, + .Name = "DPH", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DST, + .Name = "DST", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_EX2, + .Name = "EX2", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_EXP, + .Name = "EXP", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_FLR, + .Name = "FLR", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_FRC, + .Name = "FRC", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_KIL, + .Name = "KIL", + .NumSrcRegs = 1 + }, + { + .Opcode = RC_OPCODE_LG2, + .Name = "LG2", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_LIT, + .Name = "LIT", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_LOG, + .Name = "LOG", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_LRP, + .Name = "LRP", + .NumSrcRegs = 3, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_MAD, + .Name = "MAD", + .NumSrcRegs = 3, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_MAX, + .Name = "MAX", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_MIN, + .Name = "MIN", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_MOV, + .Name = "MOV", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_MUL, + .Name = "MUL", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_POW, + .Name = "POW", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_RCP, + .Name = "RCP", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_RSQ, + .Name = "RSQ", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_SCS, + .Name = "SCS", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_SEQ, + .Name = "SEQ", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_SFL, + .Name = "SFL", + .NumSrcRegs = 0, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_SGE, + .Name = "SGE", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_SGT, + .Name = "SGT", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_SIN, + .Name = "SIN", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_SLE, + .Name = "SLE", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_SLT, + .Name = "SLT", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_SNE, + .Name = "SNE", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_SUB, + .Name = "SUB", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_SWZ, + .Name = "SWZ", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_XPD, + .Name = "XPD", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TEX, + .Name = "TEX", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXB, + .Name = "TXB", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXD, + .Name = "TXD", + .HasTexture = 1, + .NumSrcRegs = 3, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXL, + .Name = "TXL", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXP, + .Name = "TXP", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_REPL_ALPHA, + .Name = "REPL_ALPHA", + .HasDstReg = 1 + } +}; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h new file mode 100644 index 0000000000..4eb9be3e55 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -0,0 +1,202 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_OPCODES_H +#define RADEON_OPCODES_H + +#include + +/** + * Opcodes understood by the Radeon compiler. + */ +typedef enum { + RC_OPCODE_NOP = 0, + RC_OPCODE_ILLEGAL_OPCODE, + + /** vec4 instruction: dst.c = abs(src0.c); */ + RC_OPCODE_ABS, + + /** vec4 instruction: dst.c = src0.c + src1.c; */ + RC_OPCODE_ADD, + + /** special instruction: load address register + * dst.x = floor(src.x), where dst must be an address register */ + RC_OPCODE_ARL, + + /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ + RC_OPCODE_CMP, + + /** scalar instruction: dst = cos(src0.x) */ + RC_OPCODE_COS, + + /** special instruction: take vec4 partial derivative in X direction + * dst.c = d src0.c / dx */ + RC_OPCODE_DDX, + + /** special instruction: take vec4 partial derivative in Y direction + * dst.c = d src0.c / dy */ + RC_OPCODE_DDY, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */ + RC_OPCODE_DP3, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */ + RC_OPCODE_DP4, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src1.w */ + RC_OPCODE_DPH, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_DST, + + /** scalar instruction: dst = 2**src0.x */ + RC_OPCODE_EX2, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_EXP, + + /** vec4 instruction: dst.c = floor(src0.c) */ + RC_OPCODE_FLR, + + /** vec4 instruction: dst.c = src0.c - floor(src0.c) */ + RC_OPCODE_FRC, + + /** special instruction: stop execution if any component of src0 is negative */ + RC_OPCODE_KIL, + + /** scalar instruction: dst = log_2(src0.x) */ + RC_OPCODE_LG2, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_LIT, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_LOG, + + /** vec4 instruction: dst.c = src0.c*src1.c + (1 - src0.c)*src2.c */ + RC_OPCODE_LRP, + + /** vec4 instruction: dst.c = src0.c*src1.c + src2.c */ + RC_OPCODE_MAD, + + /** vec4 instruction: dst.c = max(src0.c, src1.c) */ + RC_OPCODE_MAX, + + /** vec4 instruction: dst.c = min(src0.c, src1.c) */ + RC_OPCODE_MIN, + + /** vec4 instruction: dst.c = src0.c */ + RC_OPCODE_MOV, + + /** vec4 instruction: dst.c = src0.c*src1.c */ + RC_OPCODE_MUL, + + /** scalar instruction: dst = src0.x ** src1.x */ + RC_OPCODE_POW, + + /** scalar instruction: dst = 1 / src0.x */ + RC_OPCODE_RCP, + + /** scalar instruction: dst = 1 / sqrt(src0.x) */ + RC_OPCODE_RSQ, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_SCS, + + /** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SEQ, + + /** vec4 instruction: dst.c = 0.0 */ + RC_OPCODE_SFL, + + /** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SGE, + + /** vec4 instruction: dst.c = (src0.c > src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SGT, + + /** scalar instruction: dst = sin(src0.x) */ + RC_OPCODE_SIN, + + /** vec4 instruction: dst.c = (src0.c <= src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SLE, + + /** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SLT, + + /** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SNE, + + /** vec4 instruction: dst.c = src0.c - src1.c */ + RC_OPCODE_SUB, + + /** vec4 instruction: dst.c = src0.c */ + RC_OPCODE_SWZ, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_XPD, + + RC_OPCODE_TEX, + RC_OPCODE_TXB, + RC_OPCODE_TXD, + RC_OPCODE_TXL, + RC_OPCODE_TXP, + + /** special instruction, used in R300-R500 fragment program pair instructions + * indicates that the result of the alpha operation shall be replicated + * across all other channels */ + RC_OPCODE_REPL_ALPHA, + + MAX_RC_OPCODE +} rc_opcode; + + +struct rc_opcode_info { + rc_opcode Opcode; + const char * Name; + + /** true if the instruction reads from a texture. + * + * \note This is false for the KIL instruction, even though KIL is + * a texture instruction from a hardware point of view. */ + unsigned int HasTexture:1; + + unsigned int NumSrcRegs:2; + unsigned int HasDstReg:1; +}; + +extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE]; + +static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode) +{ + assert((unsigned int)opcode < MAX_RC_OPCODE); + assert(rc_opcodes[opcode].Opcode == opcode); + + return &rc_opcodes[opcode]; +} + +#endif /* RADEON_OPCODES_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c index bf7f9ac92c..0e0c1f68e6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -27,9 +27,9 @@ #include "radeon_program.h" +#include + #include "radeon_compiler.h" -#include "shader/prog_parameter.h" -#include "shader/prog_print.h" /** @@ -69,37 +69,57 @@ void radeonLocalTransform( } } +/** + * Left multiplication of a register with a swizzle + */ +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg) +{ + struct rc_src_register tmp = srcreg; + int i; + tmp.Swizzle = 0; + tmp.Negate = 0; + for(i = 0; i < 4; ++i) { + rc_swizzle swz = GET_SWZ(swizzle, i); + if (swz < 4) { + tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); + tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; + } else { + tmp.Swizzle |= swz << (i*3); + } + } + return tmp; +} -GLint rc_find_free_temporary(struct radeon_compiler * c) +unsigned int rc_find_free_temporary(struct radeon_compiler * c) { - GLboolean used[MAX_PROGRAM_TEMPS]; - GLuint i; + char used[RC_REGISTER_MAX_INDEX]; + unsigned int i; memset(used, 0, sizeof(used)); for (struct rc_instruction * rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) { - const struct prog_instruction *inst = &rcinst->I; - const GLuint nsrc = _mesa_num_inst_src_regs(inst->Opcode); - const GLuint ndst = _mesa_num_inst_dst_regs(inst->Opcode); - GLuint k; - - for (k = 0; k < nsrc; k++) { - if (inst->SrcReg[k].File == PROGRAM_TEMPORARY) - used[inst->SrcReg[k].Index] = GL_TRUE; + const struct rc_sub_instruction *inst = &rcinst->I; + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode); + unsigned int k; + + for (k = 0; k < opcode->NumSrcRegs; k++) { + if (inst->SrcReg[k].File == RC_FILE_TEMPORARY) + used[inst->SrcReg[k].Index] = 1; } - if (ndst) { - if (inst->DstReg.File == PROGRAM_TEMPORARY) - used[inst->DstReg.Index] = GL_TRUE; + if (opcode->HasDstReg) { + if (inst->DstReg.File == RC_FILE_TEMPORARY) + used[inst->DstReg.Index] = 1; } } - for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + for (i = 0; i < RC_REGISTER_MAX_INDEX; i++) { if (!used[i]) return i; } - return -1; + rc_error(c, "Ran out of temporary registers\n"); + return 0; } @@ -107,10 +127,13 @@ struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c) { struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction)); - inst->Prev = 0; - inst->Next = 0; + memset(inst, 0, sizeof(struct rc_instruction)); - _mesa_init_instructions(&inst->I, 1); + inst->I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; + inst->I.DstReg.WriteMask = RC_MASK_XYZW; + inst->I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst->I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW; + inst->I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW; return inst; } @@ -135,14 +158,142 @@ void rc_remove_instruction(struct rc_instruction * inst) inst->Next->Prev = inst->Prev; } +static const char * textarget_to_string(rc_texture_target target) +{ + switch(target) { + case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY"; + case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY"; + case RC_TEXTURE_CUBE: return "CUBE"; + case RC_TEXTURE_3D: return "3D"; + case RC_TEXTURE_RECT: return "RECT"; + case RC_TEXTURE_2D: return "2D"; + case RC_TEXTURE_1D: return "1D"; + default: return "BAD_TEXTURE_TARGET"; + } +} + +static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr) +{ + if (file == RC_FILE_NONE) { + fprintf(f, "none"); + } else { + const char * filename; + switch(file) { + case RC_FILE_TEMPORARY: filename = "temp"; break; + case RC_FILE_INPUT: filename = "input"; break; + case RC_FILE_OUTPUT: filename = "output"; break; + case RC_FILE_ADDRESS: filename = "addr"; break; + case RC_FILE_CONSTANT: filename = "const"; break; + default: filename = "BAD FILE"; break; + } + fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : ""); + } +} + +static void rc_print_mask(FILE * f, unsigned int mask) +{ + if (mask & RC_MASK_X) fprintf(f, "x"); + if (mask & RC_MASK_Y) fprintf(f, "y"); + if (mask & RC_MASK_Z) fprintf(f, "z"); + if (mask & RC_MASK_W) fprintf(f, "w"); +} + +static void rc_print_dst_register(FILE * f, struct rc_dst_register dst) +{ + rc_print_register(f, dst.File, dst.Index, dst.RelAddr); + if (dst.WriteMask != RC_MASK_XYZW) { + fprintf(f, "."); + rc_print_mask(f, dst.WriteMask); + } +} + +static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate) +{ + unsigned int comp; + for(comp = 0; comp < 4; ++comp) { + rc_swizzle swz = GET_SWZ(swizzle, comp); + if (GET_BIT(negate, comp)) + fprintf(f, "-"); + switch(swz) { + case RC_SWIZZLE_X: fprintf(f, "x"); break; + case RC_SWIZZLE_Y: fprintf(f, "y"); break; + case RC_SWIZZLE_Z: fprintf(f, "z"); break; + case RC_SWIZZLE_W: fprintf(f, "w"); break; + case RC_SWIZZLE_ZERO: fprintf(f, "0"); break; + case RC_SWIZZLE_ONE: fprintf(f, "1"); break; + case RC_SWIZZLE_HALF: fprintf(f, "H"); break; + case RC_SWIZZLE_UNUSED: fprintf(f, "_"); break; + } + } +} + +static void rc_print_src_register(FILE * f, struct rc_src_register src) +{ + int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW); + + if (src.Negate == RC_MASK_XYZW) + fprintf(f, "-"); + if (src.Abs) + fprintf(f, "|"); + + rc_print_register(f, src.File, src.Index, src.RelAddr); + + if (src.Abs && !trivial_negate) + fprintf(f, "|"); + + if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) { + fprintf(f, "."); + rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate); + } + + if (src.Abs && trivial_negate) + fprintf(f, "|"); +} + +static void rc_print_instruction(FILE * f, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + unsigned int reg; + + fprintf(f, "%s", opcode->Name); + + switch(inst->I.SaturateMode) { + case RC_SATURATE_NONE: break; + case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break; + case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break; + default: fprintf(f, "_BAD_SAT"); break; + } + + if (opcode->HasDstReg) { + fprintf(f, " "); + rc_print_dst_register(f, inst->I.DstReg); + if (opcode->NumSrcRegs) + fprintf(f, ","); + } + + for(reg = 0; reg < opcode->NumSrcRegs; ++reg) { + if (reg > 0) + fprintf(f, ","); + fprintf(f, " "); + rc_print_src_register(f, inst->I.SrcReg[reg]); + } + + if (opcode->HasTexture) { + fprintf(f, ", %s%s[%u]", + textarget_to_string(inst->I.TexSrcTarget), + inst->I.TexShadow ? "SHADOW" : "", + inst->I.TexSrcUnit); + } + + fprintf(f, ";\n"); +} /** * Print program to stderr, default options. */ void rc_print_program(const struct rc_program *prog) { - GLuint indent = 0; - GLuint linenum = 1; + unsigned int linenum = 0; struct rc_instruction *inst; fprintf(stderr, "# Radeon Compiler Program\n"); @@ -150,11 +301,7 @@ void rc_print_program(const struct rc_program *prog) for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) { fprintf(stderr, "%3d: ", linenum); - /* Massive hack: We rely on the fact that the printers do not actually - * use the gl_program argument (last argument) in debug mode */ - indent = _mesa_fprint_instruction_opt( - stderr, &inst->I, - indent, PROG_PRINT_DEBUG, 0); + rc_print_instruction(stderr, inst); linenum++; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index 561958608c..a2ab757fec 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -28,37 +28,211 @@ #ifndef __RADEON_PROGRAM_H_ #define __RADEON_PROGRAM_H_ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" -#include "shader/prog_instruction.h" +#include +#include + +#include "radeon_opcodes.h" +#include "radeon_code.h" struct radeon_compiler; -struct rc_instruction; -struct rc_program; -enum { - PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */ +typedef enum { + RC_SATURATE_NONE = 0, + RC_SATURATE_ZERO_ONE, + RC_SATURATE_MINUS_PLUS_ONE +} rc_saturate_mode; + +typedef enum { + RC_TEXTURE_2D_ARRAY, + RC_TEXTURE_1D_ARRAY, + RC_TEXTURE_CUBE, + RC_TEXTURE_3D, + RC_TEXTURE_RECT, + RC_TEXTURE_2D, + RC_TEXTURE_1D +} rc_texture_target; + +typedef enum { + /** + * Used to indicate unused register descriptions and + * source register that use a constant swizzle. + */ + RC_FILE_NONE = 0, + RC_FILE_TEMPORARY, + + /** + * Input register. + * + * \note The compiler attaches no implicit semantics to input registers. + * Fragment/vertex program specific semantics must be defined explicitly + * using the appropriate compiler interfaces. + */ + RC_FILE_INPUT, + + /** + * Output register. + * + * \note The compiler attaches no implicit semantics to input registers. + * Fragment/vertex program specific semantics must be defined explicitly + * using the appropriate compiler interfaces. + */ + RC_FILE_OUTPUT, + RC_FILE_ADDRESS, + + /** + * Indicates a constant from the \ref rc_constant_list . + */ + RC_FILE_CONSTANT +} rc_register_file; + +#define RC_REGISTER_INDEX_BITS 10 +#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS) + +typedef enum { + RC_SWIZZLE_X = 0, + RC_SWIZZLE_Y, + RC_SWIZZLE_Z, + RC_SWIZZLE_W, + RC_SWIZZLE_ZERO, + RC_SWIZZLE_ONE, + RC_SWIZZLE_HALF, + RC_SWIZZLE_UNUSED +} rc_swizzle; + +#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9)) +#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a)) +#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7) +#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1) + +#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W) +#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X) +#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y) +#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z) +#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W) +#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO) +#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE) + +/** + * \name Bitmasks for components of vectors. + * + * Used for write masks, negation masks, etc. + */ +/*@{*/ +#define RC_MASK_NONE 0 +#define RC_MASK_X 1 +#define RC_MASK_Y 2 +#define RC_MASK_Z 4 +#define RC_MASK_W 8 +#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y) +#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z) +#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W) +#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W) +/*@}*/ + +struct rc_src_register { + rc_register_file File:3; + + /** Negative values may be used for relative addressing. */ + signed int Index:(RC_REGISTER_INDEX_BITS+1); + unsigned int RelAddr:1; + + unsigned int Swizzle:12; + + /** Take the component-wise absolute value */ + unsigned int Abs:1; + + /** Post-Abs negation. */ + unsigned int Negate:4; +}; + +struct rc_dst_register { + rc_register_file File:3; + + /** Negative values may be used for relative addressing. */ + signed int Index:(RC_REGISTER_INDEX_BITS+1); + unsigned int RelAddr:1; + + unsigned int WriteMask:4; +}; + +/** + * Instructions are maintained by the compiler in a doubly linked list + * of these structures. + * + * This instruction format is intended to be expanded for hardware-specific + * trickery. At different stages of compilation, a different set of + * instruction types may be valid. + */ +struct rc_sub_instruction { + struct rc_src_register SrcReg[3]; + struct rc_dst_register DstReg; + + /** + * Opcode of this instruction, according to \ref rc_opcode enums. + */ + rc_opcode Opcode:8; + + /** + * Saturate each value of the result to the range [0,1] or [-1,1], + * according to \ref rc_saturate_mode enums. + */ + rc_saturate_mode SaturateMode:2; + + /** + * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions. + */ + /*@{*/ + /** Source texture unit. */ + unsigned int TexSrcUnit:5; + + /** Source texture target, one of the \ref rc_texture_target enums */ + rc_texture_target TexSrcTarget:3; + + /** True if tex instruction should do shadow comparison */ + unsigned int TexShadow:1; + /*@}*/ +}; + +struct rc_instruction { + struct rc_instruction * Prev; + struct rc_instruction * Next; + + struct rc_sub_instruction I; +}; + +struct rc_program { + /** + * Instructions.Next points to the first instruction, + * Instructions.Prev points to the last instruction. + */ + struct rc_instruction Instructions; + + /* Long term, we should probably remove InputsRead & OutputsWritten, + * since updating dependent state can be fragile, and they aren't + * actually used very often. */ + uint32_t InputsRead; + uint32_t OutputsWritten; + uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */ + + struct rc_constant_list Constants; }; enum { - OPCODE_REPL_ALPHA = MAX_OPCODE /**< used in paired instructions */ + OPCODE_REPL_ALPHA = MAX_RC_OPCODE /**< used in paired instructions */ }; -#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO) -#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE) -static inline GLuint get_swz(GLuint swz, GLuint idx) +static inline rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) { if (idx & 0x4) return idx; return GET_SWZ(swz, idx); } -static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, GLuint swz_z, GLuint swz_w) +static inline unsigned int combine_swizzles4(unsigned int src, + rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) { - GLuint ret = 0; + unsigned int ret = 0; ret |= get_swz(src, swz_x); ret |= get_swz(src, swz_y) << 3; @@ -68,22 +242,24 @@ static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, G return ret; } -static inline GLuint combine_swizzles(GLuint src, GLuint swz) +static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz) { - GLuint ret = 0; + unsigned int ret = 0; - ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_X)); - ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Y)) << 3; - ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Z)) << 6; - ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_W)) << 9; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X)); + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9; return ret; } -static INLINE void reset_srcreg(struct prog_src_register* reg) +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); + +static inline void reset_srcreg(struct rc_src_register* reg) { - _mesa_bzero(reg, sizeof(*reg)); - reg->Swizzle = SWIZZLE_NOOP; + memset(reg, 0, sizeof(reg)); + reg->Swizzle = RC_SWIZZLE_XYZW; } @@ -92,13 +268,13 @@ static INLINE void reset_srcreg(struct prog_src_register* reg) * * The function will be called once for each instruction. * It has to either emit the appropriate transformed code for the instruction - * and return GL_TRUE, or return GL_FALSE if it doesn't understand the + * and return true, or return false if it doesn't understand the * instruction. * * The function gets passed the userData as last parameter. */ struct radeon_program_transformation { - GLboolean (*function)( + int (*function)( struct radeon_compiler*, struct rc_instruction*, void*); @@ -110,7 +286,7 @@ void radeonLocalTransform( int num_transformations, struct radeon_program_transformation* transformations); -GLint rc_find_free_temporary(struct radeon_compiler * c); +unsigned int rc_find_free_temporary(struct radeon_compiler * c); struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c); struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 8071899eaa..041e3cab82 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -40,8 +40,8 @@ static struct rc_instruction *emit1( struct radeon_compiler * c, struct rc_instruction * after, - gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, - struct prog_src_register SrcReg) + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg) { struct rc_instruction *fpi = rc_insert_new_instruction(c, after); @@ -54,8 +54,8 @@ static struct rc_instruction *emit1( static struct rc_instruction *emit2( struct radeon_compiler * c, struct rc_instruction * after, - gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, - struct prog_src_register SrcReg0, struct prog_src_register SrcReg1) + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg0, struct rc_src_register SrcReg1) { struct rc_instruction *fpi = rc_insert_new_instruction(c, after); @@ -69,9 +69,9 @@ static struct rc_instruction *emit2( static struct rc_instruction *emit3( struct radeon_compiler * c, struct rc_instruction * after, - gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, - struct prog_src_register SrcReg0, struct prog_src_register SrcReg1, - struct prog_src_register SrcReg2) + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg0, struct rc_src_register SrcReg1, + struct rc_src_register SrcReg2) { struct rc_instruction *fpi = rc_insert_new_instruction(c, after); @@ -84,131 +84,120 @@ static struct rc_instruction *emit3( return fpi; } -static struct prog_dst_register dstreg(int file, int index) +static struct rc_dst_register dstreg(int file, int index) { - struct prog_dst_register dst; + struct rc_dst_register dst; dst.File = file; dst.Index = index; - dst.WriteMask = WRITEMASK_XYZW; - dst.CondMask = COND_TR; + dst.WriteMask = RC_MASK_XYZW; dst.RelAddr = 0; - dst.CondSwizzle = SWIZZLE_NOOP; - dst.CondSrc = 0; - dst.pad = 0; return dst; } -static struct prog_dst_register dstregtmpmask(int index, int mask) +static struct rc_dst_register dstregtmpmask(int index, int mask) { - struct prog_dst_register dst = {0}; - dst.File = PROGRAM_TEMPORARY; + struct rc_dst_register dst = {0}; + dst.File = RC_FILE_TEMPORARY; dst.Index = index; dst.WriteMask = mask; dst.RelAddr = 0; - dst.CondMask = COND_TR; - dst.CondSwizzle = SWIZZLE_NOOP; - dst.CondSrc = 0; - dst.pad = 0; return dst; } -static const struct prog_src_register builtin_zero = { - .File = PROGRAM_BUILTIN, +static const struct rc_src_register builtin_zero = { + .File = RC_FILE_NONE, .Index = 0, - .Swizzle = SWIZZLE_0000 + .Swizzle = RC_SWIZZLE_0000 }; -static const struct prog_src_register builtin_one = { - .File = PROGRAM_BUILTIN, +static const struct rc_src_register builtin_one = { + .File = RC_FILE_NONE, .Index = 0, - .Swizzle = SWIZZLE_1111 + .Swizzle = RC_SWIZZLE_1111 }; -static const struct prog_src_register srcreg_undefined = { - .File = PROGRAM_UNDEFINED, +static const struct rc_src_register srcreg_undefined = { + .File = RC_FILE_NONE, .Index = 0, - .Swizzle = SWIZZLE_NOOP + .Swizzle = RC_SWIZZLE_XYZW }; -static struct prog_src_register srcreg(int file, int index) +static struct rc_src_register srcreg(int file, int index) { - struct prog_src_register src = srcreg_undefined; + struct rc_src_register src = srcreg_undefined; src.File = file; src.Index = index; return src; } -static struct prog_src_register srcregswz(int file, int index, int swz) +static struct rc_src_register srcregswz(int file, int index, int swz) { - struct prog_src_register src = srcreg_undefined; + struct rc_src_register src = srcreg_undefined; src.File = file; src.Index = index; src.Swizzle = swz; return src; } -static struct prog_src_register absolute(struct prog_src_register reg) +static struct rc_src_register absolute(struct rc_src_register reg) { - struct prog_src_register newreg = reg; + struct rc_src_register newreg = reg; newreg.Abs = 1; - newreg.Negate = NEGATE_NONE; + newreg.Negate = RC_MASK_NONE; return newreg; } -static struct prog_src_register negate(struct prog_src_register reg) +static struct rc_src_register negate(struct rc_src_register reg) { - struct prog_src_register newreg = reg; - newreg.Negate = newreg.Negate ^ NEGATE_XYZW; + struct rc_src_register newreg = reg; + newreg.Negate = newreg.Negate ^ RC_MASK_XYZW; return newreg; } -static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, GLuint y, GLuint z, GLuint w) +static struct rc_src_register swizzle(struct rc_src_register reg, + rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w) { - struct prog_src_register swizzled = reg; - swizzled.Swizzle = MAKE_SWIZZLE4( - x >= 4 ? x : GET_SWZ(reg.Swizzle, x), - y >= 4 ? y : GET_SWZ(reg.Swizzle, y), - z >= 4 ? z : GET_SWZ(reg.Swizzle, z), - w >= 4 ? w : GET_SWZ(reg.Swizzle, w)); + struct rc_src_register swizzled = reg; + swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w); return swizzled; } -static struct prog_src_register scalar(struct prog_src_register reg) +static struct rc_src_register scalar(struct rc_src_register reg) { - return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); + return swizzle(reg, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X); } static void transform_ABS(struct radeon_compiler* c, struct rc_instruction* inst) { - struct prog_src_register src = inst->I.SrcReg[0]; + struct rc_src_register src = inst->I.SrcReg[0]; src.Abs = 1; - src.Negate = NEGATE_NONE; - emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode, inst->I.DstReg, src); + src.Negate = RC_MASK_NONE; + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->I.SaturateMode, inst->I.DstReg, src); rc_remove_instruction(inst); } static void transform_DP3(struct radeon_compiler* c, struct rc_instruction* inst) { - struct prog_src_register src0 = inst->I.SrcReg[0]; - struct prog_src_register src1 = inst->I.SrcReg[1]; - src0.Negate &= ~NEGATE_W; + struct rc_src_register src0 = inst->I.SrcReg[0]; + struct rc_src_register src1 = inst->I.SrcReg[1]; + src0.Negate &= ~RC_MASK_W; src0.Swizzle &= ~(7 << (3 * 3)); - src0.Swizzle |= SWIZZLE_ZERO << (3 * 3); - src1.Negate &= ~NEGATE_W; + src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + src1.Negate &= ~RC_MASK_W; src1.Swizzle &= ~(7 << (3 * 3)); - src1.Swizzle |= SWIZZLE_ZERO << (3 * 3); - emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, src1); + src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + emit2(c, inst->Prev, RC_OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, src1); rc_remove_instruction(inst); } static void transform_DPH(struct radeon_compiler* c, struct rc_instruction* inst) { - struct prog_src_register src0 = inst->I.SrcReg[0]; - src0.Negate &= ~NEGATE_W; + struct rc_src_register src0 = inst->I.SrcReg[0]; + src0.Negate &= ~RC_MASK_W; src0.Swizzle &= ~(7 << (3 * 3)); - src0.Swizzle |= SWIZZLE_ONE << (3 * 3); - emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, inst->I.SrcReg[1]); + src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3); + emit2(c, inst->Prev, RC_OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, inst->I.SrcReg[1]); rc_remove_instruction(inst); } @@ -219,9 +208,9 @@ static void transform_DPH(struct radeon_compiler* c, static void transform_DST(struct radeon_compiler* c, struct rc_instruction* inst) { - emit2(c, inst->Prev, OPCODE_MUL, inst->I.SaturateMode, inst->I.DstReg, - swizzle(inst->I.SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE), - swizzle(inst->I.SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W)); + emit2(c, inst->Prev, RC_OPCODE_MUL, inst->I.SaturateMode, inst->I.DstReg, + swizzle(inst->I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE), + swizzle(inst->I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W)); rc_remove_instruction(inst); } @@ -229,9 +218,9 @@ static void transform_FLR(struct radeon_compiler* c, struct rc_instruction* inst) { int tempreg = rc_find_free_temporary(c); - emit1(c, inst->Prev, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0]); - emit2(c, inst->Prev, OPCODE_ADD, inst->I.SaturateMode, inst->I.DstReg, - inst->I.SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->I.SrcReg[0]); + emit2(c, inst->Prev, RC_OPCODE_ADD, inst->I.SaturateMode, inst->I.DstReg, + inst->I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, tempreg))); rc_remove_instruction(inst); } @@ -256,64 +245,64 @@ static void transform_FLR(struct radeon_compiler* c, static void transform_LIT(struct radeon_compiler* c, struct rc_instruction* inst) { - GLuint constant; - GLuint constant_swizzle; - GLuint temp; - struct prog_src_register srctemp; + unsigned int constant; + unsigned int constant_swizzle; + unsigned int temp; + struct rc_src_register srctemp; constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle); - if (inst->I.DstReg.WriteMask != WRITEMASK_XYZW || inst->I.DstReg.File != PROGRAM_TEMPORARY) { + if (inst->I.DstReg.WriteMask != RC_MASK_XYZW || inst->I.DstReg.File != RC_FILE_TEMPORARY) { struct rc_instruction * inst_mov; inst_mov = emit1(c, inst, - OPCODE_MOV, 0, inst->I.DstReg, - srcreg(PROGRAM_TEMPORARY, rc_find_free_temporary(c))); + RC_OPCODE_MOV, 0, inst->I.DstReg, + srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c))); - inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.File = RC_FILE_TEMPORARY; inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; - inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + inst->I.DstReg.WriteMask = RC_MASK_XYZW; } temp = inst->I.DstReg.Index; - srctemp = srcreg(PROGRAM_TEMPORARY, temp); + srctemp = srcreg(RC_FILE_TEMPORARY, temp); // tmp.x = max(0.0, Src.x); // tmp.y = max(0.0, Src.y); // tmp.w = clamp(Src.z, -128+eps, 128-eps); - emit2(c, inst->Prev, OPCODE_MAX, 0, - dstregtmpmask(temp, WRITEMASK_XYW), + emit2(c, inst->Prev, RC_OPCODE_MAX, 0, + dstregtmpmask(temp, RC_MASK_XYW), inst->I.SrcReg[0], - swizzle(srcreg(PROGRAM_CONSTANT, constant), - SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3)); - emit2(c, inst->Prev, OPCODE_MIN, 0, - dstregtmpmask(temp, WRITEMASK_Z), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle))); + swizzle(srcreg(RC_FILE_CONSTANT, constant), + RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3)); + emit2(c, inst->Prev, RC_OPCODE_MIN, 0, + dstregtmpmask(temp, RC_MASK_Z), + swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle))); // tmp.w = Pow(tmp.y, tmp.w) - emit1(c, inst->Prev, OPCODE_LG2, 0, - dstregtmpmask(temp, WRITEMASK_W), - swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); - emit2(c, inst->Prev, OPCODE_MUL, 0, - dstregtmpmask(temp, WRITEMASK_W), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)); - emit1(c, inst->Prev, OPCODE_EX2, 0, - dstregtmpmask(temp, WRITEMASK_W), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + emit1(c, inst->Prev, RC_OPCODE_LG2, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle(srctemp, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y)); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + swizzle(srctemp, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z)); + emit1(c, inst->Prev, RC_OPCODE_EX2, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W)); // tmp.z = (tmp.x > 0) ? tmp.w : 0.0 - emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, - dstregtmpmask(temp, WRITEMASK_Z), - negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->I.SaturateMode, + dstregtmpmask(temp, RC_MASK_Z), + negate(swizzle(srctemp, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)), + swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), builtin_zero); // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 - emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode, - dstregtmpmask(temp, WRITEMASK_XYW), - swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE)); + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->I.SaturateMode, + dstregtmpmask(temp, RC_MASK_XYW), + swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE)); rc_remove_instruction(inst); } @@ -323,12 +312,12 @@ static void transform_LRP(struct radeon_compiler* c, { int tempreg = rc_find_free_temporary(c); - emit2(c, inst->Prev, OPCODE_ADD, 0, - dstreg(PROGRAM_TEMPORARY, tempreg), + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + dstreg(RC_FILE_TEMPORARY, tempreg), inst->I.SrcReg[1], negate(inst->I.SrcReg[2])); - emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode, + emit3(c, inst->Prev, RC_OPCODE_MAD, inst->I.SaturateMode, inst->I.DstReg, - inst->I.SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[2]); + inst->I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg), inst->I.SrcReg[2]); rc_remove_instruction(inst); } @@ -337,14 +326,14 @@ static void transform_POW(struct radeon_compiler* c, struct rc_instruction* inst) { int tempreg = rc_find_free_temporary(c); - struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg); - struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg); - tempdst.WriteMask = WRITEMASK_W; - tempsrc.Swizzle = SWIZZLE_WWWW; + struct rc_dst_register tempdst = dstreg(RC_FILE_TEMPORARY, tempreg); + struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempreg); + tempdst.WriteMask = RC_MASK_W; + tempsrc.Swizzle = RC_SWIZZLE_WWWW; - emit1(c, inst->Prev, OPCODE_LG2, 0, tempdst, scalar(inst->I.SrcReg[0])); - emit2(c, inst->Prev, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->I.SrcReg[1])); - emit1(c, inst->Prev, OPCODE_EX2, inst->I.SaturateMode, inst->I.DstReg, tempsrc); + emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, scalar(inst->I.SrcReg[0])); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->I.SrcReg[1])); + emit1(c, inst->Prev, RC_OPCODE_EX2, inst->I.SaturateMode, inst->I.DstReg, tempsrc); rc_remove_instruction(inst); } @@ -360,9 +349,9 @@ static void transform_SGE(struct radeon_compiler* c, { int tempreg = rc_find_free_temporary(c); - emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1])); - emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg, - srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one); rc_remove_instruction(inst); } @@ -372,9 +361,9 @@ static void transform_SLT(struct radeon_compiler* c, { int tempreg = rc_find_free_temporary(c); - emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1])); - emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg, - srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero); rc_remove_instruction(inst); } @@ -382,14 +371,14 @@ static void transform_SLT(struct radeon_compiler* c, static void transform_SUB(struct radeon_compiler* c, struct rc_instruction* inst) { - inst->I.Opcode = OPCODE_ADD; + inst->I.Opcode = RC_OPCODE_ADD; inst->I.SrcReg[1] = negate(inst->I.SrcReg[1]); } static void transform_SWZ(struct radeon_compiler* c, struct rc_instruction* inst) { - inst->I.Opcode = OPCODE_MOV; + inst->I.Opcode = RC_OPCODE_MOV; } static void transform_XPD(struct radeon_compiler* c, @@ -397,13 +386,13 @@ static void transform_XPD(struct radeon_compiler* c, { int tempreg = rc_find_free_temporary(c); - emit2(c, inst->Prev, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg), - swizzle(inst->I.SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), - swizzle(inst->I.SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W)); - emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode, inst->I.DstReg, - swizzle(inst->I.SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W), - swizzle(inst->I.SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), - negate(srcreg(PROGRAM_TEMPORARY, tempreg))); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstreg(RC_FILE_TEMPORARY, tempreg), + swizzle(inst->I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), + swizzle(inst->I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); + emit3(c, inst->Prev, RC_OPCODE_MAD, inst->I.SaturateMode, inst->I.DstReg, + swizzle(inst->I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), + swizzle(inst->I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), + negate(srcreg(RC_FILE_TEMPORARY, tempreg))); rc_remove_instruction(inst); } @@ -423,27 +412,27 @@ static void transform_XPD(struct radeon_compiler* c, * * @note should be applicable to R300 and R500 fragment programs. */ -GLboolean radeonTransformALU( +int radeonTransformALU( struct radeon_compiler * c, struct rc_instruction* inst, void* unused) { switch(inst->I.Opcode) { - case OPCODE_ABS: transform_ABS(c, inst); return GL_TRUE; - case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE; - case OPCODE_DST: transform_DST(c, inst); return GL_TRUE; - case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE; - case OPCODE_LIT: transform_LIT(c, inst); return GL_TRUE; - case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE; - case OPCODE_POW: transform_POW(c, inst); return GL_TRUE; - case OPCODE_RSQ: transform_RSQ(c, inst); return GL_TRUE; - case OPCODE_SGE: transform_SGE(c, inst); return GL_TRUE; - case OPCODE_SLT: transform_SLT(c, inst); return GL_TRUE; - case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE; - case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE; - case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE; + case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; + case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; + case RC_OPCODE_DST: transform_DST(c, inst); return 1; + case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; + case RC_OPCODE_LIT: transform_LIT(c, inst); return 1; + case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; + case RC_OPCODE_POW: transform_POW(c, inst); return 1; + case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1; + case RC_OPCODE_SGE: transform_SGE(c, inst); return 1; + case RC_OPCODE_SLT: transform_SLT(c, inst); return 1; + case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; + case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; + case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; default: - return GL_FALSE; + return 0; } } @@ -452,37 +441,37 @@ static void transform_r300_vertex_ABS(struct radeon_compiler* c, struct rc_instruction* inst) { /* Note: r500 can take absolute values, but r300 cannot. */ - inst->I.Opcode = OPCODE_MAX; + inst->I.Opcode = RC_OPCODE_MAX; inst->I.SrcReg[1] = inst->I.SrcReg[0]; - inst->I.SrcReg[1].Negate ^= NEGATE_XYZW; + inst->I.SrcReg[1].Negate ^= RC_MASK_XYZW; } /** * For use with radeonLocalTransform, this transforms non-native ALU * instructions of the r300 up to r500 vertex engine. */ -GLboolean r300_transform_vertex_alu( +int r300_transform_vertex_alu( struct radeon_compiler * c, struct rc_instruction* inst, void* unused) { switch(inst->I.Opcode) { - case OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return GL_TRUE; - case OPCODE_DP3: transform_DP3(c, inst); return GL_TRUE; - case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE; - case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE; - case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE; - case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE; - case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE; - case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE; + case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; + case RC_OPCODE_DP3: transform_DP3(c, inst); return 1; + case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; + case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; + case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; + case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; + case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; + case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; default: - return GL_FALSE; + return 0; } } -static void sincos_constants(struct radeon_compiler* c, GLuint *constants) +static void sincos_constants(struct radeon_compiler* c, unsigned int *constants) { - static const GLfloat SinCosConsts[2][4] = { + static const float SinCosConsts[2][4] = { { 1.273239545, // 4/PI -0.405284735, // -4/(PI*PI) @@ -512,25 +501,25 @@ static void sincos_constants(struct radeon_compiler* c, GLuint *constants) */ static void sin_approx( struct radeon_compiler* c, struct rc_instruction * after, - struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants) -{ - GLuint tempreg = rc_find_free_temporary(c); - - emit2(c, after->Prev, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - srcreg(PROGRAM_CONSTANT, constants[0])); - emit3(c, after->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), - absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); - emit3(c, after->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), - negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X))); - emit3(c, after->Prev, OPCODE_MAD, 0, dst, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), - swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); + struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants) +{ + unsigned int tempreg = rc_find_free_temporary(c); + + emit2(c, after->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY), + swizzle(src, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + srcreg(RC_FILE_CONSTANT, constants[0])); + emit3(c, after->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y), + absolute(swizzle(src, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)); + emit3(c, after->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + absolute(swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)), + negate(swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X))); + emit3(c, after->Prev, RC_OPCODE_MAD, 0, dst, + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y), + swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)); } /** @@ -538,81 +527,81 @@ static void sin_approx( * using only the basic instructions * MOV, ADD, MUL, MAD, FRC */ -GLboolean radeonTransformTrigSimple(struct radeon_compiler* c, +int radeonTransformTrigSimple(struct radeon_compiler* c, struct rc_instruction* inst, void* unused) { - if (inst->I.Opcode != OPCODE_COS && - inst->I.Opcode != OPCODE_SIN && - inst->I.Opcode != OPCODE_SCS) - return GL_FALSE; + if (inst->I.Opcode != RC_OPCODE_COS && + inst->I.Opcode != RC_OPCODE_SIN && + inst->I.Opcode != RC_OPCODE_SCS) + return 0; - GLuint constants[2]; - GLuint tempreg = rc_find_free_temporary(c); + unsigned int constants[2]; + unsigned int tempreg = rc_find_free_temporary(c); sincos_constants(c, constants); - if (inst->I.Opcode == OPCODE_COS) { + if (inst->I.Opcode == RC_OPCODE_COS) { // MAD tmp.x, src, 1/(2*PI), 0.75 // FRC tmp.x, tmp.x // MAD tmp.z, tmp.x, 2*PI, -PI - emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); - emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); - emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle(inst->I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W)); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + negate(swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z))); sin_approx(c, inst->Prev, inst->I.DstReg, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), constants); - } else if (inst->I.Opcode == OPCODE_SIN) { - emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); - emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); - emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); + } else if (inst->I.Opcode == RC_OPCODE_SIN) { + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle(inst->I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W)); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + negate(swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z))); sin_approx(c, inst->Prev, inst->I.DstReg, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), constants); } else { - emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W)); - emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - srcreg(PROGRAM_TEMPORARY, tempreg)); - emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - srcreg(PROGRAM_TEMPORARY, tempreg), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - - struct prog_dst_register dst = inst->I.DstReg; - - dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_X; + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), + swizzle(inst->I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY), + srcreg(RC_FILE_TEMPORARY, tempreg)); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), + srcreg(RC_FILE_TEMPORARY, tempreg), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + negate(swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z))); + + struct rc_dst_register dst = inst->I.DstReg; + + dst.WriteMask = inst->I.DstReg.WriteMask & RC_MASK_X; sin_approx(c, inst->Prev, dst, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), constants); - dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_Y; + dst.WriteMask = inst->I.DstReg.WriteMask & RC_MASK_Y; sin_approx(c, inst->Prev, dst, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y), constants); } rc_remove_instruction(inst); - return GL_TRUE; + return 1; } @@ -624,53 +613,53 @@ GLboolean radeonTransformTrigSimple(struct radeon_compiler* c, * * @warning This transformation implicitly changes the semantics of SIN and COS! */ -GLboolean radeonTransformTrigScale(struct radeon_compiler* c, +int radeonTransformTrigScale(struct radeon_compiler* c, struct rc_instruction* inst, void* unused) { - if (inst->I.Opcode != OPCODE_COS && - inst->I.Opcode != OPCODE_SIN && - inst->I.Opcode != OPCODE_SCS) - return GL_FALSE; + if (inst->I.Opcode != RC_OPCODE_COS && + inst->I.Opcode != RC_OPCODE_SIN && + inst->I.Opcode != RC_OPCODE_SCS) + return 0; - static const GLfloat RCP_2PI = 0.15915494309189535; - GLuint temp; - GLuint constant; - GLuint constant_swizzle; + static const float RCP_2PI = 0.15915494309189535; + unsigned int temp; + unsigned int constant; + unsigned int constant_swizzle; temp = rc_find_free_temporary(c); constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle); - emit2(c, inst->Prev, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W), - swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)); - emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W), - srcreg(PROGRAM_TEMPORARY, temp)); - - if (inst->I.Opcode == OPCODE_COS) { - emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, inst->I.DstReg, - srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } else if (inst->I.Opcode == OPCODE_SIN) { - emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode, - inst->I.DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } else if (inst->I.Opcode == OPCODE_SCS) { - struct prog_dst_register moddst = inst->I.DstReg; - - if (inst->I.DstReg.WriteMask & WRITEMASK_X) { - moddst.WriteMask = WRITEMASK_X; - emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, moddst, - srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W), + swizzle(inst->I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), + srcreg(RC_FILE_TEMPORARY, temp)); + + if (inst->I.Opcode == RC_OPCODE_COS) { + emit1(c, inst->Prev, RC_OPCODE_COS, inst->I.SaturateMode, inst->I.DstReg, + srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW)); + } else if (inst->I.Opcode == RC_OPCODE_SIN) { + emit1(c, inst->Prev, RC_OPCODE_SIN, inst->I.SaturateMode, + inst->I.DstReg, srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW)); + } else if (inst->I.Opcode == RC_OPCODE_SCS) { + struct rc_dst_register moddst = inst->I.DstReg; + + if (inst->I.DstReg.WriteMask & RC_MASK_X) { + moddst.WriteMask = RC_MASK_X; + emit1(c, inst->Prev, RC_OPCODE_COS, inst->I.SaturateMode, moddst, + srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW)); } - if (inst->I.DstReg.WriteMask & WRITEMASK_Y) { - moddst.WriteMask = WRITEMASK_Y; - emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode, moddst, - srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + if (inst->I.DstReg.WriteMask & RC_MASK_Y) { + moddst.WriteMask = RC_MASK_Y; + emit1(c, inst->Prev, RC_OPCODE_SIN, inst->I.SaturateMode, moddst, + srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW)); } } rc_remove_instruction(inst); - return GL_TRUE; + return 1; } /** @@ -681,15 +670,15 @@ GLboolean radeonTransformTrigScale(struct radeon_compiler* c, * @warning This explicitly changes the form of DDX and DDY! */ -GLboolean radeonTransformDeriv(struct radeon_compiler* c, +int radeonTransformDeriv(struct radeon_compiler* c, struct rc_instruction* inst, void* unused) { - if (inst->I.Opcode != OPCODE_DDX && inst->I.Opcode != OPCODE_DDY) - return GL_FALSE; + if (inst->I.Opcode != RC_OPCODE_DDX && inst->I.Opcode != RC_OPCODE_DDY) + return 0; - inst->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE); - inst->I.SrcReg[1].Negate = NEGATE_XYZW; + inst->I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_ONE, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE); + inst->I.SrcReg[1].Negate = RC_MASK_XYZW; - return GL_TRUE; + return 1; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h index 147efec6fc..7cb5f84b7f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h @@ -30,27 +30,27 @@ #include "radeon_program.h" -GLboolean radeonTransformALU( +int radeonTransformALU( struct radeon_compiler * c, struct rc_instruction * inst, void*); -GLboolean r300_transform_vertex_alu( +int r300_transform_vertex_alu( struct radeon_compiler * c, struct rc_instruction * inst, void*); -GLboolean radeonTransformTrigSimple( +int radeonTransformTrigSimple( struct radeon_compiler * c, struct rc_instruction * inst, void*); -GLboolean radeonTransformTrigScale( +int radeonTransformTrigScale( struct radeon_compiler * c, struct rc_instruction * inst, void*); -GLboolean radeonTransformDeriv( +int radeonTransformDeriv( struct radeon_compiler * c, struct rc_instruction * inst, void*); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c index 4c26db5d24..6cda208600 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -35,9 +35,10 @@ #include "radeon_program_pair.h" +#include + #include "memory_pool.h" #include "radeon_compiler.h" -#include "shader/prog_print.h" #define error(fmt, args...) do { \ rc_error(&s->Compiler->Base, "%s::%s(): " fmt "\n", \ @@ -45,19 +46,19 @@ } while(0) struct pair_state_instruction { - struct prog_instruction Instruction; - GLuint IP; /**< Position of this instruction in original program */ + struct rc_sub_instruction Instruction; + unsigned int IP; /**< Position of this instruction in original program */ - GLuint IsTex:1; /**< Is a texture instruction */ - GLuint NeedRGB:1; /**< Needs the RGB ALU */ - GLuint NeedAlpha:1; /**< Needs the Alpha ALU */ - GLuint IsTranscendent:1; /**< Is a special transcendent instruction */ + unsigned int IsTex:1; /**< Is a texture instruction */ + unsigned int NeedRGB:1; /**< Needs the RGB ALU */ + unsigned int NeedAlpha:1; /**< Needs the Alpha ALU */ + unsigned int IsTranscendent:1; /**< Is a special transcendent instruction */ /** * Number of (read and write) dependencies that must be resolved before * this instruction can be scheduled. */ - GLuint NumDependencies:5; + unsigned int NumDependencies:5; /** * Next instruction in the linked list of ready instructions. @@ -81,11 +82,11 @@ struct reg_value_reader { /** * Used to keep track which values are stored in each component of a - * PROGRAM_TEMPORARY. + * RC_FILE_TEMPORARY. */ struct reg_value { struct pair_state_instruction *Writer; - struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */ + struct reg_value *Next; /**< Pointer to the next value to be written to the same RC_FILE_TEMPORARY component */ /** * Unordered linked list of instructions that read from this value. @@ -98,21 +99,21 @@ struct reg_value { * When this count reaches zero, the instruction that writes the @ref Next value * can be scheduled. */ - GLuint NumReaders; + unsigned int NumReaders; }; /** - * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register + * Used to translate a RC_FILE_INPUT or RC_FILE_TEMPORARY Mesa register * to the proper hardware temporary. */ struct pair_register_translation { - GLuint Allocated:1; - GLuint HwIndex:8; - GLuint RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */ + unsigned int Allocated:1; + unsigned int HwIndex:8; + unsigned int RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */ /** * Notes the value that is currently contained in each component - * (only used for PROGRAM_TEMPORARY registers). + * (only used for RC_FILE_TEMPORARY registers). */ struct reg_value *Value[4]; }; @@ -120,17 +121,17 @@ struct pair_register_translation { struct pair_state { struct r300_fragment_program_compiler * Compiler; const struct radeon_pair_handler *Handler; - GLboolean Verbose; + unsigned int Verbose; void *UserData; /** * Translate Mesa registers to hardware registers */ - struct pair_register_translation Inputs[FRAG_ATTRIB_MAX]; - struct pair_register_translation Temps[MAX_PROGRAM_TEMPS]; + struct pair_register_translation Inputs[RC_REGISTER_MAX_INDEX]; + struct pair_register_translation Temps[RC_REGISTER_MAX_INDEX]; struct { - GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */ + unsigned int RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */ } HwTemps[128]; /** @@ -144,28 +145,28 @@ struct pair_state { }; -static struct pair_register_translation *get_register(struct pair_state *s, GLuint file, GLuint index) +static struct pair_register_translation *get_register(struct pair_state *s, rc_register_file file, unsigned int index) { switch(file) { - case PROGRAM_TEMPORARY: return &s->Temps[index]; - case PROGRAM_INPUT: return &s->Inputs[index]; + case RC_FILE_TEMPORARY: return &s->Temps[index]; + case RC_FILE_INPUT: return &s->Inputs[index]; default: return 0; } } -static void alloc_hw_reg(struct pair_state *s, GLuint file, GLuint index, GLuint hwindex) +static void alloc_hw_reg(struct pair_state *s, rc_register_file file, unsigned int index, unsigned int hwindex) { struct pair_register_translation *t = get_register(s, file, index); - ASSERT(!s->HwTemps[hwindex].RefCount); - ASSERT(!t->Allocated); + assert(!s->HwTemps[hwindex].RefCount); + assert(!t->Allocated); s->HwTemps[hwindex].RefCount = t->RefCount; t->Allocated = 1; t->HwIndex = hwindex; } -static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index) +static unsigned int get_hw_reg(struct pair_state *s, rc_register_file file, unsigned int index) { - GLuint hwindex; + unsigned int hwindex; struct pair_register_translation *t = get_register(s, file, index); if (!t) { @@ -190,7 +191,7 @@ static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index) } -static void deref_hw_reg(struct pair_state *s, GLuint hwindex) +static void deref_hw_reg(struct pair_state *s, unsigned int hwindex) { if (!s->HwTemps[hwindex].RefCount) { error("Hwindex %i refcount error", hwindex); @@ -213,7 +214,7 @@ static void add_pairinst_to_list(struct pair_state_instruction **list, struct pa static void instruction_ready(struct pair_state *s, struct pair_state_instruction *pairinst) { if (s->Verbose) - _mesa_printf("instruction_ready(%i)\n", pairinst->IP); + fprintf(stderr, "instruction_ready(%i)\n", pairinst->IP); if (pairinst->IsTex) add_pairinst_to_list(&s->ReadyTEX, pairinst); @@ -230,24 +231,24 @@ static void instruction_ready(struct pair_state *s, struct pair_state_instructio * Finally rewrite ADD, MOV, MUL as the appropriate native instruction * and reverse the order of arguments for CMP. */ -static void final_rewrite(struct pair_state *s, struct prog_instruction *inst) +static void final_rewrite(struct pair_state *s, struct rc_sub_instruction *inst) { - struct prog_src_register tmp; + struct rc_src_register tmp; switch(inst->Opcode) { - case OPCODE_ADD: + case RC_OPCODE_ADD: inst->SrcReg[2] = inst->SrcReg[1]; - inst->SrcReg[1].File = PROGRAM_BUILTIN; - inst->SrcReg[1].Swizzle = SWIZZLE_1111; - inst->SrcReg[1].Negate = NEGATE_NONE; - inst->Opcode = OPCODE_MAD; + inst->SrcReg[1].File = RC_FILE_NONE; + inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->SrcReg[1].Negate = RC_MASK_NONE; + inst->Opcode = RC_OPCODE_MAD; break; - case OPCODE_CMP: + case RC_OPCODE_CMP: tmp = inst->SrcReg[2]; inst->SrcReg[2] = inst->SrcReg[0]; inst->SrcReg[0] = tmp; break; - case OPCODE_MOV: + case RC_OPCODE_MOV: /* AMD say we should use CMP. * However, when we transform * KIL -r0; @@ -258,16 +259,16 @@ static void final_rewrite(struct pair_state *s, struct prog_instruction *inst) * It appears that the R500 KIL hardware treats -0.0 as less * than zero. */ - inst->SrcReg[1].File = PROGRAM_BUILTIN; - inst->SrcReg[1].Swizzle = SWIZZLE_1111; - inst->SrcReg[2].File = PROGRAM_BUILTIN; - inst->SrcReg[2].Swizzle = SWIZZLE_0000; - inst->Opcode = OPCODE_MAD; + inst->SrcReg[1].File = RC_FILE_NONE; + inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->SrcReg[2].File = RC_FILE_NONE; + inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; + inst->Opcode = RC_OPCODE_MAD; break; - case OPCODE_MUL: - inst->SrcReg[2].File = PROGRAM_BUILTIN; - inst->SrcReg[2].Swizzle = SWIZZLE_0000; - inst->Opcode = OPCODE_MAD; + case RC_OPCODE_MUL: + inst->SrcReg[2].File = RC_FILE_NONE; + inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; + inst->Opcode = RC_OPCODE_MAD; break; default: /* nothing to do */ @@ -282,41 +283,40 @@ static void final_rewrite(struct pair_state *s, struct prog_instruction *inst) static void classify_instruction(struct pair_state *s, struct pair_state_instruction *psi) { - psi->NeedRGB = (psi->Instruction.DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0; - psi->NeedAlpha = (psi->Instruction.DstReg.WriteMask & WRITEMASK_W) ? 1 : 0; + psi->NeedRGB = (psi->Instruction.DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0; + psi->NeedAlpha = (psi->Instruction.DstReg.WriteMask & RC_MASK_W) ? 1 : 0; switch(psi->Instruction.Opcode) { - case OPCODE_ADD: - case OPCODE_CMP: - case OPCODE_DDX: - case OPCODE_DDY: - case OPCODE_FRC: - case OPCODE_MAD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MOV: - case OPCODE_MUL: + case RC_OPCODE_ADD: + case RC_OPCODE_CMP: + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + case RC_OPCODE_FRC: + case RC_OPCODE_MAD: + case RC_OPCODE_MAX: + case RC_OPCODE_MIN: + case RC_OPCODE_MOV: + case RC_OPCODE_MUL: break; - case OPCODE_COS: - case OPCODE_EX2: - case OPCODE_LG2: - case OPCODE_RCP: - case OPCODE_RSQ: - case OPCODE_SIN: + case RC_OPCODE_COS: + case RC_OPCODE_EX2: + case RC_OPCODE_LG2: + case RC_OPCODE_RCP: + case RC_OPCODE_RSQ: + case RC_OPCODE_SIN: psi->IsTranscendent = 1; psi->NeedAlpha = 1; break; - case OPCODE_DP4: + case RC_OPCODE_DP4: psi->NeedAlpha = 1; /* fall through */ - case OPCODE_DP3: + case RC_OPCODE_DP3: psi->NeedRGB = 1; break; - case OPCODE_KIL: - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXP: - case OPCODE_END: + case RC_OPCODE_KIL: + case RC_OPCODE_TEX: + case RC_OPCODE_TXB: + case RC_OPCODE_TXP: psi->IsTex = 1; break; default: @@ -333,7 +333,7 @@ static void classify_instruction(struct pair_state *s, static void scan_instructions(struct pair_state *s) { struct rc_instruction *source; - GLuint ip; + unsigned int ip; for(source = s->Compiler->Base.Program.Instructions.Next, ip = 0; source != &s->Compiler->Base.Program.Instructions; @@ -346,9 +346,9 @@ static void scan_instructions(struct pair_state *s) final_rewrite(s, &pairinst->Instruction); classify_instruction(s, pairinst); - int nsrc = _mesa_num_inst_src_regs(pairinst->Instruction.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(pairinst->Instruction.Opcode); int j; - for(j = 0; j < nsrc; j++) { + for(j = 0; j < opcode->NumSrcRegs; j++) { struct pair_register_translation *t = get_register(s, pairinst->Instruction.SrcReg[j].File, pairinst->Instruction.SrcReg[j].Index); if (!t) @@ -356,10 +356,10 @@ static void scan_instructions(struct pair_state *s) t->RefCount++; - if (pairinst->Instruction.SrcReg[j].File == PROGRAM_TEMPORARY) { + if (pairinst->Instruction.SrcReg[j].File == RC_FILE_TEMPORARY) { int i; for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(pairinst->Instruction.SrcReg[j].Swizzle, i); + unsigned int swz = GET_SWZ(pairinst->Instruction.SrcReg[j].Swizzle, i); if (swz >= 4) continue; /* constant or NIL swizzle */ if (!t->Value[swz]) @@ -369,7 +369,7 @@ static void scan_instructions(struct pair_state *s) * also rewrites the value. The code below adds * a dependency for the DstReg, which is a superset * of the SrcReg dependency. */ - if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY && + if (pairinst->Instruction.DstReg.File == RC_FILE_TEMPORARY && pairinst->Instruction.DstReg.Index == pairinst->Instruction.SrcReg[j].Index && GET_BIT(pairinst->Instruction.DstReg.WriteMask, swz)) continue; @@ -384,14 +384,13 @@ static void scan_instructions(struct pair_state *s) } } - int ndst = _mesa_num_inst_dst_regs(pairinst->Instruction.Opcode); - if (ndst) { + if (opcode->HasDstReg) { struct pair_register_translation *t = get_register(s, pairinst->Instruction.DstReg.File, pairinst->Instruction.DstReg.Index); if (t) { t->RefCount++; - if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY) { + if (pairinst->Instruction.DstReg.File == RC_FILE_TEMPORARY) { int j; for(j = 0; j < 4; ++j) { if (!GET_BIT(pairinst->Instruction.DstReg.WriteMask, j)) @@ -412,15 +411,15 @@ static void scan_instructions(struct pair_state *s) } if (s->Verbose) - _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies); + fprintf(stderr, "scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies); if (!pairinst->NumDependencies) instruction_ready(s, pairinst); } - /* Clear the PROGRAM_TEMPORARY state */ + /* Clear the RC_FILE_TEMPORARY state */ int i, j; - for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) { + for(i = 0; i < RC_REGISTER_MAX_INDEX; ++i) { for(j = 0; j < 4; ++j) s->Temps[i].Value[j] = 0; } @@ -429,7 +428,7 @@ static void scan_instructions(struct pair_state *s) static void decrement_dependencies(struct pair_state *s, struct pair_state_instruction *pairinst) { - ASSERT(pairinst->NumDependencies > 0); + assert(pairinst->NumDependencies > 0); if (!--pairinst->NumDependencies) instruction_ready(s, pairinst); } @@ -440,12 +439,12 @@ static void decrement_dependencies(struct pair_state *s, struct pair_state_instr */ static void commit_instruction(struct pair_state *s, struct pair_state_instruction *pairinst) { - struct prog_instruction *inst = &pairinst->Instruction; + struct rc_sub_instruction *inst = &pairinst->Instruction; if (s->Verbose) - _mesa_printf("commit_instruction(%i)\n", pairinst->IP); + fprintf(stderr, "commit_instruction(%i)\n", pairinst->IP); - if (inst->DstReg.File == PROGRAM_TEMPORARY) { + if (inst->DstReg.File == RC_FILE_TEMPORARY) { struct pair_register_translation *t = &s->Temps[inst->DstReg.Index]; deref_hw_reg(s, t->HwIndex); @@ -467,21 +466,21 @@ static void commit_instruction(struct pair_state *s, struct pair_state_instructi } } - int nsrc = _mesa_num_inst_src_regs(inst->Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); int i; - for(i = 0; i < nsrc; i++) { + for(i = 0; i < opcode->NumSrcRegs; i++) { struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index); if (!t) continue; deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index)); - if (inst->SrcReg[i].File != PROGRAM_TEMPORARY) + if (inst->SrcReg[i].File != RC_FILE_TEMPORARY) continue; int j; for(j = 0; j < 4; ++j) { - GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); + unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); if (swz >= 4) continue; if (!t->Value[swz]) @@ -489,7 +488,7 @@ static void commit_instruction(struct pair_state *s, struct pair_state_instructi /* Do not free a dependency if this instruction * also rewrites the value. See scan_instructions. */ - if (inst->DstReg.File == PROGRAM_TEMPORARY && + if (inst->DstReg.File == RC_FILE_TEMPORARY && inst->DstReg.Index == inst->SrcReg[i].Index && GET_BIT(inst->DstReg.WriteMask, swz)) continue; @@ -519,7 +518,7 @@ static void emit_all_tex(struct pair_state *s) struct pair_state_instruction *readytex; struct pair_state_instruction *pairinst; - ASSERT(s->ReadyTEX); + assert(s->ReadyTEX); // Don't let the ready list change under us! readytex = s->ReadyTEX; @@ -527,39 +526,37 @@ static void emit_all_tex(struct pair_state *s) // Allocate destination hardware registers in one block to avoid conflicts. for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { - struct prog_instruction *inst = &pairinst->Instruction; - if (inst->Opcode != OPCODE_KIL) + struct rc_sub_instruction *inst = &pairinst->Instruction; + if (inst->Opcode != RC_OPCODE_KIL) get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); } if (s->Compiler->Base.Debug) - _mesa_printf(" BEGIN_TEX\n"); + fprintf(stderr, " BEGIN_TEX\n"); if (s->Handler->BeginTexBlock) s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->BeginTexBlock(s->UserData); for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { - struct prog_instruction *inst = &pairinst->Instruction; + struct rc_sub_instruction *inst = &pairinst->Instruction; commit_instruction(s, pairinst); - if (inst->Opcode != OPCODE_KIL) + if (inst->Opcode != RC_OPCODE_KIL) inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index); if (s->Compiler->Base.Debug) { - _mesa_printf(" "); - _mesa_print_instruction(inst); - fflush(stderr); + /* Should print the TEX instruction here */ } struct radeon_pair_texture_instruction rpti; switch(inst->Opcode) { - case OPCODE_TEX: rpti.Opcode = RADEON_OPCODE_TEX; break; - case OPCODE_TXB: rpti.Opcode = RADEON_OPCODE_TXB; break; - case OPCODE_TXP: rpti.Opcode = RADEON_OPCODE_TXP; break; + case RC_OPCODE_TEX: rpti.Opcode = RADEON_OPCODE_TEX; break; + case RC_OPCODE_TXB: rpti.Opcode = RADEON_OPCODE_TXB; break; + case RC_OPCODE_TXP: rpti.Opcode = RADEON_OPCODE_TXP; break; default: - case OPCODE_KIL: rpti.Opcode = RADEON_OPCODE_KIL; break; + case RC_OPCODE_KIL: rpti.Opcode = RADEON_OPCODE_KIL; break; } rpti.DestIndex = inst->DstReg.Index; @@ -573,12 +570,12 @@ static void emit_all_tex(struct pair_state *s) } if (s->Compiler->Base.Debug) - _mesa_printf(" END_TEX\n"); + fprintf(stderr, " END_TEX\n"); } static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instruction *pair, - struct prog_src_register src, GLboolean rgb, GLboolean alpha) + struct rc_src_register src, unsigned int rgb, unsigned int alpha) { int candidate = -1; int candidate_quality = -1; @@ -587,10 +584,10 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio if (!rgb && !alpha) return 0; - GLuint constant; - GLuint index; + unsigned int constant; + unsigned int index; - if (src.File == PROGRAM_TEMPORARY || src.File == PROGRAM_INPUT) { + if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { constant = 0; index = get_hw_reg(s, src.File, src.Index); } else { @@ -642,37 +639,38 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio * Fill the given ALU instruction's opcodes and source operands into the given pair, * if possible. */ -static GLboolean fill_instruction_into_pair( +static int fill_instruction_into_pair( struct pair_state *s, struct radeon_pair_instruction *pair, struct pair_state_instruction *pairinst) { - struct prog_instruction *inst = &pairinst->Instruction; + struct rc_sub_instruction *inst = &pairinst->Instruction; - ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP); - ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP); + assert(!pairinst->NeedRGB || pair->RGB.Opcode == RC_OPCODE_NOP); + assert(!pairinst->NeedAlpha || pair->Alpha.Opcode == RC_OPCODE_NOP); if (pairinst->NeedRGB) { if (pairinst->IsTranscendent) - pair->RGB.Opcode = OPCODE_REPL_ALPHA; + pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA; else pair->RGB.Opcode = inst->Opcode; - if (inst->SaturateMode == SATURATE_ZERO_ONE) + if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) pair->RGB.Saturate = 1; } if (pairinst->NeedAlpha) { pair->Alpha.Opcode = inst->Opcode; - if (inst->SaturateMode == SATURATE_ZERO_ONE) + if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) pair->Alpha.Saturate = 1; } - int nargs = _mesa_num_inst_src_regs(inst->Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + int nargs = opcode->NumSrcRegs; int i; /* Special case for DDX/DDY (MDH/MDV). */ - if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) { + if (inst->Opcode == RC_OPCODE_DDX || inst->Opcode == RC_OPCODE_DDY) { if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used) - return GL_FALSE; + return 0; else nargs++; } @@ -680,43 +678,43 @@ static GLboolean fill_instruction_into_pair( for(i = 0; i < nargs; ++i) { int source; if (pairinst->NeedRGB && !pairinst->IsTranscendent) { - GLboolean srcrgb = GL_FALSE; - GLboolean srcalpha = GL_FALSE; + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; int j; for(j = 0; j < 3; ++j) { - GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); + unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); if (swz < 3) - srcrgb = GL_TRUE; + srcrgb = 1; else if (swz < 4) - srcalpha = GL_TRUE; + srcalpha = 1; } source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); if (source < 0) - return GL_FALSE; + return 0; pair->RGB.Arg[i].Source = source; pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (NEGATE_X | NEGATE_Y | NEGATE_Z)); + pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)); } if (pairinst->NeedAlpha) { - GLboolean srcrgb = GL_FALSE; - GLboolean srcalpha = GL_FALSE; - GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3); + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3); if (swz < 3) - srcrgb = GL_TRUE; + srcrgb = 1; else if (swz < 4) - srcalpha = GL_TRUE; + srcalpha = 1; source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); if (source < 0) - return GL_FALSE; + return 0; pair->Alpha.Arg[i].Source = source; pair->Alpha.Arg[i].Swizzle = swz; pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & NEGATE_W); + pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W); } } - return GL_TRUE; + return 1; } @@ -733,20 +731,20 @@ static void fill_dest_into_pair( struct radeon_pair_instruction *pair, struct pair_state_instruction *pairinst) { - struct prog_instruction *inst = &pairinst->Instruction; + struct rc_sub_instruction *inst = &pairinst->Instruction; - if (inst->DstReg.File == PROGRAM_OUTPUT) { + if (inst->DstReg.File == RC_FILE_OUTPUT) { if (inst->DstReg.Index == s->Compiler->OutputColor) { - pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; + pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); } else if (inst->DstReg.Index == s->Compiler->OutputDepth) { pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); } } else { - GLuint hwindex = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); + unsigned int hwindex = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); if (pairinst->NeedRGB) { pair->RGB.DestIndex = hwindex; - pair->RGB.WriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; + pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; } if (pairinst->NeedAlpha) { pair->Alpha.DestIndex = hwindex; @@ -780,7 +778,7 @@ static void emit_alu(struct pair_state *s) s->ReadyAlpha = s->ReadyAlpha->NextReady; } - _mesa_bzero(&pair, sizeof(pair)); + memset(&pair, 0, sizeof(pair)); fill_instruction_into_pair(s, &pair, psi); fill_dest_into_pair(s, &pair, psi); commit_instruction(s, psi); @@ -794,7 +792,7 @@ static void emit_alu(struct pair_state *s) for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { struct pair_state_instruction * psirgb = *prgb; struct pair_state_instruction * psialpha = *palpha; - _mesa_bzero(&pair, sizeof(pair)); + memset(&pair, 0, sizeof(pair)); fill_instruction_into_pair(s, &pair, psirgb); if (!fill_instruction_into_pair(s, &pair, psialpha)) continue; @@ -812,7 +810,7 @@ static void emit_alu(struct pair_state *s) psi = s->ReadyRGB; s->ReadyRGB = s->ReadyRGB->NextReady; - _mesa_bzero(&pair, sizeof(pair)); + memset(&pair, 0, sizeof(pair)); fill_instruction_into_pair(s, &pair, psi); fill_dest_into_pair(s, &pair, psi); commit_instruction(s, psi); @@ -829,7 +827,7 @@ static void emit_alu(struct pair_state *s) static void alloc_helper(void * data, unsigned input, unsigned hwreg) { struct pair_state * s = data; - alloc_hw_reg(s, PROGRAM_INPUT, input, hwreg); + alloc_hw_reg(s, RC_FILE_INPUT, input, hwreg); } void radeonPairProgram( @@ -838,14 +836,14 @@ void radeonPairProgram( { struct pair_state s; - _mesa_bzero(&s, sizeof(s)); + memset(&s, 0, sizeof(s)); s.Compiler = compiler; s.Handler = handler; s.UserData = userdata; - s.Verbose = GL_FALSE && s.Compiler->Base.Debug; + s.Verbose = 0 && s.Compiler->Base.Debug; if (s.Compiler->Base.Debug) - _mesa_printf("Emit paired program\n"); + fprintf(stderr, "Emit paired program\n"); scan_instructions(&s); s.Compiler->AllocateHwInputs(s.Compiler, &alloc_helper, &s); @@ -860,41 +858,36 @@ void radeonPairProgram( } if (s.Compiler->Base.Debug) - _mesa_printf(" END\n"); + fprintf(stderr, " END\n"); } static void print_pair_src(int i, struct radeon_pair_instruction_source* src) { - _mesa_printf(" Src%i = %s[%i]", i, src->Constant ? "CNST" : "TEMP", src->Index); + fprintf(stderr, " Src%i = %s[%i]", i, src->Constant ? "CNST" : "TEMP", src->Index); } -static const char* opcode_string(GLuint opcode) +static const char* opcode_string(rc_opcode opcode) { - if (opcode == OPCODE_REPL_ALPHA) - return "SOP"; - else - return _mesa_opcode_string(opcode); + return rc_get_opcode_info(opcode)->Name; } -static int num_pairinst_args(GLuint opcode) +static int num_pairinst_args(rc_opcode opcode) { - if (opcode == OPCODE_REPL_ALPHA) - return 0; - else - return _mesa_num_inst_src_regs(opcode); + return rc_get_opcode_info(opcode)->NumSrcRegs; } -static char swizzle_char(GLuint swz) +static char swizzle_char(rc_swizzle swz) { switch(swz) { - case SWIZZLE_X: return 'x'; - case SWIZZLE_Y: return 'y'; - case SWIZZLE_Z: return 'z'; - case SWIZZLE_W: return 'w'; - case SWIZZLE_ZERO: return '0'; - case SWIZZLE_ONE: return '1'; - case SWIZZLE_NIL: return '_'; + case RC_SWIZZLE_X: return 'x'; + case RC_SWIZZLE_Y: return 'y'; + case RC_SWIZZLE_Z: return 'z'; + case RC_SWIZZLE_W: return 'w'; + case RC_SWIZZLE_ZERO: return '0'; + case RC_SWIZZLE_ONE: return '1'; + case RC_SWIZZLE_HALF: return 'H'; + case RC_SWIZZLE_UNUSED: return '_'; default: return '?'; } } @@ -904,27 +897,27 @@ void radeonPrintPairInstruction(struct radeon_pair_instruction *inst) int nargs; int i; - _mesa_printf(" RGB: "); + fprintf(stderr, " RGB: "); for(i = 0; i < 3; ++i) { if (inst->RGB.Src[i].Used) print_pair_src(i, inst->RGB.Src + i); } - _mesa_printf("\n"); - _mesa_printf(" Alpha:"); + fprintf(stderr, "\n"); + fprintf(stderr, " Alpha:"); for(i = 0; i < 3; ++i) { if (inst->Alpha.Src[i].Used) print_pair_src(i, inst->Alpha.Src + i); } - _mesa_printf("\n"); + fprintf(stderr, "\n"); - _mesa_printf(" %s%s", opcode_string(inst->RGB.Opcode), inst->RGB.Saturate ? "_SAT" : ""); + fprintf(stderr, " %s%s", opcode_string(inst->RGB.Opcode), inst->RGB.Saturate ? "_SAT" : ""); if (inst->RGB.WriteMask) - _mesa_printf(" TEMP[%i].%s%s%s", inst->RGB.DestIndex, + fprintf(stderr, " TEMP[%i].%s%s%s", inst->RGB.DestIndex, (inst->RGB.WriteMask & 1) ? "x" : "", (inst->RGB.WriteMask & 2) ? "y" : "", (inst->RGB.WriteMask & 4) ? "z" : ""); if (inst->RGB.OutputWriteMask) - _mesa_printf(" COLOR.%s%s%s", + fprintf(stderr, " COLOR.%s%s%s", (inst->RGB.OutputWriteMask & 1) ? "x" : "", (inst->RGB.OutputWriteMask & 2) ? "y" : "", (inst->RGB.OutputWriteMask & 4) ? "z" : ""); @@ -932,27 +925,27 @@ void radeonPrintPairInstruction(struct radeon_pair_instruction *inst) for(i = 0; i < nargs; ++i) { const char* abs = inst->RGB.Arg[i].Abs ? "|" : ""; const char* neg = inst->RGB.Arg[i].Negate ? "-" : ""; - _mesa_printf(", %s%sSrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[i].Source, + fprintf(stderr, ", %s%sSrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[i].Source, swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 0)), swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 1)), swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 2)), abs); } - _mesa_printf("\n"); + fprintf(stderr, "\n"); - _mesa_printf(" %s%s", opcode_string(inst->Alpha.Opcode), inst->Alpha.Saturate ? "_SAT" : ""); + fprintf(stderr, " %s%s", opcode_string(inst->Alpha.Opcode), inst->Alpha.Saturate ? "_SAT" : ""); if (inst->Alpha.WriteMask) - _mesa_printf(" TEMP[%i].w", inst->Alpha.DestIndex); + fprintf(stderr, " TEMP[%i].w", inst->Alpha.DestIndex); if (inst->Alpha.OutputWriteMask) - _mesa_printf(" COLOR.w"); + fprintf(stderr, " COLOR.w"); if (inst->Alpha.DepthWriteMask) - _mesa_printf(" DEPTH.w"); + fprintf(stderr, " DEPTH.w"); nargs = num_pairinst_args(inst->Alpha.Opcode); for(i = 0; i < nargs; ++i) { const char* abs = inst->Alpha.Arg[i].Abs ? "|" : ""; const char* neg = inst->Alpha.Arg[i].Negate ? "-" : ""; - _mesa_printf(", %s%sSrc%i.%c%s", neg, abs, inst->Alpha.Arg[i].Source, + fprintf(stderr, ", %s%sSrc%i.%c%s", neg, abs, inst->Alpha.Arg[i].Source, swizzle_char(inst->Alpha.Arg[i].Swizzle), abs); } - _mesa_printf("\n"); + fprintf(stderr, "\n"); } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index ff76178551..da2bcc5d89 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -38,43 +38,43 @@ struct r300_fragment_program_compiler; * fragment programs. */ struct radeon_pair_instruction_source { - GLuint Index:8; - GLuint Constant:1; - GLuint Used:1; + unsigned int Index:8; + unsigned int Constant:1; + unsigned int Used:1; }; struct radeon_pair_instruction_rgb { - GLuint Opcode:8; - GLuint DestIndex:8; - GLuint WriteMask:3; - GLuint OutputWriteMask:3; - GLuint Saturate:1; + unsigned int Opcode:8; + unsigned int DestIndex:8; + unsigned int WriteMask:3; + unsigned int OutputWriteMask:3; + unsigned int Saturate:1; struct radeon_pair_instruction_source Src[3]; struct { - GLuint Source:2; - GLuint Swizzle:9; - GLuint Abs:1; - GLuint Negate:1; + unsigned int Source:2; + unsigned int Swizzle:9; + unsigned int Abs:1; + unsigned int Negate:1; } Arg[3]; }; struct radeon_pair_instruction_alpha { - GLuint Opcode:8; - GLuint DestIndex:8; - GLuint WriteMask:1; - GLuint OutputWriteMask:1; - GLuint DepthWriteMask:1; - GLuint Saturate:1; + unsigned int Opcode:8; + unsigned int DestIndex:8; + unsigned int WriteMask:1; + unsigned int OutputWriteMask:1; + unsigned int DepthWriteMask:1; + unsigned int Saturate:1; struct radeon_pair_instruction_source Src[3]; struct { - GLuint Source:2; - GLuint Swizzle:3; - GLuint Abs:1; - GLuint Negate:1; + unsigned int Source:2; + unsigned int Swizzle:3; + unsigned int Abs:1; + unsigned int Negate:1; } Arg[3]; }; @@ -92,16 +92,16 @@ enum { }; struct radeon_pair_texture_instruction { - GLuint Opcode:2; /**< one of RADEON_OPCODE_xxx */ + unsigned int Opcode:2; /**< one of RADEON_OPCODE_xxx */ - GLuint DestIndex:8; - GLuint WriteMask:4; + unsigned int DestIndex:8; + unsigned int WriteMask:4; - GLuint TexSrcUnit:5; - GLuint TexSrcTarget:3; + unsigned int TexSrcUnit:5; + unsigned int TexSrcTarget:3; - GLuint SrcIndex:8; - GLuint SrcSwizzle:12; + unsigned int SrcIndex:8; + unsigned int SrcSwizzle:12; }; @@ -112,24 +112,24 @@ struct radeon_pair_handler { /** * Write a paired instruction to the hardware. * - * @return GL_FALSE on error. + * @return 0 on error. */ - GLboolean (*EmitPaired)(void*, struct radeon_pair_instruction*); + int (*EmitPaired)(void*, struct radeon_pair_instruction*); /** * Write a texture instruction to the hardware. * Register indices have already been rewritten to the allocated * hardware register numbers. * - * @return GL_FALSE on error. + * @return 0 on error. */ - GLboolean (*EmitTex)(void*, struct radeon_pair_texture_instruction*); + int (*EmitTex)(void*, struct radeon_pair_texture_instruction*); /** * Called before a block of contiguous, independent texture * instructions is emitted. */ - GLboolean (*BeginTexBlock)(void*); + int (*BeginTexBlock)(void*); unsigned MaxHwTemps; }; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index ea6ff03e61..588b9c0825 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -125,7 +125,7 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler, */ static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp) { - struct prog_src_register src; + struct rc_src_register src; int i; if (!(compiler->Base.Program.InputsRead & FRAG_BIT_FOGC)) { @@ -142,7 +142,7 @@ static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r } memset(&src, 0, sizeof(src)); - src.File = PROGRAM_INPUT; + src.File = RC_FILE_INPUT; src.Index = fp->fog_attr; src.Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); rc_move_input(&compiler->Base, FRAG_ATTRIB_FOGC, src); diff --git a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c index 13b9d3541f..1e8fc3de4e 100644 --- a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c +++ b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c @@ -28,12 +28,153 @@ #include "radeon_mesa_to_rc.h" #include "main/mtypes.h" +#include "shader/prog_instruction.h" #include "shader/prog_parameter.h" #include "compiler/radeon_compiler.h" #include "compiler/radeon_program.h" +static rc_opcode translate_opcode(gl_inst_opcode opcode) +{ + switch(opcode) { + case OPCODE_NOP: return RC_OPCODE_NOP; + case OPCODE_ABS: return RC_OPCODE_ABS; + case OPCODE_ADD: return RC_OPCODE_ADD; + case OPCODE_ARL: return RC_OPCODE_ARL; + case OPCODE_CMP: return RC_OPCODE_CMP; + case OPCODE_COS: return RC_OPCODE_COS; + case OPCODE_DDX: return RC_OPCODE_DDX; + case OPCODE_DDY: return RC_OPCODE_DDY; + case OPCODE_DP3: return RC_OPCODE_DP3; + case OPCODE_DP4: return RC_OPCODE_DP4; + case OPCODE_DPH: return RC_OPCODE_DPH; + case OPCODE_DST: return RC_OPCODE_DST; + case OPCODE_EX2: return RC_OPCODE_EX2; + case OPCODE_EXP: return RC_OPCODE_EXP; + case OPCODE_FLR: return RC_OPCODE_FLR; + case OPCODE_FRC: return RC_OPCODE_FRC; + case OPCODE_KIL: return RC_OPCODE_KIL; + case OPCODE_LG2: return RC_OPCODE_LG2; + case OPCODE_LIT: return RC_OPCODE_LIT; + case OPCODE_LOG: return RC_OPCODE_LOG; + case OPCODE_LRP: return RC_OPCODE_LRP; + case OPCODE_MAD: return RC_OPCODE_MAD; + case OPCODE_MAX: return RC_OPCODE_MAX; + case OPCODE_MIN: return RC_OPCODE_MIN; + case OPCODE_MOV: return RC_OPCODE_MOV; + case OPCODE_MUL: return RC_OPCODE_MUL; + case OPCODE_POW: return RC_OPCODE_POW; + case OPCODE_RCP: return RC_OPCODE_RCP; + case OPCODE_RSQ: return RC_OPCODE_RSQ; + case OPCODE_SCS: return RC_OPCODE_SCS; + case OPCODE_SEQ: return RC_OPCODE_SEQ; + case OPCODE_SFL: return RC_OPCODE_SFL; + case OPCODE_SGE: return RC_OPCODE_SGE; + case OPCODE_SGT: return RC_OPCODE_SGT; + case OPCODE_SIN: return RC_OPCODE_SIN; + case OPCODE_SLE: return RC_OPCODE_SLE; + case OPCODE_SLT: return RC_OPCODE_SLT; + case OPCODE_SNE: return RC_OPCODE_SNE; + case OPCODE_SUB: return RC_OPCODE_SUB; + case OPCODE_SWZ: return RC_OPCODE_SWZ; + case OPCODE_TEX: return RC_OPCODE_TEX; + case OPCODE_TXB: return RC_OPCODE_TXB; + case OPCODE_TXD: return RC_OPCODE_TXD; + case OPCODE_TXL: return RC_OPCODE_TXL; + case OPCODE_TXP: return RC_OPCODE_TXP; + case OPCODE_XPD: return RC_OPCODE_XPD; + default: return RC_OPCODE_ILLEGAL_OPCODE; + } +} + +static rc_saturate_mode translate_saturate(unsigned int saturate) +{ + switch(saturate) { + default: + case SATURATE_OFF: return RC_SATURATE_NONE; + case SATURATE_ZERO_ONE: return RC_SATURATE_ZERO_ONE; + case SATURATE_PLUS_MINUS_ONE: return RC_SATURATE_MINUS_PLUS_ONE; + } +} + +static rc_register_file translate_register_file(unsigned int file) +{ + switch(file) { + case PROGRAM_TEMPORARY: return RC_FILE_TEMPORARY; + case PROGRAM_INPUT: return RC_FILE_INPUT; + case PROGRAM_OUTPUT: return RC_FILE_OUTPUT; + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: return RC_FILE_CONSTANT; + case PROGRAM_ADDRESS: return RC_FILE_ADDRESS; + default: return RC_FILE_NONE; + } +} + +static void translate_srcreg(struct rc_src_register * dest, struct prog_src_register * src) +{ + dest->File = translate_register_file(src->File); + dest->Index = src->Index; + dest->RelAddr = src->RelAddr; + dest->Swizzle = src->Swizzle; + dest->Abs = src->Abs; + dest->Negate = src->Negate; +} + +static void translate_dstreg(struct rc_dst_register * dest, struct prog_dst_register * src) +{ + dest->File = translate_register_file(src->File); + dest->Index = src->Index; + dest->RelAddr = src->RelAddr; + dest->WriteMask = src->WriteMask; +} + +static rc_texture_target translate_tex_target(gl_texture_index target) +{ + switch(target) { + case TEXTURE_2D_ARRAY_INDEX: return RC_TEXTURE_2D_ARRAY; + case TEXTURE_1D_ARRAY_INDEX: return RC_TEXTURE_1D_ARRAY; + case TEXTURE_CUBE_INDEX: return RC_TEXTURE_CUBE; + case TEXTURE_3D_INDEX: return RC_TEXTURE_3D; + case TEXTURE_RECT_INDEX: return RC_TEXTURE_RECT; + default: + case TEXTURE_2D_INDEX: return RC_TEXTURE_2D; + case TEXTURE_1D_INDEX: return RC_TEXTURE_1D; + } +} + +static void translate_instruction(struct radeon_compiler * c, + struct rc_instruction * dest, struct prog_instruction * src) +{ + const struct rc_opcode_info * opcode; + unsigned int i; + + dest->I.Opcode = translate_opcode(src->Opcode); + if (dest->I.Opcode == RC_OPCODE_ILLEGAL_OPCODE) { + rc_error(c, "Unsupported opcode %i\n", src->Opcode); + return; + } + dest->I.SaturateMode = translate_saturate(src->SaturateMode); + + opcode = rc_get_opcode_info(dest->I.Opcode); + + for(i = 0; i < opcode->NumSrcRegs; ++i) + translate_srcreg(&dest->I.SrcReg[i], &src->SrcReg[i]); + + if (opcode->HasDstReg) + translate_dstreg(&dest->I.DstReg, &src->DstReg); + + if (opcode->HasTexture) { + dest->I.TexSrcUnit = src->TexSrcUnit; + dest->I.TexSrcTarget = translate_tex_target(src->TexSrcTarget); + dest->I.TexShadow = src->TexShadow; + } +} + void radeon_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program) { struct prog_instruction *source; @@ -41,7 +182,7 @@ void radeon_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * p for(source = program->Instructions; source->Opcode != OPCODE_END; ++source) { struct rc_instruction * dest = rc_insert_new_instruction(c, c->Program.Instructions.Prev); - dest->I = *source; + translate_instruction(c, dest, source); } c->Program.ShadowSamplers = program->ShadowSamplers; -- cgit v1.2.3 From fb2c7b6743ba6e89f24843890fb7fcd6a09c3dbb Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Thu, 17 Sep 2009 16:05:08 +0100 Subject: softpipe: Respect input interpolators for the shader. --- src/gallium/drivers/softpipe/sp_state_derived.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 75551000c9..9258f64109 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -91,6 +91,23 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) vinfo->num_attribs = 0; for (i = 0; i < spfs->info.num_inputs; i++) { int src; + enum interp_mode interp; + + switch (spfs->info.input_interpolate[i]) { + case TGSI_INTERPOLATE_CONSTANT: + interp = INTERP_CONSTANT; + break; + case TGSI_INTERPOLATE_LINEAR: + interp = INTERP_LINEAR; + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + interp = INTERP_PERSPECTIVE; + break; + default: + assert(0); + interp = INTERP_LINEAR; + } + switch (spfs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: src = draw_find_vs_output(softpipe->draw, @@ -106,7 +123,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) case TGSI_SEMANTIC_FOG: src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_FOG, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); break; case TGSI_SEMANTIC_GENERIC: @@ -114,7 +131,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) /* this includes texcoords and varying vars */ src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_GENERIC, spfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); break; default: -- cgit v1.2.3 From de685b37a91bc95dd4093a44a49b7b47385b1f7c Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 18 Sep 2009 14:36:59 +0100 Subject: softpipe: Fix cube face selection. If arx and ary are equal, we still want to choose from one of them, and not arz. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index a1d3bade27..f99a30277d 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -464,7 +464,7 @@ choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) unsigned face; float sc, tc, ma; - if (arx > ary && arx > arz) { + if (arx >= ary && arx >= arz) { if (rx >= 0.0F) { face = PIPE_TEX_FACE_POS_X; sc = -rz; @@ -478,7 +478,7 @@ choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) ma = arx; } } - else if (ary > arx && ary > arz) { + else if (ary >= arx && ary >= arz) { if (ry >= 0.0F) { face = PIPE_TEX_FACE_POS_Y; sc = rx; -- cgit v1.2.3 From 18d0f9a7a38674367eca25e87f67ddf423d8c4f7 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Thu, 17 Sep 2009 16:05:08 +0100 Subject: llvmpipe: Respect input interpolators for the shader. Cherry-picked from fb2c7b6743ba6e89f24843890fb7fcd6a09c3dbb --- src/gallium/drivers/llvmpipe/lp_state_derived.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index e87976b9f3..30fb41ea65 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -93,6 +93,23 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) vinfo->num_attribs = 0; for (i = 0; i < lpfs->info.num_inputs; i++) { int src; + enum interp_mode interp; + + switch (lpfs->info.input_interpolate[i]) { + case TGSI_INTERPOLATE_CONSTANT: + interp = INTERP_CONSTANT; + break; + case TGSI_INTERPOLATE_LINEAR: + interp = INTERP_LINEAR; + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + interp = INTERP_PERSPECTIVE; + break; + default: + assert(0); + interp = INTERP_LINEAR; + } + switch (lpfs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: src = draw_find_vs_output(llvmpipe->draw, @@ -108,7 +125,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) case TGSI_SEMANTIC_FOG: src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_FOG, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); break; case TGSI_SEMANTIC_GENERIC: @@ -116,7 +133,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) /* this includes texcoords and varying vars */ src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_GENERIC, lpfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); break; default: -- cgit v1.2.3 From 76c2e34b22836c3a71a096be5620ded97a2ae636 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 20 Sep 2009 12:28:07 +0100 Subject: llvmpipe: Update tile status on flush. --- src/gallium/drivers/llvmpipe/lp_tile_cache.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c index 143afec3d3..ddda5650a9 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c @@ -225,11 +225,14 @@ lp_flush_tile_cache(struct llvmpipe_tile_cache *tc) tc->clear_val); screen->transfer_unmap(screen, pt); + + tile->status = LP_TILE_STATUS_UNDEFINED; break; } case LP_TILE_STATUS_DEFINED: lp_put_tile_rgba_soa(pt, x, y, tile->color); + tile->status = LP_TILE_STATUS_UNDEFINED; break; } } -- cgit v1.2.3 From 911a7a82cd44e89dd7c24a256a0a172f01eadde3 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 20 Sep 2009 18:04:00 +0100 Subject: llvmpipe: Fix lp_get_cached_tile. Align coordinates to tile boundaries. --- src/gallium/drivers/llvmpipe/lp_tile_cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c index ddda5650a9..2e576e6039 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c @@ -260,7 +260,7 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc, case LP_TILE_STATUS_UNDEFINED: /* get new tile data from transfer */ - lp_get_tile_rgba_soa(pt, x, y, tile->color); + lp_get_tile_rgba_soa(pt, x & ~(TILE_SIZE - 1), y & ~(TILE_SIZE - 1), tile->color); tile->status = LP_TILE_STATUS_DEFINED; break; -- cgit v1.2.3 From 5a0b29050f22b4475426a6f05a0338a7cdf546a0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 21 Sep 2009 08:34:00 -0600 Subject: softpipe: Fix cube face selection. If arx and ary are equal, we still want to choose from one of them, and not arz. (cherry picked from commit de685b37a91bc95dd4093a44a49b7b47385b1f7c) --- src/gallium/drivers/softpipe/sp_tex_sample.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 5de358dae9..81793ef736 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -464,7 +464,7 @@ choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) unsigned face; float sc, tc, ma; - if (arx > ary && arx > arz) { + if (arx >= ary && arx >= arz) { if (rx >= 0.0F) { face = PIPE_TEX_FACE_POS_X; sc = -rz; @@ -478,7 +478,7 @@ choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) ma = arx; } } - else if (ary > arx && ary > arz) { + else if (ary >= arx && ary >= arz) { if (ry >= 0.0F) { face = PIPE_TEX_FACE_POS_Y; sc = rx; -- cgit v1.2.3 From 9ca94f91a3b48350b02a8fec5ecf60a819a24de5 Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Mon, 21 Sep 2009 17:35:10 +0200 Subject: r300g: Fix bad formatting parameters in calls to debug_printf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolai Hähnle --- src/gallium/drivers/r300/r300_texture.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 6e8c368320..7c041d17f7 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -48,7 +48,7 @@ static void r300_setup_texture_state(struct r300_texture* tex, state->format2 |= R500_TXHEIGHT_BIT11; } - debug_printf("r300: Set texture state (%dx%d, pitch %d, %d levels)\n", + debug_printf("r300: Set texture state (%dx%d, %d levels)\n", width, height, levels); } @@ -62,7 +62,7 @@ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) return tex->stride_override; if (level > tex->tex.last_level) { - debug_printf("%s: level (%u) > last_level (%u)\n", level, tex->tex.last_level); + debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__, level, tex->tex.last_level); return 0; } -- cgit v1.2.3 From e369294f760efd89754f4f66a1080bcf384ba4c6 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Tue, 22 Sep 2009 10:55:41 -0700 Subject: i915g: Do propper references of surfaces in context --- src/gallium/drivers/i915simple/i915_context.c | 7 +++++++ src/gallium/drivers/i915simple/i915_state.c | 10 +++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index b43f735245..e745f3342d 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -175,12 +175,19 @@ i915_is_buffer_referenced(struct pipe_context *pipe, static void i915_destroy(struct pipe_context *pipe) { struct i915_context *i915 = i915_context(pipe); + int i; draw_destroy(i915->draw); if(i915->batch) i915->iws->batchbuffer_destroy(i915->batch); + /* unbind framebuffer */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + pipe_surface_reference(&i915->framebuffer.cbufs[i], NULL); + } + pipe_surface_reference(&i915->framebuffer.zsbuf, NULL); + FREE(i915); } diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index 0087dfa410..7d48e6e84d 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -588,9 +588,17 @@ static void i915_set_framebuffer_state(struct pipe_context *pipe, const struct pipe_framebuffer_state *fb) { struct i915_context *i915 = i915_context(pipe); + int i; + draw_flush(i915->draw); - i915->framebuffer = *fb; /* struct copy */ + i915->framebuffer.width = fb->width; + i915->framebuffer.height = fb->height; + i915->framebuffer.nr_cbufs = fb->nr_cbufs; + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + pipe_surface_reference(&i915->framebuffer.cbufs[i], fb->cbufs[i]); + } + pipe_surface_reference(&i915->framebuffer.zsbuf, fb->zsbuf); i915->dirty |= I915_NEW_FRAMEBUFFER; } -- cgit v1.2.3 From b626176f0613852df908b4b0552b9b67d5830b4e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 22 Sep 2009 19:26:08 +0100 Subject: softpipe: fix occlusion counting --- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 9cffea2c9e..ce1bab9341 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -631,7 +631,7 @@ alpha_test_quads(struct quad_stage *qs, } } -static unsigned mask_count[0x8] = +static unsigned mask_count[16] = { 0, /* 0x0 */ 1, /* 0x1 */ @@ -641,6 +641,14 @@ static unsigned mask_count[0x8] = 2, /* 0x5 */ 2, /* 0x6 */ 3, /* 0x7 */ + 1, /* 0x8 */ + 2, /* 0x9 */ + 2, /* 0xa */ + 3, /* 0xb */ + 2, /* 0xc */ + 3, /* 0xd */ + 3, /* 0xe */ + 4, /* 0xf */ }; @@ -693,13 +701,17 @@ depth_test_quads_fallback(struct quad_stage *qs, qs->softpipe->depth_stencil->depth.writemask) write_depth_stencil_values(&data, quads[i]); - qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask]; quads[pass++] = quads[i]; } nr = pass; } + if (qs->softpipe->active_query_count) { + for (i = 0; i < nr; i++) + qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask]; + } + if (nr) qs->next->run(qs->next, quads, nr); } @@ -883,6 +895,8 @@ choose_depth_test(struct quad_stage *qs, boolean depthwrite = qs->softpipe->depth_stencil->depth.writemask; + boolean occlusion = qs->softpipe->active_query_count; + if (!alpha && !depth && @@ -893,6 +907,7 @@ choose_depth_test(struct quad_stage *qs, interp_depth && depth && depthwrite && + !occlusion && !stencil) { switch (depthfunc) { -- cgit v1.2.3 From b1139e9ad827d86886772a9c9d83dbb0071c702c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 22 Sep 2009 19:38:34 +0100 Subject: softpipe: fix polygon stipple --- src/gallium/drivers/softpipe/sp_context.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 6b75ee6002..c4b8b33c6a 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -262,10 +262,8 @@ softpipe_create( struct pipe_screen *screen ) draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe); -#if USE_DRAW_STAGE_PSTIPPLE /* Do polygon stipple w/ texture map + frag prog? */ draw_install_pstipple_stage(softpipe->draw, &softpipe->pipe); -#endif sp_init_surface_functions(softpipe); -- cgit v1.2.3 From 207764894b6d565568bc46722e4c239d839a62fc Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 22 Sep 2009 20:47:07 +0100 Subject: softpipe: set quad->facing value --- src/gallium/drivers/softpipe/sp_setup.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index bc8366b0e6..ade125662a 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -240,6 +240,7 @@ static void flush_spans( struct setup_context *setup ) if (quadmask) { setup->quad[q].input.x0 = lx; setup->quad[q].input.y0 = setup->span.y; + setup->quad[q].input.facing = setup->facing; setup->quad[q].inout.mask = quadmask; setup->quad_ptrs[q] = &setup->quad[q]; q++; -- cgit v1.2.3 From fe9ca0f718cbc467e5cee99a2c20a5f257ed2fe1 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 22 Sep 2009 20:47:37 +0100 Subject: softpipe: need to write depth/stencil values even when stencil fails --- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index ce1bab9341..0ca86c4e1c 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -498,7 +498,7 @@ depth_test_quad(struct quad_stage *qs, * Do stencil (and depth) testing. Stenciling depends on the outcome of * depth testing. */ -static boolean +static void depth_stencil_test_quad(struct quad_stage *qs, struct depth_data *data, struct quad_header *quad) @@ -545,13 +545,13 @@ depth_stencil_test_quad(struct quad_stage *qs, /* update stencil buffer values according to z pass/fail result */ if (zFailOp != PIPE_STENCIL_OP_KEEP) { - const unsigned failMask = origMask & ~quad->inout.mask; - apply_stencil_op(data, failMask, zFailOp, ref, wrtMask); + const unsigned zFailMask = origMask & ~quad->inout.mask; + apply_stencil_op(data, zFailMask, zFailOp, ref, wrtMask); } if (zPassOp != PIPE_STENCIL_OP_KEEP) { - const unsigned passMask = origMask & quad->inout.mask; - apply_stencil_op(data, passMask, zPassOp, ref, wrtMask); + const unsigned zPassMask = origMask & quad->inout.mask; + apply_stencil_op(data, zPassMask, zPassOp, ref, wrtMask); } } else { @@ -559,8 +559,6 @@ depth_stencil_test_quad(struct quad_stage *qs, apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask); } } - - return quad->inout.mask != 0; } @@ -689,17 +687,17 @@ depth_test_quads_fallback(struct quad_stage *qs, } if (qs->softpipe->depth_stencil->stencil[0].enabled) { - if (!depth_stencil_test_quad(qs, &data, quads[i])) - continue; + depth_stencil_test_quad(qs, &data, quads[i]); + write_depth_stencil_values(&data, quads[i]); } else { if (!depth_test_quad(qs, &data, quads[i])) continue; + + if (qs->softpipe->depth_stencil->depth.writemask) + write_depth_stencil_values(&data, quads[i]); } - if (qs->softpipe->depth_stencil->stencil[0].enabled || - qs->softpipe->depth_stencil->depth.writemask) - write_depth_stencil_values(&data, quads[i]); quads[pass++] = quads[i]; } -- cgit v1.2.3 From 0670df5cb20c0b6630ab29511d9b2cbe18b47f65 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 22 Sep 2009 16:42:15 -0600 Subject: softpipe: disable a _debug_printf() --- src/gallium/drivers/softpipe/sp_state_derived.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 2a40589e84..856c9ce176 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -203,7 +203,9 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) struct softpipe_texture *spt = softpipe_texture(tc->texture); if (spt->timestamp != tc->timestamp) { sp_tex_tile_cache_validate_texture( tc ); + /* _debug_printf("INV %d %d\n", tc->timestamp, spt->timestamp); + */ tc->timestamp = spt->timestamp; } } -- cgit v1.2.3 From 5dbedf3d7e99efe35fad308d382670e44cd60e25 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 22 Sep 2009 16:59:28 -0600 Subject: softpipe: additional assertions --- src/gallium/drivers/softpipe/sp_tex_sample.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 50460df7cd..be210d5671 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -938,6 +938,7 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, height = texture->height[level0]; assert(width > 0); + assert(height > 0); addr.value = 0; addr.bits.level = samp->level; @@ -983,6 +984,7 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler, height = texture->height[level0]; assert(width > 0); + assert(height > 0); addr.value = 0; addr.bits.level = samp->level; @@ -1104,6 +1106,7 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, height = texture->height[level0]; assert(width > 0); + assert(height > 0); addr.value = 0; addr.bits.level = samp->level; @@ -1151,6 +1154,7 @@ img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler, height = texture->height[level0]; assert(width > 0); + assert(height > 0); addr.value = 0; addr.bits.level = samp->level; -- cgit v1.2.3 From 75276ea316610a5737f2115326482024aa09d02a Mon Sep 17 00:00:00 2001 From: root Date: Tue, 22 Sep 2009 20:14:05 -0600 Subject: softpipe: fix bugs in POT texture sampling when texture is not square Before, if level was greater than the logbase2(base size) we were doing a negative bit shift and winding up with garbage values. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 34 +++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index be210d5671..ba9b91a378 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -732,6 +732,20 @@ get_texel_3d(const struct sp_sampler_varient *samp, } +/** + * Given the logbase2 of a mipmap's base level size and a mipmap level, + * return the size (in texels) of that mipmap level. + * For example, if level[0].width = 256 then base_pot will be 8. + * If level = 2, then we'll return 64 (the width at level=2). + * Return 1 if level > base_pot. + */ +static INLINE unsigned +pot_level_size(unsigned base_pot, unsigned level) +{ + return (base_pot >= level) ? (1 << (base_pot - level)) : 1; +} + + /* Some image-filter fastpaths: */ static INLINE void @@ -745,8 +759,8 @@ img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); unsigned j; unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); + unsigned xpot = pot_level_size(samp->xpot, level); + unsigned ypot = pot_level_size(samp->ypot, level); unsigned xmax = (xpot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, xpot) - 1; */ unsigned ymax = (ypot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, ypot) - 1; */ union tex_tile_address addr; @@ -807,8 +821,8 @@ img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); unsigned j; unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); + unsigned xpot = pot_level_size(samp->xpot, level); + unsigned ypot = pot_level_size(samp->ypot, level); union tex_tile_address addr; addr.value = 0; @@ -846,8 +860,8 @@ img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); unsigned j; unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); + unsigned xpot = pot_level_size(samp->xpot, level); + unsigned ypot = pot_level_size(samp->ypot, level); union tex_tile_address addr; addr.value = 0; @@ -1311,6 +1325,14 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler, samp->level = MIN2(samp->level, (int)texture->last_level); samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba ); } + +#if 0 + printf("RGBA %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n", + rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0], + rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1], + rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2], + rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]); +#endif } -- cgit v1.2.3 From 21a949365d1de2f1fea6cb87c6f389e30156566f Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 22 Sep 2009 17:16:35 +0100 Subject: gallium: Update vendor string. --- src/gallium/drivers/cell/ppu/cell_screen.c | 2 +- src/gallium/drivers/i915simple/i915_screen.c | 2 +- src/gallium/drivers/i965simple/brw_screen.c | 2 +- src/gallium/drivers/softpipe/sp_screen.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index 9161747fdb..d185c6b849 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -41,7 +41,7 @@ static const char * cell_get_vendor(struct pipe_screen *screen) { - return "Tungsten Graphics, Inc."; + return "VMware, Inc."; } diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index a1dd43c1bc..c66558c320 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -46,7 +46,7 @@ static const char * i915_get_vendor(struct pipe_screen *screen) { - return "Tungsten Graphics, Inc."; + return "VMware, Inc."; } static const char * diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c index fb68fd624b..4a84c4db23 100644 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -39,7 +39,7 @@ static const char * brw_get_vendor( struct pipe_screen *screen ) { - return "Tungsten Graphics, Inc."; + return "VMware, Inc."; } diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index ce77018415..6cf45cded2 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -40,7 +40,7 @@ static const char * softpipe_get_vendor(struct pipe_screen *screen) { - return "Tungsten Graphics, Inc."; + return "VMware, Inc."; } -- cgit v1.2.3 From 84b956c29be7eb547130974df9ceb3d2f3354526 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 22 Sep 2009 15:35:05 -0600 Subject: softpipe: increase MAX_WIDTH/HEIGTH 4096 to match max texture size --- src/gallium/drivers/softpipe/sp_tile_cache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 1f9b8f1f4f..a524275a71 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -44,8 +44,8 @@ /** XXX move these */ -#define MAX_WIDTH 2048 -#define MAX_HEIGHT 2048 +#define MAX_WIDTH 4096 +#define MAX_HEIGHT 4096 struct softpipe_tile_cache -- cgit v1.2.3 From e41707becaffd604fedc885719e5b061a4a5b363 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 23 Sep 2009 10:50:38 -0600 Subject: softpipe: added max texture/surface size sanity check --- src/gallium/drivers/softpipe/sp_tile_cache.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index a524275a71..461cbb9f95 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -116,6 +116,12 @@ sp_create_tile_cache( struct pipe_screen *screen ) { struct softpipe_tile_cache *tc; uint pos; + int maxLevels, maxTexSize; + + /* sanity checking: max sure MAX_WIDTH/HEIGHT >= largest texture image */ + maxLevels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); + maxTexSize = 1 << (maxLevels - 1); + assert(MAX_WIDTH >= maxTexSize); tc = CALLOC_STRUCT( softpipe_tile_cache ); if (tc) { -- cgit v1.2.3 From b26f1df920a712da66c72f801e3292bf44ea9a83 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 23 Sep 2009 11:04:07 -0600 Subject: llvmpipe: increase MAX_WIDTH/HEIGHT to match max texture size --- src/gallium/drivers/llvmpipe/lp_tile_cache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.h b/src/gallium/drivers/llvmpipe/lp_tile_cache.h index 6d8ba5ece7..936fc8f0fa 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.h @@ -51,8 +51,8 @@ struct llvmpipe_cached_tile /** XXX move these */ -#define MAX_WIDTH 2048 -#define MAX_HEIGHT 2048 +#define MAX_WIDTH 4096 +#define MAX_HEIGHT 4096 struct llvmpipe_tile_cache -- cgit v1.2.3 From 5244ce786a3e115fac1675450c3df8ee11e20030 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 23 Sep 2009 11:04:57 -0600 Subject: llvmpipe: added max texture/surface size sanity check Carried over from softpipe driver. --- src/gallium/drivers/llvmpipe/lp_tile_cache.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c index 2e576e6039..73460106f3 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c @@ -48,6 +48,12 @@ struct llvmpipe_tile_cache * lp_create_tile_cache( struct pipe_screen *screen ) { struct llvmpipe_tile_cache *tc; + int maxLevels, maxTexSize; + + /* sanity checking: max sure MAX_WIDTH/HEIGHT >= largest texture image */ + maxLevels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); + maxTexSize = 1 << (maxLevels - 1); + assert(MAX_WIDTH >= maxTexSize); tc = CALLOC_STRUCT( llvmpipe_tile_cache ); if(!tc) -- cgit v1.2.3 From e2329f2795d48d11131e9ac105e7aa3fd2c229c1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 23 Sep 2009 11:35:33 -0600 Subject: softpipe: white-space/formatting fixes and updated comments --- src/gallium/drivers/softpipe/sp_tex_sample.c | 173 ++++++++++++++------------- 1 file changed, 87 insertions(+), 86 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 21031c11b8..f74b86b3c2 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -115,11 +115,9 @@ lerp_3d(float a, float b, float c, * \return integer texture index */ static void -wrap_nearest_repeat(const float s[4], unsigned size, - int icoord[4]) +wrap_nearest_repeat(const float s[4], unsigned size, int icoord[4]) { uint ch; - /* s limited to [0,1) */ /* i limited to [0,size-1] */ for (ch = 0; ch < 4; ch++) { @@ -130,8 +128,7 @@ wrap_nearest_repeat(const float s[4], unsigned size, static void -wrap_nearest_clamp(const float s[4], unsigned size, - int icoord[4]) +wrap_nearest_clamp(const float s[4], unsigned size, int icoord[4]) { uint ch; /* s limited to [0,1] */ @@ -148,8 +145,7 @@ wrap_nearest_clamp(const float s[4], unsigned size, static void -wrap_nearest_clamp_to_edge(const float s[4], unsigned size, - int icoord[4]) +wrap_nearest_clamp_to_edge(const float s[4], unsigned size, int icoord[4]) { uint ch; /* s limited to [min,max] */ @@ -168,8 +164,7 @@ wrap_nearest_clamp_to_edge(const float s[4], unsigned size, static void -wrap_nearest_clamp_to_border(const float s[4], unsigned size, - int icoord[4]) +wrap_nearest_clamp_to_border(const float s[4], unsigned size, int icoord[4]) { uint ch; /* s limited to [min,max] */ @@ -186,9 +181,9 @@ wrap_nearest_clamp_to_border(const float s[4], unsigned size, } } + static void -wrap_nearest_mirror_repeat(const float s[4], unsigned size, - int icoord[4]) +wrap_nearest_mirror_repeat(const float s[4], unsigned size, int icoord[4]) { uint ch; const float min = 1.0F / (2.0F * size); @@ -209,9 +204,9 @@ wrap_nearest_mirror_repeat(const float s[4], unsigned size, } } + static void -wrap_nearest_mirror_clamp(const float s[4], unsigned size, - int icoord[4]) +wrap_nearest_mirror_clamp(const float s[4], unsigned size, int icoord[4]) { uint ch; for (ch = 0; ch < 4; ch++) { @@ -227,9 +222,10 @@ wrap_nearest_mirror_clamp(const float s[4], unsigned size, } } + static void wrap_nearest_mirror_clamp_to_edge(const float s[4], unsigned size, - int icoord[4]) + int icoord[4]) { uint ch; /* s limited to [min,max] */ @@ -284,7 +280,6 @@ wrap_linear_repeat(const float s[4], unsigned size, int icoord0[4], int icoord1[4], float w[4]) { uint ch; - for (ch = 0; ch < 4; ch++) { float u = s[ch] * size - 0.5F; icoord0[ch] = REMAINDER(util_ifloor(u), size); @@ -293,6 +288,7 @@ wrap_linear_repeat(const float s[4], unsigned size, } } + static void wrap_linear_clamp(const float s[4], unsigned size, int icoord0[4], int icoord1[4], float w[4]) @@ -307,6 +303,7 @@ wrap_linear_clamp(const float s[4], unsigned size, } } + static void wrap_linear_clamp_to_edge(const float s[4], unsigned size, int icoord0[4], int icoord1[4], float w[4]) @@ -325,6 +322,7 @@ wrap_linear_clamp_to_edge(const float s[4], unsigned size, } } + static void wrap_linear_clamp_to_border(const float s[4], unsigned size, int icoord0[4], int icoord1[4], float w[4]) @@ -365,6 +363,7 @@ wrap_linear_mirror_repeat(const float s[4], unsigned size, } } + static void wrap_linear_mirror_clamp(const float s[4], unsigned size, int icoord0[4], int icoord1[4], float w[4]) @@ -383,6 +382,7 @@ wrap_linear_mirror_clamp(const float s[4], unsigned size, } } + static void wrap_linear_mirror_clamp_to_edge(const float s[4], unsigned size, int icoord0[4], int icoord1[4], float w[4]) @@ -405,6 +405,7 @@ wrap_linear_mirror_clamp_to_edge(const float s[4], unsigned size, } } + static void wrap_linear_mirror_clamp_to_border(const float s[4], unsigned size, int icoord0[4], int icoord1[4], float w[4]) @@ -433,8 +434,7 @@ wrap_linear_mirror_clamp_to_border(const float s[4], unsigned size, * Only a subset of wrap modes supported. */ static void -wrap_nearest_unorm_clamp(const float s[4], unsigned size, - int icoord[4]) +wrap_nearest_unorm_clamp(const float s[4], unsigned size, int icoord[4]) { uint ch; for (ch = 0; ch < 4; ch++) { @@ -443,11 +443,13 @@ wrap_nearest_unorm_clamp(const float s[4], unsigned size, } } -/* Handles clamp_to_edge and clamp_to_border: + +/** + * Handles clamp_to_edge and clamp_to_border: */ static void wrap_nearest_unorm_clamp_to_border(const float s[4], unsigned size, - int icoord[4]) + int icoord[4]) { uint ch; for (ch = 0; ch < 4; ch++) { @@ -462,7 +464,7 @@ wrap_nearest_unorm_clamp_to_border(const float s[4], unsigned size, */ static void wrap_linear_unorm_clamp(const float s[4], unsigned size, - int icoord0[4], int icoord1[4], float w[4]) + int icoord0[4], int icoord1[4], float w[4]) { uint ch; for (ch = 0; ch < 4; ch++) { @@ -474,9 +476,10 @@ wrap_linear_unorm_clamp(const float s[4], unsigned size, } } + static void -wrap_linear_unorm_clamp_to_border( const float s[4], unsigned size, - int icoord0[4], int icoord1[4], float w[4]) +wrap_linear_unorm_clamp_to_border(const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) { uint ch; for (ch = 0; ch < 4; ch++) { @@ -492,8 +495,6 @@ wrap_linear_unorm_clamp_to_border( const float s[4], unsigned size, - - /** * Examine the quad's texture coordinates to compute the partial * derivatives w.r.t X and Y, then compute lambda (level of detail). @@ -519,6 +520,7 @@ compute_lambda_1d(const struct sp_sampler_varient *samp, return lambda; } + static float compute_lambda_2d(const struct sp_sampler_varient *samp, const float s[QUAD_SIZE], @@ -576,7 +578,10 @@ compute_lambda_3d(const struct sp_sampler_varient *samp, } - +/** + * Compute lambda for a vertex texture sampler. + * Since there aren't derivatives to use, just return the LOD bias. + */ static float compute_lambda_vert(const struct sp_sampler_varient *samp, const float s[QUAD_SIZE], @@ -698,7 +703,7 @@ get_texel_quad_2d(const struct sp_sampler_varient *samp, */ static INLINE const float * get_texel_3d_no_border(const struct sp_sampler_varient *samp, - union tex_tile_address addr, int x, int y, int z) + union tex_tile_address addr, int x, int y, int z) { const struct softpipe_tex_cached_tile *tile; @@ -716,7 +721,7 @@ get_texel_3d_no_border(const struct sp_sampler_varient *samp, static INLINE const float * get_texel_3d(const struct sp_sampler_varient *samp, - union tex_tile_address addr, int x, int y, int z ) + union tex_tile_address addr, int x, int y, int z) { const struct pipe_texture *texture = samp->texture; unsigned level = addr.bits.level; @@ -750,11 +755,11 @@ pot_level_size(unsigned base_pot, unsigned level) */ static INLINE void img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); unsigned j; @@ -768,7 +773,6 @@ img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, addr.value = 0; addr.bits.level = samp->level; - for (j = 0; j < QUAD_SIZE; j++) { int c; @@ -788,18 +792,15 @@ img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, /* Can we fetch all four at once: */ - if (x0 < xmax && y0 < ymax) - { + if (x0 < xmax && y0 < ymax) { get_texel_quad_2d_no_border_single_tile(samp, addr, x0, y0, tx); } - else - { + else { unsigned x1 = (x0 + 1) & (xpot - 1); unsigned y1 = (y0 + 1) & (ypot - 1); get_texel_quad_2d_no_border(samp, addr, x0, y0, x1, y1, tx); } - /* interpolate R, G, B, A */ for (c = 0; c < 4; c++) { rgba[c][j] = lerp_2d(xw, yw, @@ -896,6 +897,7 @@ img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, } } + static void img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], @@ -969,20 +971,22 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, } } -static inline union tex_tile_address face( union tex_tile_address addr, - unsigned face ) + +static inline union tex_tile_address +face(union tex_tile_address addr, unsigned face ) { addr.bits.face = face; return addr; } + static void img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_texture *texture = samp->texture; @@ -992,7 +996,6 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler, int x[4], y[4]; union tex_tile_address addr; - level0 = samp->level; width = texture->width[level0]; height = texture->height[level0]; @@ -1073,7 +1076,6 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler, float xw[4]; /* weights */ union tex_tile_address addr; - level0 = samp->level; width = texture->width[level0]; @@ -1084,7 +1086,6 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler, samp->linear_texcoord_s(s, width, x0, x1, xw); - for (j = 0; j < QUAD_SIZE; j++) { const float *tx0 = get_texel_2d(samp, addr, x0[j], 0); const float *tx1 = get_texel_2d(samp, addr, x1[j], 0); @@ -1114,7 +1115,6 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, float xw[4], yw[4]; /* weights */ union tex_tile_address addr; - level0 = samp->level; width = texture->width[level0]; height = texture->height[level0]; @@ -1147,11 +1147,11 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, static void img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_texture *texture = samp->texture; @@ -1162,7 +1162,6 @@ img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler, float xw[4], yw[4]; /* weights */ union tex_tile_address addr; - level0 = samp->level; width = texture->width[level0]; height = texture->height[level0]; @@ -1251,18 +1250,13 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler, } - - - - - static void mip_filter_linear(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_texture *texture = samp->texture; @@ -1301,7 +1295,6 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler, } - static void mip_filter_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], @@ -1357,7 +1350,8 @@ mip_filter_none(struct tgsi_sampler *tgsi_sampler, -/* Specialized version of mip_filter_linear with hard-wired calls to +/** + * Specialized version of mip_filter_linear with hard-wired calls to * 2d lambda calculation and 2d_linear_repeat_POT img filters. */ static void @@ -1409,7 +1403,8 @@ mip_filter_linear_2d_linear_repeat_POT( -/* Compare stage in the little sampling pipeline. +/** + * Do shadow/depth comparisons. */ static void sample_compare(struct tgsi_sampler *tgsi_sampler, @@ -1426,7 +1421,6 @@ sample_compare(struct tgsi_sampler *tgsi_sampler, samp->mip_filter( tgsi_sampler, s, t, p, lodbias, rgba ); - /** * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' * When we sampled the depth texture, the depth value was put into all @@ -1493,7 +1487,10 @@ sample_compare(struct tgsi_sampler *tgsi_sampler, } } -/* Calculate cube faces. + +/** + * Compute which cube face is referenced by each texcoord and put that + * info into the sampler faces[] array. Then sample the cube faces */ static void sample_cube(struct tgsi_sampler *tgsi_sampler, @@ -1586,8 +1583,8 @@ sample_cube(struct tgsi_sampler *tgsi_sampler, - -static wrap_nearest_func get_nearest_unorm_wrap( unsigned mode ) +static wrap_nearest_func +get_nearest_unorm_wrap(unsigned mode) { switch (mode) { case PIPE_TEX_WRAP_CLAMP: @@ -1602,7 +1599,8 @@ static wrap_nearest_func get_nearest_unorm_wrap( unsigned mode ) } -static wrap_nearest_func get_nearest_wrap( unsigned mode ) +static wrap_nearest_func +get_nearest_wrap(unsigned mode) { switch (mode) { case PIPE_TEX_WRAP_REPEAT: @@ -1627,7 +1625,9 @@ static wrap_nearest_func get_nearest_wrap( unsigned mode ) } } -static wrap_linear_func get_linear_unorm_wrap( unsigned mode ) + +static wrap_linear_func +get_linear_unorm_wrap(unsigned mode) { switch (mode) { case PIPE_TEX_WRAP_CLAMP: @@ -1641,7 +1641,9 @@ static wrap_linear_func get_linear_unorm_wrap( unsigned mode ) } } -static wrap_linear_func get_linear_wrap( unsigned mode ) + +static wrap_linear_func +get_linear_wrap(unsigned mode) { switch (mode) { case PIPE_TEX_WRAP_REPEAT: @@ -1666,7 +1668,9 @@ static wrap_linear_func get_linear_wrap( unsigned mode ) } } -static compute_lambda_func get_lambda_func( const union sp_sampler_key key ) + +static compute_lambda_func +get_lambda_func(const union sp_sampler_key key) { if (key.bits.processor == TGSI_PROCESSOR_VERTEX) return compute_lambda_vert; @@ -1685,9 +1689,11 @@ static compute_lambda_func get_lambda_func( const union sp_sampler_key key ) } } -static filter_func get_img_filter( const union sp_sampler_key key, - unsigned filter, - const struct pipe_sampler_state *sampler ) + +static filter_func +get_img_filter(const union sp_sampler_key key, + unsigned filter, + const struct pipe_sampler_state *sampler) { switch (key.bits.target) { case PIPE_TEXTURE_1D: @@ -1774,7 +1780,8 @@ sp_sampler_varient_destroy( struct sp_sampler_varient *samp ) } -/* Create a sampler varient for a given set of non-orthogonal state. Currently the +/** + * Create a sampler varient for a given set of non-orthogonal state. */ struct sp_sampler_varient * sp_create_sampler_varient( const struct pipe_sampler_state *sampler, @@ -1871,9 +1878,3 @@ sp_create_sampler_varient( const struct pipe_sampler_state *sampler, return samp; } - - - - - - -- cgit v1.2.3 From b4a40d10524a4be6a59805589ee4209ebdb1de4f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 23 Sep 2009 11:51:52 -0600 Subject: softpipe: replace macros with inline functions And update comments. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 53 +++++++++++++++++----------- 1 file changed, 32 insertions(+), 21 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index f74b86b3c2..2092a69740 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -46,13 +46,18 @@ /* - * Note, the FRAC macro has to work perfectly. Otherwise you'll sometimes - * see 1-pixel bands of improperly weighted linear-filtered textures. + * Return fractional part of 'f'. Used for computing interpolation weights. + * Need to be careful with negative values. + * Note, if this function isn't perfect you'll sometimes see 1-pixel bands + * of improperly weighted linear-filtered textures. * The tests/texwrap.c demo is a good test. - * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0. - * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x). */ -#define FRAC(f) ((f) - util_ifloor(f)) +static INLINE float +frac(float f) +{ + return f - util_ifloor(f); +} + /** @@ -99,10 +104,16 @@ lerp_3d(float a, float b, float c, /** - * If A is a signed integer, A % B doesn't give the right value for A < 0 - * (in terms of texture repeat). Just casting to unsigned fixes that. + * Compute coord % size for repeat wrap modes. + * Note that if coord is a signed integer, coord % size doesn't give + * the right value for coord < 0 (in terms of texture repeat). Just + * casting to unsigned fixes that. */ -#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B)) +static INLINE int +repeat(int coord, unsigned size) +{ + return (int) ((unsigned) coord % size); +} /** @@ -122,7 +133,7 @@ wrap_nearest_repeat(const float s[4], unsigned size, int icoord[4]) /* i limited to [0,size-1] */ for (ch = 0; ch < 4; ch++) { int i = util_ifloor(s[ch] * size); - icoord[ch] = REMAINDER(i, size); + icoord[ch] = repeat(i, size); } } @@ -282,9 +293,9 @@ wrap_linear_repeat(const float s[4], unsigned size, uint ch; for (ch = 0; ch < 4; ch++) { float u = s[ch] * size - 0.5F; - icoord0[ch] = REMAINDER(util_ifloor(u), size); - icoord1[ch] = REMAINDER(icoord0[ch] + 1, size); - w[ch] = FRAC(u); + icoord0[ch] = repeat(util_ifloor(u), size); + icoord1[ch] = repeat(icoord0[ch] + 1, size); + w[ch] = frac(u); } } @@ -299,7 +310,7 @@ wrap_linear_clamp(const float s[4], unsigned size, u = u * size - 0.5f; icoord0[ch] = util_ifloor(u); icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); + w[ch] = frac(u); } } @@ -318,7 +329,7 @@ wrap_linear_clamp_to_edge(const float s[4], unsigned size, icoord0[ch] = 0; if (icoord1[ch] >= (int) size) icoord1[ch] = size - 1; - w[ch] = FRAC(u); + w[ch] = frac(u); } } @@ -335,7 +346,7 @@ wrap_linear_clamp_to_border(const float s[4], unsigned size, u = u * size - 0.5f; icoord0[ch] = util_ifloor(u); icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); + w[ch] = frac(u); } } @@ -359,7 +370,7 @@ wrap_linear_mirror_repeat(const float s[4], unsigned size, icoord0[ch] = 0; if (icoord1[ch] >= (int) size) icoord1[ch] = size - 1; - w[ch] = FRAC(u); + w[ch] = frac(u); } } @@ -378,7 +389,7 @@ wrap_linear_mirror_clamp(const float s[4], unsigned size, u -= 0.5F; icoord0[ch] = util_ifloor(u); icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); + w[ch] = frac(u); } } @@ -401,7 +412,7 @@ wrap_linear_mirror_clamp_to_edge(const float s[4], unsigned size, icoord0[ch] = 0; if (icoord1[ch] >= (int) size) icoord1[ch] = size - 1; - w[ch] = FRAC(u); + w[ch] = frac(u); } } @@ -424,7 +435,7 @@ wrap_linear_mirror_clamp_to_border(const float s[4], unsigned size, u -= 0.5F; icoord0[ch] = util_ifloor(u); icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); + w[ch] = frac(u); } } @@ -472,7 +483,7 @@ wrap_linear_unorm_clamp(const float s[4], unsigned size, float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f); icoord0[ch] = util_ifloor(u); icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); + w[ch] = frac(u); } } @@ -489,7 +500,7 @@ wrap_linear_unorm_clamp_to_border(const float s[4], unsigned size, icoord1[ch] = icoord0[ch] + 1; if (icoord1[ch] > (int) size - 1) icoord1[ch] = size - 1; - w[ch] = FRAC(u); + w[ch] = frac(u); } } -- cgit v1.2.3 From 35af3f94a36d1850c8fbab3d1d0a23a904466429 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 23 Sep 2009 11:08:12 -0600 Subject: llvmpipe: move tile cache datatypes into .c file since they're private --- src/gallium/drivers/llvmpipe/lp_tile_cache.c | 37 ++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_tile_cache.h | 37 +--------------------------- 2 files changed, 38 insertions(+), 36 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c index 73460106f3..0c06b659a1 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c @@ -44,6 +44,43 @@ #include "lp_tile_cache.h" +#define MAX_WIDTH 4096 +#define MAX_HEIGHT 4096 + + +enum llvmpipe_tile_status +{ + LP_TILE_STATUS_UNDEFINED = 0, + LP_TILE_STATUS_CLEAR = 1, + LP_TILE_STATUS_DEFINED = 2 +}; + + +struct llvmpipe_cached_tile +{ + enum llvmpipe_tile_status status; + + /** color in SOA format */ + uint8_t *color; +}; + + +struct llvmpipe_tile_cache +{ + struct pipe_screen *screen; + struct pipe_surface *surface; /**< the surface we're caching */ + struct pipe_transfer *transfer; + void *transfer_map; + + struct llvmpipe_cached_tile entries[MAX_WIDTH/TILE_SIZE][MAX_HEIGHT/TILE_SIZE]; + + uint8_t clear_color[4]; /**< for color bufs */ + uint clear_val; /**< for z+stencil, or packed color clear value */ + + struct llvmpipe_cached_tile *last_tile; /**< most recently retrieved tile */ +}; + + struct llvmpipe_tile_cache * lp_create_tile_cache( struct pipe_screen *screen ) { diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.h b/src/gallium/drivers/llvmpipe/lp_tile_cache.h index 936fc8f0fa..161bab3799 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.h @@ -33,42 +33,7 @@ #include "lp_tile_soa.h" -enum llvmpipe_tile_status -{ - LP_TILE_STATUS_UNDEFINED = 0, - LP_TILE_STATUS_CLEAR = 1, - LP_TILE_STATUS_DEFINED = 2 -}; - - -struct llvmpipe_cached_tile -{ - enum llvmpipe_tile_status status; - - /** color in SOA format */ - uint8_t *color; -}; - - -/** XXX move these */ -#define MAX_WIDTH 4096 -#define MAX_HEIGHT 4096 - - -struct llvmpipe_tile_cache -{ - struct pipe_screen *screen; - struct pipe_surface *surface; /**< the surface we're caching */ - struct pipe_transfer *transfer; - void *transfer_map; - - struct llvmpipe_cached_tile entries[MAX_WIDTH/TILE_SIZE][MAX_HEIGHT/TILE_SIZE]; - - uint8_t clear_color[4]; /**< for color bufs */ - uint clear_val; /**< for z+stencil, or packed color clear value */ - - struct llvmpipe_cached_tile *last_tile; /**< most recently retrieved tile */ -}; +struct llvmpipe_tile_cache; /* opaque */ extern struct llvmpipe_tile_cache * -- cgit v1.2.3 From e8e6d8853df19f7a32fb0e4f670259ee65e88b29 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 24 Sep 2009 15:27:03 +0100 Subject: softpipe: Update SConscript. --- src/gallium/drivers/softpipe/SConscript | 6 ------ 1 file changed, 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index 153fe44546..950c3d9955 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -7,22 +7,16 @@ softpipe = env.ConvenienceLibrary( source = [ 'sp_fs_exec.c', 'sp_fs_sse.c', - 'sp_fs_llvm.c', 'sp_clear.c', 'sp_context.c', 'sp_draw_arrays.c', 'sp_flush.c', 'sp_prim_vbuf.c', 'sp_setup.c', - 'sp_quad_alpha_test.c', 'sp_quad_blend.c', 'sp_quad_pipe.c', - 'sp_quad_coverage.c', 'sp_quad_depth_test.c', - 'sp_quad_earlyz.c', 'sp_quad_fs.c', - 'sp_quad_occlusion.c', - 'sp_quad_stencil.c', 'sp_quad_stipple.c', 'sp_query.c', 'sp_screen.c', -- cgit v1.2.3 From 9659aa6482291d1530c74450612bcd952f542e01 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 24 Sep 2009 15:27:19 +0100 Subject: softpipe: Use portable INLINE macro. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 2092a69740..c22ee86b66 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -983,7 +983,7 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, } -static inline union tex_tile_address +static INLINE union tex_tile_address face(union tex_tile_address addr, unsigned face ) { addr.bits.face = face; -- cgit v1.2.3 From d3beaf2f32044b36e2ffaf27679ddd1e5115df3f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 24 Sep 2009 16:49:27 +0100 Subject: softpipe: fix compiler warnings --- src/gallium/drivers/softpipe/sp_tex_tile_cache.h | 2 +- src/gallium/drivers/softpipe/sp_tile_cache.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h index 04e65ce220..ac6886a3df 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h @@ -116,7 +116,7 @@ extern const struct softpipe_tex_cached_tile * sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, union tex_tile_address addr ); -static INLINE const union tex_tile_address +static INLINE union tex_tile_address tex_tile_address( unsigned x, unsigned y, unsigned z, diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index f21c74cb9c..a12092702a 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -128,7 +128,7 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc, union tile_address addr ); -static INLINE const union tile_address +static INLINE union tile_address tile_address( unsigned x, unsigned y ) { -- cgit v1.2.3 From 90dcfb3b47c13044d671b8a1ab0c96ab2d21ea4d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 24 Sep 2009 16:49:40 +0100 Subject: trace: fix printf warnings --- src/gallium/drivers/trace/tr_context.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index ae0af4d055..4940ce62a6 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -125,11 +125,11 @@ trace_context_draw_block(struct trace_context *tr_ctx, int flag) } else if ((tr_ctx->draw_rule.blocker & flag) && (tr_ctx->draw_blocker & 4)) { boolean block = FALSE; - debug_printf("%s (%lu %lu) (%lu %lu) (%lu %u) (%lu %u)\n", __FUNCTION__, - tr_ctx->draw_rule.fs, tr_ctx->curr.fs, - tr_ctx->draw_rule.vs, tr_ctx->curr.vs, - tr_ctx->draw_rule.surf, 0, - tr_ctx->draw_rule.tex, 0); + debug_printf("%s (%p %p) (%p %p) (%p %u) (%p %u)\n", __FUNCTION__, + tr_ctx->draw_rule.fs, tr_ctx->curr.fs, + tr_ctx->draw_rule.vs, tr_ctx->curr.vs, + tr_ctx->draw_rule.surf, 0, + tr_ctx->draw_rule.tex, 0); if (tr_ctx->draw_rule.fs && tr_ctx->draw_rule.fs == tr_ctx->curr.fs) block = TRUE; -- cgit v1.2.3 From 6be2bc56af5c0d281d07e427863789e949904db1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 24 Sep 2009 10:47:42 -0600 Subject: gallium/trace: casts to silence warnings --- src/gallium/drivers/trace/tr_context.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 4940ce62a6..bf470b46ae 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -126,10 +126,10 @@ trace_context_draw_block(struct trace_context *tr_ctx, int flag) (tr_ctx->draw_blocker & 4)) { boolean block = FALSE; debug_printf("%s (%p %p) (%p %p) (%p %u) (%p %u)\n", __FUNCTION__, - tr_ctx->draw_rule.fs, tr_ctx->curr.fs, - tr_ctx->draw_rule.vs, tr_ctx->curr.vs, - tr_ctx->draw_rule.surf, 0, - tr_ctx->draw_rule.tex, 0); + (void *) tr_ctx->draw_rule.fs, (void *) tr_ctx->curr.fs, + (void *) tr_ctx->draw_rule.vs, (void *) tr_ctx->curr.vs, + (void *) tr_ctx->draw_rule.surf, 0, + (void *) tr_ctx->draw_rule.tex, 0); if (tr_ctx->draw_rule.fs && tr_ctx->draw_rule.fs == tr_ctx->curr.fs) block = TRUE; -- cgit v1.2.3 From 1730b8db1237485778abdc4ae37e122414b5423b Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Thu, 24 Sep 2009 15:59:57 -0600 Subject: softpipe: Increase GL_MAX_3D_TEXTURE_SIZE to 256. --- src/gallium/drivers/softpipe/sp_screen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index cc39d33ede..2e2668dfe4 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -84,7 +84,7 @@ softpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: return 12; /* max 2Kx2K */ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 8; /* max 128x128x128 */ + return 9; /* max 256x256x256 */ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 12; /* max 2Kx2K */ case PIPE_CAP_TGSI_CONT_SUPPORTED: -- cgit v1.2.3 From a665a3416e5a99dc84691c7f113a11190375f340 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 24 Sep 2009 16:07:45 -0600 Subject: llvmpipe: increase max 3D texture size to 256 --- src/gallium/drivers/llvmpipe/lp_screen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index ff7ef8658a..0518927458 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -82,7 +82,7 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: return 13; /* max 4Kx4K */ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 8; /* max 128x128x128 */ + return 9; /* max 256x256x256 */ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 13; /* max 4Kx4K */ case PIPE_CAP_TGSI_CONT_SUPPORTED: -- cgit v1.2.3 From 01249c6d5653a0e66027202f44de2457be5942a5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 24 Sep 2009 16:08:47 -0600 Subject: llvmpipe: add missing __FUNCTION__ parameter to debug_printf() calls --- src/gallium/drivers/llvmpipe/lp_bld_arit.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index 0b115fc9b0..31433318a7 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -918,7 +918,8 @@ lp_build_pow(struct lp_build_context *bld, { /* TODO: optimize the constant case */ if(LLVMIsConstant(x) && LLVMIsConstant(y)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n"); + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); return lp_build_exp2(bld, lp_build_mul(bld, lp_build_log2(bld, x), y)); } @@ -972,7 +973,8 @@ lp_build_polynomial(struct lp_build_context *bld, /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n"); + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); for (i = num_coeffs; i--; ) { LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]); @@ -1026,7 +1028,8 @@ lp_build_exp2_approx(struct lp_build_context *bld, if(p_exp2_int_part || p_frac_part || p_exp2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n"); + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); assert(type.floating && type.width == 32); @@ -1125,7 +1128,8 @@ lp_build_log2_approx(struct lp_build_context *bld, if(p_exp || p_floor_log2 || p_log2) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x)) - debug_printf("%s: inefficient/imprecise constant arithmetic\n"); + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); assert(type.floating && type.width == 32); -- cgit v1.2.3 From 16a6ca9b2bd4f91aad69d4a5d36402e70a46bd37 Mon Sep 17 00:00:00 2001 From: Cooper Yuan Date: Fri, 25 Sep 2009 15:15:20 +0800 Subject: r300g: add texture format for xvmc --- src/gallium/drivers/r300/r300_texture.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 3109af5bac..697669147d 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -72,6 +72,9 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) /* W24_FP */ case PIPE_FORMAT_Z24S8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, W24_FP); + /* Z5_Y6_X5 */ + case PIPE_FORMAT_R16_SNORM: + return R300_EASY_TX_FORMAT(X, X, X, X, Z5Y6X5); default: debug_printf("r300: Implementation error: " "Got unsupported texture format %s in %s\n", -- cgit v1.2.3 From 1196f9fbd68d9f3d1acd3d097711b382d7489f41 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 24 Sep 2009 16:39:56 +0200 Subject: nv50: implement IF, ELSE, ENDIF opcodes --- src/gallium/drivers/nv50/nv50_program.c | 188 +++++++++++++++++++++++++------- 1 file changed, 146 insertions(+), 42 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index eb90d5e66f..2ab2ac35c2 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -90,6 +90,9 @@ struct nv50_reg { int acc; /* instruction where this reg is last read (first insn == 1) */ }; +/* arbitrary limit */ +#define MAX_IF_DEPTH 4 + struct nv50_pc { struct nv50_program *p; @@ -121,6 +124,11 @@ struct nv50_pc { struct nv50_reg *iv_p; struct nv50_reg *iv_c; + struct nv50_program_exec *if_cond; + struct nv50_program_exec *if_insn[MAX_IF_DEPTH]; + struct nv50_program_exec *br_join[MAX_IF_DEPTH]; + int if_lvl; + /* current instruction and total number of insns */ unsigned insn_cur; unsigned insn_nr; @@ -890,6 +898,7 @@ emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp, set_src_1(pc, src1, e); emit(pc, e); + pc->if_cond = pc->p->exec_tail; /* record for OPCODE_IF */ /* cvt.f32.u32/s32 (?) if we didn't only write the predicate */ if (rdst) @@ -1148,6 +1157,38 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, #endif } +static void +emit_branch(struct nv50_pc *pc, int pred, unsigned cc, + struct nv50_program_exec **join) +{ + struct nv50_program_exec *e = exec(pc); + + if (join) { + set_long(pc, e); + e->inst[0] |= 0xa0000002; + emit(pc, e); + *join = e; + e = exec(pc); + } + + set_long(pc, e); + e->inst[0] |= 0x10000002; + if (pred >= 0) + set_pred(pc, cc, pred, e); + emit(pc, e); +} + +static void +emit_nop(struct nv50_pc *pc) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xf0000000; + set_long(pc, e); + e->inst[1] = 0xe0000000; + emit(pc, e); +} + static void convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) { @@ -1560,6 +1601,24 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (mask & (1 << 0)) emit_mov_immdval(pc, dst[0], 1.0f); break; + case TGSI_OPCODE_ELSE: + emit_branch(pc, -1, 0, NULL); + pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size; + pc->if_insn[pc->if_lvl++] = pc->p->exec_tail; + break; + case TGSI_OPCODE_ENDIF: + pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size; + + if (pc->br_join[pc->if_lvl]) { + pc->br_join[pc->if_lvl]->param.index = pc->p->exec_size; + pc->br_join[pc->if_lvl] = NULL; + } + /* emit a NOP as join point, we could set it on the next + * one, but would have to make sure it is long and !immd + */ + emit_nop(pc); + pc->p->exec_tail->inst[1] |= 2; + break; case TGSI_OPCODE_EX2: emit_preex2(pc, temp, src[0][0]); emit_flop(pc, 6, brdc, temp); @@ -1580,6 +1639,13 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_sub(pc, dst[c], src[0][c], temp); } break; + case TGSI_OPCODE_IF: + /* emitting a join_at may not be necessary */ + assert(pc->if_lvl < MAX_IF_DEPTH); + set_pred_wr(pc, 1, 0, pc->if_cond); + emit_branch(pc, 0, 2, &pc->br_join[pc->if_lvl]); + pc->if_insn[pc->if_lvl++] = pc->p->exec_tail; + break; case TGSI_OPCODE_KIL: emit_kil(pc, src[0][0]); emit_kil(pc, src[0][1]); @@ -2237,6 +2303,8 @@ nv50_program_tx_prep(struct nv50_pc *pc) pc->result[i].rhw = rid++; if (p->info.writes_z) pc->result[2].rhw = rid; + + p->cfg.high_result = rid; } if (pc->immd_nr) { @@ -2362,12 +2430,75 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) return TRUE; } +static void +nv50_fp_move_results(struct nv50_pc *pc) +{ + struct nv50_reg reg; + unsigned i; + + ctor_reg(®, P_TEMP, -1, -1); + + for (i = 0; i < pc->result_nr * 4; ++i) { + if (pc->result[i].rhw < 0 || pc->result[i].hw < 0) + continue; + if (pc->result[i].rhw != pc->result[i].hw) { + reg.hw = pc->result[i].rhw; + emit_mov(pc, ®, &pc->result[i]); + } + } +} + +static void +nv50_program_fixup_insns(struct nv50_pc *pc) +{ + struct nv50_program_exec *e, *prev = NULL, **bra_list; + unsigned i, n, pos; + + bra_list = CALLOC(pc->p->exec_size, sizeof(struct nv50_program_exec *)); + + /* Collect branch instructions, we need to adjust their offsets + * when converting 32 bit instructions to 64 bit ones + */ + for (n = 0, e = pc->p->exec_head; e; e = e->next) + if (e->param.index >= 0 && !e->param.mask) + bra_list[n++] = e; + + /* Make sure we don't have any single 32 bit instructions. */ + for (e = pc->p->exec_head, pos = 0; e; e = e->next) { + pos += is_long(e) ? 2 : 1; + + if ((pos & 1) && (!e->next || is_long(e->next))) { + for (i = 0; i < n; ++i) + if (bra_list[i]->param.index >= pos) + bra_list[i]->param.index += 1; + convert_to_long(pc, e); + ++pos; + } + if (e->next) + prev = e; + } + + assert(!is_immd(pc->p->exec_head)); + assert(!is_immd(pc->p->exec_tail)); + + /* last instruction must be long so it can have the end bit set */ + if (!is_long(pc->p->exec_tail)) { + convert_to_long(pc, pc->p->exec_tail); + if (prev) + convert_to_long(pc, prev); + } + assert(!(pc->p->exec_tail->inst[1] & 2)); + /* set the end-bit */ + pc->p->exec_tail->inst[1] |= 1; + + FREE(bra_list); +} + static boolean nv50_program_tx(struct nv50_program *p) { struct tgsi_parse_context parse; struct nv50_pc *pc; - unsigned k; boolean ret; pc = CALLOC_STRUCT(nv50_pc); @@ -2405,48 +2536,10 @@ nv50_program_tx(struct nv50_program *p) } } - if (p->type == PIPE_SHADER_FRAGMENT) { - struct nv50_reg out; - ctor_reg(&out, P_TEMP, -1, -1); - - for (k = 0; k < pc->result_nr * 4; k++) { - if (pc->result[k].rhw == -1) - continue; - if (pc->result[k].hw != pc->result[k].rhw) { - out.hw = pc->result[k].rhw; - emit_mov(pc, &out, &pc->result[k]); - } - if (pc->p->cfg.high_result < (pc->result[k].rhw + 1)) - pc->p->cfg.high_result = pc->result[k].rhw + 1; - } - } - - /* look for single half instructions and make them long */ - struct nv50_program_exec *e, *e_prev; - - for (k = 0, e = pc->p->exec_head, e_prev = NULL; e; e = e->next) { - if (!is_long(e)) - k++; + if (pc->p->type == PIPE_SHADER_FRAGMENT) + nv50_fp_move_results(pc); - if (!e->next || is_long(e->next)) { - if (k & 1) - convert_to_long(pc, e); - k = 0; - } - - if (e->next) - e_prev = e; - } - - if (!is_long(pc->p->exec_tail)) { - /* this may occur if moving FP results */ - assert(e_prev && !is_long(e_prev)); - convert_to_long(pc, e_prev); - convert_to_long(pc, pc->p->exec_tail); - } - - assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head)); - pc->p->exec_tail->inst[1] |= 0x00000001; + nv50_program_fixup_insns(pc); p->param_nr = pc->param_nr * 4; p->immd_nr = pc->immd_nr * 4; @@ -2558,6 +2651,17 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) if (e->param.index < 0) continue; + + if (e->param.mask == 0) { + assert(!(e->param.index & 1)); + /* seem to be 8 byte steps */ + ei = (e->param.index >> 1) + 0 /* START_ID */; + + e->inst[0] &= 0xf0000fff; + e->inst[0] |= ei << 12; + continue; + } + bs = (e->inst[1] >> 22) & 0x07; assert(bs < 2); ei = e->param.shift >> 5; -- cgit v1.2.3 From e2b8dc3e38d1efddf2ded2e47a9e3092455d0f8a Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 25 Sep 2009 10:24:40 +0200 Subject: nv50: implement BGNLOOP, BRK, ENDLOOP There's a good chance a loop won't execute correctly though since our TEMP allocation assumes programs to be executed linearly. Will fix later. --- src/gallium/drivers/nv50/nv50_program.c | 77 ++++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 2ab2ac35c2..8e66fdca49 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -90,8 +90,9 @@ struct nv50_reg { int acc; /* instruction where this reg is last read (first insn == 1) */ }; -/* arbitrary limit */ +/* arbitrary limits */ #define MAX_IF_DEPTH 4 +#define MAX_LOOP_DEPTH 4 struct nv50_pc { struct nv50_program *p; @@ -127,7 +128,9 @@ struct nv50_pc { struct nv50_program_exec *if_cond; struct nv50_program_exec *if_insn[MAX_IF_DEPTH]; struct nv50_program_exec *br_join[MAX_IF_DEPTH]; - int if_lvl; + struct nv50_program_exec *br_loop[MAX_LOOP_DEPTH]; /* for BRK branch */ + int if_lvl, loop_lvl; + unsigned loop_pos[MAX_LOOP_DEPTH]; /* current instruction and total number of insns */ unsigned insn_cur; @@ -204,6 +207,10 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) assert(0); } +/* XXX: For shaders that aren't executed linearly (e.g. shaders that + * contain loops), we need to assign all hw regs to TGSI TEMPs early, + * lest we risk temp_temps overwriting regs alloc'd "later". + */ static struct nv50_reg * alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) { @@ -1485,6 +1492,55 @@ nv50_tgsi_dst_revdep(unsigned op, int s, int c) } } +static INLINE boolean +has_pred(struct nv50_program_exec *e, unsigned cc) +{ + if (!is_long(e) || is_immd(e)) + return FALSE; + return ((e->inst[1] & 0x780) == (cc << 7)); +} + +/* on ENDIF see if we can do "@p0.neu single_op" instead of: + * join_at ENDIF + * @p0.eq bra ENDIF + * single_op + * ENDIF: nop.join + */ +static boolean +nv50_kill_branch(struct nv50_pc *pc) +{ + int lvl = pc->if_lvl; + + if (pc->if_insn[lvl]->next != pc->p->exec_tail) + return FALSE; + + /* if ccode == 'true', the BRA is from an ELSE and the predicate + * reg may no longer be valid, since we currently always use $p0 + */ + if (has_pred(pc->if_insn[lvl], 0xf)) + return FALSE; + assert(pc->if_insn[lvl] && pc->br_join[lvl]); + + /* We'll use the exec allocated for JOIN_AT (as we can't easily + * update prev's next); if exec_tail is BRK, update the pointer. + */ + if (pc->loop_lvl && pc->br_loop[pc->loop_lvl - 1] == pc->p->exec_tail) + pc->br_loop[pc->loop_lvl - 1] = pc->br_join[lvl]; + + pc->p->exec_size -= 4; /* remove JOIN_AT and BRA */ + + *pc->br_join[lvl] = *pc->p->exec_tail; + + FREE(pc->if_insn[lvl]); + FREE(pc->p->exec_tail); + + pc->p->exec_tail = pc->br_join[lvl]; + pc->p->exec_tail->next = NULL; + set_pred(pc, 0xd, 0, pc->p->exec_tail); + + return TRUE; +} + static boolean nv50_program_tx_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *inst) @@ -1554,6 +1610,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_add(pc, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_BGNLOOP: + pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size; + break; + case TGSI_OPCODE_BRK: + emit_branch(pc, -1, 0, NULL); + assert(pc->loop_lvl > 0); + pc->br_loop[pc->loop_lvl - 1] = pc->p->exec_tail; + break; case TGSI_OPCODE_CEIL: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) @@ -1609,6 +1673,10 @@ nv50_program_tx_insn(struct nv50_pc *pc, case TGSI_OPCODE_ENDIF: pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size; + /* try to replace branch over 1 insn with a predicated insn */ + if (nv50_kill_branch(pc) == TRUE) + break; + if (pc->br_join[pc->if_lvl]) { pc->br_join[pc->if_lvl]->param.index = pc->p->exec_size; pc->br_join[pc->if_lvl] = NULL; @@ -1619,6 +1687,11 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_nop(pc); pc->p->exec_tail->inst[1] |= 2; break; + case TGSI_OPCODE_ENDLOOP: + emit_branch(pc, -1, 0, NULL); + pc->p->exec_tail->param.index = pc->loop_pos[--pc->loop_lvl]; + pc->br_loop[pc->loop_lvl]->param.index = pc->p->exec_size; + break; case TGSI_OPCODE_EX2: emit_preex2(pc, temp, src[0][0]); emit_flop(pc, 6, brdc, temp); -- cgit v1.2.3 From ef6805710d5c1b139695704051754f39654c8a2e Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 25 Sep 2009 10:33:02 +0200 Subject: nv50: fix CEIL and TRUNC Separated the integer rounding mode flag for cvt. --- src/gallium/drivers/nv50/nv50_program.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 8e66fdca49..2ad8cdf65c 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -825,7 +825,8 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) #define CVT_F32_U32 0x64 #define CVT_S32_F32 0x8c #define CVT_S32_S32 0x0c -#define CVT_F32_F32_ROP 0xcc +#define CVT_NEG 0x20 +#define CVT_RI 0x08 static void emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, @@ -933,7 +934,7 @@ map_tgsi_setop_cc(unsigned op) static INLINE void emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_F32_F32_ROP); + emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_F32_F32 | CVT_RI); } static void @@ -1623,7 +1624,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask & (1 << c))) continue; emit_cvt(pc, dst[c], src[0][c], -1, - CVTOP_CEIL, CVT_F32_F32); + CVTOP_CEIL, CVT_F32_F32 | CVT_RI); } break; case TGSI_OPCODE_COS: @@ -1843,7 +1844,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask & (1 << c))) continue; emit_cvt(pc, dst[c], src[0][c], -1, - CVTOP_TRUNC, CVT_F32_F32); + CVTOP_TRUNC, CVT_F32_F32 | CVT_RI); } break; case TGSI_OPCODE_XPD: -- cgit v1.2.3 From 001daf78c87b2d194b51bc650bf9f917d4224e31 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 24 Sep 2009 17:24:48 +0200 Subject: nv50: RCP and RSQ cannot load from VP inputs --- src/gallium/drivers/nv50/nv50_program.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 2ad8cdf65c..272fd8d90b 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -573,6 +573,22 @@ check_swap_src_0_1(struct nv50_pc *pc, return FALSE; } +static void +set_src_0_restricted(struct nv50_pc *pc, struct nv50_reg *src, + struct nv50_program_exec *e) +{ + struct nv50_reg *temp; + + if (src->type != P_TEMP) { + temp = temp_temp(pc); + emit_mov(pc, temp, src); + src = temp; + } + + alloc_reg(pc, src); + e->inst[0] |= (src->hw << 9); +} + static void set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) { @@ -775,7 +791,11 @@ emit_flop(struct nv50_pc *pc, unsigned sub, } set_dst(pc, dst, e); - set_src_0(pc, src, e); + + if (sub == 0 || sub == 2) + set_src_0_restricted(pc, src, e); + else + set_src_0(pc, src, e); emit(pc, e); } -- cgit v1.2.3 From 513cadf5afad18516f7299ade246f59d520753d0 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 24 Sep 2009 17:37:08 +0200 Subject: nv50: actually enable view volume clipping Until now, only primitives wholly outside the view volume were not drawn. This was only visibile when using a viewport smaller than the window size, naturally. --- src/gallium/drivers/nv50/nv50_state_validate.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 5a3559ed18..4ed76973c4 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -312,7 +312,7 @@ scissor_uptodate: goto viewport_uptodate; nv50->state.viewport_bypass = bypass; - so = so_new(12, 0); + so = so_new(14, 0); if (!bypass) { so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE(0), 3); so_data (so, fui(nv50->viewport.translate[0])); @@ -325,12 +325,21 @@ scissor_uptodate: so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1); so_data (so, 1); + /* 0x0000 = remove whole primitive only (xyz) + * 0x1018 = remove whole primitive only (xy), clamp z + * 0x1080 = clip primitive (xyz) + * 0x1098 = clip primitive (xy), clamp z + */ + so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1); + so_data (so, 0x1080); /* no idea what 0f90 does */ so_method(so, tesla, 0x0f90, 1); so_data (so, 0); } else { so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1); so_data (so, 0); + so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1); + so_data (so, 0x0000); so_method(so, tesla, 0x0f90, 1); so_data (so, 1); } -- cgit v1.2.3 From 5f4f7ad965c40327f16297606ed4f425598bfc2c Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 25 Sep 2009 10:53:01 +0200 Subject: nv50: fix TEX for WriteMask not equal 0xf If you e.g. only need alpha, it ends up in the first reg, not the last, as it would when reading rgb too. --- src/gallium/drivers/nv50/nv50_program.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 272fd8d90b..576d075318 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1166,10 +1166,11 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, emit(pc, e); #if 1 - if (mask & 1) emit_mov(pc, dst[0], t[0]); - if (mask & 2) emit_mov(pc, dst[1], t[1]); - if (mask & 4) emit_mov(pc, dst[2], t[2]); - if (mask & 8) emit_mov(pc, dst[3], t[3]); + c = 0; + if (mask & 1) emit_mov(pc, dst[0], t[c++]); + if (mask & 2) emit_mov(pc, dst[1], t[c++]); + if (mask & 4) emit_mov(pc, dst[2], t[c++]); + if (mask & 8) emit_mov(pc, dst[3], t[c]); free_temp4(pc, t); #else -- cgit v1.2.3 From a0fbc01ceaef08b33f97936d8840a6f48ec1654d Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 25 Sep 2009 10:48:19 +0200 Subject: softpipe: Do not advertise support for L16 and YCBCR formats. --- src/gallium/drivers/softpipe/sp_screen.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 3c61357eba..81fb7aa20c 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -135,6 +135,9 @@ softpipe_is_format_supported( struct pipe_screen *screen, target == PIPE_TEXTURE_CUBE); switch(format) { + case PIPE_FORMAT_L16_UNORM: + case PIPE_FORMAT_YCBCR_REV: + case PIPE_FORMAT_YCBCR: case PIPE_FORMAT_DXT1_RGB: case PIPE_FORMAT_DXT1_RGBA: case PIPE_FORMAT_DXT3_RGBA: -- cgit v1.2.3 From 69c7fc128c59bf72df461dbd583bf9794d9ed34d Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 25 Sep 2009 10:57:33 +0200 Subject: softpipe: Grab fs output z from the correct file. --- src/gallium/drivers/softpipe/sp_fs_exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index c469ac6340..4076114d39 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -151,7 +151,7 @@ exec_run( const struct sp_fragment_shader *base, { uint j; for (j = 0; j < 4; j++) { - quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j]; + quad->output.depth[j] = machine->Outputs[i].xyzw[2].f[j]; } } break; -- cgit v1.2.3 From 07183b73ebafe2d1083f1c572978317768725b99 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 26 Sep 2009 16:39:13 +1000 Subject: r300g: fix texture pitch to correct value. pitch is pixels - 1, not bytes. --- src/gallium/drivers/r300/r300_texture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 7c041d17f7..e8078ea9f1 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -37,7 +37,7 @@ static void r300_setup_texture_state(struct r300_texture* tex, /* XXX */ state->format1 = r300_translate_texformat(tex->tex.format); - state->format2 = r300_texture_get_stride(tex, 0); + state->format2 = (r300_texture_get_stride(tex, 0) / tex->tex.block.size) - 1; /* Assume (somewhat foolishly) that oversized textures will * not be permitted by the state tracker. */ -- cgit v1.2.3 From 20d3c85128192b2d3f75b68f47ab9aadc2719c5a Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 26 Sep 2009 18:24:34 +1000 Subject: r300g: add z16 unorm texture format --- src/gallium/drivers/r300/r300_texture.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 697669147d..78ee0f1611 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -75,6 +75,8 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) /* Z5_Y6_X5 */ case PIPE_FORMAT_R16_SNORM: return R300_EASY_TX_FORMAT(X, X, X, X, Z5Y6X5); + case PIPE_FORMAT_Z16_UNORM: + return R300_EASY_TX_FORMAT(X, X, X, X, X16); default: debug_printf("r300: Implementation error: " "Got unsupported texture format %s in %s\n", -- cgit v1.2.3 From 28f531e3fe95c9fad2bf2f09aef0343ab079bff2 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 26 Sep 2009 18:25:00 +1000 Subject: r300g: report GL1.5, enable cap bits for OQ and shadow. Its not like it works well on 1.3 so may as well reach for greater heights. Signed-off-by: Dave Airlie --- src/gallium/drivers/r300/r300_screen.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 3b5b1bbd37..8296d56840 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -101,11 +101,9 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) case PIPE_CAP_MAX_RENDER_TARGETS: return 4; case PIPE_CAP_OCCLUSION_QUERY: - /* IN THEORY */ - return 0; + return 1; case PIPE_CAP_TEXTURE_SHADOW_MAP: - /* IN THEORY */ - return 0; + return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: if (r300screen->caps->is_r500) { /* 13 == 4096x4096 */ -- cgit v1.2.3 From 1df539ce87ab38ebae67d63a353b01f4cf5edc79 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 26 Sep 2009 09:33:32 +0100 Subject: llvmpipe: Allow building with LLVM 2.6 too. --- src/gallium/drivers/llvmpipe/lp_jit.c | 5 +++++ src/gallium/drivers/llvmpipe/lp_test_format.c | 5 +++++ src/gallium/drivers/llvmpipe/lp_test_main.c | 5 +++++ 3 files changed, 15 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index b4a22ff4a9..f7111c1e5c 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -147,6 +147,11 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) { char *error = NULL; +#ifdef LLVM_NATIVE_ARCH + LLVMLinkInJIT(); + LLVMInitializeNativeTarget(); +#endif + screen->module = LLVMModuleCreateWithName("llvmpipe"); screen->provider = LLVMCreateModuleProviderForExistingModule(screen->module); diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index d8455e5649..7d83f899e6 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -264,6 +264,11 @@ int main(int argc, char **argv) unsigned i; int ret; +#ifdef LLVM_NATIVE_ARCH + LLVMLinkInJIT(); + LLVMInitializeNativeTarget(); +#endif + for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i) if(!test_format(&test_cases[i])) ret = 1; diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index 4592dc0b2d..f07fa256f1 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -365,6 +365,11 @@ int main(int argc, char **argv) n = atoi(argv[i]); } +#ifdef LLVM_NATIVE_ARCH + LLVMLinkInJIT(); + LLVMInitializeNativeTarget(); +#endif + if(fp) { /* Warm up the caches */ test_some(0, NULL, 100); -- cgit v1.2.3 From ec9c02187e698c26d7df3e408c1173acca9ccdd0 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 26 Sep 2009 18:38:07 +1000 Subject: r300g: add missing break in OQ emit --- src/gallium/drivers/r300/r300_emit.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index a1b36ba2ed..77ce431cdc 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -381,6 +381,7 @@ void r300_emit_query_end(struct r300_context* r300, OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 0), 0, RADEON_GEM_DOMAIN_GTT, 0); + break; default: debug_printf("r300: Implementation error: Chipset reports %d" " pixel pipes!\n", caps->num_frag_pipes); -- cgit v1.2.3 From 9bf85f6b95cb684d16b6035381b1f8a9c44f473f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 26 Sep 2009 18:38:39 +1000 Subject: r300g: only pass complete texture state to hw setup function No point passing things twice here, also allows more state to be setup properly. --- src/gallium/drivers/r300/r300_texture.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index e8078ea9f1..2ec07b453d 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -22,34 +22,32 @@ #include "r300_texture.h" -static void r300_setup_texture_state(struct r300_texture* tex, - unsigned width, - unsigned height, - unsigned levels) +static void r300_setup_texture_state(struct r300_texture* tex) { struct r300_texture_state* state = &tex->state; + struct pipe_texture *pt = &tex->tex; - state->format0 = R300_TX_WIDTH((width - 1) & 0x7ff) | - R300_TX_HEIGHT((height - 1) & 0x7ff) | - R300_TX_NUM_LEVELS(levels) | + state->format0 = R300_TX_WIDTH((pt->width[0] - 1) & 0x7ff) | + R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff) | + R300_TX_NUM_LEVELS(pt->last_level) | R300_TX_PITCH_EN; /* XXX */ - state->format1 = r300_translate_texformat(tex->tex.format); + state->format1 = r300_translate_texformat(pt->format); - state->format2 = (r300_texture_get_stride(tex, 0) / tex->tex.block.size) - 1; + state->format2 = (r300_texture_get_stride(tex, 0) / pt->block.size) - 1; /* Assume (somewhat foolishly) that oversized textures will * not be permitted by the state tracker. */ - if (width > 2048) { + if (pt->width[0] > 2048) { state->format2 |= R500_TXWIDTH_BIT11; } - if (height > 2048) { + if (pt->height[0] > 2048) { state->format2 |= R500_TXHEIGHT_BIT11; } debug_printf("r300: Set texture state (%dx%d, %d levels)\n", - width, height, levels); + pt->width[0], pt->height[0], pt->last_level); } /** @@ -120,8 +118,7 @@ static struct pipe_texture* r300_setup_miptree(tex); - r300_setup_texture_state(tex, template->width[0], template->height[0], - template->last_level); + r300_setup_texture_state(tex); tex->buffer = screen->buffer_create(screen, 1024, PIPE_BUFFER_USAGE_PIXEL, @@ -204,7 +201,7 @@ static struct pipe_texture* tex->stride_override = *stride; /* XXX */ - r300_setup_texture_state(tex, tex->tex.width[0], tex->tex.height[0], 0); + r300_setup_texture_state(tex); pipe_buffer_reference(&tex->buffer, buffer); -- cgit v1.2.3 From eb5dd947fbed35478784e777fe2e59564fee051b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 26 Sep 2009 19:32:46 +1000 Subject: r300g: add tx depth support in register. also enable cube/3d bits in txformat reg --- src/gallium/drivers/r300/r300_reg.h | 1 + src/gallium/drivers/r300/r300_texture.c | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 03cd219cde..3abff5db62 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -1478,6 +1478,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_PITCH_EN (1 << 31) # define R300_TX_WIDTH(x) ((x) << 0) # define R300_TX_HEIGHT(x) ((x) << 11) +# define R300_TX_DEPTH(x) ((x) << 22) # define R300_TX_NUM_LEVELS(x) ((x) << 26) #define R300_TX_FORMAT1_0 0x44C0 diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 2ec07b453d..ce60ded7ca 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -29,11 +29,18 @@ static void r300_setup_texture_state(struct r300_texture* tex) state->format0 = R300_TX_WIDTH((pt->width[0] - 1) & 0x7ff) | R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff) | + R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) | R300_TX_NUM_LEVELS(pt->last_level) | R300_TX_PITCH_EN; /* XXX */ state->format1 = r300_translate_texformat(pt->format); + if (pt->target == PIPE_TEXTURE_CUBE) { + state->format1 |= R300_TX_FORMAT_CUBIC_MAP; + } + if (pt->target == PIPE_TEXTURE_3D) { + state->format1 |= R300_TX_FORMAT_3D; + } state->format2 = (r300_texture_get_stride(tex, 0) / pt->block.size) - 1; -- cgit v1.2.3 From a77226071f6814a53358a5d6caff685889d0e4ec Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sun, 27 Sep 2009 10:56:42 -0400 Subject: softpipe: Grab a ref when the fb is set. Nasty bug when the surface is freed and another is allocated right on top of it. The next time we set the fb state SP thinks it's the same surface and doesn't flush, and when the flush eventually happens the surface belongs to a completely different texture. --- src/gallium/drivers/softpipe/sp_context.c | 9 +++++++-- src/gallium/drivers/softpipe/sp_state_surface.c | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index e1e31ab047..94d000a5ac 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -94,12 +94,17 @@ softpipe_destroy( struct pipe_context *pipe ) softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); softpipe->quad.blend->destroy( softpipe->quad.blend ); - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { sp_destroy_tile_cache(softpipe->cbuf_cache[i]); + pipe_surface_reference(&softpipe->framebuffer.cbufs[i], NULL); + } sp_destroy_tile_cache(softpipe->zsbuf_cache); + pipe_surface_reference(&softpipe->framebuffer.zsbuf, NULL); - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { sp_destroy_tex_tile_cache(softpipe->tex_cache[i]); + pipe_texture_reference(&softpipe->texture[i], NULL); + } for (i = 0; i < Elements(softpipe->constants); i++) { if (softpipe->constants[i].buffer) { diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index c8f55c3cec..bc0e201130 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -56,7 +56,7 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, sp_flush_tile_cache(sp->cbuf_cache[i]); /* assign new */ - sp->framebuffer.cbufs[i] = fb->cbufs[i]; + pipe_surface_reference(&sp->framebuffer.cbufs[i], fb->cbufs[i]); /* update cache */ sp_tile_cache_set_surface(sp->cbuf_cache[i], fb->cbufs[i]); @@ -71,7 +71,7 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, sp_flush_tile_cache(sp->zsbuf_cache); /* assign new */ - sp->framebuffer.zsbuf = fb->zsbuf; + pipe_surface_reference(&sp->framebuffer.zsbuf, fb->zsbuf); /* update cache */ sp_tile_cache_set_surface(sp->zsbuf_cache, fb->zsbuf); -- cgit v1.2.3 From 60d72d9e45b08c14ea4195950302f93e52e03603 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Wed, 23 Sep 2009 11:53:50 -0700 Subject: i915g: Use boolean --- src/gallium/drivers/i915simple/i915_texture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 6a6c654271..1d0329817d 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -165,7 +165,7 @@ i915_scanout_layout(struct i915_texture *tex) struct pipe_texture *pt = &tex->base; if (pt->last_level > 0 || pt->block.size != 4) - return 0; + return FALSE; i915_miptree_set_level_info(tex, 0, 1, tex->base.width[0], -- cgit v1.2.3 From 973e9a774a176be3a8f0849892b568888d41e932 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Wed, 23 Sep 2009 11:57:18 -0700 Subject: i915g: Tile shared buffers as well --- src/gallium/drivers/i915simple/i915_texture.c | 47 +++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 1d0329817d..15ccc1fc73 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -191,6 +191,38 @@ i915_scanout_layout(struct i915_texture *tex) return TRUE; } +/** + * Special case to deal with shared textures. + */ +static boolean +i915_display_target_layout(struct i915_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + + if (pt->last_level > 0 || pt->block.size != 4) + return FALSE; + + /* fallback to normal textures for small textures */ + if (tex->base.width[0] < 240) + return FALSE; + + i915_miptree_set_level_info(tex, 0, 1, + tex->base.width[0], + tex->base.height[0], + 1); + i915_miptree_set_image_offset(tex, 0, 0, 0, 0); + + tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); + tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); + tex->hw_tiled = INTEL_TILE_X; + + debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, + tex->base.width[0], tex->base.height[0], pt->block.size, + tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy); + + return TRUE; +} + static void i915_miptree_layout_2d(struct i915_texture *tex) { @@ -201,6 +233,16 @@ i915_miptree_layout_2d(struct i915_texture *tex) unsigned nblocksx = pt->nblocksx[0]; unsigned nblocksy = pt->nblocksy[0]; + /* used for scanouts that need special layouts */ + if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) + if (i915_scanout_layout(tex)) + return; + + /* for shared buffers we use some very like scanout */ + if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) + if (i915_display_target_layout(tex)) + return; + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); tex->total_nblocksy = 0; @@ -351,6 +393,11 @@ i945_miptree_layout_2d(struct i915_texture *tex) if (i915_scanout_layout(tex)) return; + /* for shared buffers we use some very like scanout */ + if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) + if (i915_display_target_layout(tex)) + return; + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); /* May need to adjust pitch to accomodate the placement of -- cgit v1.2.3 From 2d71b541d7de818f4cb47a61d3a86c0ffbb6163c Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Sun, 27 Sep 2009 13:11:49 -0700 Subject: i915g: Fix warning --- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index b3a7774fd6..aee8819ed9 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -198,7 +198,7 @@ i915_vbuf_render_map_vertices(struct vbuf_render *render) struct intel_winsys *iws = i915->iws; if (i915->vbo_flushed) - debug_printf("%s bad vbo flush occured stalling on hw\n"); + debug_printf("%s bad vbo flush occured stalling on hw\n", __func__); i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); -- cgit v1.2.3 From 48c45959ee106727fe9dd2d57bc0ca278710aab8 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Sun, 27 Sep 2009 13:12:11 -0700 Subject: i915g: Submit direct vertex buffers --- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 33 +++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index aee8819ed9..d50201642b 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -389,14 +389,43 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render, uint nr) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; if (i915_render->fallback) { draw_arrays_fallback(render, start, nr); return; } - /* JB: TODO submit direct cmds */ - draw_arrays_fallback(render, start, nr); + if (i915->dirty) + i915_update_derived(i915); + + if (i915->hardware_dirty) + i915_emit_hardware_state(i915); + + if (!BEGIN_BATCH(2, 0)) { + FLUSH_BATCH(NULL); + + /* Make sure state is re-emitted after a flush: + */ + i915_update_derived(i915); + i915_emit_hardware_state(i915); + i915->vbo_flushed = 1; + + if (!BEGIN_BATCH(2, 0)) { + assert(0); + goto out; + } + } + + OUT_BATCH(_3DPRIMITIVE | + PRIM_INDIRECT | + PRIM_INDIRECT_SEQUENTIAL | + i915_render->hwprim | + nr); + OUT_BATCH(start); /* Beginning vertex index */ + +out: + return; } /** -- cgit v1.2.3 From 225c3375fdfc4a3744c3a7a777664ef94923a2ce Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 27 Sep 2009 20:31:55 +1000 Subject: r300g: silence compiler warning --- src/gallium/drivers/r300/r300_state_derived.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 5f6b225d34..62da8e293a 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -164,7 +164,7 @@ static void r300_vs_tab_routes(struct r300_context* r300, vinfo->hwfmt[3] |= (4 << (3 * i)); } - for (i; i < texs; i++) { + for (; i < texs; i++) { draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i)); vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i); -- cgit v1.2.3 From b1252c7a342e24571ccf5fe94938bbabbdf9aa11 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 27 Sep 2009 20:34:13 +1000 Subject: r300g: rewrite RS state setup. Not 100% sure this is correct, but its more correct than what was here previous however it may require changes in the input routing for the frag shader. --- src/gallium/drivers/r300/r300_state_derived.c | 37 ++++++++++----------------- 1 file changed, 13 insertions(+), 24 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 62da8e293a..5493a098cb 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -334,48 +334,37 @@ static void r300_update_rs_block(struct r300_context* r300) struct r300_rs_block* rs = r300->rs_block; struct tgsi_shader_info* info = &r300->fs->info; int* tab = r300->vertex_info.fs_tab; - int col_count = 0, fp_offset = 0, i, memory_pos, tex_count = 0; - + int col_count = 0, fp_offset = 0, i, tex_count = 0; + int rs_tex_comp = 0; memset(rs, 0, sizeof(struct r300_rs_block)); if (r300_screen(r300->context.screen)->caps->is_r500) { for (i = 0; i < info->num_inputs; i++) { assert(tab[i] != -1); - memory_pos = tab[i] * 4; switch (info->input_semantic_name[i]) { case TGSI_SEMANTIC_COLOR: rs->ip[col_count] |= - R500_RS_COL_PTR(memory_pos) | + R500_RS_COL_PTR(col_count) | R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); col_count++; break; case TGSI_SEMANTIC_GENERIC: rs->ip[tex_count] |= - R500_RS_SEL_S(memory_pos) | - R500_RS_SEL_T(memory_pos + 1) | - R500_RS_SEL_R(memory_pos + 2) | - R500_RS_SEL_Q(memory_pos + 3); + R500_RS_SEL_S(rs_tex_comp) | + R500_RS_SEL_T(rs_tex_comp + 1) | + R500_RS_SEL_R(rs_tex_comp + 2) | + R500_RS_SEL_Q(rs_tex_comp + 3); tex_count++; + rs_tex_comp += 4; break; default: break; } } - if (col_count == 0) { - rs->ip[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001); - } - - if (tex_count == 0) { - rs->ip[0] |= - R500_RS_SEL_S(R500_RS_IP_PTR_K0) | - R500_RS_SEL_T(R500_RS_IP_PTR_K0) | - R500_RS_SEL_R(R500_RS_IP_PTR_K0) | - R500_RS_SEL_Q(R500_RS_IP_PTR_K1); - } - /* Rasterize at least one color, or bad things happen. */ if ((col_count == 0) && (tex_count == 0)) { + rs->ip[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001); col_count++; } @@ -393,22 +382,22 @@ static void r300_update_rs_block(struct r300_context* r300) } else { for (i = 0; i < info->num_inputs; i++) { assert(tab[i] != -1); - memory_pos = tab[i] * 4; switch (info->input_semantic_name[i]) { case TGSI_SEMANTIC_COLOR: rs->ip[col_count] |= - R300_RS_COL_PTR(memory_pos) | + R300_RS_COL_PTR(col_count) | R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); col_count++; break; case TGSI_SEMANTIC_GENERIC: rs->ip[tex_count] |= - R300_RS_TEX_PTR(memory_pos) | + R300_RS_TEX_PTR(rs_tex_count) | R300_RS_SEL_S(R300_RS_SEL_C0) | R300_RS_SEL_T(R300_RS_SEL_C1) | R300_RS_SEL_R(R300_RS_SEL_C2) | R300_RS_SEL_Q(R300_RS_SEL_C3); tex_count++; + rs_tex_count+=4; break; default: break; @@ -445,7 +434,7 @@ static void r300_update_rs_block(struct r300_context* r300) } } - rs->count = (tex_count * 4) | (col_count << R300_IC_COUNT_SHIFT) | + rs->count = (rs_tex_comp) | (col_count << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; rs->inst_count = MAX2(MAX2(col_count - 1, tex_count - 1), 0); -- cgit v1.2.3 From d85fe842b86aef522e0e749d9360d85052a6e8cc Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 28 Sep 2009 06:42:25 +1000 Subject: r300g: fix r300 rs path --- src/gallium/drivers/r300/r300_state_derived.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 5493a098cb..2bbbcdfd9c 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -391,13 +391,13 @@ static void r300_update_rs_block(struct r300_context* r300) break; case TGSI_SEMANTIC_GENERIC: rs->ip[tex_count] |= - R300_RS_TEX_PTR(rs_tex_count) | + R300_RS_TEX_PTR(rs_tex_comp) | R300_RS_SEL_S(R300_RS_SEL_C0) | R300_RS_SEL_T(R300_RS_SEL_C1) | R300_RS_SEL_R(R300_RS_SEL_C2) | R300_RS_SEL_Q(R300_RS_SEL_C3); tex_count++; - rs_tex_count+=4; + rs_tex_comp+=4; break; default: break; -- cgit v1.2.3 From a6eb593072298d60286f49a09e6d3a849b684dfb Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sun, 27 Sep 2009 22:15:15 +0200 Subject: r300g: add some debugging info --- src/gallium/drivers/r300/r300_cs.h | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 0a7e470363..883f0a02dc 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -68,11 +68,17 @@ } while (0) #define OUT_CS(value) do { \ + if (VERY_VERBOSE_CS || VERY_VERBOSE_REGISTERS) { \ + DBG(cs_context_copy, DBG_CS, "r300: writing %08x\n", value); \ + } \ cs_winsys->write_cs_dword(cs_winsys, (value)); \ cs_count--; \ } while (0) #define OUT_CS_32F(value) do { \ + if (VERY_VERBOSE_CS || VERY_VERBOSE_REGISTERS) { \ + DBG(cs_context_copy, DBG_CS, "r300: writing %f\n", value); \ + } \ cs_winsys->write_cs_dword(cs_winsys, fui(value)); \ cs_count--; \ } while (0) @@ -82,8 +88,9 @@ DBG(cs_context_copy, DBG_CS, "r300: writing 0x%08X to register 0x%04X\n", \ value, register); \ assert(register); \ - OUT_CS(CP_PACKET0(register, 0)); \ - OUT_CS(value); \ + cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0(register, 0)); \ + cs_winsys->write_cs_dword(cs_winsys, value); \ + cs_count -= 2; \ } while (0) /* Note: This expects count to be the number of registers, @@ -93,7 +100,8 @@ DBG(cs_context_copy, DBG_CS, "r300: writing register sequence of %d to 0x%04X\n", \ count, register); \ assert(register); \ - OUT_CS(CP_PACKET0(register, ((count) - 1))); \ + cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1))); \ + cs_count--; \ } while (0) #define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \ @@ -101,9 +109,9 @@ "domains (%d, %d, %d)\n", \ bo, offset, rd, wd, flags); \ assert(bo); \ - OUT_CS(offset); \ + cs_winsys->write_cs_dword(cs_winsys, offset); \ cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \ - cs_count -= 2; \ + cs_count -= 3; \ } while (0) #define END_CS do { \ @@ -131,24 +139,26 @@ DBG(cs_context_copy, DBG_CS, "r300: writing data sequence of %d to 0x%04X\n", \ count, register); \ assert(register); \ - OUT_CS(CP_PACKET0(register, ((count) - 1)) | RADEON_ONE_REG_WR); \ + cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1)) | RADEON_ONE_REG_WR); \ + cs_count--; \ } while (0) #define CP_PACKET3(op, count) \ (RADEON_CP_PACKET3 | (op) | ((count) << 16)) #define OUT_CS_PKT3(op, count) do { \ - OUT_CS(CP_PACKET3(op, count)); \ + cs_winsys->write_cs_dword(cs_winsys, CP_PACKET3(op, count)); \ + cs_count--; \ } while (0) #define OUT_CS_INDEX_RELOC(bo, offset, count, rd, wd, flags) do { \ DBG(cs_context_copy, DBG_CS, "r300: writing relocation for index buffer %p," \ "offset %d\n", bo, offset); \ assert(bo); \ - OUT_CS(offset); \ - OUT_CS(count); \ + cs_winsys->write_cs_dword(cs_winsys, offset); \ + cs_winsys->write_cs_dword(cs_winsys, count); \ cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \ - cs_count -= 2; \ + cs_count -= 4; \ } while (0) #endif /* R300_CS_H */ -- cgit v1.2.3 From 8c8b77a5f3ec1dac0bddc98da3ccbb64f58f22e0 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sun, 27 Sep 2009 22:18:49 +0200 Subject: r300g: plug memory leak --- src/gallium/drivers/r300/r300_render.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 737396d8d9..16f6404012 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -81,6 +81,7 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render, if (size + r300render->vbo_offset > r300render->vbo_size) { + pipe_buffer_reference(&r300->vbo, NULL); r300render->vbo = pipe_buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX, @@ -129,7 +130,6 @@ static void r300_render_release_vertices(struct vbuf_render* render) r300render->vbo_offset += r300render->vbo_max_used; r300render->vbo_max_used = 0; - r300->vbo = NULL; } static boolean r300_render_set_primitive(struct vbuf_render* render, -- cgit v1.2.3 From bedc6b7bdff40156b66cb2473c47512e5c95bdab Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sun, 27 Sep 2009 22:20:41 +0200 Subject: r300g: add some assertions Not sure why we are getting a shader with two inputs with position semantic, but we don't know how to handle it correctly so it's better to stop the app than lock the machine. --- src/gallium/drivers/r300/r300_state_derived.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 2bbbcdfd9c..083861a071 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -55,6 +55,7 @@ static void r300_vs_tab_routes(struct r300_context* r300, for (i = 0; i < info->num_inputs; i++) { switch (info->input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: + assert(pos == FALSE); pos = TRUE; tab[i] = 0; break; @@ -63,10 +64,12 @@ static void r300_vs_tab_routes(struct r300_context* r300, cols++; break; case TGSI_SEMANTIC_PSIZE: + assert(psize == FALSE); psize = TRUE; tab[i] = 15; break; case TGSI_SEMANTIC_FOG: + assert(fog == FALSE); fog = TRUE; /* Fall through */ case TGSI_SEMANTIC_GENERIC: -- cgit v1.2.3 From 98f6bea1685957fe9261e50f8a56f7dcb34f9b38 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sun, 27 Sep 2009 22:28:46 +0200 Subject: r300g: don't force vertex position for HW TCL path It could be generated by vertex shader. --- src/gallium/drivers/r300/r300_state_derived.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 083861a071..ed5dc1b9ff 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -128,7 +128,9 @@ static void r300_vs_tab_routes(struct r300_context* r300, vinfo->hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */ - if (!pos) { + /* We need to add vertex position attribute only for SW TCL case, + * for HW TCL case it could be generated by vertex shader */ + if (!pos && !r300screen->caps->has_tcl) { debug_printf("r300: Forcing vertex position attribute emit...\n"); /* Make room for the position attribute * at the beginning of the tab. */ -- cgit v1.2.3 From 540039887ac19b5fdd099ccaad6b44b5db973c25 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sun, 27 Sep 2009 22:30:45 +0200 Subject: r300g: fix erroneous condition --- src/gallium/drivers/r300/r300_state_derived.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index ed5dc1b9ff..5026afc830 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -50,7 +50,7 @@ static void r300_vs_tab_routes(struct r300_context* r300, assert(info->num_inputs <= 16); - if (!r300screen->caps->has_tcl || !r300->rs_state->enable_vte) + if (!r300screen->caps->has_tcl) { for (i = 0; i < info->num_inputs; i++) { switch (info->input_semantic_name[i]) { -- cgit v1.2.3 From f547472bfa0a797adacc2a7688b4c1ba65381a80 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sun, 27 Sep 2009 19:49:06 -0400 Subject: g3dvl: pipe_video_context interface, softpipe impl, auxiliary libs --- configs/default | 2 +- configure.ac | 2 +- src/gallium/SConscript | 1 + src/gallium/auxiliary/vl/Makefile | 12 + src/gallium/auxiliary/vl/SConscript | 12 + src/gallium/auxiliary/vl/vl_bitstream_parser.c | 144 ++ src/gallium/auxiliary/vl/vl_bitstream_parser.h | 36 + src/gallium/auxiliary/vl/vl_compositor.c | 590 ++++++++ src/gallium/auxiliary/vl/vl_compositor.h | 47 + src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 1662 ++++++++++++++++++++++ src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h | 93 ++ src/gallium/auxiliary/vl/vl_shader_build.c | 215 +++ src/gallium/auxiliary/vl/vl_shader_build.h | 61 + src/gallium/drivers/softpipe/Makefile | 3 +- src/gallium/drivers/softpipe/SConscript | 3 +- src/gallium/drivers/softpipe/sp_texture.c | 56 + src/gallium/drivers/softpipe/sp_texture.h | 16 + src/gallium/drivers/softpipe/sp_video_context.c | 273 ++++ src/gallium/drivers/softpipe/sp_video_context.h | 30 + src/gallium/include/pipe/p_defines.h | 24 + src/gallium/include/pipe/p_format.h | 18 + src/gallium/include/pipe/p_screen.h | 16 +- src/gallium/include/pipe/p_video_context.h | 92 ++ src/gallium/include/pipe/p_video_state.h | 158 ++ 24 files changed, 3561 insertions(+), 5 deletions(-) create mode 100644 src/gallium/auxiliary/vl/Makefile create mode 100644 src/gallium/auxiliary/vl/SConscript create mode 100644 src/gallium/auxiliary/vl/vl_bitstream_parser.c create mode 100644 src/gallium/auxiliary/vl/vl_bitstream_parser.h create mode 100644 src/gallium/auxiliary/vl/vl_compositor.c create mode 100644 src/gallium/auxiliary/vl/vl_compositor.h create mode 100644 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c create mode 100644 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h create mode 100644 src/gallium/auxiliary/vl/vl_shader_build.c create mode 100644 src/gallium/auxiliary/vl/vl_shader_build.h create mode 100644 src/gallium/drivers/softpipe/sp_video_context.c create mode 100644 src/gallium/drivers/softpipe/sp_video_context.h create mode 100644 src/gallium/include/pipe/p_video_context.h create mode 100644 src/gallium/include/pipe/p_video_state.h (limited to 'src/gallium/drivers') diff --git a/configs/default b/configs/default index cb3ca1046f..f1e2aa3ef5 100644 --- a/configs/default +++ b/configs/default @@ -94,7 +94,7 @@ EGL_DRIVERS_DIRS = demo # Gallium directories and GALLIUM_DIRS = auxiliary drivers state_trackers -GALLIUM_AUXILIARY_DIRS = rbug draw translate cso_cache pipebuffer tgsi sct rtasm util indices +GALLIUM_AUXILIARY_DIRS = rbug draw translate cso_cache pipebuffer tgsi sct rtasm util indices vl GALLIUM_AUXILIARIES = $(foreach DIR,$(GALLIUM_AUXILIARY_DIRS),$(TOP)/src/gallium/auxiliary/$(DIR)/lib$(DIR).a) GALLIUM_DRIVERS_DIRS = softpipe i915simple failover trace identity GALLIUM_DRIVERS = $(foreach DIR,$(GALLIUM_DRIVERS_DIRS),$(TOP)/src/gallium/drivers/$(DIR)/lib$(DIR).a) diff --git a/configure.ac b/configure.ac index 2881bb6bc2..143fd31a02 100644 --- a/configure.ac +++ b/configure.ac @@ -417,7 +417,7 @@ WINDOW_SYSTEM="" GALLIUM_DIRS="auxiliary drivers state_trackers" GALLIUM_WINSYS_DIRS="" GALLIUM_WINSYS_DRM_DIRS="" -GALLIUM_AUXILIARY_DIRS="rbug draw translate cso_cache pipebuffer tgsi sct rtasm util indices" +GALLIUM_AUXILIARY_DIRS="rbug draw translate cso_cache pipebuffer tgsi sct rtasm util indices vl" GALLIUM_DRIVERS_DIRS="softpipe failover trace identity" GALLIUM_STATE_TRACKERS_DIRS="" diff --git a/src/gallium/SConscript b/src/gallium/SConscript index 89c69d7205..8be84cddbe 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -23,6 +23,7 @@ SConscript([ 'auxiliary/pipebuffer/SConscript', 'auxiliary/indices/SConscript', 'auxiliary/rbug/SConscript', + 'auxiliary/vl/SConscript', ]) for driver in env['drivers']: diff --git a/src/gallium/auxiliary/vl/Makefile b/src/gallium/auxiliary/vl/Makefile new file mode 100644 index 0000000000..71bfb937ad --- /dev/null +++ b/src/gallium/auxiliary/vl/Makefile @@ -0,0 +1,12 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = vl + +C_SOURCES = \ + vl_bitstream_parser.c \ + vl_mpeg12_mc_renderer.c \ + vl_compositor.c \ + vl_shader_build.c + +include ../../Makefile.template diff --git a/src/gallium/auxiliary/vl/SConscript b/src/gallium/auxiliary/vl/SConscript new file mode 100644 index 0000000000..eb50940c35 --- /dev/null +++ b/src/gallium/auxiliary/vl/SConscript @@ -0,0 +1,12 @@ +Import('*') + +vl = env.ConvenienceLibrary( + target = 'vl', + source = [ + 'vl_bitstream_parser.c', + 'vl_mpeg12_mc_renderer.c', + 'vl_compositor.c', + 'vl_shader_build.c', + ]) + +auxiliaries.insert(0, vl) diff --git a/src/gallium/auxiliary/vl/vl_bitstream_parser.c b/src/gallium/auxiliary/vl/vl_bitstream_parser.c new file mode 100644 index 0000000000..356faa1348 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_bitstream_parser.c @@ -0,0 +1,144 @@ +#include "vl_bitstream_parser.h" +#include +#include +#include + +static unsigned +grab_bits(unsigned cursor, unsigned how_many_bits, unsigned bitstream_elt) +{ + unsigned excess_bits = sizeof(unsigned) * CHAR_BIT - how_many_bits - cursor; + + assert(cursor < sizeof(unsigned) * CHAR_BIT); + assert(how_many_bits > 0 && how_many_bits <= sizeof(unsigned) * CHAR_BIT); + assert(cursor + how_many_bits <= sizeof(unsigned) * CHAR_BIT); + + return (bitstream_elt << excess_bits) >> (excess_bits + cursor); +} + +static unsigned +show_bits(unsigned cursor, unsigned how_many_bits, const unsigned *bitstream) +{ + unsigned cur_int = cursor / (sizeof(unsigned) * CHAR_BIT); + unsigned cur_bit = cursor % (sizeof(unsigned) * CHAR_BIT); + + assert(bitstream); + + if (cur_bit + how_many_bits > sizeof(unsigned) * CHAR_BIT) + { + return grab_bits(cur_bit, sizeof(unsigned) * CHAR_BIT - cur_bit, + bitstream[cur_int]) | + grab_bits(0, cur_bit + how_many_bits - sizeof(unsigned) * CHAR_BIT, + bitstream[cur_int + 1]) << (sizeof(unsigned) * CHAR_BIT - cur_bit); + } + else + return grab_bits(cur_bit, how_many_bits, bitstream[cur_int]); +} + +bool vl_bitstream_parser_init(struct vl_bitstream_parser *parser, + unsigned num_bitstreams, + const void **bitstreams, + const unsigned *sizes) +{ + assert(parser); + assert(num_bitstreams); + assert(bitstreams); + assert(sizes); + + parser->num_bitstreams = num_bitstreams; + parser->bitstreams = (const unsigned**)bitstreams; + parser->sizes = sizes; + parser->cur_bitstream = 0; + parser->cursor = 0; + + return true; +} + +void vl_bitstream_parser_cleanup(struct vl_bitstream_parser *parser) +{ + assert(parser); +} + +unsigned +vl_bitstream_parser_get_bits(struct vl_bitstream_parser *parser, + unsigned how_many_bits) +{ + unsigned bits; + + assert(parser); + + bits = vl_bitstream_parser_show_bits(parser, how_many_bits); + + vl_bitstream_parser_forward(parser, how_many_bits); + + return bits; +} + +unsigned +vl_bitstream_parser_show_bits(struct vl_bitstream_parser *parser, + unsigned how_many_bits) +{ + unsigned bits = 0; + unsigned shift = 0; + unsigned cursor; + unsigned cur_bitstream; + + assert(parser); + + cursor = parser->cursor; + cur_bitstream = parser->cur_bitstream; + + while (1) + { + unsigned bits_left = parser->sizes[cur_bitstream] * CHAR_BIT - cursor; + unsigned bits_to_show = how_many_bits > bits_left ? bits_left : how_many_bits; + + bits |= show_bits(cursor, bits_to_show, + parser->bitstreams[cur_bitstream]) << shift; + + if (how_many_bits > bits_to_show) + { + how_many_bits -= bits_to_show; + cursor = 0; + ++cur_bitstream; + shift += bits_to_show; + } + else + break; + } + + return bits; +} + +void vl_bitstream_parser_forward(struct vl_bitstream_parser *parser, + unsigned how_many_bits) +{ + assert(parser); + assert(how_many_bits); + + parser->cursor += how_many_bits; + + while (parser->cursor > parser->sizes[parser->cur_bitstream] * CHAR_BIT) + { + parser->cursor -= parser->sizes[parser->cur_bitstream++] * CHAR_BIT; + assert(parser->cur_bitstream < parser->num_bitstreams); + } +} + +void vl_bitstream_parser_rewind(struct vl_bitstream_parser *parser, + unsigned how_many_bits) +{ + signed c; + + assert(parser); + assert(how_many_bits); + + c = parser->cursor - how_many_bits; + + while (c < 0) + { + c += parser->sizes[parser->cur_bitstream--] * CHAR_BIT; + assert(parser->cur_bitstream < parser->num_bitstreams); + } + + parser->cursor = (unsigned)c; +} diff --git a/src/gallium/auxiliary/vl/vl_bitstream_parser.h b/src/gallium/auxiliary/vl/vl_bitstream_parser.h new file mode 100644 index 0000000000..46bebf470f --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_bitstream_parser.h @@ -0,0 +1,36 @@ +#ifndef vl_bitstream_parser_h +#define vl_bitstream_parser_h + +#include + +struct vl_bitstream_parser +{ + unsigned num_bitstreams; + const unsigned **bitstreams; + const unsigned *sizes; + unsigned cur_bitstream; + unsigned cursor; +}; + +bool vl_bitstream_parser_init(struct vl_bitstream_parser *parser, + unsigned num_bitstreams, + const void **bitstreams, + const unsigned *sizes); + +void vl_bitstream_parser_cleanup(struct vl_bitstream_parser *parser); + +unsigned +vl_bitstream_parser_get_bits(struct vl_bitstream_parser *parser, + unsigned how_many_bits); + +unsigned +vl_bitstream_parser_show_bits(struct vl_bitstream_parser *parser, + unsigned how_many_bits); + +void vl_bitstream_parser_forward(struct vl_bitstream_parser *parser, + unsigned how_many_bits); + +void vl_bitstream_parser_rewind(struct vl_bitstream_parser *parser, + unsigned how_many_bits); + +#endif /* vl_bitstream_parser_h */ diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c new file mode 100644 index 0000000000..0894421c0b --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -0,0 +1,590 @@ +#include "vl_compositor.h" +#include +#include +#include +#include +#include +#include +#include "vl_shader_build.h" + +struct vertex2f +{ + float x, y; +}; + +struct vertex4f +{ + float x, y, z, w; +}; + +struct vertex_shader_consts +{ + struct vertex4f dst_scale; + struct vertex4f dst_trans; + struct vertex4f src_scale; + struct vertex4f src_trans; +}; + +struct fragment_shader_consts +{ + struct vertex4f bias; + float matrix[16]; +}; + +/* + * Represents 2 triangles in a strip in normalized coords. + * Used to render the surface onto the frame buffer. + */ +static const struct vertex2f surface_verts[4] = +{ + {0.0f, 0.0f}, + {0.0f, 1.0f}, + {1.0f, 0.0f}, + {1.0f, 1.0f} +}; + +/* + * Represents texcoords for the above. We can use the position values directly. + * TODO: Duplicate these in the shader, no need to create a buffer. + */ +static const struct vertex2f *surface_texcoords = surface_verts; + +/* + * Identity color conversion constants, for debugging + */ +static const struct fragment_shader_consts identity = +{ + { + 0.0f, 0.0f, 0.0f, 0.0f + }, + { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [16,235] + */ +static const struct fragment_shader_consts bt_601 = +{ + { + 0.0f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.0f, 0.0f, 1.371f, 0.0f, + 1.0f, -0.336f, -0.698f, 0.0f, + 1.0f, 1.732f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [0,255] + */ +static const struct fragment_shader_consts bt_601_full = +{ + { + 0.062745098f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.164f, 0.0f, 1.596f, 0.0f, + 1.164f, -0.391f, -0.813f, 0.0f, + 1.164f, 2.018f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [16,235] + */ +static const struct fragment_shader_consts bt_709 = +{ + { + 0.0f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.0f, 0.0f, 1.540f, 0.0f, + 1.0f, -0.183f, -0.459f, 0.0f, + 1.0f, 1.816f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [0,255] + */ +const struct fragment_shader_consts bt_709_full = +{ + { + 0.062745098f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.164f, 0.0f, 1.793f, 0.0f, + 1.164f, -0.213f, -0.534f, 0.0f, + 1.164f, 2.115f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +static void +create_vert_shader(struct vl_compositor *c) +{ + const unsigned max_tokens = 50; + + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + assert(c); + + tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token)); + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Vertex texcoords + */ + for (unsigned i = 0; i < 2; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale vertex pos rect to destination size + * decl c1 ; Translation vector to move vertex pos rect into position + * decl c2 ; Scaling vector to scale texcoord rect to source size + * decl c3 ; Translation vector to move texcoord rect into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Vertex texcoords + */ + for (unsigned i = 0; i < 2; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * mad o0, i0, c0, c1 ; Scale and translate unit output rect to destination size and pos + * mad o1, i1, c2, c3 ; Scale and translate unit texcoord rect to source size and pos + */ + for (unsigned i = 0; i < 2; ++i) + { + inst = vl_inst4(TGSI_OPCODE_MAD, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i, TGSI_FILE_CONSTANT, i * 2, TGSI_FILE_CONSTANT, i * 2 + 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + vs.tokens = tokens; + c->vertex_shader = c->pipe->create_vs_state(c->pipe, &vs); + FREE(tokens); +} + +static void +create_frag_shader(struct vl_compositor *c) +{ + const unsigned max_tokens = 50; + + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + assert(c); + + tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token)); + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* decl i0 ; Texcoords for s0 */ + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl c0 ; Bias vector for CSC + * decl c1-c4 ; CSC matrix c1-c4 + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl s0 ; Sampler for tex containing picture to display */ + decl = vl_decl_samplers(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* tex2d t0, i0, s0 ; Read src pixel */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t0, t0, c0 ; Subtract bias vector from pixel */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix + * dp4 o0.y, t0, c2 + * dp4 o0.z, t0, c3 + */ + for (unsigned i = 0; i < 3; ++i) + { + inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1); + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + fs.tokens = tokens; + c->fragment_shader = c->pipe->create_fs_state(c->pipe, &fs); + FREE(tokens); +} + +static bool +init_pipe_state(struct vl_compositor *c) +{ + struct pipe_sampler_state sampler; + + assert(c); + + c->fb_state.nr_cbufs = 1; + c->fb_state.zsbuf = NULL; + + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ;*/ + /*sampler.lod_bias = ;*/ + /*sampler.min_lod = ;*/ + /*sampler.max_lod = ;*/ + /*sampler.border_color[i] = ;*/ + /*sampler.max_anisotropy = ;*/ + c->sampler = c->pipe->create_sampler_state(c->pipe, &sampler); + + return true; +} + +static void cleanup_pipe_state(struct vl_compositor *c) +{ + assert(c); + + c->pipe->delete_sampler_state(c->pipe, c->sampler); +} + +static bool +init_shaders(struct vl_compositor *c) +{ + assert(c); + + create_vert_shader(c); + create_frag_shader(c); + + return true; +} + +static void cleanup_shaders(struct vl_compositor *c) +{ + assert(c); + + c->pipe->delete_vs_state(c->pipe, c->vertex_shader); + c->pipe->delete_fs_state(c->pipe, c->fragment_shader); +} + +static bool +init_buffers(struct vl_compositor *c) +{ + assert(c); + + /* + * Create our vertex buffer and vertex buffer element + * VB contains 4 vertices that render a quad covering the entire window + * to display a rendered surface + * Quad is rendered as a tri strip + */ + c->vertex_bufs[0].stride = sizeof(struct vertex2f); + c->vertex_bufs[0].max_index = 3; + c->vertex_bufs[0].buffer_offset = 0; + c->vertex_bufs[0].buffer = pipe_buffer_create + ( + c->pipe->screen, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vertex2f) * 4 + ); + + memcpy + ( + pipe_buffer_map(c->pipe->screen, c->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + surface_verts, + sizeof(struct vertex2f) * 4 + ); + + pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[0].buffer); + + c->vertex_elems[0].src_offset = 0; + c->vertex_elems[0].vertex_buffer_index = 0; + c->vertex_elems[0].nr_components = 2; + c->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* + * Create our texcoord buffer and texcoord buffer element + * Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices + */ + c->vertex_bufs[1].stride = sizeof(struct vertex2f); + c->vertex_bufs[1].max_index = 3; + c->vertex_bufs[1].buffer_offset = 0; + c->vertex_bufs[1].buffer = pipe_buffer_create + ( + c->pipe->screen, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vertex2f) * 4 + ); + + memcpy + ( + pipe_buffer_map(c->pipe->screen, c->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + surface_texcoords, + sizeof(struct vertex2f) * 4 + ); + + pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[1].buffer); + + c->vertex_elems[1].src_offset = 0; + c->vertex_elems[1].vertex_buffer_index = 1; + c->vertex_elems[1].nr_components = 2; + c->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* + * Create our vertex shader's constant buffer + * Const buffer contains scaling and translation vectors + */ + c->vs_const_buf.buffer = pipe_buffer_create + ( + c->pipe->screen, + 1, + PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD, + sizeof(struct vertex_shader_consts) + ); + + /* + * Create our fragment shader's constant buffer + * Const buffer contains the color conversion matrix and bias vectors + */ + c->fs_const_buf.buffer = pipe_buffer_create + ( + c->pipe->screen, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + sizeof(struct fragment_shader_consts) + ); + + /* + * TODO: Refactor this into a seperate function, + * allow changing the CSC matrix at runtime to switch between regular & full versions + */ + memcpy + ( + pipe_buffer_map(c->pipe->screen, c->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + &bt_601_full, + sizeof(struct fragment_shader_consts) + ); + + pipe_buffer_unmap(c->pipe->screen, c->fs_const_buf.buffer); + + return true; +} + +static void +cleanup_buffers(struct vl_compositor *c) +{ + assert(c); + + for (unsigned i = 0; i < 2; ++i) + pipe_buffer_reference(&c->vertex_bufs[i].buffer, NULL); + + pipe_buffer_reference(&c->vs_const_buf.buffer, NULL); + pipe_buffer_reference(&c->fs_const_buf.buffer, NULL); +} + +bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe) +{ + assert(compositor); + + memset(compositor, 0, sizeof(struct vl_compositor)); + + compositor->pipe = pipe; + + if (!init_pipe_state(compositor)) + return false; + if (!init_shaders(compositor)) + { + cleanup_pipe_state(compositor); + return false; + } + if (!init_buffers(compositor)) + { + cleanup_shaders(compositor); + cleanup_pipe_state(compositor); + return false; + } + + return true; +} + +void vl_compositor_cleanup(struct vl_compositor *compositor) +{ + assert(compositor); + + cleanup_buffers(compositor); + cleanup_shaders(compositor); + cleanup_pipe_state(compositor); +} + +void vl_compositor_render(struct vl_compositor *compositor, + /*struct pipe_texture *backround, + struct pipe_video_rect *backround_area,*/ + struct pipe_texture *src_surface, + enum pipe_mpeg12_picture_type picture_type, + /*unsigned num_past_surfaces, + struct pipe_texture *past_surfaces, + unsigned num_future_surfaces, + struct pipe_texture *future_surfaces,*/ + struct pipe_video_rect *src_area, + struct pipe_texture *dst_surface, + struct pipe_video_rect *dst_area, + /*unsigned num_layers, + struct pipe_texture *layers, + struct pipe_video_rect *layer_src_areas, + struct pipe_video_rect *layer_dst_areas*/ + struct pipe_fence_handle **fence) +{ + struct vertex_shader_consts *vs_consts; + + assert(compositor); + assert(src_surface); + assert(src_area); + assert(dst_surface); + assert(dst_area); + assert(picture_type == PIPE_MPEG12_PICTURE_TYPE_FRAME); + + compositor->fb_state.width = dst_surface->width[0]; + compositor->fb_state.height = dst_surface->height[0]; + compositor->fb_state.cbufs[0] = compositor->pipe->screen->get_tex_surface + ( + compositor->pipe->screen, + dst_surface, + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE + ); + + compositor->viewport.scale[0] = compositor->fb_state.width; + compositor->viewport.scale[1] = compositor->fb_state.height; + compositor->viewport.scale[2] = 1; + compositor->viewport.scale[3] = 1; + compositor->viewport.translate[0] = 0; + compositor->viewport.translate[1] = 0; + compositor->viewport.translate[2] = 0; + compositor->viewport.translate[3] = 0; + + compositor->pipe->set_framebuffer_state(compositor->pipe, &compositor->fb_state); + compositor->pipe->set_viewport_state(compositor->pipe, &compositor->viewport); + compositor->pipe->bind_sampler_states(compositor->pipe, 1, &compositor->sampler); + compositor->pipe->set_sampler_textures(compositor->pipe, 1, &src_surface); + compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader); + compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader); + compositor->pipe->set_vertex_buffers(compositor->pipe, 2, compositor->vertex_bufs); + compositor->pipe->set_vertex_elements(compositor->pipe, 2, compositor->vertex_elems); + compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, &compositor->vs_const_buf); + compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, &compositor->fs_const_buf); + + vs_consts = pipe_buffer_map + ( + compositor->pipe->screen, + compositor->vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD + ); + + vs_consts->dst_scale.x = dst_area->w / (float)compositor->fb_state.cbufs[0]->width; + vs_consts->dst_scale.y = dst_area->h / (float)compositor->fb_state.cbufs[0]->height; + vs_consts->dst_scale.z = 1; + vs_consts->dst_scale.w = 1; + vs_consts->dst_trans.x = dst_area->x / (float)compositor->fb_state.cbufs[0]->width; + vs_consts->dst_trans.y = dst_area->y / (float)compositor->fb_state.cbufs[0]->height; + vs_consts->dst_trans.z = 0; + vs_consts->dst_trans.w = 0; + + vs_consts->src_scale.x = src_area->w / (float)src_surface->width[0]; + vs_consts->src_scale.y = src_area->h / (float)src_surface->height[0]; + vs_consts->src_scale.z = 1; + vs_consts->src_scale.w = 1; + vs_consts->src_trans.x = src_area->x / (float)src_surface->width[0]; + vs_consts->src_trans.y = src_area->y / (float)src_surface->height[0]; + vs_consts->src_trans.z = 0; + vs_consts->src_trans.w = 0; + + pipe_buffer_unmap(compositor->pipe->screen, compositor->vs_const_buf.buffer); + + compositor->pipe->draw_arrays(compositor->pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); + compositor->pipe->flush(compositor->pipe, PIPE_FLUSH_RENDER_CACHE, fence); + + pipe_surface_reference(&compositor->fb_state.cbufs[0], NULL); +} diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h new file mode 100644 index 0000000000..2af41e1981 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_compositor.h @@ -0,0 +1,47 @@ +#ifndef vl_compositor_h +#define vl_compositor_h + +#include +#include +#include + +struct pipe_context; +struct pipe_texture; + +struct vl_compositor +{ + struct pipe_context *pipe; + + struct pipe_framebuffer_state fb_state; + void *sampler; + void *vertex_shader; + void *fragment_shader; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_bufs[2]; + struct pipe_vertex_element vertex_elems[2]; + struct pipe_constant_buffer vs_const_buf, fs_const_buf; +}; + +bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe); + +void vl_compositor_cleanup(struct vl_compositor *compositor); + +void vl_compositor_render(struct vl_compositor *compositor, + /*struct pipe_texture *backround, + struct pipe_video_rect *backround_area,*/ + struct pipe_texture *src_surface, + enum pipe_mpeg12_picture_type picture_type, + /*unsigned num_past_surfaces, + struct pipe_texture *past_surfaces, + unsigned num_future_surfaces, + struct pipe_texture *future_surfaces,*/ + struct pipe_video_rect *src_area, + struct pipe_texture *dst_surface, + struct pipe_video_rect *dst_area, + /*unsigned num_layers, + struct pipe_texture *layers, + struct pipe_video_rect *layer_src_areas, + struct pipe_video_rect *layer_dst_areas,*/ + struct pipe_fence_handle **fence); + +#endif /* vl_compositor_h */ diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c new file mode 100644 index 0000000000..7e73c5ced9 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -0,0 +1,1662 @@ +#include "vl_mpeg12_mc_renderer.h" +#include +#include +#include +#include +#include +#include +#include +#include "vl_shader_build.h" + +#define DEFAULT_BUF_ALIGNMENT 1 +#define MACROBLOCK_WIDTH 16 +#define MACROBLOCK_HEIGHT 16 +#define BLOCK_WIDTH 8 +#define BLOCK_HEIGHT 8 +#define ZERO_BLOCK_NIL -1.0f +#define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f) + +struct vertex2f +{ + float x, y; +}; + +struct vertex4f +{ + float x, y, z, w; +}; + +struct vertex_shader_consts +{ + struct vertex4f denorm; +}; + +struct fragment_shader_consts +{ + struct vertex4f multiplier; + struct vertex4f div; +}; + +/* + * Muliplier renormalizes block samples from 16 bits to 12 bits. + * Divider is used when calculating Y % 2 for choosing top or bottom + * field for P or B macroblocks. + * TODO: Use immediates. + */ +static const struct fragment_shader_consts fs_consts = { + {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f}, + {0.5f, 2.0f, 0.0f, 0.0f} +}; + +struct vert_stream_0 +{ + struct vertex2f pos; + struct vertex2f luma_tc; + struct vertex2f cb_tc; + struct vertex2f cr_tc; +}; + +enum MACROBLOCK_TYPE +{ + MACROBLOCK_TYPE_INTRA, + MACROBLOCK_TYPE_FWD_FRAME_PRED, + MACROBLOCK_TYPE_FWD_FIELD_PRED, + MACROBLOCK_TYPE_BKWD_FRAME_PRED, + MACROBLOCK_TYPE_BKWD_FIELD_PRED, + MACROBLOCK_TYPE_BI_FRAME_PRED, + MACROBLOCK_TYPE_BI_FIELD_PRED, + + NUM_MACROBLOCK_TYPES +}; + +static void +create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r) +{ + const unsigned max_tokens = 50; + + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + assert(r); + + tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + */ + for (unsigned i = 0; i < 4; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + */ + for (unsigned i = 0; i < 4; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output + */ + for (unsigned i = 0; i < 4; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + vs.tokens = tokens; + r->i_vs = r->pipe->create_vs_state(r->pipe, &vs); + free(tokens); +} + +static void +create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r) +{ + const unsigned max_tokens = 100; + + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + assert(r); + + tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + */ + for (unsigned i = 0; i < 3; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + */ + for (unsigned i = 0; i < 3; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (unsigned i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + fs.tokens = tokens; + r->i_fs = r->pipe->create_fs_state(r->pipe, &fs); + free(tokens); +} + +static void +create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) +{ + const unsigned max_tokens = 100; + + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + assert(r); + + tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + * decl i4 ; Ref surface top field texcoords + * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream) + */ + for (unsigned i = 0; i < 6; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + * decl o4 ; Ref macroblock texcoords + */ + for (unsigned i = 0; i < 5; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output + */ + for (unsigned i = 0; i < 4; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + vs.tokens = tokens; + r->p_vs[0] = r->pipe->create_vs_state(r->pipe, &vs); + free(tokens); +} + +static void +create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) +{ + assert(false); +} + +static void +create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) +{ + const unsigned max_tokens = 100; + + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + assert(r); + + tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + * decl i3 ; Ref macroblock texcoords + */ + for (unsigned i = 0; i < 4; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ + for (unsigned i = 0; i < 4; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (unsigned i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* tex2d t1, i3, s3 ; Read texel from ref macroblock */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + fs.tokens = tokens; + r->p_fs[0] = r->pipe->create_fs_state(r->pipe, &fs); + free(tokens); +} + +static void +create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) +{ + assert(false); +} + +static void +create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) +{ + const unsigned max_tokens = 100; + + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + assert(r); + + tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + * decl i4 ; First ref macroblock top field texcoords + * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream) + * decl i6 ; Second ref macroblock top field texcoords + * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream) + */ + for (unsigned i = 0; i < 8; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + * decl o4 ; First ref macroblock texcoords + * decl o5 ; Second ref macroblock texcoords + */ + for (unsigned i = 0; i < 6; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output + */ + for (unsigned i = 0; i < 4; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* + * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords + * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords + */ + for (unsigned i = 0; i < 2; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + vs.tokens = tokens; + r->b_vs[0] = r->pipe->create_vs_state(r->pipe, &vs); + free(tokens); +} + +static void +create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) +{ + assert(false); +} + +static void +create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) +{ + const unsigned max_tokens = 100; + + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned ti; + + assert(r); + + tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + * decl i3 ; First ref macroblock texcoords + * decl i4 ; Second ref macroblock texcoords + */ + for (unsigned i = 0; i < 5; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t2 */ + decl = vl_decl_temps(0, 2); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for first ref surface texture + * decl s4 ; Sampler for second ref surface texture + */ + for (unsigned i = 0; i < 5; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (unsigned i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i3, s3 ; Read texel from first ref macroblock + * tex2d t2, i4, s4 ; Read texel from second ref macroblock + */ + for (unsigned i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + assert(ti <= max_tokens); + + fs.tokens = tokens; + r->b_fs[0] = r->pipe->create_fs_state(r->pipe, &fs); + free(tokens); +} + +static void +create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) +{ + assert(false); +} + +static void +xfer_buffers_map(struct vl_mpeg12_mc_renderer *r) +{ + assert(r); + + for (unsigned i = 0; i < 3; ++i) + { + r->tex_transfer[i] = r->pipe->screen->get_tex_transfer + ( + r->pipe->screen, r->textures.all[i], + 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0, + r->textures.all[i]->width[0], r->textures.all[i]->height[0] + ); + + r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]); + } +} + +static void +xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r) +{ + assert(r); + + for (unsigned i = 0; i < 3; ++i) + { + r->pipe->screen->transfer_unmap(r->pipe->screen, r->tex_transfer[i]); + r->pipe->screen->tex_transfer_destroy(r->tex_transfer[i]); + } +} + +static bool +init_pipe_state(struct vl_mpeg12_mc_renderer *r) +{ + struct pipe_sampler_state sampler; + unsigned filters[5]; + + assert(r); + + r->viewport.scale[0] = r->pot_buffers ? + util_next_power_of_two(r->picture_width) : r->picture_width; + r->viewport.scale[1] = r->pot_buffers ? + util_next_power_of_two(r->picture_height) : r->picture_height; + r->viewport.scale[2] = 1; + r->viewport.scale[3] = 1; + r->viewport.translate[0] = 0; + r->viewport.translate[1] = 0; + r->viewport.translate[2] = 0; + r->viewport.translate[3] = 0; + + r->fb_state.width = r->pot_buffers ? + util_next_power_of_two(r->picture_width) : r->picture_width; + r->fb_state.height = r->pot_buffers ? + util_next_power_of_two(r->picture_height) : r->picture_height; + r->fb_state.nr_cbufs = 1; + r->fb_state.zsbuf = NULL; + + /* Luma filter */ + filters[0] = PIPE_TEX_FILTER_NEAREST; + /* Chroma filters */ + if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 || + r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) + { + filters[1] = PIPE_TEX_FILTER_NEAREST; + filters[2] = PIPE_TEX_FILTER_NEAREST; + } + else + { + filters[1] = PIPE_TEX_FILTER_LINEAR; + filters[2] = PIPE_TEX_FILTER_LINEAR; + } + /* Fwd, bkwd ref filters */ + filters[3] = PIPE_TEX_FILTER_LINEAR; + filters[4] = PIPE_TEX_FILTER_LINEAR; + + for (unsigned i = 0; i < 5; ++i) + { + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = filters[i]; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = filters[i]; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ; */ + /*sampler.shadow_ambient = ; */ + /*sampler.lod_bias = ; */ + sampler.min_lod = 0; + /*sampler.max_lod = ; */ + /*sampler.border_color[i] = ; */ + /*sampler.max_anisotropy = ; */ + r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler); + } + + return true; +} + +static void +cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r) +{ + assert(r); + + for (unsigned i = 0; i < 5; ++i) + r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]); +} + +static bool +init_shaders(struct vl_mpeg12_mc_renderer *r) +{ + assert(r); + + create_intra_vert_shader(r); + create_intra_frag_shader(r); + create_frame_pred_vert_shader(r); + create_frame_pred_frag_shader(r); + create_frame_bi_pred_vert_shader(r); + create_frame_bi_pred_frag_shader(r); + + return true; +} + +static void +cleanup_shaders(struct vl_mpeg12_mc_renderer *r) +{ + assert(r); + + r->pipe->delete_vs_state(r->pipe, r->i_vs); + r->pipe->delete_fs_state(r->pipe, r->i_fs); + r->pipe->delete_vs_state(r->pipe, r->p_vs[0]); + r->pipe->delete_fs_state(r->pipe, r->p_fs[0]); + r->pipe->delete_vs_state(r->pipe, r->b_vs[0]); + r->pipe->delete_fs_state(r->pipe, r->b_fs[0]); +} + +static bool +init_buffers(struct vl_mpeg12_mc_renderer *r) +{ + struct pipe_texture template; + + const unsigned mbw = + align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH; + const unsigned mbh = + align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT; + + assert(r); + + r->macroblocks_per_batch = + mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1); + r->num_macroblocks = 0; + r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock)); + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */ + template.format = PIPE_FORMAT_R16_SNORM; + template.last_level = 0; + template.width[0] = r->pot_buffers ? + util_next_power_of_two(r->picture_width) : r->picture_width; + template.height[0] = r->pot_buffers ? + util_next_power_of_two(r->picture_height) : r->picture_height; + template.depth[0] = 1; + pf_get_block(template.format, &template.block); + template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC; + + r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template); + + if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) + { + template.width[0] = r->pot_buffers ? + util_next_power_of_two(r->picture_width / 2) : + r->picture_width / 2; + template.height[0] = r->pot_buffers ? + util_next_power_of_two(r->picture_height / 2) : + r->picture_height / 2; + } + else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) + template.height[0] = r->pot_buffers ? + util_next_power_of_two(r->picture_height / 2) : + r->picture_height / 2; + + r->textures.individual.cb = + r->pipe->screen->texture_create(r->pipe->screen, &template); + r->textures.individual.cr = + r->pipe->screen->texture_create(r->pipe->screen, &template); + + r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4; + r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1; + r->vertex_bufs.individual.ycbcr.buffer_offset = 0; + r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create + ( + r->pipe->screen, + DEFAULT_BUF_ALIGNMENT, + PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD, + sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch + ); + + for (unsigned i = 1; i < 3; ++i) + { + r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2; + r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1; + r->vertex_bufs.all[i].buffer_offset = 0; + r->vertex_bufs.all[i].buffer = pipe_buffer_create + ( + r->pipe->screen, + DEFAULT_BUF_ALIGNMENT, + PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD, + sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch + ); + } + + /* Position element */ + r->vertex_elems[0].src_offset = 0; + r->vertex_elems[0].vertex_buffer_index = 0; + r->vertex_elems[0].nr_components = 2; + r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Luma, texcoord element */ + r->vertex_elems[1].src_offset = sizeof(struct vertex2f); + r->vertex_elems[1].vertex_buffer_index = 0; + r->vertex_elems[1].nr_components = 2; + r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Chroma Cr texcoord element */ + r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2; + r->vertex_elems[2].vertex_buffer_index = 0; + r->vertex_elems[2].nr_components = 2; + r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Chroma Cb texcoord element */ + r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3; + r->vertex_elems[3].vertex_buffer_index = 0; + r->vertex_elems[3].nr_components = 2; + r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* First ref surface top field texcoord element */ + r->vertex_elems[4].src_offset = 0; + r->vertex_elems[4].vertex_buffer_index = 1; + r->vertex_elems[4].nr_components = 2; + r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* First ref surface bottom field texcoord element */ + r->vertex_elems[5].src_offset = sizeof(struct vertex2f); + r->vertex_elems[5].vertex_buffer_index = 1; + r->vertex_elems[5].nr_components = 2; + r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Second ref surface top field texcoord element */ + r->vertex_elems[6].src_offset = 0; + r->vertex_elems[6].vertex_buffer_index = 2; + r->vertex_elems[6].nr_components = 2; + r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Second ref surface bottom field texcoord element */ + r->vertex_elems[7].src_offset = sizeof(struct vertex2f); + r->vertex_elems[7].vertex_buffer_index = 2; + r->vertex_elems[7].nr_components = 2; + r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT; + + r->vs_const_buf.buffer = pipe_buffer_create + ( + r->pipe->screen, + DEFAULT_BUF_ALIGNMENT, + PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD, + sizeof(struct vertex_shader_consts) + ); + + r->fs_const_buf.buffer = pipe_buffer_create + ( + r->pipe->screen, + DEFAULT_BUF_ALIGNMENT, + PIPE_BUFFER_USAGE_CONSTANT, sizeof(struct fragment_shader_consts) + ); + + memcpy + ( + pipe_buffer_map(r->pipe->screen, r->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + &fs_consts, sizeof(struct fragment_shader_consts) + ); + + pipe_buffer_unmap(r->pipe->screen, r->fs_const_buf.buffer); + + return true; +} + +static void +cleanup_buffers(struct vl_mpeg12_mc_renderer *r) +{ + assert(r); + + pipe_buffer_reference(&r->vs_const_buf.buffer, NULL); + pipe_buffer_reference(&r->fs_const_buf.buffer, NULL); + + for (unsigned i = 0; i < 3; ++i) + pipe_buffer_reference(&r->vertex_bufs.all[i].buffer, NULL); + + for (unsigned i = 0; i < 3; ++i) + pipe_texture_reference(&r->textures.all[i], NULL); + + FREE(r->macroblock_buf); +} + +static enum MACROBLOCK_TYPE +get_macroblock_type(struct pipe_mpeg12_macroblock *mb) +{ + assert(mb); + + switch (mb->mb_type) + { + case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA: + return MACROBLOCK_TYPE_INTRA; + case PIPE_MPEG12_MACROBLOCK_TYPE_FWD: + return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? + MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED; + case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD: + return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? + MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED; + case PIPE_MPEG12_MACROBLOCK_TYPE_BI: + return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? + MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED; + default: + assert(0); + } + + /* Unreachable */ + return -1; +} + +/* XXX: One of these days this will have to be killed with fire */ +#define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, use_zb, zb) \ + do { \ + (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \ + (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \ + (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \ + (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \ + \ + if (!use_zb || (cbp) & (lm)) \ + { \ + (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + } \ + else \ + { \ + (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \ + (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \ + (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \ + (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \ + (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \ + (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \ + } \ + \ + if (!use_zb || (cbp) & (cbm)) \ + { \ + (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + } \ + else \ + { \ + (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \ + (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \ + (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \ + (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \ + (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \ + (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \ + } \ + \ + if (!use_zb || (cbp) & (crm)) \ + { \ + (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + } \ + else \ + { \ + (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \ + (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \ + (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \ + (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \ + (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \ + (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \ + } \ + } while (0) + +static void +gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r, + struct pipe_mpeg12_macroblock *mb, unsigned pos, + struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb) +{ + struct vertex2f mo_vec[2]; + + assert(r); + assert(mb); + assert(ycbcr_vb); + assert(pos < r->macroblocks_per_batch); + + switch (mb->mb_type) + { + case PIPE_MPEG12_MACROBLOCK_TYPE_BI: + { + struct vertex2f *vb; + + assert(ref_vb && ref_vb[1]); + + vb = ref_vb[1] + pos * 2 * 24; + + mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x; + mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y; + + if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) + { + for (unsigned i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + } + } + else + { + mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x; + mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y; + + for (unsigned i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + vb[i + 1].x = mo_vec[1].x; + vb[i + 1].y = mo_vec[1].y; + } + } + + /* fall-through */ + } + case PIPE_MPEG12_MACROBLOCK_TYPE_FWD: + case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD: + { + struct vertex2f *vb; + + assert(ref_vb && ref_vb[0]); + + vb = ref_vb[0] + pos * 2 * 24; + + if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) + { + mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x; + mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y; + + if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) + { + mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x; + mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y; + } + } + else + { + mo_vec[0].x = mb->pmv[0][0][0] * 0.5f * r->surface_tex_inv_size.x; + mo_vec[0].y = mb->pmv[0][0][1] * 0.5f * r->surface_tex_inv_size.y; + + if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) + { + mo_vec[1].x = mb->pmv[1][0][0] * 0.5f * r->surface_tex_inv_size.x; + mo_vec[1].y = mb->pmv[1][0][1] * 0.5f * r->surface_tex_inv_size.y; + } + } + + if (mb->mb_type == PIPE_MPEG12_MOTION_TYPE_FRAME) + { + for (unsigned i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + } + } + else + { + for (unsigned i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + vb[i + 1].x = mo_vec[1].x; + vb[i + 1].y = mo_vec[1].y; + } + } + + /* fall-through */ + } + case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA: + { + const struct vertex2f unit = + { + r->surface_tex_inv_size.x * MACROBLOCK_WIDTH, + r->surface_tex_inv_size.y * MACROBLOCK_HEIGHT + }; + const struct vertex2f half = + { + r->surface_tex_inv_size.x * (MACROBLOCK_WIDTH / 2), + r->surface_tex_inv_size.y * (MACROBLOCK_HEIGHT / 2) + }; + const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE; + + struct vert_stream_0 *vb = ycbcr_vb + pos * 24; + + SET_BLOCK(vb, mb->cbp, mb->mbx, mb->mby, + unit.x, unit.y, 0, 0, half.x, half.y, + 32, 2, 1, use_zb, r->zero_block); + + SET_BLOCK(vb + 6, mb->cbp, mb->mbx, mb->mby, + unit.x, unit.y, half.x, 0, half.x, half.y, + 16, 2, 1, use_zb, r->zero_block); + + SET_BLOCK(vb + 12, mb->cbp, mb->mbx, mb->mby, + unit.x, unit.y, 0, half.y, half.x, half.y, + 8, 2, 1, use_zb, r->zero_block); + + SET_BLOCK(vb + 18, mb->cbp, mb->mbx, mb->mby, + unit.x, unit.y, half.x, half.y, half.x, half.y, + 4, 2, 1, use_zb, r->zero_block); + + break; + } + default: + assert(0); + } +} + +static void +gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r, + unsigned *num_macroblocks) +{ + unsigned offset[NUM_MACROBLOCK_TYPES]; + struct vert_stream_0 *ycbcr_vb; + struct vertex2f *ref_vb[2]; + + assert(r); + assert(num_macroblocks); + + for (unsigned i = 0; i < r->num_macroblocks; ++i) + { + enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]); + ++num_macroblocks[mb_type]; + } + + offset[0] = 0; + + for (unsigned i = 1; i < NUM_MACROBLOCK_TYPES; ++i) + offset[i] = offset[i - 1] + num_macroblocks[i - 1]; + + ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map + ( + r->pipe->screen, + r->vertex_bufs.individual.ycbcr.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD + ); + + for (unsigned i = 0; i < 2; ++i) + ref_vb[i] = (struct vertex2f *)pipe_buffer_map + ( + r->pipe->screen, + r->vertex_bufs.individual.ref[i].buffer, + PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD + ); + + for (unsigned i = 0; i < r->num_macroblocks; ++i) + { + enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]); + + gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type], + ycbcr_vb, ref_vb); + + ++offset[mb_type]; + } + + pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ycbcr.buffer); + for (unsigned i = 0; i < 2; ++i) + pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ref[i].buffer); +} + +static void +flush(struct vl_mpeg12_mc_renderer *r) +{ + unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 }; + unsigned vb_start = 0; + struct vertex_shader_consts *vs_consts; + + assert(r); + assert(r->num_macroblocks == r->macroblocks_per_batch); + + gen_macroblock_stream(r, num_macroblocks); + + r->fb_state.cbufs[0] = r->pipe->screen->get_tex_surface + ( + r->pipe->screen, r->surface, + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE + ); + + r->pipe->set_framebuffer_state(r->pipe, &r->fb_state); + r->pipe->set_viewport_state(r->pipe, &r->viewport); + + vs_consts = pipe_buffer_map + ( + r->pipe->screen, r->vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD + ); + + vs_consts->denorm.x = r->surface->width[0]; + vs_consts->denorm.y = r->surface->height[0]; + + pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer); + + r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0, + &r->vs_const_buf); + r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_FRAGMENT, 0, + &r->fs_const_buf); + + if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) + { + r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all); + r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems); + r->pipe->set_sampler_textures(r->pipe, 3, r->textures.all); + r->pipe->bind_sampler_states(r->pipe, 3, r->samplers.all); + r->pipe->bind_vs_state(r->pipe, r->i_vs); + r->pipe->bind_fs_state(r->pipe, r->i_fs); + + r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, + num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24); + vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24; + } + + if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) + { + r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); + r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); + r->textures.individual.ref[0] = r->past; + r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all); + r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all); + r->pipe->bind_vs_state(r->pipe, r->p_vs[0]); + r->pipe->bind_fs_state(r->pipe, r->p_fs[0]); + + r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, + num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24); + vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24; + } + + if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) + { + r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); + r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); + r->textures.individual.ref[0] = r->past; + r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all); + r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all); + r->pipe->bind_vs_state(r->pipe, r->p_vs[1]); + r->pipe->bind_fs_state(r->pipe, r->p_fs[1]); + + r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, + num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24); + vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24; + } + + if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) + { + r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); + r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); + r->textures.individual.ref[0] = r->future; + r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all); + r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all); + r->pipe->bind_vs_state(r->pipe, r->p_vs[0]); + r->pipe->bind_fs_state(r->pipe, r->p_fs[0]); + + r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, + num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24); + vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24; + } + + if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) + { + r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); + r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); + r->textures.individual.ref[0] = r->future; + r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all); + r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all); + r->pipe->bind_vs_state(r->pipe, r->p_vs[1]); + r->pipe->bind_fs_state(r->pipe, r->p_fs[1]); + + r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, + num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24); + vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24; + } + + if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) + { + r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all); + r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems); + r->textures.individual.ref[0] = r->past; + r->textures.individual.ref[1] = r->future; + r->pipe->set_sampler_textures(r->pipe, 5, r->textures.all); + r->pipe->bind_sampler_states(r->pipe, 5, r->samplers.all); + r->pipe->bind_vs_state(r->pipe, r->b_vs[0]); + r->pipe->bind_fs_state(r->pipe, r->b_fs[0]); + + r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, + num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24); + vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24; + } + + if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) + { + r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all); + r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems); + r->textures.individual.ref[0] = r->past; + r->textures.individual.ref[1] = r->future; + r->pipe->set_sampler_textures(r->pipe, 5, r->textures.all); + r->pipe->bind_sampler_states(r->pipe, 5, r->samplers.all); + r->pipe->bind_vs_state(r->pipe, r->b_vs[1]); + r->pipe->bind_fs_state(r->pipe, r->b_fs[1]); + + r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, + num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24); + vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24; + } + + r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence); + pipe_surface_reference(&r->fb_state.cbufs[0], NULL); + + if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) + for (unsigned i = 0; i < 3; ++i) + r->zero_block[i].x = ZERO_BLOCK_NIL; + + r->num_macroblocks = 0; +} + +static void +grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch) +{ + assert(src); + assert(dst); + + for (unsigned y = 0; y < BLOCK_HEIGHT; ++y) + memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2); +} + +static void +grab_field_coded_block(short *src, short *dst, unsigned dst_pitch) +{ + assert(src); + assert(dst); + + for (unsigned y = 0; y < BLOCK_HEIGHT; ++y) + memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2); +} + +static void +fill_zero_block(short *dst, unsigned dst_pitch) +{ + assert(dst); + + for (unsigned y = 0; y < BLOCK_HEIGHT; ++y) + memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2); +} + +static void +grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby, + enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks) +{ + unsigned tex_pitch; + short *texels; + unsigned tb = 0, sb = 0; + unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT; + + assert(r); + assert(blocks); + + tex_pitch = r->tex_transfer[0]->stride / r->tex_transfer[0]->block.size; + texels = r->texels[0] + mbpy * tex_pitch + mbpx; + + for (unsigned y = 0; y < 2; ++y) + { + for (unsigned x = 0; x < 2; ++x, ++tb) + { + if ((cbp >> (5 - tb)) & 1) + { + if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) + { + grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, + texels + y * tex_pitch * BLOCK_WIDTH + + x * BLOCK_WIDTH, tex_pitch); + } + else + { + grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, + texels + y * tex_pitch + x * BLOCK_WIDTH, + tex_pitch); + } + + ++sb; + } + else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) + { + if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL || + ZERO_BLOCK_IS_NIL(r->zero_block[0])) + { + fill_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch); + if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) + { + r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x; + r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y; + } + } + } + } + } + + /* TODO: Implement 422, 444 */ + assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420); + + mbpx /= 2; + mbpy /= 2; + + for (tb = 0; tb < 2; ++tb) + { + tex_pitch = r->tex_transfer[tb + 1]->stride / r->tex_transfer[tb + 1]->block.size; + texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx; + + if ((cbp >> (1 - tb)) & 1) + { + grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch); + ++sb; + } + else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) + { + if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL || + ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) + { + fill_zero_block(texels, tex_pitch); + if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) + { + r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x; + r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y; + } + } + } + } +} + +static void +grab_macroblock(struct vl_mpeg12_mc_renderer *r, + struct pipe_mpeg12_macroblock *mb) +{ + assert(r); + assert(mb); + assert(r->num_macroblocks < r->macroblocks_per_batch); + + memcpy(&r->macroblock_buf[r->num_macroblocks], mb, + sizeof(struct pipe_mpeg12_macroblock)); + + grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks); + + ++r->num_macroblocks; +} + +bool +vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer, + struct pipe_context *pipe, + unsigned picture_width, + unsigned picture_height, + enum pipe_video_chroma_format chroma_format, + enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode, + enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling, + bool pot_buffers) +{ + assert(renderer); + assert(pipe); + /* TODO: Implement other policies */ + assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE); + /* TODO: Implement this */ + /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */ + assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE); + /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */ + assert(pot_buffers); + + memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer)); + + renderer->pipe = pipe; + renderer->picture_width = picture_width; + renderer->picture_height = picture_height; + renderer->chroma_format = chroma_format; + renderer->bufmode = bufmode; + renderer->eb_handling = eb_handling; + renderer->pot_buffers = pot_buffers; + + if (!init_pipe_state(renderer)) + return false; + if (!init_shaders(renderer)) + { + cleanup_pipe_state(renderer); + return false; + } + if (!init_buffers(renderer)) + { + cleanup_shaders(renderer); + cleanup_pipe_state(renderer); + return false; + } + + renderer->surface = NULL; + renderer->past = NULL; + renderer->future = NULL; + for (unsigned i = 0; i < 3; ++i) + renderer->zero_block[i].x = ZERO_BLOCK_NIL; + renderer->num_macroblocks = 0; + + xfer_buffers_map(renderer); + + return true; +} + +void +vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer) +{ + assert(renderer); + + xfer_buffers_unmap(renderer); + + cleanup_pipe_state(renderer); + cleanup_shaders(renderer); + cleanup_buffers(renderer); +} + +void +vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer + *renderer, + struct pipe_texture *surface, + struct pipe_texture *past, + struct pipe_texture *future, + unsigned num_macroblocks, + struct pipe_mpeg12_macroblock + *mpeg12_macroblocks, + struct pipe_fence_handle **fence) +{ + bool new_surface = false; + + assert(renderer); + assert(surface); + assert(num_macroblocks); + assert(mpeg12_macroblocks); + + if (renderer->surface) + { + if (surface != renderer->surface) + { + if (renderer->num_macroblocks > 0) + { + xfer_buffers_unmap(renderer); + flush(renderer); + } + + new_surface = true; + } + + /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */ + assert(surface != renderer->surface || renderer->past == past); + assert(surface != renderer->surface || renderer->future == future); + } + else + new_surface = true; + + if (new_surface) + { + renderer->surface = surface; + renderer->past = past; + renderer->future = future; + renderer->fence = fence; + renderer->surface_tex_inv_size.x = 1.0f / surface->width[0]; + renderer->surface_tex_inv_size.y = 1.0f / surface->height[0]; + } + + while (num_macroblocks) + { + unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks; + unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch); + + for (unsigned i = 0; i < num_to_submit; ++i) + { + assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12); + grab_macroblock(renderer, &mpeg12_macroblocks[i]); + } + + num_macroblocks -= num_to_submit; + + if (renderer->num_macroblocks == renderer->macroblocks_per_batch) + { + xfer_buffers_unmap(renderer); + flush(renderer); + xfer_buffers_map(renderer); + /* Next time we get this surface it may have new ref frames */ + renderer->surface = NULL; + } + } +} diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h new file mode 100644 index 0000000000..dfe0f7a24b --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h @@ -0,0 +1,93 @@ +#ifndef vl_mpeg12_mc_renderer_h +#define vl_mpeg12_mc_renderer_h + +#include +#include +#include + +struct pipe_context; +struct pipe_video_surface; +struct pipe_macroblock; + +/* A slice is video-width (rounded up to a multiple of macroblock width) x macroblock height */ +enum VL_MPEG12_MC_RENDERER_BUFFER_MODE +{ + VL_MPEG12_MC_RENDERER_BUFFER_SLICE, /* Saves memory at the cost of smaller batches */ + VL_MPEG12_MC_RENDERER_BUFFER_PICTURE /* Larger batches, more memory */ +}; + +enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK +{ + VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL, /* Waste of memory bandwidth */ + VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE, /* Can only do point-filtering when interpolating subsampled chroma channels */ + VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE /* Needs conditional texel fetch! */ +}; + +struct vl_mpeg12_mc_renderer +{ + struct pipe_context *pipe; + unsigned picture_width; + unsigned picture_height; + enum pipe_video_chroma_format chroma_format; + enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode; + enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling; + bool pot_buffers; + unsigned macroblocks_per_batch; + + struct pipe_viewport_state viewport; + struct pipe_constant_buffer vs_const_buf; + struct pipe_constant_buffer fs_const_buf; + struct pipe_framebuffer_state fb_state; + struct pipe_vertex_element vertex_elems[8]; + + union + { + void *all[5]; + struct { void *y, *cb, *cr, *ref[2]; } individual; + } samplers; + + void *i_vs, *p_vs[2], *b_vs[2]; + void *i_fs, *p_fs[2], *b_fs[2]; + + union + { + struct pipe_texture *all[5]; + struct { struct pipe_texture *y, *cb, *cr, *ref[2]; } individual; + } textures; + + union + { + struct pipe_vertex_buffer all[3]; + struct { struct pipe_vertex_buffer ycbcr, ref[2]; } individual; + } vertex_bufs; + + struct pipe_texture *surface, *past, *future; + struct pipe_fence_handle **fence; + unsigned num_macroblocks; + struct pipe_mpeg12_macroblock *macroblock_buf; + struct pipe_transfer *tex_transfer[3]; + short *texels[3]; + struct { float x, y; } surface_tex_inv_size; + struct { float x, y; } zero_block[3]; +}; + +bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer, + struct pipe_context *pipe, + unsigned picture_width, + unsigned picture_height, + enum pipe_video_chroma_format chroma_format, + enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode, + enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling, + bool pot_buffers); + +void vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer); + +void vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer, + struct pipe_texture *surface, + struct pipe_texture *past, + struct pipe_texture *future, + unsigned num_macroblocks, + struct pipe_mpeg12_macroblock *mpeg12_macroblocks, + struct pipe_fence_handle **fence); + +#endif /* vl_mpeg12_mc_renderer_h */ diff --git a/src/gallium/auxiliary/vl/vl_shader_build.c b/src/gallium/auxiliary/vl/vl_shader_build.c new file mode 100644 index 0000000000..5a4a5ab72c --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_shader_build.c @@ -0,0 +1,215 @@ +#include "vl_shader_build.h" +#include +#include +#include + +struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_interpolated_input +( + unsigned int name, + unsigned int index, + unsigned int first, + unsigned int last, + int interpolation +) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + assert + ( + interpolation == TGSI_INTERPOLATE_CONSTANT || + interpolation == TGSI_INTERPOLATE_LINEAR || + interpolation == TGSI_INTERPOLATE_PERSPECTIVE + ); + + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.Declaration.Interpolate = interpolation;; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_instruction vl_inst2 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src_file, + unsigned int src_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = src_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src_index; + + return inst; +} + +struct tgsi_full_instruction vl_inst3 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = src1_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; + inst.FullSrcRegisters[1].SrcRegister.File = src2_file; + inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + + return inst; +} + +struct tgsi_full_instruction vl_tex +( + int tex, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = tex; + inst.FullSrcRegisters[0].SrcRegister.File = src1_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; + inst.FullSrcRegisters[1].SrcRegister.File = src2_file; + inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + + return inst; +} + +struct tgsi_full_instruction vl_inst4 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index, + enum tgsi_file_type src3_file, + unsigned int src3_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 3; + inst.FullSrcRegisters[0].SrcRegister.File = src1_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; + inst.FullSrcRegisters[1].SrcRegister.File = src2_file; + inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + inst.FullSrcRegisters[2].SrcRegister.File = src3_file; + inst.FullSrcRegisters[2].SrcRegister.Index = src3_index; + + return inst; +} + +struct tgsi_full_instruction vl_end(void) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + + return inst; +} diff --git a/src/gallium/auxiliary/vl/vl_shader_build.h b/src/gallium/auxiliary/vl/vl_shader_build.h new file mode 100644 index 0000000000..c6c60b5552 --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_shader_build.h @@ -0,0 +1,61 @@ +#ifndef vl_shader_build_h +#define vl_shader_build_h + +#include + +struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_interpolated_input +( + unsigned int name, + unsigned int index, + unsigned int first, + unsigned int last, + int interpolation +); +struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last); +struct tgsi_full_instruction vl_inst2 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src_file, + unsigned int src_index +); +struct tgsi_full_instruction vl_inst3 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +); +struct tgsi_full_instruction vl_tex +( + int tex, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +); +struct tgsi_full_instruction vl_inst4 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index, + enum tgsi_file_type src3_file, + unsigned int src3_index +); +struct tgsi_full_instruction vl_end(void); + +#endif diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 6ab3753762..bcb887a0b2 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -31,6 +31,7 @@ C_SOURCES = \ sp_tex_sample.c \ sp_tex_tile_cache.c \ sp_tile_cache.c \ - sp_surface.c + sp_surface.c \ + sp_video_context.c include ../../Makefile.template diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index 950c3d9955..aac9edf44e 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -33,6 +33,7 @@ softpipe = env.ConvenienceLibrary( 'sp_tex_tile_cache.c', 'sp_texture.c', 'sp_tile_cache.c', + 'sp_video_context.c', ]) -Export('softpipe') \ No newline at end of file +Export('softpipe') diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 49b51afda7..45289380d0 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -381,6 +381,59 @@ softpipe_transfer_unmap(struct pipe_screen *screen, } } +static struct pipe_video_surface* +softpipe_video_surface_create(struct pipe_screen *screen, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height) +{ + struct softpipe_video_surface *sp_vsfc; + struct pipe_texture template; + + assert(screen); + assert(width && height); + + sp_vsfc = CALLOC_STRUCT(softpipe_video_surface); + if (!sp_vsfc) + return NULL; + + pipe_reference_init(&sp_vsfc->base.reference, 1); + sp_vsfc->base.screen = screen; + sp_vsfc->base.chroma_format = chroma_format; + /*sp_vsfc->base.surface_format = PIPE_VIDEO_SURFACE_FORMAT_VUYA;*/ + sp_vsfc->base.width = width; + sp_vsfc->base.height = height; + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_X8R8G8B8_UNORM; + template.last_level = 0; + /* vl_mpeg12_mc_renderer expects this when it's initialized with pot_buffers=true */ + template.width[0] = util_next_power_of_two(width); + template.height[0] = util_next_power_of_two(height); + template.depth[0] = 1; + pf_get_block(template.format, &template.block); + template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET; + + sp_vsfc->tex = screen->texture_create(screen, &template); + if (!sp_vsfc->tex) + { + FREE(sp_vsfc); + return NULL; + } + + return &sp_vsfc->base; +} + + +static void +softpipe_video_surface_destroy(struct pipe_video_surface *vsfc) +{ + struct softpipe_video_surface *sp_vsfc = softpipe_video_surface(vsfc); + + pipe_texture_reference(&sp_vsfc->tex, NULL); + FREE(sp_vsfc); +} + void softpipe_init_screen_texture_funcs(struct pipe_screen *screen) @@ -396,6 +449,9 @@ softpipe_init_screen_texture_funcs(struct pipe_screen *screen) screen->tex_transfer_destroy = softpipe_tex_transfer_destroy; screen->transfer_map = softpipe_transfer_map; screen->transfer_unmap = softpipe_transfer_unmap; + + screen->video_surface_create = softpipe_video_surface_create; + screen->video_surface_destroy = softpipe_video_surface_destroy; } diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index 2537ab6a40..2ef64e1e7c 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -30,6 +30,7 @@ #include "pipe/p_state.h" +#include "pipe/p_video_state.h" struct pipe_context; @@ -62,6 +63,15 @@ struct softpipe_transfer unsigned long offset; }; +struct softpipe_video_surface +{ + struct pipe_video_surface base; + + /* The data is held here: + */ + struct pipe_texture *tex; +}; + /** cast wrappers */ static INLINE struct softpipe_texture * @@ -76,6 +86,12 @@ softpipe_transfer(struct pipe_transfer *pt) return (struct softpipe_transfer *) pt; } +static INLINE struct softpipe_video_surface * +softpipe_video_surface(struct pipe_video_surface *pvs) +{ + return (struct softpipe_video_surface *) pvs; +} + extern void softpipe_init_screen_texture_funcs(struct pipe_screen *screen); diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c new file mode 100644 index 0000000000..1b47bbede2 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_video_context.c @@ -0,0 +1,273 @@ +#include "sp_video_context.h" +#include +#include +#include "softpipe/sp_winsys.h" +#include "softpipe/sp_texture.h" + +static void +sp_mpeg12_destroy(struct pipe_video_context *vpipe) +{ + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + + assert(vpipe); + + /* Asserted in softpipe_delete_fs_state() for some reason */ + ctx->pipe->bind_vs_state(ctx->pipe, NULL); + ctx->pipe->bind_fs_state(ctx->pipe, NULL); + + ctx->pipe->delete_blend_state(ctx->pipe, ctx->blend); + ctx->pipe->delete_rasterizer_state(ctx->pipe, ctx->rast); + ctx->pipe->delete_depth_stencil_alpha_state(ctx->pipe, ctx->dsa); + + pipe_video_surface_reference(&ctx->decode_target, NULL); + vl_compositor_cleanup(&ctx->compositor); + vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer); + ctx->pipe->destroy(ctx->pipe); + + FREE(ctx); +} + +static void +sp_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe, + struct pipe_video_surface *past, + struct pipe_video_surface *future, + unsigned num_macroblocks, + struct pipe_macroblock *macroblocks, + struct pipe_fence_handle **fence) +{ + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + struct pipe_mpeg12_macroblock *mpeg12_macroblocks = (struct pipe_mpeg12_macroblock*)macroblocks; + + assert(vpipe); + assert(num_macroblocks); + assert(macroblocks); + assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12); + assert(ctx->decode_target); + + vl_mpeg12_mc_renderer_render_macroblocks(&ctx->mc_renderer, + softpipe_video_surface(ctx->decode_target)->tex, + past ? softpipe_video_surface(past)->tex : NULL, + future ? softpipe_video_surface(future)->tex : NULL, + num_macroblocks, mpeg12_macroblocks, fence); +} + +static void +sp_mpeg12_clear_surface(struct pipe_video_context *vpipe, + unsigned x, unsigned y, + unsigned width, unsigned height, + unsigned value, + struct pipe_surface *surface) +{ + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + + assert(vpipe); + assert(surface); + + ctx->pipe->surface_fill(ctx->pipe, surface, x, y, width, height, value); +} + +static void +sp_mpeg12_render_picture(struct pipe_video_context *vpipe, + /*struct pipe_surface *backround, + struct pipe_video_rect *backround_area,*/ + struct pipe_video_surface *src_surface, + enum pipe_mpeg12_picture_type picture_type, + /*unsigned num_past_surfaces, + struct pipe_video_surface *past_surfaces, + unsigned num_future_surfaces, + struct pipe_video_surface *future_surfaces,*/ + struct pipe_video_rect *src_area, + struct pipe_surface *dst_surface, + struct pipe_video_rect *dst_area, + /*unsigned num_layers, + struct pipe_surface *layers, + struct pipe_video_rect *layer_src_areas, + struct pipe_video_rect *layer_dst_areas*/ + struct pipe_fence_handle **fence) +{ + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + + assert(vpipe); + assert(src_surface); + assert(src_area); + assert(dst_surface); + assert(dst_area); + + vl_compositor_render(&ctx->compositor, softpipe_video_surface(src_surface)->tex, + picture_type, src_area, dst_surface->texture, dst_area, fence); +} + +static void +sp_mpeg12_set_decode_target(struct pipe_video_context *vpipe, + struct pipe_video_surface *dt) +{ + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + + assert(vpipe); + assert(dt); + + pipe_video_surface_reference(&ctx->decode_target, dt); +} + +static bool +init_pipe_state(struct sp_mpeg12_context *ctx) +{ + struct pipe_rasterizer_state rast; + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state dsa; + + assert(ctx); + + rast.flatshade = 1; + rast.flatshade_first = 0; + rast.light_twoside = 0; + rast.front_winding = PIPE_WINDING_CCW; + rast.cull_mode = PIPE_WINDING_CW; + rast.fill_cw = PIPE_POLYGON_MODE_FILL; + rast.fill_ccw = PIPE_POLYGON_MODE_FILL; + rast.offset_cw = 0; + rast.offset_ccw = 0; + rast.scissor = 0; + rast.poly_smooth = 0; + rast.poly_stipple_enable = 0; + rast.point_sprite = 0; + rast.point_size_per_vertex = 0; + rast.multisample = 0; + rast.line_smooth = 0; + rast.line_stipple_enable = 0; + rast.line_stipple_factor = 0; + rast.line_stipple_pattern = 0; + rast.line_last_pixel = 0; + rast.bypass_vs_clip_and_viewport = 0; + rast.line_width = 1; + rast.point_smooth = 0; + rast.point_size = 1; + rast.offset_units = 1; + rast.offset_scale = 1; + /*rast.sprite_coord_mode[i] = ;*/ + ctx->rast = ctx->pipe->create_rasterizer_state(ctx->pipe, &rast); + ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rast); + + blend.blend_enable = 0; + blend.rgb_func = PIPE_BLEND_ADD; + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_func = PIPE_BLEND_ADD; + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.logicop_enable = 0; + blend.logicop_func = PIPE_LOGICOP_CLEAR; + /* Needed to allow color writes to FB, even if blending disabled */ + blend.colormask = PIPE_MASK_RGBA; + blend.dither = 0; + ctx->blend = ctx->pipe->create_blend_state(ctx->pipe, &blend); + ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend); + + dsa.depth.enabled = 0; + dsa.depth.writemask = 0; + dsa.depth.func = PIPE_FUNC_ALWAYS; + dsa.depth.occlusion_count = 0; + for (unsigned i = 0; i < 2; ++i) + { + dsa.stencil[i].enabled = 0; + dsa.stencil[i].func = PIPE_FUNC_ALWAYS; + dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].ref_value = 0; + dsa.stencil[i].valuemask = 0; + dsa.stencil[i].writemask = 0; + } + dsa.alpha.enabled = 0; + dsa.alpha.func = PIPE_FUNC_ALWAYS; + dsa.alpha.ref_value = 0; + ctx->dsa = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &dsa); + ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, ctx->dsa); + + return true; +} + +static struct pipe_video_context * +sp_mpeg12_create(struct pipe_screen *screen, enum pipe_video_profile profile, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height) +{ + struct sp_mpeg12_context *ctx; + + assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12); + + ctx = CALLOC_STRUCT(sp_mpeg12_context); + + if (!ctx) + return NULL; + + ctx->base.profile = profile; + ctx->base.chroma_format = chroma_format; + ctx->base.width = width; + ctx->base.height = height; + + ctx->base.screen = screen; + ctx->base.destroy = sp_mpeg12_destroy; + ctx->base.decode_macroblocks = sp_mpeg12_decode_macroblocks; + ctx->base.clear_surface = sp_mpeg12_clear_surface; + ctx->base.render_picture = sp_mpeg12_render_picture; + ctx->base.set_decode_target = sp_mpeg12_set_decode_target; + + ctx->pipe = softpipe_create(screen); + if (!ctx->pipe) + { + FREE(ctx); + return NULL; + } + + /* TODO: Use slice buffering for softpipe when implemented, no advantage to buffering an entire picture */ + if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe, + width, height, chroma_format, + VL_MPEG12_MC_RENDERER_BUFFER_PICTURE, + /* TODO: Use XFER_NONE when implemented */ + VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE, + true)) + { + ctx->pipe->destroy(ctx->pipe); + FREE(ctx); + return NULL; + } + + if (!vl_compositor_init(&ctx->compositor, ctx->pipe)) + { + vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer); + ctx->pipe->destroy(ctx->pipe); + FREE(ctx); + return NULL; + } + + if (!init_pipe_state(ctx)) + { + vl_compositor_cleanup(&ctx->compositor); + vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer); + ctx->pipe->destroy(ctx->pipe); + FREE(ctx); + return NULL; + } + + return &ctx->base; +} + +struct pipe_video_context * +sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height) +{ + assert(screen); + assert(width && height); + + switch (u_reduce_video_profile(profile)) + { + case PIPE_VIDEO_CODEC_MPEG12: + return sp_mpeg12_create(screen, profile, + chroma_format, + width, height); + default: + return NULL; + } +} diff --git a/src/gallium/drivers/softpipe/sp_video_context.h b/src/gallium/drivers/softpipe/sp_video_context.h new file mode 100644 index 0000000000..a70ce9f476 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_video_context.h @@ -0,0 +1,30 @@ +#ifndef SP_VIDEO_CONTEXT_H +#define SP_VIDEO_CONTEXT_H + +#include +#include +#include + +struct pipe_screen; +struct pipe_context; +struct pipe_video_surface; + +struct sp_mpeg12_context +{ + struct pipe_video_context base; + struct pipe_context *pipe; + struct pipe_video_surface *decode_target; + struct vl_mpeg12_mc_renderer mc_renderer; + struct vl_compositor compositor; + + void *rast; + void *dsa; + void *blend; +}; + +struct pipe_video_context * +sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height); + +#endif /* SP_VIDEO_CONTEXT_H */ diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index f252d6df00..1980831dd9 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -315,6 +315,30 @@ enum pipe_transfer_usage { #define PIPE_REFERENCED_FOR_READ (1 << 0) #define PIPE_REFERENCED_FOR_WRITE (1 << 1) + +enum pipe_video_codec +{ + PIPE_VIDEO_CODEC_MPEG12, /**< MPEG1, MPEG2 */ + PIPE_VIDEO_CODEC_MPEG4, /**< DIVX, XVID */ + PIPE_VIDEO_CODEC_VC1, /**< WMV */ + PIPE_VIDEO_CODEC_MPEG4_AVC /**< H.264 */ +}; + +enum pipe_video_profile +{ + PIPE_VIDEO_PROFILE_MPEG1, + PIPE_VIDEO_PROFILE_MPEG2_SIMPLE, + PIPE_VIDEO_PROFILE_MPEG2_MAIN, + PIPE_VIDEO_PROFILE_MPEG4_SIMPLE, + PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE, + PIPE_VIDEO_PROFILE_VC1_SIMPLE, + PIPE_VIDEO_PROFILE_VC1_MAIN, + PIPE_VIDEO_PROFILE_VC1_ADVANCED, + PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE, + PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN, + PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH +}; + #ifdef __cplusplus } #endif diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index c4469d4a9e..af23080920 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -613,6 +613,24 @@ pf_has_alpha( enum pipe_format format ) } } +enum pipe_video_chroma_format +{ + PIPE_VIDEO_CHROMA_FORMAT_420, + PIPE_VIDEO_CHROMA_FORMAT_422, + PIPE_VIDEO_CHROMA_FORMAT_444 +}; + +#if 0 +enum pipe_video_surface_format +{ + PIPE_VIDEO_SURFACE_FORMAT_NV12, /**< Planar; Y plane, UV plane */ + PIPE_VIDEO_SURFACE_FORMAT_YV12, /**< Planar; Y plane, U plane, V plane */ + PIPE_VIDEO_SURFACE_FORMAT_YUYV, /**< Interleaved; Y,U,Y,V,Y,U,Y,V */ + PIPE_VIDEO_SURFACE_FORMAT_UYVY, /**< Interleaved; U,Y,V,Y,U,Y,V,Y */ + PIPE_VIDEO_SURFACE_FORMAT_VUYA /**< Packed; A31-24|Y23-16|U15-8|V7-0 */ +}; +#endif + #ifdef __cplusplus } #endif diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index 3f30c52a16..f0a4de5df3 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -53,7 +53,10 @@ extern "C" { struct pipe_fence_handle; struct pipe_winsys; struct pipe_buffer; - +struct pipe_texture; +struct pipe_surface; +struct pipe_video_surface; +struct pipe_transfer; /** @@ -252,6 +255,17 @@ struct pipe_screen { void (*buffer_destroy)( struct pipe_buffer *buf ); + /** + * Create a video surface suitable for use as a decoding target by the + * driver's pipe_video_context. + */ + struct pipe_video_surface* + (*video_surface_create)( struct pipe_screen *screen, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height ); + + void (*video_surface_destroy)( struct pipe_video_surface *vsfc ); + /** * Do any special operations to ensure frontbuffer contents are diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h new file mode 100644 index 0000000000..937705ac50 --- /dev/null +++ b/src/gallium/include/pipe/p_video_context.h @@ -0,0 +1,92 @@ +#ifndef PIPE_VIDEO_CONTEXT_H +#define PIPE_VIDEO_CONTEXT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +struct pipe_screen; +struct pipe_buffer; +struct pipe_surface; +struct pipe_video_surface; +struct pipe_macroblock; +struct pipe_picture_desc; +struct pipe_fence_handle; + +/** + * Gallium video rendering context + */ +struct pipe_video_context +{ + struct pipe_screen *screen; + enum pipe_video_profile profile; + enum pipe_video_chroma_format chroma_format; + unsigned width; + unsigned height; + + void *priv; /**< context private data (for DRI for example) */ + + void (*destroy)(struct pipe_video_context *vpipe); + + /** + * Picture decoding and displaying + */ + /*@{*/ + void (*decode_bitstream)(struct pipe_video_context *vpipe, + unsigned num_bufs, + struct pipe_buffer **bitstream_buf); + + void (*decode_macroblocks)(struct pipe_video_context *vpipe, + struct pipe_video_surface *past, + struct pipe_video_surface *future, + unsigned num_macroblocks, + struct pipe_macroblock *macroblocks, + struct pipe_fence_handle **fence); + + void (*clear_surface)(struct pipe_video_context *vpipe, + unsigned x, unsigned y, + unsigned width, unsigned height, + unsigned value, + struct pipe_surface *surface); + + void (*render_picture)(struct pipe_video_context *vpipe, + /*struct pipe_surface *backround, + struct pipe_video_rect *backround_area,*/ + struct pipe_video_surface *src_surface, + enum pipe_mpeg12_picture_type picture_type, + /*unsigned num_past_surfaces, + struct pipe_video_surface *past_surfaces, + unsigned num_future_surfaces, + struct pipe_video_surface *future_surfaces,*/ + struct pipe_video_rect *src_area, + struct pipe_surface *dst_surface, + struct pipe_video_rect *dst_area, + /*unsigned num_layers, + struct pipe_texture *layers, + struct pipe_video_rect *layer_src_areas, + struct pipe_video_rect *layer_dst_areas,*/ + struct pipe_fence_handle **fence); + /*@}*/ + + /** + * Parameter-like states (or properties) + */ + /*@{*/ + void (*set_picture_desc)(struct pipe_video_context *vpipe, + const struct pipe_picture_desc *desc); + + void (*set_decode_target)(struct pipe_video_context *vpipe, + struct pipe_video_surface *dt); + + /* TODO: Interface for CSC matrix, scaling modes, post-processing, etc. */ + /*@}*/ +}; + + +#ifdef __cplusplus +} +#endif + +#endif /* PIPE_VIDEO_CONTEXT_H */ diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h new file mode 100644 index 0000000000..a0128fbd48 --- /dev/null +++ b/src/gallium/include/pipe/p_video_state.h @@ -0,0 +1,158 @@ +#ifndef PIPE_VIDEO_STATE_H +#define PIPE_VIDEO_STATE_H + +/* u_reduce_video_profile() needs these */ +#include +#include + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct pipe_video_surface +{ + struct pipe_reference reference; + struct pipe_screen *screen; + enum pipe_video_chroma_format chroma_format; + /*enum pipe_video_surface_format surface_format;*/ + unsigned width; + unsigned height; +}; + +static INLINE void +pipe_video_surface_reference(struct pipe_video_surface **ptr, struct pipe_video_surface *surf) +{ + struct pipe_video_surface *old_surf = *ptr; + + if (pipe_reference((struct pipe_reference **)ptr, &surf->reference)) + old_surf->screen->video_surface_destroy(old_surf); +} + +struct pipe_video_rect +{ + unsigned x, y, w, h; +}; + +static INLINE enum pipe_video_codec +u_reduce_video_profile(enum pipe_video_profile profile) +{ + switch (profile) + { + case PIPE_VIDEO_PROFILE_MPEG1: + case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE: + case PIPE_VIDEO_PROFILE_MPEG2_MAIN: + return PIPE_VIDEO_CODEC_MPEG12; + + case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE: + case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE: + return PIPE_VIDEO_CODEC_MPEG4; + + case PIPE_VIDEO_PROFILE_VC1_SIMPLE: + case PIPE_VIDEO_PROFILE_VC1_MAIN: + case PIPE_VIDEO_PROFILE_VC1_ADVANCED: + return PIPE_VIDEO_CODEC_VC1; + + case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: + case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: + return PIPE_VIDEO_CODEC_MPEG4_AVC; + + default: + assert(false); + } + + return -1; +} + +enum pipe_mpeg12_picture_type +{ + PIPE_MPEG12_PICTURE_TYPE_FIELD_TOP, + PIPE_MPEG12_PICTURE_TYPE_FIELD_BOTTOM, + PIPE_MPEG12_PICTURE_TYPE_FRAME +}; + +enum pipe_mpeg12_macroblock_type +{ + PIPE_MPEG12_MACROBLOCK_TYPE_INTRA, + PIPE_MPEG12_MACROBLOCK_TYPE_FWD, + PIPE_MPEG12_MACROBLOCK_TYPE_BKWD, + PIPE_MPEG12_MACROBLOCK_TYPE_BI, + + PIPE_MPEG12_MACROBLOCK_NUM_TYPES +}; + +enum pipe_mpeg12_motion_type +{ + PIPE_MPEG12_MOTION_TYPE_FIELD, + PIPE_MPEG12_MOTION_TYPE_FRAME, + PIPE_MPEG12_MOTION_TYPE_DUALPRIME, + PIPE_MPEG12_MOTION_TYPE_16x8 +}; + +enum pipe_mpeg12_dct_type +{ + PIPE_MPEG12_DCT_TYPE_FIELD, + PIPE_MPEG12_DCT_TYPE_FRAME +}; + +struct pipe_macroblock +{ + enum pipe_video_codec codec; +}; + +struct pipe_mpeg12_macroblock +{ + struct pipe_macroblock base; + + unsigned mbx; + unsigned mby; + enum pipe_mpeg12_macroblock_type mb_type; + enum pipe_mpeg12_motion_type mo_type; + enum pipe_mpeg12_dct_type dct_type; + signed pmv[2][2][2]; + unsigned cbp; + void *blocks; +}; + +#if 0 +struct pipe_picture_desc +{ + enum pipe_video_format format; +}; + +struct pipe_mpeg12_picture_desc +{ + struct pipe_picture_desc base; + + /* TODO: Use bitfields where possible? */ + struct pipe_surface *forward_reference; + struct pipe_surface *backward_reference; + unsigned picture_coding_type; + unsigned fcode; + unsigned intra_dc_precision; + unsigned picture_structure; + unsigned top_field_first; + unsigned frame_pred_frame_dct; + unsigned concealment_motion_vectors; + unsigned q_scale_type; + unsigned intra_vlc_format; + unsigned alternate_scan; + unsigned full_pel_forward_vector; + unsigned full_pel_backward_vector; + struct pipe_buffer *intra_quantizer_matrix; + struct pipe_buffer *non_intra_quantizer_matrix; + struct pipe_buffer *chroma_intra_quantizer_matrix; + struct pipe_buffer *chroma_non_intra_quantizer_matrix; +}; +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* PIPE_VIDEO_STATE_H */ -- cgit v1.2.3 From d52d51ab8ae1240f77b6c18c3e99be4bf4868037 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sun, 27 Sep 2009 23:14:52 -0400 Subject: g3dvl: Formatting and cleanups. --- src/gallium/auxiliary/vl/vl_bitstream_parser.c | 24 +- src/gallium/auxiliary/vl/vl_compositor.c | 62 ++--- src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 216 +++++---------- src/gallium/auxiliary/vl/vl_shader_build.c | 6 +- src/gallium/drivers/softpipe/sp_texture.c | 3 +- src/gallium/drivers/softpipe/sp_video_context.c | 332 +++++++++++------------ src/gallium/drivers/softpipe/sp_video_context.h | 16 +- src/gallium/state_trackers/xorg/xvmc/context.c | 26 +- src/gallium/state_trackers/xorg/xvmc/surface.c | 85 ++---- 9 files changed, 313 insertions(+), 457 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/vl/vl_bitstream_parser.c b/src/gallium/auxiliary/vl/vl_bitstream_parser.c index 356faa1348..7883b95bbe 100644 --- a/src/gallium/auxiliary/vl/vl_bitstream_parser.c +++ b/src/gallium/auxiliary/vl/vl_bitstream_parser.c @@ -23,12 +23,12 @@ show_bits(unsigned cursor, unsigned how_many_bits, const unsigned *bitstream) assert(bitstream); - if (cur_bit + how_many_bits > sizeof(unsigned) * CHAR_BIT) - { - return grab_bits(cur_bit, sizeof(unsigned) * CHAR_BIT - cur_bit, - bitstream[cur_int]) | - grab_bits(0, cur_bit + how_many_bits - sizeof(unsigned) * CHAR_BIT, - bitstream[cur_int + 1]) << (sizeof(unsigned) * CHAR_BIT - cur_bit); + if (cur_bit + how_many_bits > sizeof(unsigned) * CHAR_BIT) { + unsigned lower = grab_bits(cur_bit, sizeof(unsigned) * CHAR_BIT - cur_bit, + bitstream[cur_int]); + unsigned upper = grab_bits(0, cur_bit + how_many_bits - sizeof(unsigned) * CHAR_BIT, + bitstream[cur_int + 1]) + return lower | upper << (sizeof(unsigned) * CHAR_BIT - cur_bit); } else return grab_bits(cur_bit, how_many_bits, bitstream[cur_int]); @@ -87,16 +87,14 @@ vl_bitstream_parser_show_bits(struct vl_bitstream_parser *parser, cursor = parser->cursor; cur_bitstream = parser->cur_bitstream; - while (1) - { + while (1) { unsigned bits_left = parser->sizes[cur_bitstream] * CHAR_BIT - cursor; unsigned bits_to_show = how_many_bits > bits_left ? bits_left : how_many_bits; bits |= show_bits(cursor, bits_to_show, parser->bitstreams[cur_bitstream]) << shift; - if (how_many_bits > bits_to_show) - { + if (how_many_bits > bits_to_show) { how_many_bits -= bits_to_show; cursor = 0; ++cur_bitstream; @@ -117,8 +115,7 @@ void vl_bitstream_parser_forward(struct vl_bitstream_parser *parser, parser->cursor += how_many_bits; - while (parser->cursor > parser->sizes[parser->cur_bitstream] * CHAR_BIT) - { + while (parser->cursor > parser->sizes[parser->cur_bitstream] * CHAR_BIT) { parser->cursor -= parser->sizes[parser->cur_bitstream++] * CHAR_BIT; assert(parser->cur_bitstream < parser->num_bitstreams); } @@ -134,8 +131,7 @@ void vl_bitstream_parser_rewind(struct vl_bitstream_parser *parser, c = parser->cursor - how_many_bits; - while (c < 0) - { + while (c < 0) { c += parser->sizes[parser->cur_bitstream--] * CHAR_BIT; assert(parser->cur_bitstream < parser->num_bitstreams); } diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index 0894421c0b..bca03cd401 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -162,30 +162,28 @@ create_vert_shader(struct vl_compositor *c) ti = 3; /* - * decl i0 ; Vertex pos - * decl i1 ; Vertex texcoords + * decl i0 ; Vertex pos + * decl i1 ; Vertex texcoords */ - for (unsigned i = 0; i < 2; i++) - { + for (unsigned i = 0; i < 2; i++) { decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } /* - * decl c0 ; Scaling vector to scale vertex pos rect to destination size - * decl c1 ; Translation vector to move vertex pos rect into position - * decl c2 ; Scaling vector to scale texcoord rect to source size - * decl c3 ; Translation vector to move texcoord rect into position + * decl c0 ; Scaling vector to scale vertex pos rect to destination size + * decl c1 ; Translation vector to move vertex pos rect into position + * decl c2 ; Scaling vector to scale texcoord rect to source size + * decl c3 ; Translation vector to move texcoord rect into position */ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); /* - * decl o0 ; Vertex pos - * decl o1 ; Vertex texcoords + * decl o0 ; Vertex pos + * decl o1 ; Vertex texcoords */ - for (unsigned i = 0; i < 2; i++) - { + for (unsigned i = 0; i < 2; i++) { decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } @@ -195,11 +193,10 @@ create_vert_shader(struct vl_compositor *c) ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); /* - * mad o0, i0, c0, c1 ; Scale and translate unit output rect to destination size and pos - * mad o1, i1, c2, c3 ; Scale and translate unit texcoord rect to source size and pos + * mad o0, i0, c0, c1 ; Scale and translate unit output rect to destination size and pos + * mad o1, i1, c2, c3 ; Scale and translate unit texcoord rect to source size and pos */ - for (unsigned i = 0; i < 2; ++i) - { + for (unsigned i = 0; i < 2; ++i) { inst = vl_inst4(TGSI_OPCODE_MAD, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i, TGSI_FILE_CONSTANT, i * 2, TGSI_FILE_CONSTANT, i * 2 + 1); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -239,18 +236,18 @@ create_frag_shader(struct vl_compositor *c) ti = 3; - /* decl i0 ; Texcoords for s0 */ + /* decl i0 ; Texcoords for s0 */ decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); /* - * decl c0 ; Bias vector for CSC - * decl c1-c4 ; CSC matrix c1-c4 + * decl c0 ; Bias vector for CSC + * decl c1-c4 ; CSC matrix c1-c4 */ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* decl o0 ; Fragment color */ + /* decl o0 ; Fragment color */ decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -258,25 +255,24 @@ create_frag_shader(struct vl_compositor *c) decl = vl_decl_temps(0, 0); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* decl s0 ; Sampler for tex containing picture to display */ + /* decl s0 ; Sampler for tex containing picture to display */ decl = vl_decl_samplers(0, 0); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* tex2d t0, i0, s0 ; Read src pixel */ + /* tex2d t0, i0, s0 ; Read src pixel */ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* sub t0, t0, c0 ; Subtract bias vector from pixel */ + /* sub t0, t0, c0 ; Subtract bias vector from pixel */ inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* - * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix + * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix * dp4 o0.y, t0, c2 * dp4 o0.z, t0, c3 */ - for (unsigned i = 0; i < 3; ++i) - { + for (unsigned i = 0; i < 3; ++i) { inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1); inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); @@ -365,10 +361,10 @@ init_buffers(struct vl_compositor *c) c->vertex_bufs[0].buffer_offset = 0; c->vertex_bufs[0].buffer = pipe_buffer_create ( - c->pipe->screen, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct vertex2f) * 4 + c->pipe->screen, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vertex2f) * 4 ); memcpy @@ -476,13 +472,11 @@ bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *p if (!init_pipe_state(compositor)) return false; - if (!init_shaders(compositor)) - { + if (!init_shaders(compositor)) { cleanup_pipe_state(compositor); return false; } - if (!init_buffers(compositor)) - { + if (!init_buffers(compositor)) { cleanup_shaders(compositor); cleanup_pipe_state(compositor); return false; diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index 7e73c5ced9..b728067d79 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -99,8 +99,7 @@ create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r) * decl i2 ; Chroma Cb texcoords * decl i3 ; Chroma Cr texcoords */ - for (unsigned i = 0; i < 4; i++) - { + for (unsigned i = 0; i < 4; i++) { decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } @@ -111,8 +110,7 @@ create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r) * decl o2 ; Chroma Cb texcoords * decl o3 ; Chroma Cr texcoords */ - for (unsigned i = 0; i < 4; i++) - { + for (unsigned i = 0; i < 4; i++) { decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } @@ -123,8 +121,7 @@ create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r) * mov o2, i2 ; Move input chroma Cb texcoords to output * mov o3, i3 ; Move input chroma Cr texcoords to output */ - for (unsigned i = 0; i < 4; ++i) - { + for (unsigned i = 0; i < 4; ++i) { inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -169,8 +166,7 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r) * decl i1 ; Chroma Cb texcoords * decl i2 ; Chroma Cr texcoords */ - for (unsigned i = 0; i < 3; ++i) - { + for (unsigned i = 0; i < 3; ++i) { decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } @@ -192,8 +188,7 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r) * decl s1 ; Sampler for chroma Cb texture * decl s2 ; Sampler for chroma Cr texture */ - for (unsigned i = 0; i < 3; ++i) - { + for (unsigned i = 0; i < 3; ++i) { decl = vl_decl_samplers(i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } @@ -206,8 +201,7 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r) * tex2d t1, i2, s2 ; Read texel from chroma Cr texture * mov t0.z, t1.x ; Move Cr sample into .z component */ - for (unsigned i = 0; i < 3; ++i) - { + for (unsigned i = 0; i < 3; ++i) { inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); @@ -266,8 +260,7 @@ create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) * decl i4 ; Ref surface top field texcoords * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream) */ - for (unsigned i = 0; i < 6; i++) - { + for (unsigned i = 0; i < 6; i++) { decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } @@ -279,8 +272,7 @@ create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) * decl o3 ; Chroma Cr texcoords * decl o4 ; Ref macroblock texcoords */ - for (unsigned i = 0; i < 5; i++) - { + for (unsigned i = 0; i < 5; i++) { decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } @@ -291,8 +283,7 @@ create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) * mov o2, i2 ; Move input chroma Cb texcoords to output * mov o3, i3 ; Move input chroma Cr texcoords to output */ - for (unsigned i = 0; i < 4; ++i) - { + for (unsigned i = 0; i < 4; ++i) { inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -348,8 +339,7 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) * decl i2 ; Chroma Cr texcoords * decl i3 ; Ref macroblock texcoords */ - for (unsigned i = 0; i < 4; ++i) - { + for (unsigned i = 0; i < 4; ++i) { decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } @@ -372,8 +362,7 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) * decl s2 ; Sampler for chroma Cr texture * decl s3 ; Sampler for ref surface texture */ - for (unsigned i = 0; i < 4; ++i) - { + for (unsigned i = 0; i < 4; ++i) { decl = vl_decl_samplers(i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } @@ -386,8 +375,7 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) * tex2d t1, i2, s2 ; Read texel from chroma Cr texture * mov t0.z, t1.x ; Move Cr sample into .z component */ - for (unsigned i = 0; i < 3; ++i) - { + for (unsigned i = 0; i < 3; ++i) { inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); @@ -462,8 +450,7 @@ create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) * decl i6 ; Second ref macroblock top field texcoords * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream) */ - for (unsigned i = 0; i < 8; i++) - { + for (unsigned i = 0; i < 8; i++) { decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } @@ -476,8 +463,7 @@ create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) * decl o4 ; First ref macroblock texcoords * decl o5 ; Second ref macroblock texcoords */ - for (unsigned i = 0; i < 6; i++) - { + for (unsigned i = 0; i < 6; i++) { decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } @@ -488,8 +474,7 @@ create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) * mov o2, i2 ; Move input chroma Cb texcoords to output * mov o3, i3 ; Move input chroma Cr texcoords to output */ - for (unsigned i = 0; i < 4; ++i) - { + for (unsigned i = 0; i < 4; ++i) { inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -498,8 +483,7 @@ create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords */ - for (unsigned i = 0; i < 2; ++i) - { + for (unsigned i = 0; i < 2; ++i) { inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -552,8 +536,7 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) * decl i3 ; First ref macroblock texcoords * decl i4 ; Second ref macroblock texcoords */ - for (unsigned i = 0; i < 5; ++i) - { + for (unsigned i = 0; i < 5; ++i) { decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } @@ -580,8 +563,7 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) * decl s3 ; Sampler for first ref surface texture * decl s4 ; Sampler for second ref surface texture */ - for (unsigned i = 0; i < 5; ++i) - { + for (unsigned i = 0; i < 5; ++i) { decl = vl_decl_samplers(i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } @@ -594,8 +576,7 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) * tex2d t1, i2, s2 ; Read texel from chroma Cr texture * mov t0.z, t1.x ; Move Cr sample into .z component */ - for (unsigned i = 0; i < 3; ++i) - { + for (unsigned i = 0; i < 3; ++i) { inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); @@ -615,8 +596,7 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) * tex2d t1, i3, s3 ; Read texel from first ref macroblock * tex2d t2, i4, s4 ; Read texel from second ref macroblock */ - for (unsigned i = 0; i < 2; ++i) - { + for (unsigned i = 0; i < 2; ++i) { inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -655,8 +635,7 @@ xfer_buffers_map(struct vl_mpeg12_mc_renderer *r) { assert(r); - for (unsigned i = 0; i < 3; ++i) - { + for (unsigned i = 0; i < 3; ++i) { r->tex_transfer[i] = r->pipe->screen->get_tex_transfer ( r->pipe->screen, r->textures.all[i], @@ -673,8 +652,7 @@ xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r) { assert(r); - for (unsigned i = 0; i < 3; ++i) - { + for (unsigned i = 0; i < 3; ++i) { r->pipe->screen->transfer_unmap(r->pipe->screen, r->tex_transfer[i]); r->pipe->screen->tex_transfer_destroy(r->tex_transfer[i]); } @@ -710,13 +688,11 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r) filters[0] = PIPE_TEX_FILTER_NEAREST; /* Chroma filters */ if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 || - r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) - { + r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) { filters[1] = PIPE_TEX_FILTER_NEAREST; filters[2] = PIPE_TEX_FILTER_NEAREST; } - else - { + else { filters[1] = PIPE_TEX_FILTER_LINEAR; filters[2] = PIPE_TEX_FILTER_LINEAR; } @@ -724,8 +700,7 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r) filters[3] = PIPE_TEX_FILTER_LINEAR; filters[4] = PIPE_TEX_FILTER_LINEAR; - for (unsigned i = 0; i < 5; ++i) - { + for (unsigned i = 0; i < 5; ++i) { sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; @@ -817,8 +792,7 @@ init_buffers(struct vl_mpeg12_mc_renderer *r) r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template); - if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) - { + if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) { template.width[0] = r->pot_buffers ? util_next_power_of_two(r->picture_width / 2) : r->picture_width / 2; @@ -847,8 +821,7 @@ init_buffers(struct vl_mpeg12_mc_renderer *r) sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch ); - for (unsigned i = 1; i < 3; ++i) - { + for (unsigned i = 1; i < 3; ++i) { r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2; r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1; r->vertex_bufs.all[i].buffer_offset = 0; @@ -957,8 +930,7 @@ get_macroblock_type(struct pipe_mpeg12_macroblock *mb) { assert(mb); - switch (mb->mb_type) - { + switch (mb->mb_type) { case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA: return MACROBLOCK_TYPE_INTRA; case PIPE_MPEG12_MACROBLOCK_TYPE_FWD: @@ -1058,8 +1030,7 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r, assert(ycbcr_vb); assert(pos < r->macroblocks_per_batch); - switch (mb->mb_type) - { + switch (mb->mb_type) { case PIPE_MPEG12_MACROBLOCK_TYPE_BI: { struct vertex2f *vb; @@ -1071,21 +1042,17 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r, mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x; mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y; - if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) - { - for (unsigned i = 0; i < 24 * 2; i += 2) - { + if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) { + for (unsigned i = 0; i < 24 * 2; i += 2) { vb[i].x = mo_vec[0].x; vb[i].y = mo_vec[0].y; } } - else - { + else { mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x; mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y; - for (unsigned i = 0; i < 24 * 2; i += 2) - { + for (unsigned i = 0; i < 24 * 2; i += 2) { vb[i].x = mo_vec[0].x; vb[i].y = mo_vec[0].y; vb[i + 1].x = mo_vec[1].x; @@ -1104,41 +1071,33 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r, vb = ref_vb[0] + pos * 2 * 24; - if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) - { + if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) { mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x; mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y; - if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) - { + if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) { mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x; mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y; } } - else - { + else { mo_vec[0].x = mb->pmv[0][0][0] * 0.5f * r->surface_tex_inv_size.x; mo_vec[0].y = mb->pmv[0][0][1] * 0.5f * r->surface_tex_inv_size.y; - if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) - { + if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) { mo_vec[1].x = mb->pmv[1][0][0] * 0.5f * r->surface_tex_inv_size.x; mo_vec[1].y = mb->pmv[1][0][1] * 0.5f * r->surface_tex_inv_size.y; } } - if (mb->mb_type == PIPE_MPEG12_MOTION_TYPE_FRAME) - { - for (unsigned i = 0; i < 24 * 2; i += 2) - { + if (mb->mb_type == PIPE_MPEG12_MOTION_TYPE_FRAME) { + for (unsigned i = 0; i < 24 * 2; i += 2) { vb[i].x = mo_vec[0].x; vb[i].y = mo_vec[0].y; } } - else - { - for (unsigned i = 0; i < 24 * 2; i += 2) - { + else { + for (unsigned i = 0; i < 24 * 2; i += 2) { vb[i].x = mo_vec[0].x; vb[i].y = mo_vec[0].y; vb[i + 1].x = mo_vec[1].x; @@ -1198,8 +1157,7 @@ gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r, assert(r); assert(num_macroblocks); - for (unsigned i = 0; i < r->num_macroblocks; ++i) - { + for (unsigned i = 0; i < r->num_macroblocks; ++i) { enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]); ++num_macroblocks[mb_type]; } @@ -1224,8 +1182,7 @@ gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r, PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD ); - for (unsigned i = 0; i < r->num_macroblocks; ++i) - { + for (unsigned i = 0; i < r->num_macroblocks; ++i) { enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]); gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type], @@ -1276,8 +1233,7 @@ flush(struct vl_mpeg12_mc_renderer *r) r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_FRAGMENT, 0, &r->fs_const_buf); - if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) - { + if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) { r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all); r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems); r->pipe->set_sampler_textures(r->pipe, 3, r->textures.all); @@ -1290,8 +1246,7 @@ flush(struct vl_mpeg12_mc_renderer *r) vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24; } - if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) - { + if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) { r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); r->textures.individual.ref[0] = r->past; @@ -1305,8 +1260,7 @@ flush(struct vl_mpeg12_mc_renderer *r) vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24; } - if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) - { + if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) { r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); r->textures.individual.ref[0] = r->past; @@ -1320,8 +1274,7 @@ flush(struct vl_mpeg12_mc_renderer *r) vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24; } - if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) - { + if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) { r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); r->textures.individual.ref[0] = r->future; @@ -1335,8 +1288,7 @@ flush(struct vl_mpeg12_mc_renderer *r) vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24; } - if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) - { + if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) { r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); r->textures.individual.ref[0] = r->future; @@ -1350,8 +1302,7 @@ flush(struct vl_mpeg12_mc_renderer *r) vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24; } - if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) - { + if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) { r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all); r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems); r->textures.individual.ref[0] = r->past; @@ -1366,8 +1317,7 @@ flush(struct vl_mpeg12_mc_renderer *r) vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24; } - if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) - { + if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) { r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all); r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems); r->textures.individual.ref[0] = r->past; @@ -1436,20 +1386,15 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby, tex_pitch = r->tex_transfer[0]->stride / r->tex_transfer[0]->block.size; texels = r->texels[0] + mbpy * tex_pitch + mbpx; - for (unsigned y = 0; y < 2; ++y) - { - for (unsigned x = 0; x < 2; ++x, ++tb) - { - if ((cbp >> (5 - tb)) & 1) - { - if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) - { + for (unsigned y = 0; y < 2; ++y) { + for (unsigned x = 0; x < 2; ++x, ++tb) { + if ((cbp >> (5 - tb)) & 1) { + if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) { grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch); } - else - { + else { grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels + y * tex_pitch + x * BLOCK_WIDTH, tex_pitch); @@ -1457,14 +1402,11 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby, ++sb; } - else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) - { + else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) { if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL || - ZERO_BLOCK_IS_NIL(r->zero_block[0])) - { + ZERO_BLOCK_IS_NIL(r->zero_block[0])) { fill_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch); - if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) - { + if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) { r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x; r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y; } @@ -1479,24 +1421,19 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby, mbpx /= 2; mbpy /= 2; - for (tb = 0; tb < 2; ++tb) - { + for (tb = 0; tb < 2; ++tb) { tex_pitch = r->tex_transfer[tb + 1]->stride / r->tex_transfer[tb + 1]->block.size; texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx; - if ((cbp >> (1 - tb)) & 1) - { + if ((cbp >> (1 - tb)) & 1) { grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch); ++sb; } - else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) - { + else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) { if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL || - ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) - { + ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) { fill_zero_block(texels, tex_pitch); - if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) - { + if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) { r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x; r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y; } @@ -1553,13 +1490,11 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer, if (!init_pipe_state(renderer)) return false; - if (!init_shaders(renderer)) - { + if (!init_shaders(renderer)) { cleanup_pipe_state(renderer); return false; } - if (!init_buffers(renderer)) - { + if (!init_buffers(renderer)) { cleanup_shaders(renderer); cleanup_pipe_state(renderer); return false; @@ -1607,12 +1542,9 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer assert(num_macroblocks); assert(mpeg12_macroblocks); - if (renderer->surface) - { - if (surface != renderer->surface) - { - if (renderer->num_macroblocks > 0) - { + if (renderer->surface) { + if (surface != renderer->surface) { + if (renderer->num_macroblocks > 0) { xfer_buffers_unmap(renderer); flush(renderer); } @@ -1627,8 +1559,7 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer else new_surface = true; - if (new_surface) - { + if (new_surface) { renderer->surface = surface; renderer->past = past; renderer->future = future; @@ -1637,21 +1568,18 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer renderer->surface_tex_inv_size.y = 1.0f / surface->height[0]; } - while (num_macroblocks) - { + while (num_macroblocks) { unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks; unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch); - for (unsigned i = 0; i < num_to_submit; ++i) - { + for (unsigned i = 0; i < num_to_submit; ++i) { assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12); grab_macroblock(renderer, &mpeg12_macroblocks[i]); } num_macroblocks -= num_to_submit; - if (renderer->num_macroblocks == renderer->macroblocks_per_batch) - { + if (renderer->num_macroblocks == renderer->macroblocks_per_batch) { xfer_buffers_unmap(renderer); flush(renderer); xfer_buffers_map(renderer); diff --git a/src/gallium/auxiliary/vl/vl_shader_build.c b/src/gallium/auxiliary/vl/vl_shader_build.c index 5a4a5ab72c..9ad1e052c6 100644 --- a/src/gallium/auxiliary/vl/vl_shader_build.c +++ b/src/gallium/auxiliary/vl/vl_shader_build.c @@ -30,9 +30,9 @@ struct tgsi_full_declaration vl_decl_interpolated_input assert ( - interpolation == TGSI_INTERPOLATE_CONSTANT || - interpolation == TGSI_INTERPOLATE_LINEAR || - interpolation == TGSI_INTERPOLATE_PERSPECTIVE + interpolation == TGSI_INTERPOLATE_CONSTANT || + interpolation == TGSI_INTERPOLATE_LINEAR || + interpolation == TGSI_INTERPOLATE_PERSPECTIVE ); decl.Declaration.File = TGSI_FILE_INPUT; diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 45289380d0..1c64d58372 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -415,8 +415,7 @@ softpipe_video_surface_create(struct pipe_screen *screen, template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET; sp_vsfc->tex = screen->texture_create(screen, &template); - if (!sp_vsfc->tex) - { + if (!sp_vsfc->tex) { FREE(sp_vsfc); return NULL; } diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c index 1b47bbede2..3be33fbbdf 100644 --- a/src/gallium/drivers/softpipe/sp_video_context.c +++ b/src/gallium/drivers/softpipe/sp_video_context.c @@ -7,24 +7,24 @@ static void sp_mpeg12_destroy(struct pipe_video_context *vpipe) { - struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; - assert(vpipe); + assert(vpipe); - /* Asserted in softpipe_delete_fs_state() for some reason */ - ctx->pipe->bind_vs_state(ctx->pipe, NULL); - ctx->pipe->bind_fs_state(ctx->pipe, NULL); + /* Asserted in softpipe_delete_fs_state() for some reason */ + ctx->pipe->bind_vs_state(ctx->pipe, NULL); + ctx->pipe->bind_fs_state(ctx->pipe, NULL); - ctx->pipe->delete_blend_state(ctx->pipe, ctx->blend); - ctx->pipe->delete_rasterizer_state(ctx->pipe, ctx->rast); - ctx->pipe->delete_depth_stencil_alpha_state(ctx->pipe, ctx->dsa); + ctx->pipe->delete_blend_state(ctx->pipe, ctx->blend); + ctx->pipe->delete_rasterizer_state(ctx->pipe, ctx->rast); + ctx->pipe->delete_depth_stencil_alpha_state(ctx->pipe, ctx->dsa); - pipe_video_surface_reference(&ctx->decode_target, NULL); - vl_compositor_cleanup(&ctx->compositor); - vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer); - ctx->pipe->destroy(ctx->pipe); + pipe_video_surface_reference(&ctx->decode_target, NULL); + vl_compositor_cleanup(&ctx->compositor); + vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer); + ctx->pipe->destroy(ctx->pipe); - FREE(ctx); + FREE(ctx); } static void @@ -35,20 +35,20 @@ sp_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe, struct pipe_macroblock *macroblocks, struct pipe_fence_handle **fence) { - struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; - struct pipe_mpeg12_macroblock *mpeg12_macroblocks = (struct pipe_mpeg12_macroblock*)macroblocks; + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + struct pipe_mpeg12_macroblock *mpeg12_macroblocks = (struct pipe_mpeg12_macroblock*)macroblocks; - assert(vpipe); - assert(num_macroblocks); - assert(macroblocks); - assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12); - assert(ctx->decode_target); + assert(vpipe); + assert(num_macroblocks); + assert(macroblocks); + assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12); + assert(ctx->decode_target); - vl_mpeg12_mc_renderer_render_macroblocks(&ctx->mc_renderer, - softpipe_video_surface(ctx->decode_target)->tex, - past ? softpipe_video_surface(past)->tex : NULL, - future ? softpipe_video_surface(future)->tex : NULL, - num_macroblocks, mpeg12_macroblocks, fence); + vl_mpeg12_mc_renderer_render_macroblocks(&ctx->mc_renderer, + softpipe_video_surface(ctx->decode_target)->tex, + past ? softpipe_video_surface(past)->tex : NULL, + future ? softpipe_video_surface(future)->tex : NULL, + num_macroblocks, mpeg12_macroblocks, fence); } static void @@ -58,12 +58,12 @@ sp_mpeg12_clear_surface(struct pipe_video_context *vpipe, unsigned value, struct pipe_surface *surface) { - struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; - assert(vpipe); - assert(surface); + assert(vpipe); + assert(surface); - ctx->pipe->surface_fill(ctx->pipe, surface, x, y, width, height, value); + ctx->pipe->surface_fill(ctx->pipe, surface, x, y, width, height, value); } static void @@ -85,106 +85,105 @@ sp_mpeg12_render_picture(struct pipe_video_context *vpipe, struct pipe_video_rect *layer_dst_areas*/ struct pipe_fence_handle **fence) { - struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; - assert(vpipe); - assert(src_surface); - assert(src_area); - assert(dst_surface); - assert(dst_area); + assert(vpipe); + assert(src_surface); + assert(src_area); + assert(dst_surface); + assert(dst_area); - vl_compositor_render(&ctx->compositor, softpipe_video_surface(src_surface)->tex, - picture_type, src_area, dst_surface->texture, dst_area, fence); + vl_compositor_render(&ctx->compositor, softpipe_video_surface(src_surface)->tex, + picture_type, src_area, dst_surface->texture, dst_area, fence); } static void sp_mpeg12_set_decode_target(struct pipe_video_context *vpipe, struct pipe_video_surface *dt) { - struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; - assert(vpipe); - assert(dt); + assert(vpipe); + assert(dt); - pipe_video_surface_reference(&ctx->decode_target, dt); + pipe_video_surface_reference(&ctx->decode_target, dt); } static bool init_pipe_state(struct sp_mpeg12_context *ctx) { - struct pipe_rasterizer_state rast; - struct pipe_blend_state blend; - struct pipe_depth_stencil_alpha_state dsa; + struct pipe_rasterizer_state rast; + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state dsa; - assert(ctx); + assert(ctx); - rast.flatshade = 1; - rast.flatshade_first = 0; - rast.light_twoside = 0; - rast.front_winding = PIPE_WINDING_CCW; - rast.cull_mode = PIPE_WINDING_CW; - rast.fill_cw = PIPE_POLYGON_MODE_FILL; - rast.fill_ccw = PIPE_POLYGON_MODE_FILL; - rast.offset_cw = 0; - rast.offset_ccw = 0; - rast.scissor = 0; - rast.poly_smooth = 0; - rast.poly_stipple_enable = 0; - rast.point_sprite = 0; - rast.point_size_per_vertex = 0; - rast.multisample = 0; - rast.line_smooth = 0; - rast.line_stipple_enable = 0; - rast.line_stipple_factor = 0; - rast.line_stipple_pattern = 0; - rast.line_last_pixel = 0; - rast.bypass_vs_clip_and_viewport = 0; - rast.line_width = 1; - rast.point_smooth = 0; - rast.point_size = 1; - rast.offset_units = 1; - rast.offset_scale = 1; - /*rast.sprite_coord_mode[i] = ;*/ - ctx->rast = ctx->pipe->create_rasterizer_state(ctx->pipe, &rast); - ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rast); + rast.flatshade = 1; + rast.flatshade_first = 0; + rast.light_twoside = 0; + rast.front_winding = PIPE_WINDING_CCW; + rast.cull_mode = PIPE_WINDING_CW; + rast.fill_cw = PIPE_POLYGON_MODE_FILL; + rast.fill_ccw = PIPE_POLYGON_MODE_FILL; + rast.offset_cw = 0; + rast.offset_ccw = 0; + rast.scissor = 0; + rast.poly_smooth = 0; + rast.poly_stipple_enable = 0; + rast.point_sprite = 0; + rast.point_size_per_vertex = 0; + rast.multisample = 0; + rast.line_smooth = 0; + rast.line_stipple_enable = 0; + rast.line_stipple_factor = 0; + rast.line_stipple_pattern = 0; + rast.line_last_pixel = 0; + rast.bypass_vs_clip_and_viewport = 0; + rast.line_width = 1; + rast.point_smooth = 0; + rast.point_size = 1; + rast.offset_units = 1; + rast.offset_scale = 1; + /*rast.sprite_coord_mode[i] = ;*/ + ctx->rast = ctx->pipe->create_rasterizer_state(ctx->pipe, &rast); + ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rast); - blend.blend_enable = 0; - blend.rgb_func = PIPE_BLEND_ADD; - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ONE; - blend.alpha_func = PIPE_BLEND_ADD; - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ONE; - blend.logicop_enable = 0; - blend.logicop_func = PIPE_LOGICOP_CLEAR; - /* Needed to allow color writes to FB, even if blending disabled */ - blend.colormask = PIPE_MASK_RGBA; - blend.dither = 0; - ctx->blend = ctx->pipe->create_blend_state(ctx->pipe, &blend); - ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend); + blend.blend_enable = 0; + blend.rgb_func = PIPE_BLEND_ADD; + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_func = PIPE_BLEND_ADD; + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.logicop_enable = 0; + blend.logicop_func = PIPE_LOGICOP_CLEAR; + /* Needed to allow color writes to FB, even if blending disabled */ + blend.colormask = PIPE_MASK_RGBA; + blend.dither = 0; + ctx->blend = ctx->pipe->create_blend_state(ctx->pipe, &blend); + ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend); - dsa.depth.enabled = 0; - dsa.depth.writemask = 0; - dsa.depth.func = PIPE_FUNC_ALWAYS; - dsa.depth.occlusion_count = 0; - for (unsigned i = 0; i < 2; ++i) - { - dsa.stencil[i].enabled = 0; - dsa.stencil[i].func = PIPE_FUNC_ALWAYS; - dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP; - dsa.stencil[i].ref_value = 0; - dsa.stencil[i].valuemask = 0; - dsa.stencil[i].writemask = 0; - } - dsa.alpha.enabled = 0; - dsa.alpha.func = PIPE_FUNC_ALWAYS; - dsa.alpha.ref_value = 0; - ctx->dsa = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &dsa); - ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, ctx->dsa); + dsa.depth.enabled = 0; + dsa.depth.writemask = 0; + dsa.depth.func = PIPE_FUNC_ALWAYS; + dsa.depth.occlusion_count = 0; + for (unsigned i = 0; i < 2; ++i) { + dsa.stencil[i].enabled = 0; + dsa.stencil[i].func = PIPE_FUNC_ALWAYS; + dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].ref_value = 0; + dsa.stencil[i].valuemask = 0; + dsa.stencil[i].writemask = 0; + } + dsa.alpha.enabled = 0; + dsa.alpha.func = PIPE_FUNC_ALWAYS; + dsa.alpha.ref_value = 0; + ctx->dsa = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &dsa); + ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, ctx->dsa); - return true; + return true; } static struct pipe_video_context * @@ -192,65 +191,61 @@ sp_mpeg12_create(struct pipe_screen *screen, enum pipe_video_profile profile, enum pipe_video_chroma_format chroma_format, unsigned width, unsigned height) { - struct sp_mpeg12_context *ctx; + struct sp_mpeg12_context *ctx; - assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12); + assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12); - ctx = CALLOC_STRUCT(sp_mpeg12_context); + ctx = CALLOC_STRUCT(sp_mpeg12_context); - if (!ctx) - return NULL; + if (!ctx) + return NULL; - ctx->base.profile = profile; - ctx->base.chroma_format = chroma_format; - ctx->base.width = width; - ctx->base.height = height; + ctx->base.profile = profile; + ctx->base.chroma_format = chroma_format; + ctx->base.width = width; + ctx->base.height = height; - ctx->base.screen = screen; - ctx->base.destroy = sp_mpeg12_destroy; - ctx->base.decode_macroblocks = sp_mpeg12_decode_macroblocks; - ctx->base.clear_surface = sp_mpeg12_clear_surface; - ctx->base.render_picture = sp_mpeg12_render_picture; - ctx->base.set_decode_target = sp_mpeg12_set_decode_target; + ctx->base.screen = screen; + ctx->base.destroy = sp_mpeg12_destroy; + ctx->base.decode_macroblocks = sp_mpeg12_decode_macroblocks; + ctx->base.clear_surface = sp_mpeg12_clear_surface; + ctx->base.render_picture = sp_mpeg12_render_picture; + ctx->base.set_decode_target = sp_mpeg12_set_decode_target; - ctx->pipe = softpipe_create(screen); - if (!ctx->pipe) - { - FREE(ctx); - return NULL; - } + ctx->pipe = softpipe_create(screen); + if (!ctx->pipe) { + FREE(ctx); + return NULL; + } - /* TODO: Use slice buffering for softpipe when implemented, no advantage to buffering an entire picture */ - if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe, - width, height, chroma_format, - VL_MPEG12_MC_RENDERER_BUFFER_PICTURE, - /* TODO: Use XFER_NONE when implemented */ - VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE, - true)) - { - ctx->pipe->destroy(ctx->pipe); - FREE(ctx); - return NULL; - } + /* TODO: Use slice buffering for softpipe when implemented, no advantage to buffering an entire picture */ + if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe, + width, height, chroma_format, + VL_MPEG12_MC_RENDERER_BUFFER_PICTURE, + /* TODO: Use XFER_NONE when implemented */ + VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE, + true)) { + ctx->pipe->destroy(ctx->pipe); + FREE(ctx); + return NULL; + } - if (!vl_compositor_init(&ctx->compositor, ctx->pipe)) - { - vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer); - ctx->pipe->destroy(ctx->pipe); - FREE(ctx); - return NULL; - } + if (!vl_compositor_init(&ctx->compositor, ctx->pipe)) { + vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer); + ctx->pipe->destroy(ctx->pipe); + FREE(ctx); + return NULL; + } - if (!init_pipe_state(ctx)) - { - vl_compositor_cleanup(&ctx->compositor); - vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer); - ctx->pipe->destroy(ctx->pipe); - FREE(ctx); - return NULL; - } + if (!init_pipe_state(ctx)) { + vl_compositor_cleanup(&ctx->compositor); + vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer); + ctx->pipe->destroy(ctx->pipe); + FREE(ctx); + return NULL; + } - return &ctx->base; + return &ctx->base; } struct pipe_video_context * @@ -258,16 +253,15 @@ sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile, enum pipe_video_chroma_format chroma_format, unsigned width, unsigned height) { - assert(screen); - assert(width && height); + assert(screen); + assert(width && height); - switch (u_reduce_video_profile(profile)) - { - case PIPE_VIDEO_CODEC_MPEG12: - return sp_mpeg12_create(screen, profile, - chroma_format, - width, height); - default: - return NULL; - } + switch (u_reduce_video_profile(profile)) { + case PIPE_VIDEO_CODEC_MPEG12: + return sp_mpeg12_create(screen, profile, + chroma_format, + width, height); + default: + return NULL; + } } diff --git a/src/gallium/drivers/softpipe/sp_video_context.h b/src/gallium/drivers/softpipe/sp_video_context.h index a70ce9f476..2c7691c7cb 100644 --- a/src/gallium/drivers/softpipe/sp_video_context.h +++ b/src/gallium/drivers/softpipe/sp_video_context.h @@ -11,15 +11,15 @@ struct pipe_video_surface; struct sp_mpeg12_context { - struct pipe_video_context base; - struct pipe_context *pipe; - struct pipe_video_surface *decode_target; - struct vl_mpeg12_mc_renderer mc_renderer; - struct vl_compositor compositor; + struct pipe_video_context base; + struct pipe_context *pipe; + struct pipe_video_surface *decode_target; + struct vl_mpeg12_mc_renderer mc_renderer; + struct vl_compositor compositor; - void *rast; - void *dsa; - void *blend; + void *rast; + void *dsa; + void *blend; }; struct pipe_video_context * diff --git a/src/gallium/state_trackers/xorg/xvmc/context.c b/src/gallium/state_trackers/xorg/xvmc/context.c index 33f47838f5..ff2bd295ec 100644 --- a/src/gallium/state_trackers/xorg/xvmc/context.c +++ b/src/gallium/state_trackers/xorg/xvmc/context.c @@ -28,16 +28,13 @@ static Status Validate(Display *dpy, XvPortID port, int surface_type_id, *found_port = false; - for (unsigned int i = 0; i < XScreenCount(dpy); ++i) - { + for (unsigned int i = 0; i < XScreenCount(dpy); ++i) { ret = XvQueryAdaptors(dpy, XRootWindow(dpy, i), &num_adaptors, &adaptor_info); if (ret != Success) return ret; - for (unsigned int j = 0; j < num_adaptors && !*found_port; ++j) - { - for (unsigned int k = 0; k < adaptor_info[j].num_ports && !*found_port; ++k) - { + for (unsigned int j = 0; j < num_adaptors && !*found_port; ++j) { + for (unsigned int k = 0; k < adaptor_info[j].num_ports && !*found_port; ++k) { XvMCSurfaceInfo *surface_info; if (adaptor_info[j].base_id + k != port) @@ -46,14 +43,12 @@ static Status Validate(Display *dpy, XvPortID port, int surface_type_id, *found_port = true; surface_info = XvMCListSurfaceTypes(dpy, adaptor_info[j].base_id, &num_types); - if (!surface_info) - { + if (!surface_info) { XvFreeAdaptorInfo(adaptor_info); return BadAlloc; } - for (unsigned int l = 0; l < num_types && !found_surface; ++l) - { + for (unsigned int l = 0; l < num_types && !found_surface; ++l) { if (surface_info[l].surface_type_id != surface_type_id) continue; @@ -65,7 +60,7 @@ static Status Validate(Display *dpy, XvPortID port, int surface_type_id, *screen = i; } - XFree(surface_info); + XFree(surface_info); } } @@ -102,8 +97,7 @@ static enum pipe_video_profile ProfileToPipe(int xvmc_profile) static enum pipe_video_chroma_format FormatToPipe(int xvmc_format) { - switch (xvmc_format) - { + switch (xvmc_format) { case XVMC_CHROMA_FORMAT_420: return PIPE_VIDEO_CHROMA_FORMAT_420; case XVMC_CHROMA_FORMAT_422: @@ -148,8 +142,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id, /* TODO: Reuse screen if process creates another context */ screen = vl_screen_create(dpy, scrn); - if (!screen) - { + if (!screen) { FREE(context_priv); return BadAlloc; } @@ -157,8 +150,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id, vpipe = vl_video_create(screen, ProfileToPipe(mc_type), FormatToPipe(chroma_format), width, height); - if (!vpipe) - { + if (!vpipe) { screen->destroy(screen); FREE(context_priv); return BadAlloc; diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c index 0467c4d07d..6b7dbf11dc 100644 --- a/src/gallium/state_trackers/xorg/xvmc/surface.c +++ b/src/gallium/state_trackers/xorg/xvmc/surface.c @@ -24,8 +24,7 @@ static enum pipe_mpeg12_macroblock_type TypeToPipe(int xvmc_mb_type) static enum pipe_mpeg12_picture_type PictureToPipe(int xvmc_pic) { - switch (xvmc_pic) - { + switch (xvmc_pic) { case XVMC_TOP_FIELD: return PIPE_MPEG12_PICTURE_TYPE_FIELD_TOP; case XVMC_BOTTOM_FIELD: @@ -41,8 +40,7 @@ static enum pipe_mpeg12_picture_type PictureToPipe(int xvmc_pic) static enum pipe_mpeg12_motion_type MotionToPipe(int xvmc_motion_type, int xvmc_dct_type) { - switch (xvmc_motion_type) - { + switch (xvmc_motion_type) { case XVMC_PREDICTION_FRAME: return xvmc_dct_type == XVMC_DCT_TYPE_FIELD ? PIPE_MPEG12_MOTION_TYPE_16x8 : PIPE_MPEG12_MOTION_TYPE_FRAME; @@ -66,8 +64,7 @@ CreateOrResizeBackBuffer(struct pipe_video_context *vpipe, unsigned int width, u assert(vpipe); - if (*backbuffer) - { + if (*backbuffer) { if ((*backbuffer)->width != width || (*backbuffer)->height != height) pipe_surface_reference(backbuffer, NULL); else @@ -121,8 +118,7 @@ MacroBlocksToPipe(const XvMCMacroBlockArray *xvmc_macroblocks, xvmc_mb = xvmc_macroblocks->macro_blocks + first_macroblock; - for (i = 0; i < num_macroblocks; ++i) - { + for (i = 0; i < num_macroblocks; ++i) { pipe_macroblocks->base.codec = PIPE_VIDEO_CODEC_MPEG12; pipe_macroblocks->mbx = xvmc_mb->x; pipe_macroblocks->mby = xvmc_mb->y; @@ -171,8 +167,7 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac vsfc = vpipe->screen->video_surface_create(vpipe->screen, vpipe->chroma_format, vpipe->width, vpipe->height); - if (!vsfc) - { + if (!vsfc) { FREE(surface_priv); return BadAlloc; } @@ -262,35 +257,21 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur Status XvMCFlushSurface(Display *dpy, XvMCSurface *surface) { -#if 0 - struct vlSurface *vl_sfc; - - assert(dpy); - - if (!surface) - return XvMCBadSurface; + assert(dpy); - vl_sfc = surface->privData; + if (!surface) + return XvMCBadSurface; - vlSurfaceFlush(vl_sfc); -#endif return Success; } Status XvMCSyncSurface(Display *dpy, XvMCSurface *surface) { -#if 0 - struct vlSurface *vl_sfc; - - assert(dpy); - - if (!surface) - return XvMCBadSurface; + assert(dpy); - vl_sfc = surface->privData; + if (!surface) + return XvMCBadSurface; - vlSurfaceSync(vl_sfc); -#endif return Success; } @@ -359,43 +340,15 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable, Status XvMCGetSurfaceStatus(Display *dpy, XvMCSurface *surface, int *status) { -#if 0 - struct vlSurface *vl_sfc; - enum vlResourceStatus res_status; - - assert(dpy); - - if (!surface) - return XvMCBadSurface; - - assert(status); - - vl_sfc = surface->privData; - - vlSurfaceGetStatus(vl_sfc, &res_status); - - switch (res_status) - { - case vlResourceStatusFree: - { - *status = 0; - break; - } - case vlResourceStatusRendering: - { - *status = XVMC_RENDERING; - break; - } - case vlResourceStatusDisplaying: - { - *status = XVMC_DISPLAYING; - break; - } - default: - assert(0); - } -#endif + assert(dpy); + + if (!surface) + return XvMCBadSurface; + + assert(status); + *status = 0; + return Success; } -- cgit v1.2.3 From 99e1745af9a6a1fe1ebc65b17afb5f1a975348d2 Mon Sep 17 00:00:00 2001 From: Cooper Yuan Date: Mon, 28 Sep 2009 17:55:38 +0800 Subject: r300g: fix r300g cause GPU hang issue. why there are two input position semantic tags is that ureg doesn't set vs input semantic due to commit: 6d8dbd3d1ec888 so use vs input index instead of semantic name. --- src/gallium/drivers/r300/r300_state_derived.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 5026afc830..02b7ab9107 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -50,12 +50,11 @@ static void r300_vs_tab_routes(struct r300_context* r300, assert(info->num_inputs <= 16); - if (!r300screen->caps->has_tcl) + if (!r300screen->caps->has_tcl || !r300->rs_state->enable_vte) { for (i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { + switch (r300->vs->code.inputs[i]) { case TGSI_SEMANTIC_POSITION: - assert(pos == FALSE); pos = TRUE; tab[i] = 0; break; -- cgit v1.2.3 From 56870534803982a73019ddd77dab300d146f77c6 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 28 Sep 2009 13:03:03 +0100 Subject: softpipe: Fix MSVC build. --- src/gallium/drivers/softpipe/sp_video_context.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c index 3be33fbbdf..ccb29726b6 100644 --- a/src/gallium/drivers/softpipe/sp_video_context.c +++ b/src/gallium/drivers/softpipe/sp_video_context.c @@ -115,6 +115,7 @@ init_pipe_state(struct sp_mpeg12_context *ctx) struct pipe_rasterizer_state rast; struct pipe_blend_state blend; struct pipe_depth_stencil_alpha_state dsa; + unsigned i; assert(ctx); @@ -167,7 +168,7 @@ init_pipe_state(struct sp_mpeg12_context *ctx) dsa.depth.writemask = 0; dsa.depth.func = PIPE_FUNC_ALWAYS; dsa.depth.occlusion_count = 0; - for (unsigned i = 0; i < 2; ++i) { + for (i = 0; i < 2; ++i) { dsa.stencil[i].enabled = 0; dsa.stencil[i].func = PIPE_FUNC_ALWAYS; dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP; -- cgit v1.2.3 From 9871521b302117682afbefa7316a41a1a00485b2 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sun, 27 Sep 2009 10:56:42 -0400 Subject: llvmpipe: Grab a ref when the fb is set. Based on softpipe commit a77226071f6814a53358a5d6caff685889d0e4ec. --- src/gallium/drivers/llvmpipe/lp_context.c | 9 +++++++-- src/gallium/drivers/llvmpipe/lp_state_surface.c | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index a4b2bd8c2a..202cb8ef43 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -107,11 +107,16 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) if (llvmpipe->draw) draw_destroy( llvmpipe->draw ); - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { lp_destroy_tile_cache(llvmpipe->cbuf_cache[i]); + pipe_surface_reference(&llvmpipe->framebuffer.cbufs[i], NULL); + } + pipe_surface_reference(&llvmpipe->framebuffer.zsbuf, NULL); - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { lp_destroy_tex_tile_cache(llvmpipe->tex_cache[i]); + pipe_texture_reference(&llvmpipe->texture[i], NULL); + } for (i = 0; i < Elements(llvmpipe->constants); i++) { if (llvmpipe->constants[i].buffer) { diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index 177a26b7b1..2c29144c03 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -56,7 +56,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, lp_flush_tile_cache(lp->cbuf_cache[i]); /* assign new */ - lp->framebuffer.cbufs[i] = fb->cbufs[i]; + pipe_surface_reference(&lp->framebuffer.cbufs[i], fb->cbufs[i]); /* update cache */ lp_tile_cache_set_surface(lp->cbuf_cache[i], fb->cbufs[i]); @@ -81,7 +81,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, } /* assign new */ - lp->framebuffer.zsbuf = fb->zsbuf; + pipe_surface_reference(&lp->framebuffer.zsbuf, fb->zsbuf); /* Tell draw module how deep the Z/depth buffer is */ if (lp->framebuffer.zsbuf) { -- cgit v1.2.3 From 60f3f22a52422b11cc71149a28e24a14a9251205 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Tue, 29 Sep 2009 10:38:47 +0100 Subject: i915: Fix MSVC build. --- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index d50201642b..8a3e466c84 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -198,7 +198,7 @@ i915_vbuf_render_map_vertices(struct vbuf_render *render) struct intel_winsys *iws = i915->iws; if (i915->vbo_flushed) - debug_printf("%s bad vbo flush occured stalling on hw\n", __func__); + debug_printf("%s bad vbo flush occured stalling on hw\n", __FUNCTION__); i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); -- cgit v1.2.3 From 7cda8ea44c2b65265cefa79bd29a4990ac81cee6 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 29 Sep 2009 13:58:58 +0100 Subject: llvmpipe: Emit SSE intrinsics based on runtime cpu capability check. Note that llvmpipe still doesn't run on any processor yet: if you don't have a recent processor with SSE4.1 you will still likely end up hitting a code path for which a generic non-sse4 version is not implemented yet. --- src/gallium/drivers/llvmpipe/lp_bld_arit.c | 72 +++++++++++------------------ src/gallium/drivers/llvmpipe/lp_bld_conv.c | 7 ++- src/gallium/drivers/llvmpipe/lp_bld_logic.c | 6 ++- src/gallium/drivers/llvmpipe/lp_jit.c | 3 ++ 4 files changed, 37 insertions(+), 51 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index 31433318a7..e8c5fa3c2a 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -48,6 +48,7 @@ #include "util/u_memory.h" #include "util/u_debug.h" #include "util/u_string.h" +#include "util/u_cpu_detect.h" #include "lp_bld_type.h" #include "lp_bld_const.h" @@ -119,30 +120,28 @@ lp_build_max_simple(struct lp_build_context *bld, /* TODO: optimize the constant case */ -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if(type.width * type.length == 128) { if(type.floating) { - if(type.width == 32) + if(type.width == 32 && util_cpu_caps.has_sse) intrinsic = "llvm.x86.sse.max.ps"; - if(type.width == 64) + if(type.width == 64 && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.max.pd"; } else { - if(type.width == 8 && !type.sign) + if(type.width == 8 && !type.sign && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.pmaxu.b"; - if(type.width == 8 && type.sign) + if(type.width == 8 && type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pmaxsb"; - if(type.width == 16 && !type.sign) + if(type.width == 16 && !type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pmaxuw"; - if(type.width == 16 && type.sign) + if(type.width == 16 && type.sign && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.pmaxs.w"; - if(type.width == 32 && !type.sign) + if(type.width == 32 && !type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pmaxud"; - if(type.width == 32 && type.sign) + if(type.width == 32 && type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pmaxsd"; } } -#endif if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); @@ -204,15 +203,14 @@ lp_build_add(struct lp_build_context *bld, if(a == bld->one || b == bld->one) return bld->one; -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width * type.length == 128 && + if(util_cpu_caps.has_sse2 && + type.width * type.length == 128 && !type.floating && !type.fixed) { if(type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b"; if(type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w"; } -#endif if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); @@ -257,15 +255,14 @@ lp_build_sub(struct lp_build_context *bld, if(b == bld->one) return bld->zero; -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width * type.length == 128 && + if(util_cpu_caps.has_sse2 && + type.width * type.length == 128 && !type.floating && !type.fixed) { if(type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b"; if(type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w"; } -#endif if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); @@ -419,8 +416,7 @@ lp_build_mul(struct lp_build_context *bld, return bld->undef; if(!type.floating && !type.fixed && type.norm) { -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width == 8 && type.length == 16) { + if(util_cpu_caps.has_sse2 && type.width == 8 && type.length == 16) { LLVMTypeRef i16x8 = LLVMVectorType(LLVMInt16Type(), 8); LLVMTypeRef i8x16 = LLVMVectorType(LLVMInt8Type(), 16); static LLVMValueRef ml = NULL; @@ -456,7 +452,6 @@ lp_build_mul(struct lp_build_context *bld, return ab; } -#endif /* FIXME */ assert(0); @@ -493,10 +488,8 @@ lp_build_div(struct lp_build_context *bld, if(LLVMIsConstant(a) && LLVMIsConstant(b)) return LLVMConstFDiv(a, b); -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width == 32 && type.length == 4) + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) return lp_build_mul(bld, a, lp_build_rcp(bld, b)); -#endif return LLVMBuildFDiv(bld->builder, a, b, ""); } @@ -606,8 +599,7 @@ lp_build_abs(struct lp_build_context *bld, return a; } -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width*type.length == 128) { + if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) { switch(type.width) { case 8: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a); @@ -617,7 +609,6 @@ lp_build_abs(struct lp_build_context *bld, return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a); } } -#endif return lp_build_max(bld, a, LLVMBuildNeg(bld->builder, a, "")); } @@ -710,9 +701,8 @@ lp_build_round(struct lp_build_context *bld, assert(type.floating); -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); -#endif + if(util_cpu_caps.has_sse4_1) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); /* FIXME */ assert(0); @@ -728,9 +718,8 @@ lp_build_floor(struct lp_build_context *bld, assert(type.floating); -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); -#endif + if(util_cpu_caps.has_sse4_1) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); /* FIXME */ assert(0); @@ -746,9 +735,8 @@ lp_build_ceil(struct lp_build_context *bld, assert(type.floating); -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); -#endif + if(util_cpu_caps.has_sse4_1) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); /* FIXME */ assert(0); @@ -764,9 +752,8 @@ lp_build_trunc(struct lp_build_context *bld, assert(type.floating); -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE); -#endif + if(util_cpu_caps.has_sse4_1) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE); /* FIXME */ assert(0); @@ -837,11 +824,9 @@ lp_build_rcp(struct lp_build_context *bld, if(LLVMIsConstant(a)) return LLVMConstFDiv(bld->one, a); - /* XXX: is this really necessary? */ -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width == 32 && type.length == 4) + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) + /* FIXME: improve precision */ return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); -#endif return LLVMBuildFDiv(bld->builder, bld->one, a, ""); } @@ -858,11 +843,8 @@ lp_build_rsqrt(struct lp_build_context *bld, assert(type.floating); - /* XXX: is this really necessary? */ -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width == 32 && type.length == 4) + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a); -#endif return lp_build_rcp(bld, lp_build_sqrt(bld, a)); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c index 186cac70f6..20c8710214 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c @@ -63,6 +63,7 @@ #include "util/u_debug.h" #include "util/u_math.h" +#include "util/u_cpu_detect.h" #include "lp_bld_type.h" #include "lp_bld_const.h" @@ -334,8 +335,7 @@ lp_build_pack2(LLVMBuilderRef builder, assert(!src_type.floating); assert(!dst_type.floating); -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(src_type.width * src_type.length == 128) { + if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) { /* All X86 non-interleaved pack instructions all take signed inputs and * saturate them, so saturate beforehand. */ if(!src_type.sign && !clamped) { @@ -349,7 +349,7 @@ lp_build_pack2(LLVMBuilderRef builder, switch(src_type.width) { case 32: - if(dst_type.sign) + if(dst_type.sign || !util_cpu_caps.has_sse4_1) res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi); else /* PACKUSDW is the only instrinsic with a consistent signature */ @@ -372,7 +372,6 @@ lp_build_pack2(LLVMBuilderRef builder, res = LLVMBuildBitCast(builder, res, dst_vec_type, ""); return res; } -#endif lo = LLVMBuildBitCast(builder, lo, dst_vec_type, ""); hi = LLVMBuildBitCast(builder, hi, dst_vec_type, ""); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c index 6b6f820769..db22a8028a 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c @@ -33,6 +33,8 @@ */ +#include "util/u_cpu_detect.h" + #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_intr.h" @@ -65,7 +67,7 @@ lp_build_cmp(struct lp_build_context *bld, #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if(type.width * type.length == 128) { - if(type.floating) { + if(type.floating && util_cpu_caps.has_sse) { LLVMValueRef args[3]; unsigned cc; boolean swap; @@ -114,7 +116,7 @@ lp_build_cmp(struct lp_build_context *bld, res = LLVMBuildBitCast(bld->builder, res, int_vec_type, ""); return res; } - else { + else if(util_cpu_caps.has_sse2) { static const struct { unsigned swap:1; unsigned eq:1; diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index f7111c1e5c..5d2cf01e5b 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -36,6 +36,7 @@ #include #include "util/u_memory.h" +#include "util/u_cpu_detect.h" #include "lp_screen.h" #include "lp_bld_intr.h" #include "lp_jit.h" @@ -147,6 +148,8 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) { char *error = NULL; + util_cpu_detect(); + #ifdef LLVM_NATIVE_ARCH LLVMLinkInJIT(); LLVMInitializeNativeTarget(); -- cgit v1.2.3 From fbddc75aa2f6542117783b8024f9ebd2f0309e1f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 29 Sep 2009 08:21:54 -0600 Subject: softpipe: Grab a ref when the fb is set. Nasty bug when the surface is freed and another is allocated right on top of it. The next time we set the fb state SP thinks it's the same surface and doesn't flush, and when the flush eventually happens the surface belongs to a completely different texture. (cherry picked from commit a77226071f6814a53358a5d6caff685889d0e4ec) Conflicts: src/gallium/drivers/softpipe/sp_context.c --- src/gallium/drivers/softpipe/sp_context.c | 9 +++++++-- src/gallium/drivers/softpipe/sp_state_surface.c | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 86df320ea8..b4650c0dc5 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -105,12 +105,17 @@ static void softpipe_destroy( struct pipe_context *pipe ) softpipe->quad[i].output->destroy( softpipe->quad[i].output ); } - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { sp_destroy_tile_cache(softpipe->cbuf_cache[i]); + pipe_surface_reference(&softpipe->framebuffer.cbufs[i], NULL); + } sp_destroy_tile_cache(softpipe->zsbuf_cache); + pipe_surface_reference(&softpipe->framebuffer.zsbuf, NULL); - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { sp_destroy_tile_cache(softpipe->tex_cache[i]); + pipe_texture_reference(&softpipe->texture[i], NULL); + } for (i = 0; i < Elements(softpipe->constants); i++) { if (softpipe->constants[i].buffer) { diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index 7c06d864a7..181bff8f75 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -56,7 +56,7 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, sp_flush_tile_cache(sp, sp->cbuf_cache[i]); /* assign new */ - sp->framebuffer.cbufs[i] = fb->cbufs[i]; + pipe_surface_reference(&sp->framebuffer.cbufs[i], fb->cbufs[i]); /* update cache */ sp_tile_cache_set_surface(sp->cbuf_cache[i], fb->cbufs[i]); @@ -71,7 +71,7 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, sp_flush_tile_cache(sp, sp->zsbuf_cache); /* assign new */ - sp->framebuffer.zsbuf = fb->zsbuf; + pipe_surface_reference(&sp->framebuffer.zsbuf, fb->zsbuf); /* update cache */ sp_tile_cache_set_surface(sp->zsbuf_cache, fb->zsbuf); -- cgit v1.2.3 From 564df9dc5f6335eb8dc68f3c69cf054d2142663c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 29 Sep 2009 08:50:56 -0600 Subject: softpipe: initialize the clear_flags bitvector in sp_create_tile_cache() This silences tons of valgrind warnings in programs that don't call glClear(), such as progs/demos/gamma. --- src/gallium/drivers/softpipe/sp_tile_cache.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 461cbb9f95..5f7864e671 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -130,6 +130,11 @@ sp_create_tile_cache( struct pipe_screen *screen ) tc->entries[pos].x = tc->entries[pos].y = -1; } + +#if TILE_CLEAR_OPTIMIZATION + /* set flags to indicate all the tiles are cleared */ + memset(tc->clear_flags, 255, sizeof(tc->clear_flags)); +#endif } return tc; } -- cgit v1.2.3 From 741c40a232637c933c9273bbdef905397e54bc94 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 29 Sep 2009 16:59:13 +0100 Subject: llvmpipe: Implement non SSE4.1 versions of floor and round. --- src/gallium/drivers/llvmpipe/lp_bld_arit.c | 177 +++++++++++++++++++---- src/gallium/drivers/llvmpipe/lp_bld_arit.h | 13 +- src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 4 +- 3 files changed, 159 insertions(+), 35 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index e8c5fa3c2a..f878706ad1 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -675,6 +675,8 @@ lp_build_round_sse41(struct lp_build_context *bld, assert(type.floating); assert(type.width*type.length == 128); + assert(lp_check_value(type, a)); + assert(util_cpu_caps.has_sse4_1); switch(type.width) { case 32: @@ -693,6 +695,28 @@ lp_build_round_sse41(struct lp_build_context *bld, } +LLVMValueRef +lp_build_trunc(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + assert(lp_check_value(type, a)); + + if(util_cpu_caps.has_sse4_1) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE); + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef res; + res = LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); + res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); + return res; + } +} + + LLVMValueRef lp_build_round(struct lp_build_context *bld, LLVMValueRef a) @@ -700,13 +724,17 @@ lp_build_round(struct lp_build_context *bld, const struct lp_type type = bld->type; assert(type.floating); + assert(lp_check_value(type, a)); if(util_cpu_caps.has_sse4_1) return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); - - /* FIXME */ - assert(0); - return bld->undef; + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef res; + res = lp_build_iround(bld, a); + res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); + return res; + } } @@ -720,10 +748,13 @@ lp_build_floor(struct lp_build_context *bld, if(util_cpu_caps.has_sse4_1) return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); - - /* FIXME */ - assert(0); - return bld->undef; + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef res; + res = lp_build_ifloor(bld, a); + res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); + return res; + } } @@ -734,47 +765,74 @@ lp_build_ceil(struct lp_build_context *bld, const struct lp_type type = bld->type; assert(type.floating); + assert(lp_check_value(type, a)); if(util_cpu_caps.has_sse4_1) return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); - - /* FIXME */ - assert(0); - return bld->undef; + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef res; + res = lp_build_iceil(bld, a); + res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); + return res; + } } +/** + * Convert to integer, through whichever rounding method that's fastest, + * typically truncating to zero. + */ LLVMValueRef -lp_build_trunc(struct lp_build_context *bld, - LLVMValueRef a) +lp_build_itrunc(struct lp_build_context *bld, + LLVMValueRef a) { const struct lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); assert(type.floating); + assert(lp_check_value(type, a)); - if(util_cpu_caps.has_sse4_1) - return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE); - - /* FIXME */ - assert(0); - return bld->undef; + return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); } -/** - * Convert to integer, through whichever rounding method that's fastest, - * typically truncating to zero. - */ LLVMValueRef -lp_build_int(struct lp_build_context *bld, - LLVMValueRef a) +lp_build_iround(struct lp_build_context *bld, + LLVMValueRef a) { const struct lp_type type = bld->type; LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef res; assert(type.floating); + assert(lp_check_value(type, a)); - return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); + if(util_cpu_caps.has_sse4_1) { + res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); + } + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + LLVMValueRef half; + + /* get sign bit */ + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + + /* sign * 0.5 */ + half = lp_build_const_scalar(type, 0.5); + half = LLVMBuildBitCast(bld->builder, half, int_vec_type, ""); + half = LLVMBuildOr(bld->builder, sign, half, ""); + half = LLVMBuildBitCast(bld->builder, half, vec_type, ""); + + res = LLVMBuildAdd(bld->builder, a, half, ""); + } + + res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); + + return res; } @@ -782,9 +840,68 @@ LLVMValueRef lp_build_ifloor(struct lp_build_context *bld, LLVMValueRef a) { - a = lp_build_floor(bld, a); - a = lp_build_int(bld, a); - return a; + const struct lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef res; + + assert(type.floating); + assert(lp_check_value(type, a)); + + if(util_cpu_caps.has_sse4_1) { + res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); + } + else { + /* Take the sign bit and add it to 1 constant */ + LLVMTypeRef vec_type = lp_build_vec_type(type); + unsigned mantissa = lp_mantissa(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + LLVMValueRef offset; + + /* sign = a < 0 ? ~0 : 0 */ + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), ""); + + /* offset = -0.99999(9)f */ + offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa)); + offset = LLVMConstBitCast(offset, int_vec_type); + + /* offset = a < 0 ? -0.99999(9)f : 0.0f */ + offset = LLVMBuildAnd(bld->builder, offset, sign, ""); + offset = LLVMBuildBitCast(bld->builder, offset, vec_type, ""); + + res = LLVMBuildAdd(bld->builder, a, offset, ""); + } + + res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); + + return res; +} + + +LLVMValueRef +lp_build_iceil(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef res; + + assert(type.floating); + assert(lp_check_value(type, a)); + + if(util_cpu_caps.has_sse4_1) { + res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); + } + else { + assert(0); + res = bld->undef; + } + + res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); + + return res; } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index d68a97c4b8..095a8e1cab 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -126,11 +126,18 @@ lp_build_trunc(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef -lp_build_int(struct lp_build_context *bld, - LLVMValueRef a); +lp_build_ifloor(struct lp_build_context *bld, + LLVMValueRef a); +LLVMValueRef +lp_build_iceil(struct lp_build_context *bld, + LLVMValueRef a); LLVMValueRef -lp_build_ifloor(struct lp_build_context *bld, +lp_build_iround(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_itrunc(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 8ca1be6f1b..1a47ca32d2 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -274,8 +274,8 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart); t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart); - x0 = lp_build_int(&bld->coord_bld, s_ipart); - y0 = lp_build_int(&bld->coord_bld, t_ipart); + x0 = lp_build_itrunc(&bld->coord_bld, s_ipart); + y0 = lp_build_itrunc(&bld->coord_bld, t_ipart); x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s); y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t); -- cgit v1.2.3 From 754f48871c3be671031d9a495fc96a42b71da349 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 29 Sep 2009 17:21:34 +0100 Subject: llvmpipe: Runtime cpu checks for lp_build_min_simple too. --- src/gallium/drivers/llvmpipe/lp_bld_arit.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index f878706ad1..d27ef0de04 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -54,6 +54,7 @@ #include "lp_bld_const.h" #include "lp_bld_intr.h" #include "lp_bld_logic.h" +#include "lp_bld_debug.h" #include "lp_bld_arit.h" @@ -72,30 +73,28 @@ lp_build_min_simple(struct lp_build_context *bld, /* TODO: optimize the constant case */ -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if(type.width * type.length == 128) { if(type.floating) { - if(type.width == 32) + if(type.width == 32 && util_cpu_caps.has_sse) intrinsic = "llvm.x86.sse.min.ps"; - if(type.width == 64) + if(type.width == 64 && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.min.pd"; } else { - if(type.width == 8 && !type.sign) + if(type.width == 8 && !type.sign && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.pminu.b"; - if(type.width == 8 && type.sign) + if(type.width == 8 && type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pminsb"; - if(type.width == 16 && !type.sign) + if(type.width == 16 && !type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pminuw"; - if(type.width == 16 && type.sign) + if(type.width == 16 && type.sign && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.pmins.w"; - if(type.width == 32 && !type.sign) + if(type.width == 32 && !type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pminud"; - if(type.width == 32 && type.sign) + if(type.width == 32 && type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pminsd"; } } -#endif if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); -- cgit v1.2.3 From a02ecdf8c2fc5783a4bc82e8cd9d36f0dec7ccec Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 29 Sep 2009 17:22:39 +0100 Subject: llvmpipe: First verify LLVM IR, only then run optimizing passes. --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 9faed5a0b1..d5ce6993c5 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -582,6 +582,11 @@ generate_fragment(struct llvmpipe_context *lp, * Translate the LLVM IR into machine code. */ + if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { + LLVMDumpValue(variant->function); + abort(); + } + LLVMRunFunctionPassManager(screen->pass, variant->function); #ifdef DEBUG @@ -589,11 +594,6 @@ generate_fragment(struct llvmpipe_context *lp, debug_printf("\n"); #endif - if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { - LLVMDumpValue(variant->function); - abort(); - } - variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function); #ifdef DEBUG -- cgit v1.2.3 From baddcbc5225e12052b3bc8c07a8b65243d76574d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 29 Sep 2009 17:26:20 +0100 Subject: llvmpipe: Workaround for bug in llvm 2.5. The combination of fptosi and sitofp (necessary for trunc/floor/ceil/round implementation) somehow becomes invalid code. Skip the instruction combining pass when SSE4.1 is not available. --- src/gallium/drivers/llvmpipe/lp_jit.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 5d2cf01e5b..1126bf90b9 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -150,6 +150,12 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) util_cpu_detect(); +#if 0 + /* For simulating less capable machines */ + util_cpu_caps.has_sse3 = 0; + util_cpu_caps.has_sse4_1 = 0; +#endif + #ifdef LLVM_NATIVE_ARCH LLVMLinkInJIT(); LLVMInitializeNativeTarget(); @@ -171,8 +177,15 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) LLVMAddTargetData(screen->target, screen->pass); /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ + /* TODO: Add more passes */ LLVMAddConstantPropagationPass(screen->pass); - LLVMAddInstructionCombiningPass(screen->pass); + if(util_cpu_caps.has_sse4_1) { + /* FIXME: There is a bug in this pass, whereby the combination of fptosi + * and sitofp (necessary for trunc/floor/ceil/round implementation) + * somehow becomes invalid code. + */ + LLVMAddInstructionCombiningPass(screen->pass); + } LLVMAddPromoteMemoryToRegisterPass(screen->pass); LLVMAddGVNPass(screen->pass); LLVMAddCFGSimplificationPass(screen->pass); -- cgit v1.2.3 From 4456006ba626890172289111403e469f49106e18 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 Oct 2009 14:34:23 +0100 Subject: gallium: remove depth.occlusion_count flag This was redundant as drivers can just keep track of whether they are inside a begin/end query pair. We want to add more query types later and also support nested queries, none of which map well onto a flag like this. No driver appeared to be using the flag. --- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 3 --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 1 - src/gallium/drivers/softpipe/sp_video_context.c | 1 - src/gallium/include/pipe/p_state.h | 1 - src/gallium/state_trackers/g3dvl/vl_context.c | 1 - src/mesa/state_tracker/st_atom_depth.c | 4 ---- 6 files changed, 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 21c665c4d4..98ec1cb1b9 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -210,7 +210,4 @@ lp_build_depth_test(LLVMBuilderRef builder, dst = lp_build_select(&bld, z_bitmask, src, dst); LLVMBuildStore(builder, dst, dst_ptr); } - - /* FIXME */ - assert(!state->occlusion_count); } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index d5ce6993c5..b00be0cc32 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -401,7 +401,6 @@ generate_fragment(struct llvmpipe_context *lp, if(key->depth.enabled) { debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); debug_printf("depth.writemask = %u\n", key->depth.writemask); - debug_printf("depth.occlusion_count = %u\n", key->depth.occlusion_count); } if(key->alpha.enabled) { debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE)); diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c index ccb29726b6..7e9136d8e0 100644 --- a/src/gallium/drivers/softpipe/sp_video_context.c +++ b/src/gallium/drivers/softpipe/sp_video_context.c @@ -167,7 +167,6 @@ init_pipe_state(struct sp_mpeg12_context *ctx) dsa.depth.enabled = 0; dsa.depth.writemask = 0; dsa.depth.func = PIPE_FUNC_ALWAYS; - dsa.depth.occlusion_count = 0; for (i = 0; i < 2; ++i) { dsa.stencil[i].enabled = 0; dsa.stencil[i].func = PIPE_FUNC_ALWAYS; diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index b59d6b7ae3..287b424e4a 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -197,7 +197,6 @@ struct pipe_depth_state unsigned enabled:1; /**< depth test enabled? */ unsigned writemask:1; /**< allow depth buffer writes? */ unsigned func:3; /**< depth test func (PIPE_FUNC_x) */ - unsigned occlusion_count:1; /**< do occlusion counting? */ }; diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 5cfd233c4c..cfbf618d74 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -69,7 +69,6 @@ static int vlInitCommon(struct vlContext *context) dsa.depth.enabled = 0; dsa.depth.writemask = 0; dsa.depth.func = PIPE_FUNC_ALWAYS; - dsa.depth.occlusion_count = 0; for (i = 0; i < 2; ++i) { dsa.stencil[i].enabled = 0; diff --git a/src/mesa/state_tracker/st_atom_depth.c b/src/mesa/state_tracker/st_atom_depth.c index 0aa128f947..88b80a07fc 100644 --- a/src/mesa/state_tracker/st_atom_depth.c +++ b/src/mesa/state_tracker/st_atom_depth.c @@ -104,10 +104,6 @@ update_depth_stencil_alpha(struct st_context *st) dsa->depth.func = st_compare_func_to_pipe(ctx->Depth.Func); } - if (ctx->Query.CurrentOcclusionObject && - ctx->Query.CurrentOcclusionObject->Active) - dsa->depth.occlusion_count = 1; - if (ctx->Stencil.Enabled && ctx->DrawBuffer->Visual.stencilBits > 0) { dsa->stencil[0].enabled = 1; dsa->stencil[0].func = st_compare_func_to_pipe(ctx->Stencil.Function[0]); -- cgit v1.2.3 From 0b466c8705c9000c347760b5daafdf31c291736d Mon Sep 17 00:00:00 2001 From: Robert Noland Date: Wed, 30 Sep 2009 10:14:38 -0700 Subject: util: Enable sockets on BSD I think this should be safe for all of the BSDs. Signed-off-by: Robert Noland Signed-off-by: Brian Paul --- src/gallium/auxiliary/util/u_network.c | 6 +++--- src/gallium/auxiliary/util/u_network.h | 2 +- src/gallium/drivers/trace/tr_rbug.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_network.c b/src/gallium/auxiliary/util/u_network.c index bc4b758406..6269c72e12 100644 --- a/src/gallium/auxiliary/util/u_network.c +++ b/src/gallium/auxiliary/util/u_network.c @@ -6,7 +6,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # include # include -#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_BSD) # include # include # include @@ -54,7 +54,7 @@ u_socket_close(int s) if (s < 0) return; -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_BSD) shutdown(s, SHUT_RDWR); close(s); #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) @@ -169,7 +169,7 @@ u_socket_listen_on_port(uint16_t portnum) void u_socket_block(int s, boolean block) { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_BSD) int old = fcntl(s, F_GETFL, 0); if (old == -1) return; diff --git a/src/gallium/auxiliary/util/u_network.h b/src/gallium/auxiliary/util/u_network.h index 8c778f492c..0aa898b967 100644 --- a/src/gallium/auxiliary/util/u_network.h +++ b/src/gallium/auxiliary/util/u_network.h @@ -6,7 +6,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # define PIPE_HAVE_SOCKETS -#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_BSD) # define PIPE_HAVE_SOCKETS #endif diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c index e85ac15edc..81e0a6f3b0 100644 --- a/src/gallium/drivers/trace/tr_rbug.c +++ b/src/gallium/drivers/trace/tr_rbug.c @@ -44,7 +44,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # define sleep Sleep -#elif defined(PIPE_OS_LINUX) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) void usleep(int); # define sleep usleep #else -- cgit v1.2.3 From e00da1476fcdf8e5877fc1e62118080f5c4193f0 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Thu, 1 Oct 2009 21:53:17 -0400 Subject: g3dvl: Color space conv interface & vl impl. Interface is pipe_video_context::set_csc_matrix(). vl_csc.h defines some helpers to generate CSC matrices based on one of the color standard and a user defined ProcAmp (brightness, contrast, saturation, hue). --- src/gallium/auxiliary/vl/Makefile | 1 + src/gallium/auxiliary/vl/SConscript | 1 + src/gallium/auxiliary/vl/vl_compositor.c | 139 ++++-------------- src/gallium/auxiliary/vl/vl_compositor.h | 2 + src/gallium/auxiliary/vl/vl_csc.c | 179 ++++++++++++++++++++++++ src/gallium/auxiliary/vl/vl_csc.h | 26 ++++ src/gallium/drivers/softpipe/sp_video_context.c | 10 ++ src/gallium/include/pipe/p_video_context.h | 4 +- 8 files changed, 249 insertions(+), 113 deletions(-) create mode 100644 src/gallium/auxiliary/vl/vl_csc.c create mode 100644 src/gallium/auxiliary/vl/vl_csc.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/vl/Makefile b/src/gallium/auxiliary/vl/Makefile index 71bfb937ad..4314c1e8d6 100644 --- a/src/gallium/auxiliary/vl/Makefile +++ b/src/gallium/auxiliary/vl/Makefile @@ -7,6 +7,7 @@ C_SOURCES = \ vl_bitstream_parser.c \ vl_mpeg12_mc_renderer.c \ vl_compositor.c \ + vl_csc.c \ vl_shader_build.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/vl/SConscript b/src/gallium/auxiliary/vl/SConscript index eb50940c35..aed69f5efe 100644 --- a/src/gallium/auxiliary/vl/SConscript +++ b/src/gallium/auxiliary/vl/SConscript @@ -6,6 +6,7 @@ vl = env.ConvenienceLibrary( 'vl_bitstream_parser.c', 'vl_mpeg12_mc_renderer.c', 'vl_compositor.c', + 'vl_csc.c', 'vl_shader_build.c', ]) diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index 6431da6611..5d3458afd2 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -5,6 +5,7 @@ #include #include #include +#include "vl_csc.h" #include "vl_shader_build.h" struct vertex2f @@ -27,7 +28,6 @@ struct vertex_shader_consts struct fragment_shader_consts { - struct vertex4f bias; float matrix[16]; }; @@ -49,94 +49,6 @@ static const struct vertex2f surface_verts[4] = */ static const struct vertex2f *surface_texcoords = surface_verts; -/* - * Identity color conversion constants, for debugging - */ -static const struct fragment_shader_consts identity = -{ - { - 0.0f, 0.0f, 0.0f, 0.0f - }, - { - 1.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 1.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f - } -}; - -/* - * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: - * Y is in [16,235], Cb and Cr are in [16,240] - * R, G, and B are in [16,235] - */ -static const struct fragment_shader_consts bt_601 = -{ - { - 0.0f, 0.501960784f, 0.501960784f, 0.0f - }, - { - 1.0f, 0.0f, 1.371f, 0.0f, - 1.0f, -0.336f, -0.698f, 0.0f, - 1.0f, 1.732f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f - } -}; - -/* - * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: - * Y is in [16,235], Cb and Cr are in [16,240] - * R, G, and B are in [0,255] - */ -static const struct fragment_shader_consts bt_601_full = -{ - { - 0.062745098f, 0.501960784f, 0.501960784f, 0.0f - }, - { - 1.164f, 0.0f, 1.596f, 0.0f, - 1.164f, -0.391f, -0.813f, 0.0f, - 1.164f, 2.018f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f - } -}; - -/* - * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: - * Y is in [16,235], Cb and Cr are in [16,240] - * R, G, and B are in [16,235] - */ -static const struct fragment_shader_consts bt_709 = -{ - { - 0.0f, 0.501960784f, 0.501960784f, 0.0f - }, - { - 1.0f, 0.0f, 1.540f, 0.0f, - 1.0f, -0.183f, -0.459f, 0.0f, - 1.0f, 1.816f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f - } -}; - -/* - * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: - * Y is in [16,235], Cb and Cr are in [16,240] - * R, G, and B are in [0,255] - */ -const struct fragment_shader_consts bt_709_full = -{ - { - 0.062745098f, 0.501960784f, 0.501960784f, 0.0f - }, - { - 1.164f, 0.0f, 1.793f, 0.0f, - 1.164f, -0.213f, -0.534f, 0.0f, - 1.164f, 2.115f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f - } -}; - static void create_vert_shader(struct vl_compositor *c) { @@ -245,10 +157,9 @@ create_frag_shader(struct vl_compositor *c) ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); /* - * decl c0 ; Bias vector for CSC - * decl c1-c4 ; CSC matrix c1-c4 + * decl c0-c3 ; CSC matrix c0-c3 */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); /* decl o0 ; Fragment color */ @@ -267,17 +178,14 @@ create_frag_shader(struct vl_compositor *c) inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* sub t0, t0, c0 ; Subtract bias vector from pixel */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* - * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix - * dp4 o0.y, t0, c2 - * dp4 o0.z, t0, c3 + * dp4 o0.x, t0, c0 ; Multiply pixel by the color conversion matrix + * dp4 o0.y, t0, c1 + * dp4 o0.z, t0, c2 + * dp4 o0.w, t0, c3 */ - for (i = 0; i < 3; ++i) { - inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1); + for (i = 0; i < 4; ++i) { + inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i); inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -352,6 +260,8 @@ static void cleanup_shaders(struct vl_compositor *c) static bool init_buffers(struct vl_compositor *c) { + struct fragment_shader_consts fsc; + assert(c); /* @@ -438,18 +348,9 @@ init_buffers(struct vl_compositor *c) sizeof(struct fragment_shader_consts) ); - /* - * TODO: Refactor this into a seperate function, - * allow changing the CSC matrix at runtime to switch between regular & full versions - */ - memcpy - ( - pipe_buffer_map(c->pipe->screen, c->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - &bt_601_full, - sizeof(struct fragment_shader_consts) - ); + vl_csc_get_matrix(VL_CSC_COLOR_STANDARD_IDENTITY, NULL, true, fsc.matrix); - pipe_buffer_unmap(c->pipe->screen, c->fs_const_buf.buffer); + vl_compositor_set_csc_matrix(c, fsc.matrix); return true; } @@ -588,3 +489,17 @@ void vl_compositor_render(struct vl_compositor *compositor, pipe_surface_reference(&compositor->fb_state.cbufs[0], NULL); } + +void vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float *mat) +{ + assert(compositor); + + memcpy + ( + pipe_buffer_map(compositor->pipe->screen, compositor->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + mat, + sizeof(struct fragment_shader_consts) + ); + + pipe_buffer_unmap(compositor->pipe->screen, compositor->fs_const_buf.buffer); +} diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h index 19ad66d9c6..975ea00bde 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.h +++ b/src/gallium/auxiliary/vl/vl_compositor.h @@ -44,4 +44,6 @@ void vl_compositor_render(struct vl_compositor *compositor, struct pipe_video_rect *layer_dst_areas,*/ struct pipe_fence_handle **fence); +void vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float *mat); + #endif /* vl_compositor_h */ diff --git a/src/gallium/auxiliary/vl/vl_csc.c b/src/gallium/auxiliary/vl/vl_csc.c new file mode 100644 index 0000000000..828cebe4ed --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_csc.c @@ -0,0 +1,179 @@ +#include "vl_csc.h" +#include +#include + +/* + * Color space conversion formulas + * + * To convert YCbCr to RGB, + * vec4 ycbcr, rgb + * mat44 csc + * rgb = csc * ycbcr + * + * To calculate the color space conversion matrix csc with ProcAmp adjustments, + * mat44 csc, cstd, procamp, bias + * csc = cstd * (procamp * bias) + * + * Where cstd is a matrix corresponding to one of the color standards (BT.601, BT.709, etc) + * adjusted for the kind of YCbCr -> RGB mapping wanted (1:1, full), + * bias is a matrix corresponding to the kind of YCbCr -> RGB mapping wanted (1:1, full) + * + * To calculate procamp, + * mat44 procamp, hue, saturation, brightness, contrast + * procamp = brightness * (saturation * (contrast * hue)) + * Alternatively, + * procamp = saturation * (brightness * (contrast * hue)) + * + * contrast + * [ c, 0, 0, 0] + * [ 0, c, 0, 0] + * [ 0, 0, c, 0] + * [ 0, 0, 0, 1] + * + * brightness + * [ 1, 0, 0, b] + * [ 0, 1, 0, 0] + * [ 0, 0, 1, 0] + * [ 0, 0, 0, 1] + * + * saturation + * [ 1, 0, 0, 0] + * [ 0, s, 0, 0] + * [ 0, 0, s, 0] + * [ 0, 0, 0, 1] + * + * hue + * [ 1, 0, 0, 0] + * [ 0, cos(h), sin(h), 0] + * [ 0, -sin(h), cos(h), 0] + * [ 0, 0, 0, 1] + * + * procamp + * [ c, 0, 0, b] + * [ 0, c*s*cos(h), c*s*sin(h), 0] + * [ 0, -c*s*sin(h), c*s*cos(h), 0] + * [ 0, 0, 0, 1] + * + * bias + * [ 1, 0, 0, ybias] + * [ 0, 1, 0, cbbias] + * [ 0, 0, 1, crbias] + * [ 0, 0, 0, 1] + * + * csc + * [ c*cstd[ 0], c*cstd[ 1]*s*cos(h) - c*cstd[ 2]*s*sin(h), c*cstd[ 2]*s*cos(h) + c*cstd[ 1]*s*sin(h), cstd[ 3] + cstd[ 0]*(b + c*ybias) + cstd[ 1]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[ 2]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h))] + * [ c*cstd[ 4], c*cstd[ 5]*s*cos(h) - c*cstd[ 6]*s*sin(h), c*cstd[ 6]*s*cos(h) + c*cstd[ 5]*s*sin(h), cstd[ 7] + cstd[ 4]*(b + c*ybias) + cstd[ 5]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[ 6]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h))] + * [ c*cstd[ 8], c*cstd[ 9]*s*cos(h) - c*cstd[10]*s*sin(h), c*cstd[10]*s*cos(h) + c*cstd[ 9]*s*sin(h), cstd[11] + cstd[ 8]*(b + c*ybias) + cstd[ 9]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[10]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h))] + * [ c*cstd[12], c*cstd[13]*s*cos(h) - c*cstd[14]*s*sin(h), c*cstd[14]*s*cos(h) + c*cstd[13]*s*sin(h), cstd[15] + cstd[12]*(b + c*ybias) + cstd[13]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[14]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h))] + */ + +/* + * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [16,235] + */ +static const float bt_601[16] = +{ + 1.0f, 0.0f, 1.371f, 0.0f, + 1.0f, -0.336f, -0.698f, 0.0f, + 1.0f, 1.732f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f +}; + +/* + * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [0,255] + */ +static const float bt_601_full[16] = +{ + 1.164f, 0.0f, 1.596f, 0.0f, + 1.164f, -0.391f, -0.813f, 0.0f, + 1.164f, 2.018f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f +}; + +/* + * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [16,235] + */ +static const float bt_709[16] = +{ + 1.0f, 0.0f, 1.540f, 0.0f, + 1.0f, -0.183f, -0.459f, 0.0f, + 1.0f, 1.816f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f +}; + +/* + * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [0,255] + */ +static const float bt_709_full[16] = +{ + 1.164f, 0.0f, 1.793f, 0.0f, + 1.164f, -0.213f, -0.534f, 0.0f, + 1.164f, 2.115f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f +}; + +static const float identity[16] = +{ + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f +}; + +void vl_csc_get_matrix(enum VL_CSC_COLOR_STANDARD cs, + struct vl_procamp *procamp, + bool full_range, + float *matrix) +{ + float ybias = full_range ? -16.0f/255.0f : 0.0f; + float cbbias = -128.0f/255.0f; + float crbias = -128.0f/255.0f; + float c = procamp ? procamp->contrast : 1.0f; + float s = procamp ? procamp->saturation : 1.0f; + float b = procamp ? procamp->brightness : 0.0f; + float h = procamp ? procamp->hue : 0.0f; + const float *cstd; + + assert(matrix); + + switch (cs) { + case VL_CSC_COLOR_STANDARD_BT_601: + cstd = full_range ? &bt_601_full[0] : &bt_601[0]; + break; + case VL_CSC_COLOR_STANDARD_BT_709: + cstd = full_range ? &bt_709_full[0] : &bt_709[0]; + break; + case VL_CSC_COLOR_STANDARD_IDENTITY: + default: + assert(cs == VL_CSC_COLOR_STANDARD_IDENTITY); + memcpy(matrix, &identity[0], sizeof(float) * 16); + return; + } + + matrix[ 0] = c*cstd[ 0]; + matrix[ 1] = c*cstd[ 1]*s*cosf(h) - c*cstd[ 2]*s*sinf(h); + matrix[ 2] = c*cstd[ 2]*s*cosf(h) + c*cstd[ 1]*s*sinf(h); + matrix[ 3] = cstd[ 3] + cstd[ 0]*(b + c*ybias) + cstd[ 1]*(c*cbbias*s*cosf(h) + c*crbias*s*sinf(h)) + cstd[ 2]*(c*crbias*s*cosf(h) - c*cbbias*s*sinf(h)); + + matrix[ 4] = c*cstd[ 4]; + matrix[ 5] = c*cstd[ 5]*s*cosf(h) - c*cstd[ 6]*s*sinf(h); + matrix[ 6] = c*cstd[ 6]*s*cosf(h) + c*cstd[ 5]*s*sinf(h); + matrix[ 7] = cstd[ 7] + cstd[ 4]*(b + c*ybias) + cstd[ 5]*(c*cbbias*s*cosf(h) + c*crbias*s*sinf(h)) + cstd[ 6]*(c*crbias*s*cosf(h) - c*cbbias*s*sinf(h)); + + matrix[ 8] = c*cstd[ 8]; + matrix[ 9] = c*cstd[ 9]*s*cosf(h) - c*cstd[10]*s*sinf(h); + matrix[10] = c*cstd[10]*s*cosf(h) + c*cstd[ 9]*s*sinf(h); + matrix[11] = cstd[11] + cstd[ 8]*(b + c*ybias) + cstd[ 9]*(c*cbbias*s*cosf(h) + c*crbias*s*sinf(h)) + cstd[10]*(c*crbias*s*cosf(h) - c*cbbias*s*sinf(h)); + + matrix[12] = c*cstd[12]; + matrix[13] = c*cstd[13]*s*cos(h) - c*cstd[14]*s*sin(h); + matrix[14] = c*cstd[14]*s*cos(h) + c*cstd[13]*s*sin(h); + matrix[15] = cstd[15] + cstd[12]*(b + c*ybias) + cstd[13]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[14]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h)); +} diff --git a/src/gallium/auxiliary/vl/vl_csc.h b/src/gallium/auxiliary/vl/vl_csc.h new file mode 100644 index 0000000000..c3b87d279c --- /dev/null +++ b/src/gallium/auxiliary/vl/vl_csc.h @@ -0,0 +1,26 @@ +#ifndef vl_csc_h +#define vl_csc_h + +#include + +struct vl_procamp +{ + float brightness; + float contrast; + float saturation; + float hue; +}; + +enum VL_CSC_COLOR_STANDARD +{ + VL_CSC_COLOR_STANDARD_IDENTITY, + VL_CSC_COLOR_STANDARD_BT_601, + VL_CSC_COLOR_STANDARD_BT_709 +}; + +void vl_csc_get_matrix(enum VL_CSC_COLOR_STANDARD cs, + struct vl_procamp *procamp, + bool full_range, + float *matrix); + +#endif /* vl_csc_h */ diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c index 7e9136d8e0..00b4b7d560 100644 --- a/src/gallium/drivers/softpipe/sp_video_context.c +++ b/src/gallium/drivers/softpipe/sp_video_context.c @@ -109,6 +109,15 @@ sp_mpeg12_set_decode_target(struct pipe_video_context *vpipe, pipe_video_surface_reference(&ctx->decode_target, dt); } +static void sp_mpeg12_set_csc_matrix(struct pipe_video_context *vpipe, const float *mat) +{ + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + + assert(vpipe); + + vl_compositor_set_csc_matrix(&ctx->compositor, mat); +} + static bool init_pipe_state(struct sp_mpeg12_context *ctx) { @@ -211,6 +220,7 @@ sp_mpeg12_create(struct pipe_screen *screen, enum pipe_video_profile profile, ctx->base.clear_surface = sp_mpeg12_clear_surface; ctx->base.render_picture = sp_mpeg12_render_picture; ctx->base.set_decode_target = sp_mpeg12_set_decode_target; + ctx->base.set_csc_matrix = sp_mpeg12_set_csc_matrix; ctx->pipe = softpipe_create(screen); if (!ctx->pipe) { diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h index 937705ac50..4d125fa4d5 100644 --- a/src/gallium/include/pipe/p_video_context.h +++ b/src/gallium/include/pipe/p_video_context.h @@ -80,7 +80,9 @@ struct pipe_video_context void (*set_decode_target)(struct pipe_video_context *vpipe, struct pipe_video_surface *dt); - /* TODO: Interface for CSC matrix, scaling modes, post-processing, etc. */ + void (*set_csc_matrix)(struct pipe_video_context *vpipe, const float *mat); + + /* TODO: Interface for scaling modes, post-processing, etc. */ /*@}*/ }; -- cgit v1.2.3 From fcb595c04f9ee275eae49b7bb7c61246671f5ce2 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Thu, 1 Oct 2009 22:16:10 -0400 Subject: g3dvl: Copyright blocks. --- src/gallium/auxiliary/vl/vl_bitstream_parser.c | 27 ++++++++++++++++++++++ src/gallium/auxiliary/vl/vl_bitstream_parser.h | 27 ++++++++++++++++++++++ src/gallium/auxiliary/vl/vl_compositor.c | 27 ++++++++++++++++++++++ src/gallium/auxiliary/vl/vl_compositor.h | 27 ++++++++++++++++++++++ src/gallium/auxiliary/vl/vl_csc.c | 27 ++++++++++++++++++++++ src/gallium/auxiliary/vl/vl_csc.h | 27 ++++++++++++++++++++++ src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 27 ++++++++++++++++++++++ src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h | 27 ++++++++++++++++++++++ src/gallium/auxiliary/vl/vl_shader_build.c | 27 ++++++++++++++++++++++ src/gallium/auxiliary/vl/vl_shader_build.h | 27 ++++++++++++++++++++++ src/gallium/drivers/softpipe/sp_video_context.c | 27 ++++++++++++++++++++++ src/gallium/drivers/softpipe/sp_video_context.h | 27 ++++++++++++++++++++++ src/gallium/include/pipe/p_video_context.h | 27 ++++++++++++++++++++++ src/gallium/include/pipe/p_video_state.h | 27 ++++++++++++++++++++++ src/gallium/state_trackers/xorg/xvmc/attributes.c | 27 ++++++++++++++++++++++ src/gallium/state_trackers/xorg/xvmc/block.c | 27 ++++++++++++++++++++++ src/gallium/state_trackers/xorg/xvmc/context.c | 27 ++++++++++++++++++++++ src/gallium/state_trackers/xorg/xvmc/subpicture.c | 27 ++++++++++++++++++++++ src/gallium/state_trackers/xorg/xvmc/surface.c | 27 ++++++++++++++++++++++ .../state_trackers/xorg/xvmc/tests/test_blocks.c | 27 ++++++++++++++++++++++ .../state_trackers/xorg/xvmc/tests/test_context.c | 27 ++++++++++++++++++++++ .../xorg/xvmc/tests/test_rendering.c | 27 ++++++++++++++++++++++ .../state_trackers/xorg/xvmc/tests/test_surface.c | 27 ++++++++++++++++++++++ .../state_trackers/xorg/xvmc/tests/testlib.c | 27 ++++++++++++++++++++++ .../state_trackers/xorg/xvmc/tests/testlib.h | 27 ++++++++++++++++++++++ .../state_trackers/xorg/xvmc/tests/xvmc_bench.c | 27 ++++++++++++++++++++++ .../state_trackers/xorg/xvmc/xvmc_private.h | 27 ++++++++++++++++++++++ src/gallium/winsys/g3dvl/vl_winsys.h | 27 ++++++++++++++++++++++ src/gallium/winsys/g3dvl/xlib/xsp_winsys.c | 27 ++++++++++++++++++++++ 29 files changed, 783 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/vl/vl_bitstream_parser.c b/src/gallium/auxiliary/vl/vl_bitstream_parser.c index 45826bad45..3193ea5f41 100644 --- a/src/gallium/auxiliary/vl/vl_bitstream_parser.c +++ b/src/gallium/auxiliary/vl/vl_bitstream_parser.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include "vl_bitstream_parser.h" #include #include diff --git a/src/gallium/auxiliary/vl/vl_bitstream_parser.h b/src/gallium/auxiliary/vl/vl_bitstream_parser.h index 91ebaab45b..30ec743fa7 100644 --- a/src/gallium/auxiliary/vl/vl_bitstream_parser.h +++ b/src/gallium/auxiliary/vl/vl_bitstream_parser.h @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #ifndef vl_bitstream_parser_h #define vl_bitstream_parser_h diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index 5d3458afd2..b36dbeb208 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include "vl_compositor.h" #include #include diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h index 975ea00bde..17e2afd353 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.h +++ b/src/gallium/auxiliary/vl/vl_compositor.h @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #ifndef vl_compositor_h #define vl_compositor_h diff --git a/src/gallium/auxiliary/vl/vl_csc.c b/src/gallium/auxiliary/vl/vl_csc.c index 828cebe4ed..5ecc43a5fa 100644 --- a/src/gallium/auxiliary/vl/vl_csc.c +++ b/src/gallium/auxiliary/vl/vl_csc.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include "vl_csc.h" #include #include diff --git a/src/gallium/auxiliary/vl/vl_csc.h b/src/gallium/auxiliary/vl/vl_csc.h index c3b87d279c..722ca35f33 100644 --- a/src/gallium/auxiliary/vl/vl_csc.h +++ b/src/gallium/auxiliary/vl/vl_csc.h @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #ifndef vl_csc_h #define vl_csc_h diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index 9b69f2956c..6b3614821c 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include "vl_mpeg12_mc_renderer.h" #include #include diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h index 0c2f679664..5d2c1273ee 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #ifndef vl_mpeg12_mc_renderer_h #define vl_mpeg12_mc_renderer_h diff --git a/src/gallium/auxiliary/vl/vl_shader_build.c b/src/gallium/auxiliary/vl/vl_shader_build.c index 9ad1e052c6..faa20a903c 100644 --- a/src/gallium/auxiliary/vl/vl_shader_build.c +++ b/src/gallium/auxiliary/vl/vl_shader_build.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include "vl_shader_build.h" #include #include diff --git a/src/gallium/auxiliary/vl/vl_shader_build.h b/src/gallium/auxiliary/vl/vl_shader_build.h index c6c60b5552..5da71f8e13 100644 --- a/src/gallium/auxiliary/vl/vl_shader_build.h +++ b/src/gallium/auxiliary/vl/vl_shader_build.h @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #ifndef vl_shader_build_h #define vl_shader_build_h diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c index 00b4b7d560..cae2d3efc5 100644 --- a/src/gallium/drivers/softpipe/sp_video_context.c +++ b/src/gallium/drivers/softpipe/sp_video_context.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include "sp_video_context.h" #include #include diff --git a/src/gallium/drivers/softpipe/sp_video_context.h b/src/gallium/drivers/softpipe/sp_video_context.h index 2c7691c7cb..ccbd1ffe4c 100644 --- a/src/gallium/drivers/softpipe/sp_video_context.h +++ b/src/gallium/drivers/softpipe/sp_video_context.h @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #ifndef SP_VIDEO_CONTEXT_H #define SP_VIDEO_CONTEXT_H diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h index 4d125fa4d5..6ae31418fa 100644 --- a/src/gallium/include/pipe/p_video_context.h +++ b/src/gallium/include/pipe/p_video_context.h @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #ifndef PIPE_VIDEO_CONTEXT_H #define PIPE_VIDEO_CONTEXT_H diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h index 2a7422bf04..4da26d608c 100644 --- a/src/gallium/include/pipe/p_video_state.h +++ b/src/gallium/include/pipe/p_video_state.h @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #ifndef PIPE_VIDEO_STATE_H #define PIPE_VIDEO_STATE_H diff --git a/src/gallium/state_trackers/xorg/xvmc/attributes.c b/src/gallium/state_trackers/xorg/xvmc/attributes.c index 638da0b577..79a67838e6 100644 --- a/src/gallium/state_trackers/xorg/xvmc/attributes.c +++ b/src/gallium/state_trackers/xorg/xvmc/attributes.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include #include #include diff --git a/src/gallium/state_trackers/xorg/xvmc/block.c b/src/gallium/state_trackers/xorg/xvmc/block.c index 78fddfb79e..5102375fcf 100644 --- a/src/gallium/state_trackers/xorg/xvmc/block.c +++ b/src/gallium/state_trackers/xorg/xvmc/block.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include #include #include diff --git a/src/gallium/state_trackers/xorg/xvmc/context.c b/src/gallium/state_trackers/xorg/xvmc/context.c index a002a7c844..942692d1bb 100644 --- a/src/gallium/state_trackers/xorg/xvmc/context.c +++ b/src/gallium/state_trackers/xorg/xvmc/context.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include #include #include diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c index 78ba618f5a..69898d5fcd 100644 --- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c +++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include #include #include diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c index 6b7dbf11dc..bf9038f356 100644 --- a/src/gallium/state_trackers/xorg/xvmc/surface.c +++ b/src/gallium/state_trackers/xorg/xvmc/surface.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include #include #include diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/test_blocks.c b/src/gallium/state_trackers/xorg/xvmc/tests/test_blocks.c index dc80adfa65..994e3ca4d1 100644 --- a/src/gallium/state_trackers/xorg/xvmc/tests/test_blocks.c +++ b/src/gallium/state_trackers/xorg/xvmc/tests/test_blocks.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include #include #include "testlib.h" diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/test_context.c b/src/gallium/state_trackers/xorg/xvmc/tests/test_context.c index 53f7449cd0..3da957c933 100644 --- a/src/gallium/state_trackers/xorg/xvmc/tests/test_context.c +++ b/src/gallium/state_trackers/xorg/xvmc/tests/test_context.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include #include #include "testlib.h" diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/test_rendering.c b/src/gallium/state_trackers/xorg/xvmc/tests/test_rendering.c index 6d720dfcdc..6058783a79 100644 --- a/src/gallium/state_trackers/xorg/xvmc/tests/test_rendering.c +++ b/src/gallium/state_trackers/xorg/xvmc/tests/test_rendering.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include #include #include diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/test_surface.c b/src/gallium/state_trackers/xorg/xvmc/tests/test_surface.c index 06948201ac..b65eb265c0 100644 --- a/src/gallium/state_trackers/xorg/xvmc/tests/test_surface.c +++ b/src/gallium/state_trackers/xorg/xvmc/tests/test_surface.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include #include #include "testlib.h" diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/testlib.c b/src/gallium/state_trackers/xorg/xvmc/tests/testlib.c index 59a03ca813..142c09bb59 100644 --- a/src/gallium/state_trackers/xorg/xvmc/tests/testlib.c +++ b/src/gallium/state_trackers/xorg/xvmc/tests/testlib.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include "testlib.h" #include diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/testlib.h b/src/gallium/state_trackers/xorg/xvmc/tests/testlib.h index af71ad74e1..0438e52928 100644 --- a/src/gallium/state_trackers/xorg/xvmc/tests/testlib.h +++ b/src/gallium/state_trackers/xorg/xvmc/tests/testlib.h @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #ifndef testlib_h #define testlib_h diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/xvmc_bench.c b/src/gallium/state_trackers/xorg/xvmc/tests/xvmc_bench.c index 97adcfc58a..bf94d85623 100644 --- a/src/gallium/state_trackers/xorg/xvmc/tests/xvmc_bench.c +++ b/src/gallium/state_trackers/xorg/xvmc/tests/xvmc_bench.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include #include #include diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h index 1e3dd561c6..42337631ca 100644 --- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h +++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #ifndef xvmc_private_h #define xvmc_private_h diff --git a/src/gallium/winsys/g3dvl/vl_winsys.h b/src/gallium/winsys/g3dvl/vl_winsys.h index 4f7a243361..22119f9559 100644 --- a/src/gallium/winsys/g3dvl/vl_winsys.h +++ b/src/gallium/winsys/g3dvl/vl_winsys.h @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #ifndef vl_winsys_h #define vl_winsys_h diff --git a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c index 37eee79c5d..0faad544d1 100644 --- a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c +++ b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include #include #include -- cgit v1.2.3 From 389021220d27c376b81a6221a31d0ee33c24e67f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 2 Oct 2009 07:31:42 -0600 Subject: gallium: replace // comments with /* */ --- src/gallium/auxiliary/cso_cache/cso_context.c | 2 +- src/gallium/auxiliary/draw/draw_pt_post_vs.c | 2 +- src/gallium/auxiliary/draw/draw_vs_aos.c | 6 +++--- src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c | 2 +- src/gallium/auxiliary/translate/translate_generic.c | 2 +- src/gallium/auxiliary/util/u_cpu_detect.c | 2 +- src/gallium/auxiliary/util/u_debug_profile.c | 2 +- src/gallium/auxiliary/util/u_debug_symbol.c | 2 +- src/gallium/drivers/softpipe/sp_fs_sse.c | 2 +- 9 files changed, 11 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 36c882acb7..4f13b3e2ba 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -268,7 +268,7 @@ void cso_release_all( struct cso_context *ctx ) void cso_destroy_context( struct cso_context *ctx ) { if (ctx) { - //cso_release_all( ctx ); + /*cso_release_all( ctx );*/ FREE( ctx ); } } diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index 00d7197b13..78953bccfc 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -210,7 +210,7 @@ void draw_pt_post_vs_prepare( struct pt_post_vs *pvs, pvs->run = post_vs_viewport; } else { - //if (opengl) + /* if (opengl) */ pvs->run = post_vs_cliptest_viewport_gl; } } diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 62e04a65f3..645d7cccba 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -891,7 +891,7 @@ static void x87_emit_ex2( struct aos_compilation *cp ) struct x86_reg st1 = x86_make_reg(file_x87, 1); int stack = cp->func->x87_stack; -// set_fpu_round_neg_inf( cp ); + /* set_fpu_round_neg_inf( cp ); */ x87_fld(cp->func, st0); /* a a */ x87_fprndint( cp->func ); /* int(a) a*/ @@ -1759,14 +1759,14 @@ emit_instruction( struct aos_compilation *cp, return emit_SUB(cp, inst); case TGSI_OPCODE_LRP: -// return emit_LERP(cp, inst); + /*return emit_LERP(cp, inst);*/ return FALSE; case TGSI_OPCODE_FRC: return emit_FRC(cp, inst); case TGSI_OPCODE_CLAMP: -// return emit_CLAMP(cp, inst); + /*return emit_CLAMP(cp, inst);*/ return FALSE; case TGSI_OPCODE_FLR: diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 109ac7c9d6..0d30363484 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -584,7 +584,7 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) } #ifdef DEBUG - //assert(!fenced_list->numUnfenced); + /*assert(!fenced_list->numUnfenced);*/ #endif pipe_mutex_unlock(fenced_list->mutex); diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 8d39b64c6c..266e7ee81e 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -217,7 +217,7 @@ ATTRIB( R8G8_SNORM, 2, char, FROM_8_SNORM, TO_8_SNORM ) ATTRIB( R8_SNORM, 1, char, FROM_8_SNORM, TO_8_SNORM ) ATTRIB( A8R8G8B8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM ) -//ATTRIB( R8G8B8A8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM ) +/*ATTRIB( R8G8B8A8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM )*/ ATTRIB( R32G32B32A32_FIXED, 4, int, FROM_32_FIXED, TO_32_FIXED ) ATTRIB( R32G32B32_FIXED, 3, int, FROM_32_FIXED, TO_32_FIXED ) diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index d9f2f8fc28..f78706f447 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -268,7 +268,7 @@ static void check_os_katmai_support(void) * and therefore to be safe I'm going to leave this test in here. */ if (__cpu_detect_caps.hasSSE) { - // test_os_katmai_exception_support(); + /* test_os_katmai_exception_support(); */ } /* Restore the original signal handlers. diff --git a/src/gallium/auxiliary/util/u_debug_profile.c b/src/gallium/auxiliary/util/u_debug_profile.c index 6d8b244c3a..d765b50144 100644 --- a/src/gallium/auxiliary/util/u_debug_profile.c +++ b/src/gallium/auxiliary/util/u_debug_profile.c @@ -254,7 +254,7 @@ debug_profile_start(void) { WCHAR *p; - // increment starting from the less significant digit + /* increment starting from the less significant digit */ p = &wFileName[14]; while(1) { if(*p == '9') { diff --git a/src/gallium/auxiliary/util/u_debug_symbol.c b/src/gallium/auxiliary/util/u_debug_symbol.c index 811931f81b..417d0cf04c 100644 --- a/src/gallium/auxiliary/util/u_debug_symbol.c +++ b/src/gallium/auxiliary/util/u_debug_symbol.c @@ -214,7 +214,7 @@ debug_symbol_print_imagehlp(const void *addr) HANDLE hProcess; BYTE symbolBuffer[1024]; PIMAGEHLP_SYMBOL pSymbol = (PIMAGEHLP_SYMBOL) symbolBuffer; - DWORD dwDisplacement = 0; // Displacement of the input address, relative to the start of the symbol + DWORD dwDisplacement = 0; /* Displacement of the input address, relative to the start of the symbol */ hProcess = GetCurrentProcess(); diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index f4fa0905d7..31ccc3bda9 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -101,7 +101,7 @@ fs_sse_run( const struct sp_fragment_shader *base, machine->Consts, (const float (*)[4])shader->immediates, machine->InterpCoefs - // , &machine->QuadPos + /*, &machine->QuadPos*/ ); return ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]); -- cgit v1.2.3 From 47e41b024e325f69ed514e551a6824afa58f1db6 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Fri, 2 Oct 2009 18:13:26 +0200 Subject: gallium: Preparations for adding more PIPE_TRANSFER_* usage flags. Always test for PIPE_TRANSFER_READ/WRITE using the bit-wise and operator, and add a pipe_transfer_buffer_flags() helper for getting the buffer usage flags corresponding to them. --- src/gallium/auxiliary/util/u_tile.c | 4 ++-- src/gallium/drivers/cell/ppu/cell_texture.c | 18 ++++----------- src/gallium/drivers/i915simple/i915_texture.c | 2 +- src/gallium/drivers/llvmpipe/lp_texture.c | 14 +++--------- src/gallium/drivers/nv04/nv04_transfer.c | 26 +++++----------------- src/gallium/drivers/nv10/nv10_transfer.c | 26 +++++----------------- src/gallium/drivers/nv20/nv20_transfer.c | 26 +++++----------------- src/gallium/drivers/nv30/nv30_transfer.c | 26 +++++----------------- src/gallium/drivers/nv40/nv40_transfer.c | 26 +++++----------------- src/gallium/drivers/nv50/nv50_transfer.c | 4 ++-- src/gallium/drivers/r300/r300_screen.c | 11 ++------- src/gallium/drivers/softpipe/sp_texture.c | 15 +++---------- src/gallium/drivers/trace/tr_screen.c | 2 +- src/gallium/include/pipe/p_inlines.h | 16 +++++++++++++ .../state_trackers/python/retrace/interpreter.py | 2 +- src/gallium/state_trackers/vega/st_inlines.h | 3 +-- src/mesa/state_tracker/st_cb_accum.c | 2 +- src/mesa/state_tracker/st_texture.c | 3 +-- 18 files changed, 63 insertions(+), 163 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 0d6489c26e..8a22f584be 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -1452,7 +1452,7 @@ pipe_put_tile_z(struct pipe_transfer *pt, case PIPE_FORMAT_S8Z24_UNORM: { uint *pDest = (uint *) (map + y * pt->stride + x*4); - assert(pt->usage == PIPE_TRANSFER_READ_WRITE); + assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 24-bit Z, preserve stencil */ @@ -1479,7 +1479,7 @@ pipe_put_tile_z(struct pipe_transfer *pt, case PIPE_FORMAT_Z24S8_UNORM: { uint *pDest = (uint *) (map + y * pt->stride + x*4); - assert(pt->usage == PIPE_TRANSFER_READ_WRITE); + assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 24-bit Z, preserve stencil */ diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 6a63a0e6ce..ae4c61efb3 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -389,22 +389,14 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer) const uint texWidth = pt->width[level]; const uint texHeight = pt->height[level]; const uint stride = ct->stride[level]; - unsigned flags = 0x0; unsigned size; assert(transfer->texture); - if (transfer->usage != PIPE_TRANSFER_READ) { - flags |= PIPE_BUFFER_USAGE_CPU_WRITE; - } - - if (transfer->usage != PIPE_TRANSFER_WRITE) { - flags |= PIPE_BUFFER_USAGE_CPU_READ; - } - if (!ct->mapped) { /* map now */ - ct->mapped = pipe_buffer_map(screen, ct->buffer, flags); + ct->mapped = pipe_buffer_map(screen, ct->buffer, + pipe_transfer_buffer_flags(transfer)); } /* @@ -417,8 +409,7 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer) if (!ctrans->map) return NULL; /* out of memory */ - if (transfer->usage == PIPE_TRANSFER_READ || - transfer->usage == PIPE_TRANSFER_READ_WRITE) { + if (transfer->usage & PIPE_TRANSFER_READ) { /* need to untwiddle the texture to make a linear version */ const uint bpp = pf_get_size(ct->base.format); if (bpp == 4) { @@ -459,8 +450,7 @@ cell_transfer_unmap(struct pipe_screen *screen, PIPE_BUFFER_USAGE_CPU_READ); } - if (transfer->usage == PIPE_TRANSFER_WRITE || - transfer->usage == PIPE_TRANSFER_READ_WRITE) { + if (transfer->usage & PIPE_TRANSFER_WRITE) { /* The user wrote new texture data into the mapped buffer. * We need to convert the new linear data into the twiddled/tiled format. */ diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 15ccc1fc73..286c9ace8e 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -859,7 +859,7 @@ i915_transfer_map(struct pipe_screen *screen, char *map; boolean write = FALSE; - if (transfer->usage != PIPE_TRANSFER_READ) + if (transfer->usage & PIPE_TRANSFER_WRITE) write = TRUE; map = iws->buffer_map(iws, tex->buffer, write); diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 724d437833..08f0950d47 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -353,17 +353,9 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, if(lpt->dt) { struct llvmpipe_winsys *winsys = screen->winsys; - unsigned flags = 0; - if (transfer->usage != PIPE_TRANSFER_READ) { - flags |= PIPE_BUFFER_USAGE_CPU_WRITE; - } - - if (transfer->usage != PIPE_TRANSFER_WRITE) { - flags |= PIPE_BUFFER_USAGE_CPU_READ; - } - - map = winsys->displaytarget_map(winsys, lpt->dt, flags); + map = winsys->displaytarget_map(winsys, lpt->dt, + pipe_transfer_buffer_flags(transfer)); if (map == NULL) return NULL; } @@ -373,7 +365,7 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, /* May want to different things here depending on read/write nature * of the map: */ - if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ) + if (transfer->texture && (transfer->usage & PIPE_TRANSFER_WRITE)) { /* Do something to notify sharing contexts of a texture change. * In llvmpipe, that would mean flushing the texture cache. diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c index 854b855d64..6618660743 100644 --- a/src/gallium/drivers/nv04/nv04_transfer.c +++ b/src/gallium/drivers/nv04/nv04_transfer.c @@ -13,22 +13,6 @@ struct nv04_transfer { bool direct; }; -static unsigned nv04_usage_tx_to_buf(unsigned tx_usage) -{ - switch (tx_usage) { - case PIPE_TRANSFER_READ: - return PIPE_BUFFER_USAGE_CPU_READ; - case PIPE_TRANSFER_WRITE: - return PIPE_BUFFER_USAGE_CPU_WRITE; - case PIPE_TRANSFER_READ_WRITE: - return PIPE_BUFFER_USAGE_CPU_READ_WRITE; - default: - assert(0); - } - - return -1; -} - static void nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, struct pipe_texture *template) @@ -86,7 +70,7 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = true; tx->surface = pscreen->get_tex_surface(pscreen, pt, 0, 0, 0, - nv04_usage_tx_to_buf(usage)); + pipe_transfer_buffer_flags(&tx->base)); return &tx->base; } @@ -103,7 +87,7 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, face, level, zslice, - nv04_usage_tx_to_buf(usage)); + pipe_transfer_buffer_flags(&tx->base)); pipe_texture_reference(&tx_tex, NULL); @@ -114,7 +98,7 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } - if (usage != PIPE_TRANSFER_WRITE) { + if (usage & PIPE_TRANSFER_READ) { struct nv04_screen *nvscreen = nv04_screen(pscreen); struct pipe_surface *src; @@ -140,7 +124,7 @@ nv04_transfer_del(struct pipe_transfer *ptx) { struct nv04_transfer *tx = (struct nv04_transfer *)ptx; - if (!tx->direct && ptx->usage != PIPE_TRANSFER_READ) { + if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) { struct pipe_screen *pscreen = ptx->texture->screen; struct nv04_screen *nvscreen = nv04_screen(pscreen); struct pipe_surface *dst; @@ -170,7 +154,7 @@ nv04_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) struct nv04_surface *ns = (struct nv04_surface *)tx->surface; struct nv04_miptree *mt = (struct nv04_miptree *)tx->surface->texture; void *map = pipe_buffer_map(pscreen, mt->buffer, - nv04_usage_tx_to_buf(ptx->usage)); + pipe_transfer_buffer_flags(ptx)); return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * ptx->block.size; diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c index c06b8d34c7..8feb85e4bd 100644 --- a/src/gallium/drivers/nv10/nv10_transfer.c +++ b/src/gallium/drivers/nv10/nv10_transfer.c @@ -13,22 +13,6 @@ struct nv10_transfer { bool direct; }; -static unsigned nv10_usage_tx_to_buf(unsigned tx_usage) -{ - switch (tx_usage) { - case PIPE_TRANSFER_READ: - return PIPE_BUFFER_USAGE_CPU_READ; - case PIPE_TRANSFER_WRITE: - return PIPE_BUFFER_USAGE_CPU_WRITE; - case PIPE_TRANSFER_READ_WRITE: - return PIPE_BUFFER_USAGE_CPU_READ_WRITE; - default: - assert(0); - } - - return -1; -} - static void nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, struct pipe_texture *template) @@ -86,7 +70,7 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = true; tx->surface = pscreen->get_tex_surface(pscreen, pt, 0, 0, 0, - nv10_usage_tx_to_buf(usage)); + pipe_transfer_buffer_flags(&tx->base)); return &tx->base; } @@ -103,7 +87,7 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, face, level, zslice, - nv10_usage_tx_to_buf(usage)); + pipe_transfer_buffer_flags(&tx->base)); pipe_texture_reference(&tx_tex, NULL); @@ -114,7 +98,7 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } - if (usage != PIPE_TRANSFER_WRITE) { + if (usage & PIPE_TRANSFER_READ) { struct nv10_screen *nvscreen = nv10_screen(pscreen); struct pipe_surface *src; @@ -140,7 +124,7 @@ nv10_transfer_del(struct pipe_transfer *ptx) { struct nv10_transfer *tx = (struct nv10_transfer *)ptx; - if (!tx->direct && ptx->usage != PIPE_TRANSFER_READ) { + if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) { struct pipe_screen *pscreen = ptx->texture->screen; struct nv10_screen *nvscreen = nv10_screen(pscreen); struct pipe_surface *dst; @@ -170,7 +154,7 @@ nv10_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) struct nv04_surface *ns = (struct nv04_surface *)tx->surface; struct nv10_miptree *mt = (struct nv10_miptree *)tx->surface->texture; void *map = pipe_buffer_map(pscreen, mt->buffer, - nv10_usage_tx_to_buf(ptx->usage)); + pipe_transfer_buffer_flags(ptx)); return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * ptx->block.size; diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c index 5018995596..81b4f1a917 100644 --- a/src/gallium/drivers/nv20/nv20_transfer.c +++ b/src/gallium/drivers/nv20/nv20_transfer.c @@ -13,22 +13,6 @@ struct nv20_transfer { bool direct; }; -static unsigned nv20_usage_tx_to_buf(unsigned tx_usage) -{ - switch (tx_usage) { - case PIPE_TRANSFER_READ: - return PIPE_BUFFER_USAGE_CPU_READ; - case PIPE_TRANSFER_WRITE: - return PIPE_BUFFER_USAGE_CPU_WRITE; - case PIPE_TRANSFER_READ_WRITE: - return PIPE_BUFFER_USAGE_CPU_READ_WRITE; - default: - assert(0); - } - - return -1; -} - static void nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, struct pipe_texture *template) @@ -86,7 +70,7 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = true; tx->surface = pscreen->get_tex_surface(pscreen, pt, 0, 0, 0, - nv20_usage_tx_to_buf(usage)); + pipe_transfer_buffer_flags(&tx->base)); return &tx->base; } @@ -103,7 +87,7 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, face, level, zslice, - nv20_usage_tx_to_buf(usage)); + pipe_transfer_buffer_flags(&tx->base)); pipe_texture_reference(&tx_tex, NULL); @@ -114,7 +98,7 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } - if (usage != PIPE_TRANSFER_WRITE) { + if (usage & PIPE_TRANSFER_READ) { struct nv20_screen *nvscreen = nv20_screen(pscreen); struct pipe_surface *src; @@ -140,7 +124,7 @@ nv20_transfer_del(struct pipe_transfer *ptx) { struct nv20_transfer *tx = (struct nv20_transfer *)ptx; - if (!tx->direct && ptx->usage != PIPE_TRANSFER_READ) { + if (!tx->direct && (ptx->usage = PIPE_TRANSFER_WRITE)) { struct pipe_screen *pscreen = ptx->texture->screen; struct nv20_screen *nvscreen = nv20_screen(pscreen); struct pipe_surface *dst; @@ -170,7 +154,7 @@ nv20_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) struct nv04_surface *ns = (struct nv04_surface *)tx->surface; struct nv20_miptree *mt = (struct nv20_miptree *)tx->surface->texture; void *map = pipe_buffer_map(pscreen, mt->buffer, - nv20_usage_tx_to_buf(ptx->usage)); + pipe_transfer_buffer_flags(ptx)); return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * ptx->block.size; diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c index 2367571878..98011decf7 100644 --- a/src/gallium/drivers/nv30/nv30_transfer.c +++ b/src/gallium/drivers/nv30/nv30_transfer.c @@ -13,22 +13,6 @@ struct nv30_transfer { bool direct; }; -static unsigned nv30_usage_tx_to_buf(unsigned tx_usage) -{ - switch (tx_usage) { - case PIPE_TRANSFER_READ: - return PIPE_BUFFER_USAGE_CPU_READ; - case PIPE_TRANSFER_WRITE: - return PIPE_BUFFER_USAGE_CPU_WRITE; - case PIPE_TRANSFER_READ_WRITE: - return PIPE_BUFFER_USAGE_CPU_READ_WRITE; - default: - assert(0); - } - - return -1; -} - static void nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, struct pipe_texture *template) @@ -86,7 +70,7 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = true; tx->surface = pscreen->get_tex_surface(pscreen, pt, face, level, zslice, - nv30_usage_tx_to_buf(usage)); + pipe_transfer_buffer_flags(&tx->base)); return &tx->base; } @@ -103,7 +87,7 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, 0, 0, 0, - nv30_usage_tx_to_buf(usage)); + pipe_transfer_buffer_flags(&tx->base)); pipe_texture_reference(&tx_tex, NULL); @@ -114,7 +98,7 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } - if (usage != PIPE_TRANSFER_WRITE) { + if (usage & PIPE_TRANSFER_READ) { struct nv30_screen *nvscreen = nv30_screen(pscreen); struct pipe_surface *src; @@ -140,7 +124,7 @@ nv30_transfer_del(struct pipe_transfer *ptx) { struct nv30_transfer *tx = (struct nv30_transfer *)ptx; - if (!tx->direct && ptx->usage != PIPE_TRANSFER_READ) { + if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) { struct pipe_screen *pscreen = ptx->texture->screen; struct nv30_screen *nvscreen = nv30_screen(pscreen); struct pipe_surface *dst; @@ -170,7 +154,7 @@ nv30_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) struct nv04_surface *ns = (struct nv04_surface *)tx->surface; struct nv30_miptree *mt = (struct nv30_miptree *)tx->surface->texture; void *map = pipe_buffer_map(pscreen, mt->buffer, - nv30_usage_tx_to_buf(ptx->usage)); + pipe_transfer_buffer_flags(ptx)); return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * ptx->block.size; diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c index 6d92ac3db9..92caee6f38 100644 --- a/src/gallium/drivers/nv40/nv40_transfer.c +++ b/src/gallium/drivers/nv40/nv40_transfer.c @@ -13,22 +13,6 @@ struct nv40_transfer { bool direct; }; -static unsigned nv40_usage_tx_to_buf(unsigned tx_usage) -{ - switch (tx_usage) { - case PIPE_TRANSFER_READ: - return PIPE_BUFFER_USAGE_CPU_READ; - case PIPE_TRANSFER_WRITE: - return PIPE_BUFFER_USAGE_CPU_WRITE; - case PIPE_TRANSFER_READ_WRITE: - return PIPE_BUFFER_USAGE_CPU_READ_WRITE; - default: - assert(0); - } - - return -1; -} - static void nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, struct pipe_texture *template) @@ -86,7 +70,7 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = true; tx->surface = pscreen->get_tex_surface(pscreen, pt, face, level, zslice, - nv40_usage_tx_to_buf(usage)); + pipe_transfer_buffer_flags(&tx->base)); return &tx->base; } @@ -103,7 +87,7 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, 0, 0, 0, - nv40_usage_tx_to_buf(usage)); + pipe_transfer_buffer_flags(&tx->base)); pipe_texture_reference(&tx_tex, NULL); @@ -114,7 +98,7 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } - if (usage != PIPE_TRANSFER_WRITE) { + if (usage & PIPE_TRANSFER_READ) { struct nv40_screen *nvscreen = nv40_screen(pscreen); struct pipe_surface *src; @@ -140,7 +124,7 @@ nv40_transfer_del(struct pipe_transfer *ptx) { struct nv40_transfer *tx = (struct nv40_transfer *)ptx; - if (!tx->direct && ptx->usage != PIPE_TRANSFER_READ) { + if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) { struct pipe_screen *pscreen = ptx->texture->screen; struct nv40_screen *nvscreen = nv40_screen(pscreen); struct pipe_surface *dst; @@ -170,7 +154,7 @@ nv40_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) struct nv04_surface *ns = (struct nv04_surface *)tx->surface; struct nv40_miptree *mt = (struct nv40_miptree *)tx->surface->texture; void *map = pipe_buffer_map(pscreen, mt->buffer, - nv40_usage_tx_to_buf(ptx->usage)); + pipe_transfer_buffer_flags(ptx)); return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * ptx->block.size; diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index bb7731855c..9c289026bb 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -161,7 +161,7 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } - if (usage != PIPE_TRANSFER_WRITE) { + if (usage & PIPE_TRANSFER_READ) { nv50_transfer_rect_m2mf(pscreen, mt->base.bo, tx->level_offset, tx->level_pitch, tx->level_tiling, x, y, @@ -183,7 +183,7 @@ nv50_transfer_del(struct pipe_transfer *ptx) struct nv50_transfer *tx = (struct nv50_transfer *)ptx; struct nv50_miptree *mt = nv50_miptree(ptx->texture); - if (ptx->usage != PIPE_TRANSFER_READ) { + if (ptx->usage & PIPE_TRANSFER_WRITE) { struct pipe_screen *pscreen = ptx->texture->screen; nv50_transfer_rect_m2mf(pscreen, tx->bo, 0, tx->base.stride, tx->bo->tile_mode, 0, 0, diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 8296d56840..f2659ca61f 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -345,16 +345,9 @@ static void* r300_transfer_map(struct pipe_screen* screen, { struct r300_texture* tex = (struct r300_texture*)transfer->texture; char* map; - unsigned flags = 0; - if (transfer->usage != PIPE_TRANSFER_WRITE) { - flags |= PIPE_BUFFER_USAGE_CPU_READ; - } - if (transfer->usage != PIPE_TRANSFER_READ) { - flags |= PIPE_BUFFER_USAGE_CPU_WRITE; - } - - map = pipe_buffer_map(screen, tex->buffer, flags); + map = pipe_buffer_map(screen, tex->buffer, + pipe_transfer_buffer_flags(transfer)); if (!map) { return NULL; diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 1c64d58372..2e6c43c7ef 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -329,27 +329,18 @@ softpipe_transfer_map( struct pipe_screen *screen, { ubyte *map, *xfer_map; struct softpipe_texture *spt; - unsigned flags = 0; assert(transfer->texture); spt = softpipe_texture(transfer->texture); - if (transfer->usage != PIPE_TRANSFER_READ) { - flags |= PIPE_BUFFER_USAGE_CPU_WRITE; - } - - if (transfer->usage != PIPE_TRANSFER_WRITE) { - flags |= PIPE_BUFFER_USAGE_CPU_READ; - } - - map = pipe_buffer_map(screen, spt->buffer, flags); + map = pipe_buffer_map(screen, spt->buffer, pipe_transfer_buffer_flags(transfer)); if (map == NULL) return NULL; /* May want to different things here depending on read/write nature * of the map: */ - if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ) { + if (transfer->texture && (transfer->usage & PIPE_TRANSFER_WRITE)) { /* Do something to notify sharing contexts of a texture change. * In softpipe, that would mean flushing the texture cache. */ @@ -375,7 +366,7 @@ softpipe_transfer_unmap(struct pipe_screen *screen, pipe_buffer_unmap( screen, spt->buffer ); - if (transfer->usage != PIPE_TRANSFER_READ) { + if (transfer->usage & PIPE_TRANSFER_WRITE) { /* Mark the texture as dirty to expire the tile caches. */ spt->timestamp++; } diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index 26f1c04594..ab605c7fc8 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -403,7 +403,7 @@ trace_screen_transfer_map(struct pipe_screen *_screen, map = screen->transfer_map(screen, transfer); if(map) { - if(transfer->usage != PIPE_TRANSFER_READ) { + if(transfer->usage & PIPE_TRANSFER_WRITE) { assert(!tr_trans->map); tr_trans->map = map; } diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h index 30a4aaf409..5fbd62a03d 100644 --- a/src/gallium/include/pipe/p_inlines.h +++ b/src/gallium/include/pipe/p_inlines.h @@ -176,6 +176,22 @@ pipe_transfer_destroy( struct pipe_transfer *transf ) screen->tex_transfer_destroy(transf); } +static INLINE unsigned +pipe_transfer_buffer_flags( struct pipe_transfer *transf ) +{ + switch (transf->usage & PIPE_TRANSFER_READ_WRITE) { + case PIPE_TRANSFER_READ_WRITE: + return PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE; + case PIPE_TRANSFER_READ: + return PIPE_BUFFER_USAGE_CPU_READ; + case PIPE_TRANSFER_WRITE: + return PIPE_BUFFER_USAGE_CPU_WRITE; + default: + debug_assert(0); + return 0; + } +} + #ifdef __cplusplus } #endif diff --git a/src/gallium/state_trackers/python/retrace/interpreter.py b/src/gallium/state_trackers/python/retrace/interpreter.py index 6f0bd6ae52..f4ed2fde4d 100755 --- a/src/gallium/state_trackers/python/retrace/interpreter.py +++ b/src/gallium/state_trackers/python/retrace/interpreter.py @@ -314,7 +314,7 @@ class Screen(Object): if texture is None: return None transfer = Transfer(texture.get_surface(face, level, zslice), x, y, w, h) - if transfer and usage != gallium.PIPE_TRANSFER_WRITE: + if transfer and usage & gallium.PIPE_TRANSFER_READ if self.interpreter.options.all: self.interpreter.present(transfer.surface, 'transf_read', x, y, w, h) return transfer diff --git a/src/gallium/state_trackers/vega/st_inlines.h b/src/gallium/state_trackers/vega/st_inlines.h index 1f331dfcdb..610755e063 100644 --- a/src/gallium/state_trackers/vega/st_inlines.h +++ b/src/gallium/state_trackers/vega/st_inlines.h @@ -57,8 +57,7 @@ st_cond_flush_get_tex_transfer(struct vg_context *st, pipe->is_texture_referenced(pipe, pt, face, level); if (referenced && ((referenced & PIPE_REFERENCED_FOR_WRITE) || - usage == PIPE_TRANSFER_WRITE || - usage == PIPE_TRANSFER_READ_WRITE)) + (usage & PIPE_TRANSFER_WRITE))) vgFlush(); return screen->get_tex_transfer(screen, pt, face, level, zslice, usage, diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index 95181578f6..3d1d0f71d5 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -241,7 +241,7 @@ accum_return(GLcontext *ctx, GLfloat value, xpos, ypos, width, height); - if (usage != PIPE_TRANSFER_WRITE) + if (usage & PIPE_TRANSFER_READ) pipe_get_tile_rgba(color_trans, 0, 0, width, height, buf); switch (acc_strb->format) { diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index bbc2830e69..ba8d1e8cc1 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -577,7 +577,6 @@ st_teximage_flush_before_map(struct st_context *st, pipe->is_texture_referenced(pipe, pt, face, level); if (referenced && ((referenced & PIPE_REFERENCED_FOR_WRITE) || - usage == PIPE_TRANSFER_WRITE || - usage == PIPE_TRANSFER_READ_WRITE)) + (usage & PIPE_TRANSFER_WRITE))) st_flush(st, PIPE_FLUSH_RENDER_CACHE, NULL); } -- cgit v1.2.3 From 751aa58e01bd2b4f35aa0e1477d77a0dc5490f39 Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Sat, 3 Oct 2009 17:24:04 +0200 Subject: r300g: Reset vbo_offset after allocation of a new buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the glxgears bug, among other things. Signed-off-by: Nicolai Hähnle --- src/gallium/drivers/r300/r300_render.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 16f6404012..69d162324a 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -79,13 +79,14 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render, struct pipe_screen* screen = r300->context.screen; size_t size = (size_t)vertex_size * (size_t)count; - if (size + r300render->vbo_offset > r300render->vbo_size) + if (size + r300render->vbo_offset > r300render->vbo_size) { pipe_buffer_reference(&r300->vbo, NULL); r300render->vbo = pipe_buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX, R300_MAX_VBO_SIZE); + r300render->vbo_offset = 0; r300render->vbo_size = R300_MAX_VBO_SIZE; } @@ -118,7 +119,7 @@ static void r300_render_unmap_vertices(struct vbuf_render* render, OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max); END_CS; - r300render->vbo_max_used = MAX2(r300render->vbo_max_used, + r300render->vbo_max_used = MAX2(r300render->vbo_max_used, r300render->vertex_size * (max + 1)); pipe_buffer_unmap(screen, r300render->vbo); } -- cgit v1.2.3 From 26df8af4fe4173eb52132dc63ee789b80a7a4db2 Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Sat, 3 Oct 2009 17:49:16 +0200 Subject: r300g: Remove an unnecessarily created pipe buffer (and thus fix a leak) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolai Hähnle --- src/gallium/drivers/r300/r300_render.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 69d162324a..ca44e0f661 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -223,13 +223,6 @@ static void r300_render_draw(struct vbuf_render* render, r300_prepare_render(r300render, count); - /* Send our indices into an index buffer. */ - index_buffer = pipe_buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX, - count * 2); - if (!index_buffer) { - return; - } - BEGIN_CS(2 + (count+1)/2); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | -- cgit v1.2.3 From 81e5188f66248424d54fcf1d85a81510694bd472 Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Sat, 3 Oct 2009 19:20:31 +0200 Subject: r300g: Do not abort on fragment program compiler error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolai Hähnle --- src/gallium/drivers/r300/r300_fs.c | 11 ++++++++--- src/gallium/drivers/r300/r300_fs.h | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index a0e848a59a..546ad545a5 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -126,9 +126,14 @@ void r300_translate_fragment_shader(struct r300_context* r300, /* Invoke the compiler */ r3xx_compile_fragment_program(&compiler); if (compiler.Base.Error) { - /* Todo: Fail gracefully */ - fprintf(stderr, "r300 FP: Compiler error\n"); - abort(); + /* Todo: Fallback to software rendering gracefully? */ + fprintf(stderr, "r300 FP: Compiler error: %s\n", compiler.Base.ErrorMsg); + + if (compiler.is_r500) { + memcpy(compiler.code, &r5xx_passthrough_fragment_shader, sizeof(r5xx_passthrough_fragment_shader)); + } else { + memcpy(compiler.code, &r3xx_passthrough_fragment_shader, sizeof(r3xx_passthrough_fragment_shader)); + } } /* And, finally... */ diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h index 9fab789402..967e9f697e 100644 --- a/src/gallium/drivers/r300/r300_fs.h +++ b/src/gallium/drivers/r300/r300_fs.h @@ -48,4 +48,4 @@ struct r300_fragment_shader { void r300_translate_fragment_shader(struct r300_context* r300, struct r300_fragment_shader* fs); - #endif /* R300_FS_H */ +#endif /* R300_FS_H */ -- cgit v1.2.3 From cbb57bf726619a34a244acaebf0dcd77750cba54 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 3 Oct 2009 19:42:22 +0100 Subject: llvmpipe: Fetch tile only if a color buffer is bound. --- src/gallium/drivers/llvmpipe/lp_setup.c | 10 ++++++++-- src/gallium/drivers/llvmpipe/lp_tile_cache.c | 3 +++ 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 2d2fc19a65..60107214df 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -124,7 +124,7 @@ shade_quads(struct llvmpipe_context *llvmpipe, struct quad_header *quad = quads[0]; const unsigned x = quad->input.x0; const unsigned y = quad->input.y0; - uint8_t *tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y); + uint8_t *tile; uint8_t *color; void *depth; uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; @@ -150,7 +150,13 @@ shade_quads(struct llvmpipe_context *llvmpipe, mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0; /* color buffer */ - color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0); + if(llvmpipe->framebuffer.nr_cbufs >= 1 && + llvmpipe->framebuffer.cbufs[0]) { + tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y); + color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0); + } + else + color = NULL; /* depth buffer */ if(llvmpipe->zsbuf_map) { diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c index 0c06b659a1..2ac8cb5c82 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c @@ -294,6 +294,9 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc, struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; struct pipe_transfer *pt = tc->transfer; + assert(tc->surface); + assert(tc->transfer); + switch(tile->status) { case LP_TILE_STATUS_CLEAR: /* don't get tile from framebuffer, just clear it */ -- cgit v1.2.3 From b7cf887ca74561469c144f1d12227e1bcf277e7e Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Sat, 3 Oct 2009 21:28:59 +0200 Subject: r300/compiler: Introduce control flow instructions and refactor dataflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Note that control flow instruction support isn't actually fully functional yet. Signed-off-by: Nicolai Hähnle --- src/gallium/drivers/r300/r300_tgsi_to_rc.c | 6 +- src/mesa/drivers/dri/r300/compiler/Makefile | 3 +- src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 15 +- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 18 +- .../drivers/dri/r300/compiler/radeon_dataflow.c | 74 ----- .../drivers/dri/r300/compiler/radeon_dataflow.h | 69 +--- .../dri/r300/compiler/radeon_dataflow_annotate.c | 365 --------------------- .../dri/r300/compiler/radeon_dataflow_deadcode.c | 253 ++++++++++++++ .../dri/r300/compiler/radeon_dataflow_dealias.c | 150 --------- .../dri/r300/compiler/radeon_dataflow_swizzles.c | 24 -- .../drivers/dri/r300/compiler/radeon_opcodes.c | 78 +++++ .../drivers/dri/r300/compiler/radeon_opcodes.h | 20 ++ .../drivers/dri/r300/compiler/radeon_program.c | 19 +- .../drivers/dri/r300/compiler/radeon_program.h | 71 ++-- .../dri/r300/compiler/radeon_program_print.c | 48 +-- 15 files changed, 434 insertions(+), 779 deletions(-) delete mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_dataflow_annotate.c create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c delete mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_dataflow_dealias.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 7f12008830..1246ffe8c2 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -106,11 +106,11 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_DP2: return RC_OPCODE_DP2; */ case TGSI_OPCODE_TXL: return RC_OPCODE_TXL; /* case TGSI_OPCODE_BRK: return RC_OPCODE_BRK; */ - /* case TGSI_OPCODE_IF: return RC_OPCODE_IF; */ + case TGSI_OPCODE_IF: return RC_OPCODE_IF; /* case TGSI_OPCODE_LOOP: return RC_OPCODE_LOOP; */ /* case TGSI_OPCODE_REP: return RC_OPCODE_REP; */ - /* case TGSI_OPCODE_ELSE: return RC_OPCODE_ELSE; */ - /* case TGSI_OPCODE_ENDIF: return RC_OPCODE_ENDIF; */ + case TGSI_OPCODE_ELSE: return RC_OPCODE_ELSE; + case TGSI_OPCODE_ENDIF: return RC_OPCODE_ENDIF; /* case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP; */ /* case TGSI_OPCODE_ENDREP: return RC_OPCODE_ENDREP; */ /* case TGSI_OPCODE_PUSHA: return RC_OPCODE_PUSHA; */ diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index 53fb7caa95..0d8dcb9f87 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -14,8 +14,7 @@ C_SOURCES = \ radeon_program_alu.c \ radeon_program_pair.c \ radeon_dataflow.c \ - radeon_dataflow_annotate.c \ - radeon_dataflow_dealias.c \ + radeon_dataflow_deadcode.c \ radeon_dataflow_swizzles.c \ r3xx_fragprog.c \ r300_fragprog.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index bf9bea685a..590201a9ba 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -24,6 +24,7 @@ #include +#include "radeon_dataflow.h" #include "radeon_program_alu.h" #include "r300_fragprog.h" #include "r300_fragprog_swizzle.h" @@ -107,17 +108,23 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) if (c->Base.Debug) { fprintf(stderr, "Fragment Program: After native rewrite:\n"); - rc_print_program(&c->Base.Program, 0); + rc_print_program(&c->Base.Program); + fflush(stderr); + } + + rc_dataflow_deadcode(&c->Base, &dataflow_outputs_mark_use, c); + + if (c->Base.Debug) { + fprintf(stderr, "Fragment Program: After deadcode:\n"); + rc_print_program(&c->Base.Program); fflush(stderr); } - rc_dataflow_annotate(&c->Base, &dataflow_outputs_mark_use, c); - rc_dataflow_dealias(&c->Base); rc_dataflow_swizzles(&c->Base); if (c->Base.Debug) { fprintf(stderr, "Compiler: after dataflow passes:\n"); - rc_print_program(&c->Base.Program, 0); + rc_print_program(&c->Base.Program); fflush(stderr); } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index ac72f8cbb6..e1e735568d 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -588,7 +588,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) if (compiler->Base.Debug) { fprintf(stderr, "Vertex program after native rewrite:\n"); - rc_print_program(&compiler->Base.Program, 0); + rc_print_program(&compiler->Base.Program); fflush(stderr); } @@ -605,21 +605,25 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) if (compiler->Base.Debug) { fprintf(stderr, "Vertex program after source conflict resolve:\n"); - rc_print_program(&compiler->Base.Program, 0); + rc_print_program(&compiler->Base.Program); + fflush(stderr); + } + + rc_dataflow_deadcode(&compiler->Base, &dataflow_outputs_mark_used, compiler); + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after deadcode:\n"); + rc_print_program(&compiler->Base.Program); fflush(stderr); } - rc_dataflow_annotate(&compiler->Base, &dataflow_outputs_mark_used, compiler); - rc_dataflow_dealias(&compiler->Base); rc_dataflow_swizzles(&compiler->Base); - /* This invalidates dataflow annotations and should be replaced - * by a future generic register allocation pass. */ allocate_temporary_registers(compiler); if (compiler->Base.Debug) { fprintf(stderr, "Vertex program after dataflow:\n"); - rc_print_program(&compiler->Base.Program, 0); + rc_print_program(&compiler->Base.Program); fflush(stderr); } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index af6777a7bd..a171d8ab54 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -30,77 +30,3 @@ #include "radeon_compiler.h" -static void add_ref_to_vector(struct rc_dataflow_ref * ref, struct rc_dataflow_vector * vector) -{ - ref->Vector = vector; - ref->Prev = &vector->Refs; - ref->Next = vector->Refs.Next; - ref->Prev->Next = ref; - ref->Next->Prev = ref; -} - -struct rc_dataflow_ref * rc_dataflow_create_ref(struct radeon_compiler * c, - struct rc_dataflow_vector * vector, struct rc_instruction * inst) -{ - struct rc_dataflow_ref * ref = memory_pool_malloc(&c->Pool, sizeof(struct rc_dataflow_ref)); - ref->ReadInstruction = inst; - ref->UseMask = 0; - - add_ref_to_vector(ref, vector); - - return ref; -} - -struct rc_dataflow_vector * rc_dataflow_create_vector(struct radeon_compiler * c, - rc_register_file file, unsigned int index, struct rc_instruction * inst) -{ - struct rc_dataflow_vector * vec = memory_pool_malloc(&c->Pool, sizeof(struct rc_dataflow_vector)); - - memset(vec, 0, sizeof(struct rc_dataflow_vector)); - vec->File = file; - vec->Index = index; - vec->WriteInstruction = inst; - - vec->Refs.Next = vec->Refs.Prev = &vec->Refs; - - return vec; -} - -void rc_dataflow_remove_ref(struct rc_dataflow_ref * ref) -{ - ref->Prev->Next = ref->Next; - ref->Next->Prev = ref->Prev; -} - -void rc_dataflow_remove_instruction(struct rc_instruction * inst) -{ - for(unsigned int i = 0; i < 3; ++i) { - if (inst->Dataflow.SrcReg[i]) { - rc_dataflow_remove_ref(inst->Dataflow.SrcReg[i]); - inst->Dataflow.SrcReg[i] = 0; - } - if (inst->Dataflow.SrcRegAddress[i]) { - rc_dataflow_remove_ref(inst->Dataflow.SrcRegAddress[i]); - inst->Dataflow.SrcRegAddress[i] = 0; - } - } - - if (inst->Dataflow.DstReg) { - while(inst->Dataflow.DstReg->Refs.Next != &inst->Dataflow.DstReg->Refs) { - struct rc_dataflow_ref * ref = inst->Dataflow.DstReg->Refs.Next; - rc_dataflow_remove_ref(ref); - if (inst->Dataflow.DstRegPrev) - add_ref_to_vector(ref, inst->Dataflow.DstRegPrev->Vector); - } - - inst->Dataflow.DstReg->WriteInstruction = 0; - inst->Dataflow.DstReg = 0; - } - - if (inst->Dataflow.DstRegPrev) { - rc_dataflow_remove_ref(inst->Dataflow.DstRegPrev); - inst->Dataflow.DstRegPrev = 0; - } - - inst->Dataflow.DstRegAliased = 0; -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h index c9856affe8..76c323d057 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h @@ -34,79 +34,14 @@ struct radeon_compiler; struct rc_instruction; struct rc_swizzle_caps; -struct rc_dataflow_vector; - -struct rc_dataflow_ref { - struct rc_dataflow_vector * Vector; - - /** - * Linked list of references to the above-mentioned vector. - * The linked list is \em not sorted. - */ - /*@{*/ - struct rc_dataflow_ref * Prev; - struct rc_dataflow_ref * Next; - /*@}*/ - - unsigned int UseMask:4; - struct rc_instruction * ReadInstruction; -}; - -struct rc_dataflow_vector { - rc_register_file File:3; - unsigned int Index:RC_REGISTER_INDEX_BITS; - - /** For private use in compiler passes. MUST BE RESET TO 0 by the end of each pass. - * The annotate pass uses this bit to track whether a vector is in the - * update stack. - */ - unsigned int PassBit:1; - /** Which of the components have been written with useful values */ - unsigned int ValidMask:4; - /** Which of the components are used downstream */ - unsigned int UseMask:4; - /** The instruction that produced this vector */ - struct rc_instruction * WriteInstruction; - - /** Linked list of references to this vector */ - struct rc_dataflow_ref Refs; -}; - -struct rc_instruction_dataflow { - struct rc_dataflow_ref * SrcReg[3]; - struct rc_dataflow_ref * SrcRegAddress[3]; - - /** Reference the components of the destination register - * that are carried over without being overwritten */ - struct rc_dataflow_ref * DstRegPrev; - /** Indicates whether the destination register was in use - * before this instruction */ - unsigned int DstRegAliased:1; - struct rc_dataflow_vector * DstReg; -}; - -/** - * General functions for manipulating the dataflow structures. - */ -/*@{*/ -struct rc_dataflow_ref * rc_dataflow_create_ref(struct radeon_compiler * c, - struct rc_dataflow_vector * vector, struct rc_instruction * inst); -struct rc_dataflow_vector * rc_dataflow_create_vector(struct radeon_compiler * c, - rc_register_file file, unsigned int index, struct rc_instruction * inst); -void rc_dataflow_remove_ref(struct rc_dataflow_ref * ref); - -void rc_dataflow_remove_instruction(struct rc_instruction * inst); -/*@}*/ - /** - * Compiler passes based on dataflow structures. + * Compiler passes based on dataflow analysis. */ /*@{*/ typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data, void (*mark_fn)(void * data, unsigned int index, unsigned int mask)); -void rc_dataflow_annotate(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata); -void rc_dataflow_dealias(struct radeon_compiler * c); +void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata); void rc_dataflow_swizzles(struct radeon_compiler * c); /*@}*/ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_annotate.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_annotate.c deleted file mode 100644 index 41d175a22f..0000000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_annotate.c +++ /dev/null @@ -1,365 +0,0 @@ -/* - * Copyright (C) 2009 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_dataflow.h" - -#include "radeon_compiler.h" - - -struct dataflow_state { - struct radeon_compiler * C; - unsigned int DCE:1; - unsigned int UpdateRunning:1; - - struct rc_dataflow_vector * Input[RC_REGISTER_MAX_INDEX]; - struct rc_dataflow_vector * Output[RC_REGISTER_MAX_INDEX]; - struct rc_dataflow_vector * Temporary[RC_REGISTER_MAX_INDEX]; - struct rc_dataflow_vector * Address; - - struct rc_dataflow_vector ** UpdateStack; - unsigned int UpdateStackSize; - unsigned int UpdateStackReserved; -}; - -static void mark_vector_use(struct dataflow_state * s, struct rc_dataflow_vector * vector, unsigned int mask); - -static struct rc_dataflow_vector * get_register_contents(struct dataflow_state * s, - rc_register_file file, unsigned int index) -{ - if (file == RC_FILE_INPUT || file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) { - if (index >= RC_REGISTER_MAX_INDEX) - return 0; /* cannot happen, but be defensive */ - - if (file == RC_FILE_TEMPORARY) - return s->Temporary[index]; - if (file == RC_FILE_INPUT) - return s->Input[index]; - if (file == RC_FILE_OUTPUT) - return s->Output[index]; - } - - if (file == RC_FILE_ADDRESS) - return s->Address; - - return 0; /* can happen, constant register file */ -} - -static void mark_ref_use(struct dataflow_state * s, struct rc_dataflow_ref * ref, unsigned int mask) -{ - if (!(mask & ~ref->UseMask)) - return; - - ref->UseMask |= mask; - mark_vector_use(s, ref->Vector, ref->UseMask); -} - -static void mark_source_use(struct dataflow_state * s, struct rc_instruction * inst, - unsigned int src, unsigned int srcmask) -{ - unsigned int refmask = 0; - - for(unsigned int i = 0; i < 4; ++i) { - if (GET_BIT(srcmask, i)) - refmask |= 1 << GET_SWZ(inst->I.SrcReg[src].Swizzle, i); - } - - /* get rid of spurious bits from ZERO, ONE, etc. swizzles */ - refmask &= RC_MASK_XYZW; - - if (!refmask) - return; /* can happen if the swizzle contains constant components */ - - if (inst->Dataflow.SrcReg[src]) - mark_ref_use(s, inst->Dataflow.SrcReg[src], refmask); - - if (inst->Dataflow.SrcRegAddress[src]) - mark_ref_use(s, inst->Dataflow.SrcRegAddress[src], RC_MASK_X); -} - -static void compute_sources_for_writemask( - struct rc_instruction * inst, - unsigned int writemask, - unsigned int *srcmasks) -{ - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); - - srcmasks[0] = 0; - srcmasks[1] = 0; - srcmasks[2] = 0; - - if (inst->I.Opcode == RC_OPCODE_KIL) - srcmasks[0] |= RC_MASK_XYZW; - - if (!writemask) - return; - - if (opcode->IsComponentwise) { - for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) - srcmasks[src] |= writemask; - } else if (opcode->IsStandardScalar) { - for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) - srcmasks[src] |= RC_MASK_X; - } else { - switch(inst->I.Opcode) { - case RC_OPCODE_ARL: - srcmasks[0] |= RC_MASK_X; - break; - case RC_OPCODE_DP3: - srcmasks[0] |= RC_MASK_XYZ; - srcmasks[1] |= RC_MASK_XYZ; - break; - case RC_OPCODE_DP4: - srcmasks[0] |= RC_MASK_XYZW; - srcmasks[1] |= RC_MASK_XYZW; - break; - case RC_OPCODE_TEX: - case RC_OPCODE_TXB: - case RC_OPCODE_TXP: - srcmasks[0] |= RC_MASK_XYZW; - break; - case RC_OPCODE_DST: - srcmasks[0] |= 0x6; - srcmasks[1] |= 0xa; - break; - case RC_OPCODE_EXP: - case RC_OPCODE_LOG: - srcmasks[0] |= RC_MASK_XY; - break; - case RC_OPCODE_LIT: - srcmasks[0] |= 0xb; - break; - default: - break; - } - } -} - -static void mark_instruction_source_use(struct dataflow_state * s, - struct rc_instruction * inst, unsigned int writemask) -{ - unsigned int srcmasks[3]; - - compute_sources_for_writemask(inst, writemask, srcmasks); - - for(unsigned int src = 0; src < 3; ++src) - mark_source_use(s, inst, src, srcmasks[src]); -} - -static void run_update(struct dataflow_state * s) -{ - s->UpdateRunning = 1; - - while(s->UpdateStackSize) { - struct rc_dataflow_vector * vector = s->UpdateStack[--s->UpdateStackSize]; - vector->PassBit = 0; - - if (vector->WriteInstruction) { - struct rc_instruction * inst = vector->WriteInstruction; - - if (inst->Dataflow.DstRegPrev) { - unsigned int carryover = vector->UseMask & ~inst->I.DstReg.WriteMask; - - if (carryover) - mark_ref_use(s, inst->Dataflow.DstRegPrev, carryover); - } - - mark_instruction_source_use( - s, vector->WriteInstruction, - vector->UseMask & inst->I.DstReg.WriteMask); - } - } - - s->UpdateRunning = 0; -} - -static void mark_vector_use(struct dataflow_state * s, struct rc_dataflow_vector * vector, unsigned int mask) -{ - if (!(mask & ~vector->UseMask)) - return; /* no new used bits */ - - vector->UseMask |= mask; - if (vector->PassBit) - return; - - if (s->UpdateStackSize >= s->UpdateStackReserved) { - unsigned int new_reserve = 2 * s->UpdateStackReserved; - struct rc_dataflow_vector ** new_stack; - - if (!new_reserve) - new_reserve = 16; - - new_stack = memory_pool_malloc(&s->C->Pool, new_reserve * sizeof(struct rc_dataflow_vector *)); - memcpy(new_stack, s->UpdateStack, s->UpdateStackSize * sizeof(struct rc_dataflow_vector *)); - - s->UpdateStack = new_stack; - s->UpdateStackReserved = new_reserve; - } - - s->UpdateStack[s->UpdateStackSize++] = vector; - vector->PassBit = 1; - - if (!s->UpdateRunning) - run_update(s); -} - -static void annotate_instruction(struct dataflow_state * s, struct rc_instruction * inst) -{ - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); - unsigned int src; - - for(src = 0; src < opcode->NumSrcRegs; ++src) { - struct rc_dataflow_vector * vector = get_register_contents(s, inst->I.SrcReg[src].File, inst->I.SrcReg[src].Index); - if (vector) { - inst->Dataflow.SrcReg[src] = rc_dataflow_create_ref(s->C, vector, inst); - } - if (inst->I.SrcReg[src].RelAddr) { - struct rc_dataflow_vector * addr = get_register_contents(s, RC_FILE_ADDRESS, 0); - if (addr) - inst->Dataflow.SrcRegAddress[src] = rc_dataflow_create_ref(s->C, addr, inst); - } - } - - mark_instruction_source_use(s, inst, 0); /* for KIL */ - - if (opcode->HasDstReg) { - struct rc_dataflow_vector * oldvec = get_register_contents(s, inst->I.DstReg.File, inst->I.DstReg.Index); - struct rc_dataflow_vector * newvec = rc_dataflow_create_vector(s->C, inst->I.DstReg.File, inst->I.DstReg.Index, inst); - - newvec->ValidMask = inst->I.DstReg.WriteMask; - - if (oldvec) { - unsigned int carryover = oldvec->ValidMask & ~inst->I.DstReg.WriteMask; - - if (oldvec->ValidMask) - inst->Dataflow.DstRegAliased = 1; - - if (carryover) { - inst->Dataflow.DstRegPrev = rc_dataflow_create_ref(s->C, oldvec, inst); - newvec->ValidMask |= carryover; - - if (!s->DCE) - mark_ref_use(s, inst->Dataflow.DstRegPrev, carryover); - } - } - - inst->Dataflow.DstReg = newvec; - - if (newvec->File == RC_FILE_TEMPORARY) - s->Temporary[newvec->Index] = newvec; - else if (newvec->File == RC_FILE_OUTPUT) - s->Output[newvec->Index] = newvec; - else - s->Address = newvec; - - if (!s->DCE) - mark_vector_use(s, newvec, inst->I.DstReg.WriteMask); - } -} - -static void init_inputs(struct dataflow_state * s) -{ - unsigned int index; - - for(index = 0; index < 32; ++index) { - if (s->C->Program.InputsRead & (1 << index)) { - s->Input[index] = rc_dataflow_create_vector(s->C, RC_FILE_INPUT, index, 0); - s->Input[index]->ValidMask = RC_MASK_XYZW; - } - } -} - -static void mark_output_use(void * data, unsigned int index, unsigned int mask) -{ - struct dataflow_state * s = data; - struct rc_dataflow_vector * vec = s->Output[index]; - - if (vec) - mark_vector_use(s, vec, mask); -} - -void rc_dataflow_annotate(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata) -{ - struct dataflow_state s; - struct rc_instruction * inst; - - memset(&s, 0, sizeof(s)); - s.C = c; - s.DCE = dce ? 1 : 0; - - init_inputs(&s); - - for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - annotate_instruction(&s, inst); - } - - if (s.DCE) { - dce(userdata, &s, &mark_output_use); - - for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); - - if (opcode->HasDstReg) { - unsigned int redundant_writes = inst->I.DstReg.WriteMask & ~inst->Dataflow.DstReg->UseMask; - - inst->I.DstReg.WriteMask &= ~redundant_writes; - - if (!inst->I.DstReg.WriteMask) { - struct rc_instruction * todelete = inst; - inst = inst->Prev; - rc_remove_instruction(todelete); - continue; - } - } - - unsigned int srcmasks[3]; - compute_sources_for_writemask(inst, inst->I.DstReg.WriteMask, srcmasks); - - for(unsigned int src = 0; src < 3; ++src) { - for(unsigned int chan = 0; chan < 4; ++chan) { - if (!GET_BIT(srcmasks[src], chan)) - SET_SWZ(inst->I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); - } - - if (inst->Dataflow.SrcReg[src]) { - if (!inst->Dataflow.SrcReg[src]->UseMask) { - rc_dataflow_remove_ref(inst->Dataflow.SrcReg[src]); - inst->Dataflow.SrcReg[src] = 0; - } - } - - if (inst->Dataflow.SrcRegAddress[src]) { - if (!inst->Dataflow.SrcRegAddress[src]->UseMask) { - rc_dataflow_remove_ref(inst->Dataflow.SrcRegAddress[src]); - inst->Dataflow.SrcRegAddress[src] = 0; - } - } - } - } - - rc_calculate_inputs_outputs(c); - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c new file mode 100644 index 0000000000..95af6fd411 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -0,0 +1,253 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" + + +struct updatemask_state { + unsigned char Output[RC_REGISTER_MAX_INDEX]; + unsigned char Temporary[RC_REGISTER_MAX_INDEX]; + unsigned char Address; +}; + +struct instruction_state { + unsigned char WriteMask; + unsigned char SrcReg[3]; +}; + +struct branchinfo { + unsigned int HaveElse:1; + + struct updatemask_state StoreEndif; + struct updatemask_state StoreElse; +}; + +struct deadcode_state { + struct radeon_compiler * C; + struct instruction_state * Instructions; + + struct updatemask_state R; + + struct branchinfo * BranchStack; + unsigned int BranchStackSize; + unsigned int BranchStackReserved; +}; + + +static void or_updatemasks( + struct updatemask_state * dst, + struct updatemask_state * a, + struct updatemask_state * b) +{ + for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) { + dst->Output[i] = a->Output[i] | b->Output[i]; + dst->Temporary[i] = a->Temporary[i] | b->Temporary[i]; + } + + dst->Address = a->Address | b->Address; +} + +static void push_branch(struct deadcode_state * s) +{ + if (s->BranchStackSize >= s->BranchStackReserved) { + unsigned int new_reserve = 2 * s->BranchStackReserved; + struct branchinfo * new_stack; + + if (!new_reserve) + new_reserve = 4; + + new_stack = memory_pool_malloc(&s->C->Pool, new_reserve * sizeof(struct branchinfo)); + memcpy(new_stack, s->BranchStack, s->BranchStackSize * sizeof(struct branchinfo)); + + s->BranchStack = new_stack; + s->BranchStackReserved = new_reserve; + } + + struct branchinfo * branch = &s->BranchStack[s->BranchStackSize++]; + branch->HaveElse = 0; + memcpy(&branch->StoreEndif, &s->R, sizeof(s->R)); +} + +static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index) +{ + if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) { + if (index >= RC_REGISTER_MAX_INDEX) { + rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file); + return 0; + } + + if (file == RC_FILE_OUTPUT) + return &s->R.Output[index]; + else + return &s->R.Temporary[index]; + } else if (file == RC_FILE_ADDRESS) { + return &s->R.Address; + } + + return 0; +} + +static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask) +{ + unsigned char * pused = get_used_ptr(s, file, index); + if (pused) + *pused |= mask; +} + +static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + struct instruction_state * insts = &s->Instructions[inst->IP]; + unsigned int usedmask = 0; + + if (opcode->HasDstReg) { + unsigned char * pused = get_used_ptr(s, inst->I.DstReg.File, inst->I.DstReg.Index); + if (pused) { + usedmask = *pused & inst->I.DstReg.WriteMask; + *pused &= ~usedmask; + } + } + + insts->WriteMask |= usedmask; + + unsigned int srcmasks[3]; + rc_compute_sources_for_writemask(opcode, usedmask, srcmasks); + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + unsigned int refmask = 0; + unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src]; + insts->SrcReg[src] |= newsrcmask; + + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_BIT(newsrcmask, chan)) + refmask |= 1 << GET_SWZ(inst->I.SrcReg[src].Swizzle, chan); + } + + /* get rid of spurious bits from ZERO, ONE, etc. swizzles */ + refmask &= RC_MASK_XYZW; + + if (!refmask) + continue; + + mark_used(s, inst->I.SrcReg[src].File, inst->I.SrcReg[src].Index, refmask); + + if (inst->I.SrcReg[src].RelAddr) + mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X); + } +} + +static void mark_output_use(void * data, unsigned int index, unsigned int mask) +{ + struct deadcode_state * s = data; + + mark_used(s, RC_FILE_OUTPUT, index, mask); +} + +void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata) +{ + struct deadcode_state s; + unsigned int nr_instructions; + + memset(&s, 0, sizeof(s)); + s.C = c; + + nr_instructions = rc_recompute_ips(c); + s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions); + memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions); + + dce(userdata, &s, &mark_output_use); + + for(struct rc_instruction * inst = c->Program.Instructions.Prev; + inst != &c->Program.Instructions; + inst = inst->Prev) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + + if (opcode->IsControlFlow) { + if (opcode->Opcode == RC_OPCODE_ENDIF) { + push_branch(&s); + } else { + if (s.BranchStackSize) { + struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1]; + + if (opcode->Opcode == RC_OPCODE_IF) { + or_updatemasks(&s.R, + &s.R, + branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif); + + s.BranchStackSize--; + } else if (opcode->Opcode == RC_OPCODE_ELSE) { + if (branch->HaveElse) { + rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__); + } else { + memcpy(&branch->StoreElse, &s.R, sizeof(s.R)); + memcpy(&s.R, &branch->StoreEndif, sizeof(s.R)); + branch->HaveElse = 1; + } + } else { + rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name); + } + } else { + rc_error(c, "%s: Unexpected control flow instruction\n", __FUNCTION__); + } + } + } + + update_instruction(&s, inst); + } + + unsigned int ip = 0; + for(struct rc_instruction * inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next, ++ip) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + + if (opcode->HasDstReg) { + if (s.Instructions[ip].WriteMask) { + inst->I.DstReg.WriteMask = s.Instructions[ip].WriteMask; + } else { + struct rc_instruction * todelete = inst; + inst = inst->Prev; + rc_remove_instruction(todelete); + continue; + } + } + + unsigned int srcmasks[3]; + rc_compute_sources_for_writemask(opcode, s.Instructions[ip].WriteMask, srcmasks); + + for(unsigned int src = 0; src < 3; ++src) { + for(unsigned int chan = 0; chan < 4; ++chan) { + if (!GET_BIT(srcmasks[src], chan)) + SET_SWZ(inst->I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); + } + } + } + + rc_calculate_inputs_outputs(c); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_dealias.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_dealias.c deleted file mode 100644 index 4596636970..0000000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_dealias.c +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (C) 2009 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_dataflow.h" - -#include "radeon_compiler.h" - - -#define DEALIAS_LIST_SIZE 128 - -struct dealias_state { - struct radeon_compiler * C; - - unsigned int OldIndex:RC_REGISTER_INDEX_BITS; - unsigned int NewIndex:RC_REGISTER_INDEX_BITS; - unsigned int DealiasFail:1; - - struct rc_dataflow_vector * List[DEALIAS_LIST_SIZE]; - unsigned int Length; -}; - -static void push_dealias_vector(struct dealias_state * s, struct rc_dataflow_vector * vec) -{ - if (s->Length >= DEALIAS_LIST_SIZE) { - rc_debug(s->C, "%s: list size exceeded\n", __FUNCTION__); - s->DealiasFail = 1; - return; - } - - if (rc_assert(s->C, vec->File == RC_FILE_TEMPORARY && vec->Index == s->OldIndex)) - return; - - s->List[s->Length++] = vec; -} - -static void run_dealias(struct dealias_state * s) -{ - unsigned int i; - - for(i = 0; i < s->Length && !s->DealiasFail; ++i) { - struct rc_dataflow_vector * vec = s->List[i]; - struct rc_dataflow_ref * ref; - - for(ref = vec->Refs.Next; ref != &vec->Refs; ref = ref->Next) { - if (ref->ReadInstruction->Dataflow.DstRegPrev == ref) - push_dealias_vector(s, ref->ReadInstruction->Dataflow.DstReg); - } - } - - if (s->DealiasFail) - return; - - for(i = 0; i < s->Length; ++i) { - struct rc_dataflow_vector * vec = s->List[i]; - struct rc_dataflow_ref * ref; - - vec->Index = s->NewIndex; - vec->WriteInstruction->I.DstReg.Index = s->NewIndex; - - for(ref = vec->Refs.Next; ref != &vec->Refs; ref = ref->Next) { - struct rc_instruction * inst = ref->ReadInstruction; - unsigned int i; - - for(i = 0; i < 3; ++i) { - if (inst->Dataflow.SrcReg[i] == ref) { - if (rc_assert(s->C, inst->I.SrcReg[i].File == RC_FILE_TEMPORARY && - inst->I.SrcReg[i].Index == s->OldIndex)) - return; - - inst->I.SrcReg[i].Index = s->NewIndex; - } - } - } - } -} - -/** - * Breaks register aliasing to reduce multiple assignments to a single register. - * - * This affects sequences like: - * MUL r0, ...; - * MAD r0, r1, r2, r0; - * In this example, a new register will be used for the destination of the - * second MAD. - * - * The purpose of this dealiasing is to make the resulting code more SSA-like - * and therefore make it easier to move instructions around. - * This is of crucial importance for R300 fragment programs, where de-aliasing - * can help to reduce texture indirections, but other targets can benefit from - * it as well. - * - * \note When compiling GLSL, there may be some benefit gained from breaking - * up vectors whose components are unrelated. This is not done yet and should - * be investigated at some point (of course, a matching pass to re-merge - * components would be required). - */ -void rc_dataflow_dealias(struct radeon_compiler * c) -{ - struct dealias_state s; - - memset(&s, 0, sizeof(s)); - s.C = c; - - struct rc_instruction * inst; - for(inst = c->Program.Instructions.Prev; inst != &c->Program.Instructions; inst = inst->Prev) { - if (!inst->Dataflow.DstRegAliased || inst->Dataflow.DstReg->File != RC_FILE_TEMPORARY) - continue; - - if (inst->Dataflow.DstReg->UseMask & ~inst->I.DstReg.WriteMask) - continue; - - s.OldIndex = inst->I.DstReg.Index; - s.NewIndex = rc_find_free_temporary(c); - s.DealiasFail = 0; - s.Length = 0; - - inst->Dataflow.DstRegAliased = 0; - if (inst->Dataflow.DstRegPrev) { - rc_dataflow_remove_ref(inst->Dataflow.DstRegPrev); - inst->Dataflow.DstRegPrev = 0; - } - - push_dealias_vector(&s, inst->Dataflow.DstReg); - run_dealias(&s); - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c index 1aa91eff7c..fcef218b59 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c @@ -37,8 +37,6 @@ static void rewrite_source(struct radeon_compiler * c, struct rc_swizzle_split split; unsigned int tempreg = rc_find_free_temporary(c); unsigned int usemask; - struct rc_dataflow_ref * oldref = inst->Dataflow.SrcReg[src]; - struct rc_dataflow_vector * vector = 0; usemask = 0; for(unsigned int chan = 0; chan < 4; ++chan) { @@ -75,30 +73,8 @@ static void rewrite_source(struct radeon_compiler * c, else if (masked_negate == split.Phase[phase]) mov->I.SrcReg[0].Negate = RC_MASK_XYZW; - if (oldref) { - mov->Dataflow.SrcReg[0] = rc_dataflow_create_ref(c, oldref->Vector, mov); - mov->Dataflow.SrcReg[0]->UseMask = phase_refmask; - } - - mov->Dataflow.DstReg = rc_dataflow_create_vector(c, RC_FILE_TEMPORARY, tempreg, mov); - mov->Dataflow.DstReg->ValidMask = split.Phase[phase]; - - if (vector) { - mov->Dataflow.DstRegPrev = rc_dataflow_create_ref(c, vector, mov); - mov->Dataflow.DstRegPrev->UseMask = vector->ValidMask; - mov->Dataflow.DstReg->ValidMask |= vector->ValidMask; - mov->Dataflow.DstRegAliased = 1; - } - - mov->Dataflow.DstReg->UseMask = mov->Dataflow.DstReg->ValidMask; - vector = mov->Dataflow.DstReg; } - if (oldref) - rc_dataflow_remove_ref(oldref); - inst->Dataflow.SrcReg[src] = rc_dataflow_create_ref(c, vector, inst); - inst->Dataflow.SrcReg[src]->UseMask = usemask; - inst->I.SrcReg[src].File = RC_FILE_TEMPORARY; inst->I.SrcReg[src].Index = tempreg; inst->I.SrcReg[src].Swizzle = 0; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index b7200990c2..e097a62b55 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -27,6 +27,8 @@ #include "radeon_opcodes.h" +#include "radeon_program_constants.h" + struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { { .Opcode = RC_OPCODE_NOP, @@ -339,9 +341,85 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .NumSrcRegs = 1, .HasDstReg = 1 }, + { + .Opcode = RC_OPCODE_IF, + .Name = "IF", + .IsControlFlow = 1, + .NumSrcRegs = 1 + }, + { + .Opcode = RC_OPCODE_ELSE, + .Name = "ELSE", + .IsControlFlow = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_ENDIF, + .Name = "ENDIF", + .IsControlFlow = 1, + .NumSrcRegs = 0 + }, { .Opcode = RC_OPCODE_REPL_ALPHA, .Name = "REPL_ALPHA", .HasDstReg = 1 } }; + +void rc_compute_sources_for_writemask( + const struct rc_opcode_info * opcode, + unsigned int writemask, + unsigned int *srcmasks) +{ + srcmasks[0] = 0; + srcmasks[1] = 0; + srcmasks[2] = 0; + + if (opcode->Opcode == RC_OPCODE_KIL) + srcmasks[0] |= RC_MASK_XYZW; + else if (opcode->Opcode == RC_OPCODE_IF) + srcmasks[0] |= RC_MASK_X; + + if (!writemask) + return; + + if (opcode->IsComponentwise) { + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) + srcmasks[src] |= writemask; + } else if (opcode->IsStandardScalar) { + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) + srcmasks[src] |= RC_MASK_X; + } else { + switch(opcode->Opcode) { + case RC_OPCODE_ARL: + srcmasks[0] |= RC_MASK_X; + break; + case RC_OPCODE_DP3: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZ; + break; + case RC_OPCODE_DP4: + srcmasks[0] |= RC_MASK_XYZW; + srcmasks[1] |= RC_MASK_XYZW; + break; + case RC_OPCODE_TEX: + case RC_OPCODE_TXB: + case RC_OPCODE_TXP: + srcmasks[0] |= RC_MASK_XYZW; + break; + case RC_OPCODE_DST: + srcmasks[0] |= 0x6; + srcmasks[1] |= 0xa; + break; + case RC_OPCODE_EXP: + case RC_OPCODE_LOG: + srcmasks[0] |= RC_MASK_XY; + break; + case RC_OPCODE_LIT: + srcmasks[0] |= 0xb; + break; + default: + break; + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index 8e30bef1e3..f8ba5255ca 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -166,6 +166,18 @@ typedef enum { RC_OPCODE_TXL, RC_OPCODE_TXP, + /** branch instruction: + * If src0.x != 0.0, continue with the next instruction; + * otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF. + */ + RC_OPCODE_IF, + + /** branch instruction: jump to matching RC_OPCODE_ENDIF */ + RC_OPCODE_ELSE, + + /** branch instruction: has no effect */ + RC_OPCODE_ENDIF, + /** special instruction, used in R300-R500 fragment program pair instructions * indicates that the result of the alpha operation shall be replicated * across all other channels */ @@ -188,6 +200,9 @@ struct rc_opcode_info { unsigned int NumSrcRegs:2; unsigned int HasDstReg:1; + /** true if this instruction affects control flow */ + unsigned int IsControlFlow:1; + /** true if this is a vector instruction that operates on components in parallel * without any cross-component interaction */ unsigned int IsComponentwise:1; @@ -207,4 +222,9 @@ static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode) return &rc_opcodes[opcode]; } +void rc_compute_sources_for_writemask( + const struct rc_opcode_info * opcode, + unsigned int writemask, + unsigned int *srcmasks); + #endif /* RADEON_OPCODES_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c index b97c48084b..a1ee7c0cab 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -154,7 +154,24 @@ struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, str void rc_remove_instruction(struct rc_instruction * inst) { - rc_dataflow_remove_instruction(inst); inst->Prev->Next = inst->Next; inst->Next->Prev = inst->Prev; } + +/** + * Return the number of instructions in the program. + */ +unsigned int rc_recompute_ips(struct radeon_compiler * c) +{ + unsigned int ip = 0; + + for(struct rc_instruction * inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + inst->IP = ip++; + } + + c->Program.Instructions.IP = 0xcafedead; + + return ip; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index d38c9a420c..efa2b0dfe3 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -34,7 +34,6 @@ #include "radeon_opcodes.h" #include "radeon_code.h" #include "radeon_program_constants.h" -#include "radeon_dataflow.h" struct radeon_compiler; @@ -73,33 +72,33 @@ struct rc_dst_register { * instruction types may be valid. */ struct rc_sub_instruction { - struct rc_src_register SrcReg[3]; - struct rc_dst_register DstReg; - - /** - * Opcode of this instruction, according to \ref rc_opcode enums. - */ - rc_opcode Opcode:8; - - /** - * Saturate each value of the result to the range [0,1] or [-1,1], - * according to \ref rc_saturate_mode enums. - */ - rc_saturate_mode SaturateMode:2; - - /** - * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions. - */ - /*@{*/ - /** Source texture unit. */ - unsigned int TexSrcUnit:5; - - /** Source texture target, one of the \ref rc_texture_target enums */ - rc_texture_target TexSrcTarget:3; - - /** True if tex instruction should do shadow comparison */ - unsigned int TexShadow:1; - /*@}*/ + struct rc_src_register SrcReg[3]; + struct rc_dst_register DstReg; + + /** + * Opcode of this instruction, according to \ref rc_opcode enums. + */ + rc_opcode Opcode:8; + + /** + * Saturate each value of the result to the range [0,1] or [-1,1], + * according to \ref rc_saturate_mode enums. + */ + rc_saturate_mode SaturateMode:2; + + /** + * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions. + */ + /*@{*/ + /** Source texture unit. */ + unsigned int TexSrcUnit:5; + + /** Source texture target, one of the \ref rc_texture_target enums */ + rc_texture_target TexSrcTarget:3; + + /** True if tex instruction should do shadow comparison */ + unsigned int TexShadow:1; + /*@}*/ }; struct rc_instruction { @@ -109,13 +108,11 @@ struct rc_instruction { struct rc_sub_instruction I; /** - * Dataflow annotations. - * - * These are not supplied by the caller of the compiler, - * but filled in during compilation stages that make use of - * dataflow analysis. + * Warning: IPs are not stable. If you want to use them, + * you need to recompute them at the beginning of each pass + * using \ref rc_recompute_ips */ - struct rc_instruction_dataflow Dataflow; + unsigned int IP; }; struct rc_program { @@ -210,10 +207,8 @@ struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c); struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after); void rc_remove_instruction(struct rc_instruction * inst); -enum { - RC_PRINT_DATAFLOW = 0x1 -}; +unsigned int rc_recompute_ips(struct radeon_compiler * c); -void rc_print_program(const struct rc_program *prog, unsigned int flags); +void rc_print_program(const struct rc_program *prog); #endif diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c index 38060ea3ad..0485286451 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c @@ -24,11 +24,6 @@ #include -static void print_comment(FILE * f) -{ - fprintf(f, " # "); -} - static const char * textarget_to_string(rc_texture_target target) { switch(target) { @@ -121,19 +116,7 @@ static void rc_print_src_register(FILE * f, struct rc_src_register src) fprintf(f, "|"); } -static void rc_print_ref(FILE * f, struct rc_dataflow_ref * ref) -{ - fprintf(f, "ref(%p", ref->Vector); - - if (ref->UseMask != RC_MASK_XYZW) { - fprintf(f, "."); - rc_print_mask(f, ref->UseMask); - } - - fprintf(f, ")"); -} - -static void rc_print_instruction(FILE * f, unsigned int flags, struct rc_instruction * inst) +static void rc_print_instruction(FILE * f, struct rc_instruction * inst) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); unsigned int reg; @@ -169,45 +152,22 @@ static void rc_print_instruction(FILE * f, unsigned int flags, struct rc_instruc } fprintf(f, ";\n"); - - if (flags & RC_PRINT_DATAFLOW) { - print_comment(f); - - fprintf(f, "Dst = %p", inst->Dataflow.DstReg); - if (inst->Dataflow.DstRegAliased) - fprintf(f, " aliased"); - if (inst->Dataflow.DstRegPrev) { - fprintf(f, " from "); - rc_print_ref(f, inst->Dataflow.DstRegPrev); - } - - for(reg = 0; reg < opcode->NumSrcRegs; ++reg) { - fprintf(f, ", "); - if (inst->Dataflow.SrcReg[reg]) - rc_print_ref(f, inst->Dataflow.SrcReg[reg]); - else - fprintf(f, ""); - } - - fprintf(f, "\n"); - } } /** * Print program to stderr, default options. */ -void rc_print_program(const struct rc_program *prog, unsigned int flags) +void rc_print_program(const struct rc_program *prog) { unsigned int linenum = 0; struct rc_instruction *inst; - fprintf(stderr, "# Radeon Compiler Program%s\n", - flags & RC_PRINT_DATAFLOW ? " (with dataflow annotations)" : ""); + fprintf(stderr, "# Radeon Compiler Program\n"); for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) { fprintf(stderr, "%3d: ", linenum); - rc_print_instruction(stderr, flags, inst); + rc_print_instruction(stderr, inst); linenum++; } -- cgit v1.2.3 From 995135479d5662d1b1970c0f233c3c3d944d8b4d Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Sun, 4 Oct 2009 11:25:48 +0200 Subject: r300/compiler: Refactor to allow different instruction types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolai Hähnle --- src/gallium/drivers/r300/r300_tgsi_to_rc.c | 32 ++-- src/mesa/drivers/dri/r300/compiler/r300_fragprog.c | 144 ++++++++-------- src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 2 +- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 90 +++++----- src/mesa/drivers/dri/r300/compiler/r500_fragprog.c | 144 ++++++++-------- .../drivers/dri/r300/compiler/radeon_compiler.c | 136 +++++++-------- .../dri/r300/compiler/radeon_dataflow_deadcode.c | 32 ++-- .../dri/r300/compiler/radeon_dataflow_swizzles.c | 40 ++--- .../drivers/dri/r300/compiler/radeon_program.c | 12 +- .../drivers/dri/r300/compiler/radeon_program.h | 10 +- .../drivers/dri/r300/compiler/radeon_program_alu.c | 186 ++++++++++----------- .../dri/r300/compiler/radeon_program_pair.c | 2 +- .../dri/r300/compiler/radeon_program_pair.h | 2 - .../dri/r300/compiler/radeon_program_print.c | 20 +-- src/mesa/drivers/dri/r300/r300_vertprog.c | 22 +-- src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c | 18 +- 16 files changed, 449 insertions(+), 443 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 1246ffe8c2..cc5e5c19e9 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -226,31 +226,31 @@ static void transform_texture(struct rc_instruction * dst, struct tgsi_instructi { switch(src.Texture) { case TGSI_TEXTURE_1D: - dst->I.TexSrcTarget = RC_TEXTURE_1D; + dst->U.I.TexSrcTarget = RC_TEXTURE_1D; break; case TGSI_TEXTURE_2D: - dst->I.TexSrcTarget = RC_TEXTURE_2D; + dst->U.I.TexSrcTarget = RC_TEXTURE_2D; break; case TGSI_TEXTURE_3D: - dst->I.TexSrcTarget = RC_TEXTURE_3D; + dst->U.I.TexSrcTarget = RC_TEXTURE_3D; break; case TGSI_TEXTURE_CUBE: - dst->I.TexSrcTarget = RC_TEXTURE_CUBE; + dst->U.I.TexSrcTarget = RC_TEXTURE_CUBE; break; case TGSI_TEXTURE_RECT: - dst->I.TexSrcTarget = RC_TEXTURE_RECT; + dst->U.I.TexSrcTarget = RC_TEXTURE_RECT; break; case TGSI_TEXTURE_SHADOW1D: - dst->I.TexSrcTarget = RC_TEXTURE_1D; - dst->I.TexShadow = 1; + dst->U.I.TexSrcTarget = RC_TEXTURE_1D; + dst->U.I.TexShadow = 1; break; case TGSI_TEXTURE_SHADOW2D: - dst->I.TexSrcTarget = RC_TEXTURE_2D; - dst->I.TexShadow = 1; + dst->U.I.TexSrcTarget = RC_TEXTURE_2D; + dst->U.I.TexShadow = 1; break; case TGSI_TEXTURE_SHADOWRECT: - dst->I.TexSrcTarget = RC_TEXTURE_RECT; - dst->I.TexShadow = 1; + dst->U.I.TexSrcTarget = RC_TEXTURE_RECT; + dst->U.I.TexShadow = 1; break; } } @@ -263,17 +263,17 @@ static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_inst struct rc_instruction * dst = rc_insert_new_instruction(ttr->compiler, ttr->compiler->Program.Instructions.Prev); int i; - dst->I.Opcode = translate_opcode(src->Instruction.Opcode); - dst->I.SaturateMode = translate_saturate(src->Instruction.Saturate); + dst->U.I.Opcode = translate_opcode(src->Instruction.Opcode); + dst->U.I.SaturateMode = translate_saturate(src->Instruction.Saturate); if (src->Instruction.NumDstRegs) - transform_dstreg(ttr, &dst->I.DstReg, &src->FullDstRegisters[0]); + transform_dstreg(ttr, &dst->U.I.DstReg, &src->FullDstRegisters[0]); for(i = 0; i < src->Instruction.NumSrcRegs; ++i) { if (src->FullSrcRegisters[i].SrcRegister.File == TGSI_FILE_SAMPLER) - dst->I.TexSrcUnit = src->FullSrcRegisters[i].SrcRegister.Index; + dst->U.I.TexSrcUnit = src->FullSrcRegisters[i].SrcRegister.Index; else - transform_srcreg(ttr, &dst->I.SrcReg[i], &src->FullSrcRegisters[i]); + transform_srcreg(ttr, &dst->U.I.SrcReg[i], &src->FullSrcRegisters[i]); } /* Texturing. */ diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c index 94f8fdfea3..aa69b0fc72 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c @@ -55,75 +55,75 @@ int r300_transform_TEX( struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)data; - if (inst->I.Opcode != RC_OPCODE_TEX && - inst->I.Opcode != RC_OPCODE_TXB && - inst->I.Opcode != RC_OPCODE_TXP && - inst->I.Opcode != RC_OPCODE_KIL) + if (inst->U.I.Opcode != RC_OPCODE_TEX && + inst->U.I.Opcode != RC_OPCODE_TXB && + inst->U.I.Opcode != RC_OPCODE_TXP && + inst->U.I.Opcode != RC_OPCODE_KIL) return 0; /* ARB_shadow & EXT_shadow_funcs */ - if (inst->I.Opcode != RC_OPCODE_KIL && - c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) { - rc_compare_func comparefunc = compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + if (inst->U.I.Opcode != RC_OPCODE_KIL && + c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) { + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { - inst->I.Opcode = RC_OPCODE_MOV; + inst->U.I.Opcode = RC_OPCODE_MOV; if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { - inst->I.SrcReg[0].File = RC_FILE_NONE; - inst->I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; } else { - inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit); + inst->U.I.SrcReg[0] = shadow_ambient(c, inst->U.I.TexSrcUnit); } return 1; } else { - rc_compare_func comparefunc = compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; - unsigned int depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode; + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; + unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode; struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); int pass, fail; - inst_rcp->I.Opcode = RC_OPCODE_RCP; - inst_rcp->I.DstReg.File = RC_FILE_TEMPORARY; - inst_rcp->I.DstReg.Index = rc_find_free_temporary(c); - inst_rcp->I.DstReg.WriteMask = RC_MASK_W; - inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0]; - inst_rcp->I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; - - inst_cmp->I.DstReg = inst->I.DstReg; - inst->I.DstReg.File = RC_FILE_TEMPORARY; - inst->I.DstReg.Index = rc_find_free_temporary(c); - inst->I.DstReg.WriteMask = RC_MASK_XYZW; - - inst_mad->I.Opcode = RC_OPCODE_MAD; - inst_mad->I.DstReg.File = RC_FILE_TEMPORARY; - inst_mad->I.DstReg.Index = rc_find_free_temporary(c); - inst_mad->I.SrcReg[0] = inst->I.SrcReg[0]; - inst_mad->I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ; - inst_mad->I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index; - inst_mad->I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; - inst_mad->I.SrcReg[2].File = RC_FILE_TEMPORARY; - inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index; + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + + inst_cmp->U.I.DstReg = inst->U.I.DstReg; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = rc_find_free_temporary(c); + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ; + inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index; + inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index; if (depthmode == 0) /* GL_LUMINANCE */ - inst_mad->I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z); + inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z); else if (depthmode == 2) /* GL_ALPHA */ - inst_mad->I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW; + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW; /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: * r < tex <=> -tex+r < 0 * r >= tex <=> not (-tex+r < 0 */ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL) - inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ RC_MASK_XYZW; + inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW; else - inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ RC_MASK_XYZW; + inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; - inst_cmp->I.Opcode = RC_OPCODE_CMP; + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; /* DstReg has been filled out above */ - inst_cmp->I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index; + inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) { pass = 1; @@ -133,9 +133,9 @@ int r300_transform_TEX( fail = 1; } - inst_cmp->I.SrcReg[pass].File = RC_FILE_NONE; - inst_cmp->I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; - inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit); + inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; + inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; + inst_cmp->U.I.SrcReg[fail] = shadow_ambient(c, inst->U.I.TexSrcUnit); } } @@ -143,49 +143,49 @@ int r300_transform_TEX( * instead of [0..Width]x[0..Height]. * Add a scaling instruction. */ - if (inst->I.Opcode != RC_OPCODE_KIL && inst->I.TexSrcTarget == RC_TEXTURE_RECT) { + if (inst->U.I.Opcode != RC_OPCODE_KIL && inst->U.I.TexSrcTarget == RC_TEXTURE_RECT) { struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst->Prev); - inst_mul->I.Opcode = RC_OPCODE_MUL; - inst_mul->I.DstReg.File = RC_FILE_TEMPORARY; - inst_mul->I.DstReg.Index = rc_find_free_temporary(c); - inst_mul->I.SrcReg[0] = inst->I.SrcReg[0]; - inst_mul->I.SrcReg[1].File = RC_FILE_CONSTANT; - inst_mul->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->I.TexSrcUnit); + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mul->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit); - reset_srcreg(&inst->I.SrcReg[0]); - inst->I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->I.SrcReg[0].Index = inst_mul->I.DstReg.Index; + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mul->U.I.DstReg.Index; } /* Cannot write texture to output registers or with masks */ - if (inst->I.Opcode != RC_OPCODE_KIL && - (inst->I.DstReg.File != RC_FILE_TEMPORARY || inst->I.DstReg.WriteMask != RC_MASK_XYZW)) { + if (inst->U.I.Opcode != RC_OPCODE_KIL && + (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || inst->U.I.DstReg.WriteMask != RC_MASK_XYZW)) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); - inst_mov->I.Opcode = RC_OPCODE_MOV; - inst_mov->I.DstReg = inst->I.DstReg; - inst_mov->I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg = inst->U.I.DstReg; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); - inst->I.DstReg.File = RC_FILE_TEMPORARY; - inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; - inst->I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; } /* Cannot read texture coordinate from constants file */ - if (inst->I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->I.SrcReg[0].File != RC_FILE_INPUT) { + if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->I.Opcode = RC_OPCODE_MOV; - inst_mov->I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->I.DstReg.Index = rc_find_free_temporary(c); - inst_mov->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - reset_srcreg(&inst->I.SrcReg[0]); - inst->I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index; + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; } return 1; diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 614c2e3d24..746e4495fe 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -44,7 +44,7 @@ static void rewrite_depth_out(struct r300_fragment_program_compiler * c) struct rc_instruction *rci; for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) { - struct rc_sub_instruction * inst = &rci->I; + struct rc_sub_instruction * inst = &rci->U.I; if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth) continue; diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index e1e735568d..1b2cb8dde7 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -341,7 +341,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi compiler->SetHwInputOutput(compiler); for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { - struct rc_sub_instruction *vpi = &rci->I; + struct rc_sub_instruction *vpi = &rci->U.I; unsigned int *inst = compiler->code->body.d + compiler->code->length; /* Skip instructions writing to non-existing destination */ @@ -405,19 +405,19 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c /* Pass 1: Count original temporaries and allocate structures */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); for (i = 0; i < opcode->NumSrcRegs; ++i) { - if (inst->I.SrcReg[i].File == RC_FILE_TEMPORARY) { - if (inst->I.SrcReg[i].Index >= num_orig_temps) - num_orig_temps = inst->I.SrcReg[i].Index + 1; + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + if (inst->U.I.SrcReg[i].Index >= num_orig_temps) + num_orig_temps = inst->U.I.SrcReg[i].Index + 1; } } if (opcode->HasDstReg) { - if (inst->I.DstReg.File == RC_FILE_TEMPORARY) { - if (inst->I.DstReg.Index >= num_orig_temps) - num_orig_temps = inst->I.DstReg.Index + 1; + if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { + if (inst->U.I.DstReg.Index >= num_orig_temps) + num_orig_temps = inst->U.I.DstReg.Index + 1; } } } @@ -428,22 +428,22 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c /* Pass 2: Determine original temporary lifetimes */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); for (i = 0; i < opcode->NumSrcRegs; ++i) { - if (inst->I.SrcReg[i].File == RC_FILE_TEMPORARY) - ta[inst->I.SrcReg[i].Index].LastRead = inst; + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) + ta[inst->U.I.SrcReg[i].Index].LastRead = inst; } } /* Pass 3: Register allocation */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); for (i = 0; i < opcode->NumSrcRegs; ++i) { - if (inst->I.SrcReg[i].File == RC_FILE_TEMPORARY) { - unsigned int orig = inst->I.SrcReg[i].Index; - inst->I.SrcReg[i].Index = ta[orig].HwTemp; + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->U.I.SrcReg[i].Index; + inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; if (ta[orig].Allocated && inst == ta[orig].LastRead) hwtemps[ta[orig].HwTemp] = 0; @@ -451,8 +451,8 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c } if (opcode->HasDstReg) { - if (inst->I.DstReg.File == RC_FILE_TEMPORARY) { - unsigned int orig = inst->I.DstReg.Index; + if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->U.I.DstReg.Index; if (!ta[orig].Allocated) { for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) { @@ -471,7 +471,7 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c } } - inst->I.DstReg.Index = ta[orig].HwTemp; + inst->U.I.DstReg.Index = ta[orig].HwTemp; } } } @@ -487,36 +487,36 @@ static int transform_source_conflicts( struct rc_instruction* inst, void* unused) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); if (opcode->NumSrcRegs == 3) { - if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2]) - || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) { + if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]) + || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) { int tmpreg = rc_find_free_temporary(c); struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->I.Opcode = RC_OPCODE_MOV; - inst_mov->I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->I.DstReg.Index = tmpreg; - inst_mov->I.SrcReg[0] = inst->I.SrcReg[2]; - - reset_srcreg(&inst->I.SrcReg[2]); - inst->I.SrcReg[2].File = RC_FILE_TEMPORARY; - inst->I.SrcReg[2].Index = tmpreg; + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = tmpreg; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + + reset_srcreg(&inst->U.I.SrcReg[2]); + inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[2].Index = tmpreg; } } if (opcode->NumSrcRegs >= 2) { - if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) { + if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) { int tmpreg = rc_find_free_temporary(c); struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->I.Opcode = RC_OPCODE_MOV; - inst_mov->I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->I.DstReg.Index = tmpreg; - inst_mov->I.SrcReg[0] = inst->I.SrcReg[1]; - - reset_srcreg(&inst->I.SrcReg[1]); - inst->I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst->I.SrcReg[1].Index = tmpreg; + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = tmpreg; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + + reset_srcreg(&inst->U.I.SrcReg[1]); + inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[1].Index = tmpreg; } } @@ -531,15 +531,15 @@ static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) if ((compiler->RequiredOutputs & (1 << i)) && !(compiler->Base.Program.OutputsWritten & (1 << i))) { struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev); - inst->I.Opcode = RC_OPCODE_MOV; + inst->U.I.Opcode = RC_OPCODE_MOV; - inst->I.DstReg.File = RC_FILE_OUTPUT; - inst->I.DstReg.Index = i; - inst->I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = i; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - inst->I.SrcReg[0].File = RC_FILE_CONSTANT; - inst->I.SrcReg[0].Index = 0; - inst->I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT; + inst->U.I.SrcReg[0].Index = 0; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; compiler->Base.Program.OutputsWritten |= 1 << i; } diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index 39f2445bd4..d87acecdab 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -54,75 +54,75 @@ int r500_transform_TEX( struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)data; - if (inst->I.Opcode != RC_OPCODE_TEX && - inst->I.Opcode != RC_OPCODE_TXB && - inst->I.Opcode != RC_OPCODE_TXP && - inst->I.Opcode != RC_OPCODE_KIL) + if (inst->U.I.Opcode != RC_OPCODE_TEX && + inst->U.I.Opcode != RC_OPCODE_TXB && + inst->U.I.Opcode != RC_OPCODE_TXP && + inst->U.I.Opcode != RC_OPCODE_KIL) return 0; /* ARB_shadow & EXT_shadow_funcs */ - if (inst->I.Opcode != RC_OPCODE_KIL && - c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) { - rc_compare_func comparefunc = compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + if (inst->U.I.Opcode != RC_OPCODE_KIL && + c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) { + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { - inst->I.Opcode = RC_OPCODE_MOV; + inst->U.I.Opcode = RC_OPCODE_MOV; if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { - inst->I.SrcReg[0].File = RC_FILE_NONE; - inst->I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; } else { - inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit); + inst->U.I.SrcReg[0] = shadow_ambient(c, inst->U.I.TexSrcUnit); } return 1; } else { - rc_compare_func comparefunc = compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; - unsigned int depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode; + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; + unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode; struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); int pass, fail; - inst_rcp->I.Opcode = RC_OPCODE_RCP; - inst_rcp->I.DstReg.File = RC_FILE_TEMPORARY; - inst_rcp->I.DstReg.Index = rc_find_free_temporary(c); - inst_rcp->I.DstReg.WriteMask = RC_MASK_W; - inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0]; - inst_rcp->I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; - - inst_cmp->I.DstReg = inst->I.DstReg; - inst->I.DstReg.File = RC_FILE_TEMPORARY; - inst->I.DstReg.Index = rc_find_free_temporary(c); - inst->I.DstReg.WriteMask = RC_MASK_XYZW; - - inst_mad->I.Opcode = RC_OPCODE_MAD; - inst_mad->I.DstReg.File = RC_FILE_TEMPORARY; - inst_mad->I.DstReg.Index = rc_find_free_temporary(c); - inst_mad->I.SrcReg[0] = inst->I.SrcReg[0]; - inst_mad->I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ; - inst_mad->I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index; - inst_mad->I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; - inst_mad->I.SrcReg[2].File = RC_FILE_TEMPORARY; - inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index; + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + + inst_cmp->U.I.DstReg = inst->U.I.DstReg; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = rc_find_free_temporary(c); + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ; + inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index; + inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index; if (depthmode == 0) /* GL_LUMINANCE */ - inst_mad->I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z); + inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z); else if (depthmode == 2) /* GL_ALPHA */ - inst_mad->I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW; + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW; /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: * r < tex <=> -tex+r < 0 * r >= tex <=> not (-tex+r < 0 */ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL) - inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ RC_MASK_XYZW; + inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW; else - inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ RC_MASK_XYZW; + inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; - inst_cmp->I.Opcode = RC_OPCODE_CMP; + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; /* DstReg has been filled out above */ - inst_cmp->I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index; + inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) { pass = 1; @@ -132,38 +132,38 @@ int r500_transform_TEX( fail = 1; } - inst_cmp->I.SrcReg[pass].File = RC_FILE_NONE; - inst_cmp->I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; - inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit); + inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; + inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; + inst_cmp->U.I.SrcReg[fail] = shadow_ambient(c, inst->U.I.TexSrcUnit); } } /* Cannot write texture to output registers */ - if (inst->I.Opcode != RC_OPCODE_KIL && inst->I.DstReg.File != RC_FILE_TEMPORARY) { + if (inst->U.I.Opcode != RC_OPCODE_KIL && inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); - inst_mov->I.Opcode = RC_OPCODE_MOV; - inst_mov->I.DstReg = inst->I.DstReg; - inst_mov->I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg = inst->U.I.DstReg; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); - inst->I.DstReg.File = RC_FILE_TEMPORARY; - inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; - inst->I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; } /* Cannot read texture coordinate from constants file */ - if (inst->I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->I.SrcReg[0].File != RC_FILE_INPUT) { + if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->I.Opcode = RC_OPCODE_MOV; - inst_mov->I.DstReg.File = RC_FILE_TEMPORARY; - inst_mov->I.DstReg.Index = rc_find_free_temporary(c); - inst_mov->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - reset_srcreg(&inst->I.SrcReg[0]); - inst->I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index; + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; } return 1; @@ -177,22 +177,22 @@ int r500_transform_IF( struct rc_instruction * inst, void* data) { - if (inst->I.Opcode != RC_OPCODE_IF) + if (inst->U.I.Opcode != RC_OPCODE_IF) return 0; struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->I.Opcode = RC_OPCODE_MOV; - inst_mov->I.DstReg.WriteMask = 0; - inst_mov->I.WriteALUResult = RC_ALURESULT_W; - inst_mov->I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL; - inst_mov->I.SrcReg[0] = inst->I.SrcReg[0]; - inst_mov->I.SrcReg[0].Swizzle = combine_swizzles4(inst_mov->I.SrcReg[0].Swizzle, + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.WriteMask = 0; + inst_mov->U.I.WriteALUResult = RC_ALURESULT_W; + inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(inst_mov->U.I.SrcReg[0].Swizzle, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_X); - inst->I.SrcReg[0].File = RC_FILE_SPECIAL; - inst->I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT; - inst->I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; - inst->I.SrcReg[0].Negate = 0; + inst->U.I.SrcReg[0].File = RC_FILE_SPECIAL; + inst->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[0].Negate = 0; return 1; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index d0b78ec1c8..c0e7a7f7a0 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -36,7 +36,7 @@ void rc_init(struct radeon_compiler * c) memory_pool_init(&c->Pool); c->Program.Instructions.Prev = &c->Program.Instructions; c->Program.Instructions.Next = &c->Program.Instructions; - c->Program.Instructions.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; + c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; } void rc_destroy(struct radeon_compiler * c) @@ -113,17 +113,17 @@ void rc_calculate_inputs_outputs(struct radeon_compiler * c) for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); int i; for (i = 0; i < opcode->NumSrcRegs; ++i) { - if (inst->I.SrcReg[i].File == RC_FILE_INPUT) - c->Program.InputsRead |= 1 << inst->I.SrcReg[i].Index; + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT) + c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index; } if (opcode->HasDstReg) { - if (inst->I.DstReg.File == RC_FILE_OUTPUT) - c->Program.OutputsWritten |= 1 << inst->I.DstReg.Index; + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) + c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index; } } } @@ -139,17 +139,17 @@ void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_reg c->Program.InputsRead &= ~(1 << input); for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned i; for(i = 0; i < opcode->NumSrcRegs; ++i) { - if (inst->I.SrcReg[i].File == RC_FILE_INPUT && inst->I.SrcReg[i].Index == input) { - inst->I.SrcReg[i].File = new_input.File; - inst->I.SrcReg[i].Index = new_input.Index; - inst->I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->I.SrcReg[i].Swizzle); - if (!inst->I.SrcReg[i].Abs) { - inst->I.SrcReg[i].Negate ^= new_input.Negate; - inst->I.SrcReg[i].Abs = new_input.Abs; + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) { + inst->U.I.SrcReg[i].File = new_input.File; + inst->U.I.SrcReg[i].Index = new_input.Index; + inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle); + if (!inst->U.I.SrcReg[i].Abs) { + inst->U.I.SrcReg[i].Negate ^= new_input.Negate; + inst->U.I.SrcReg[i].Abs = new_input.Abs; } c->Program.InputsRead |= 1 << new_input.Index; @@ -171,12 +171,12 @@ void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_ou c->Program.OutputsWritten &= ~(1 << output); for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); if (opcode->HasDstReg) { - if (inst->I.DstReg.File == RC_FILE_OUTPUT && inst->I.DstReg.Index == output) { - inst->I.DstReg.Index = new_output; - inst->I.DstReg.WriteMask &= writemask; + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { + inst->U.I.DstReg.Index = new_output; + inst->U.I.DstReg.WriteMask &= writemask; c->Program.OutputsWritten |= 1 << new_output; } @@ -194,33 +194,33 @@ void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_ou struct rc_instruction * inst; for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); if (opcode->HasDstReg) { - if (inst->I.DstReg.File == RC_FILE_OUTPUT && inst->I.DstReg.Index == output) { - inst->I.DstReg.File = RC_FILE_TEMPORARY; - inst->I.DstReg.Index = tempreg; + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = tempreg; } } } inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); - inst->I.Opcode = RC_OPCODE_MOV; - inst->I.DstReg.File = RC_FILE_OUTPUT; - inst->I.DstReg.Index = output; + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = output; - inst->I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->I.SrcReg[0].Index = tempreg; - inst->I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = tempreg; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); - inst->I.Opcode = RC_OPCODE_MOV; - inst->I.DstReg.File = RC_FILE_OUTPUT; - inst->I.DstReg.Index = dup_output; + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = dup_output; - inst->I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst->I.SrcReg[0].Index = tempreg; - inst->I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = tempreg; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; c->Program.OutputsWritten |= 1 << dup_output; } @@ -238,60 +238,60 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig /* perspective divide */ struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); - inst_rcp->I.Opcode = RC_OPCODE_RCP; + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; - inst_rcp->I.DstReg.File = RC_FILE_TEMPORARY; - inst_rcp->I.DstReg.Index = tempregi; - inst_rcp->I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = tempregi; + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; - inst_rcp->I.SrcReg[0].File = RC_FILE_INPUT; - inst_rcp->I.SrcReg[0].Index = new_input; - inst_rcp->I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst_rcp->U.I.SrcReg[0].Index = new_input; + inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst_rcp); - inst_mul->I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.Opcode = RC_OPCODE_MUL; - inst_mul->I.DstReg.File = RC_FILE_TEMPORARY; - inst_mul->I.DstReg.Index = tempregi; - inst_mul->I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = tempregi; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_mul->I.SrcReg[0].File = RC_FILE_INPUT; - inst_mul->I.SrcReg[0].Index = new_input; + inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst_mul->U.I.SrcReg[0].Index = new_input; - inst_mul->I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_mul->I.SrcReg[1].Index = tempregi; - inst_mul->I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[1].Index = tempregi; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; /* viewport transformation */ struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_mul); - inst_mad->I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.Opcode = RC_OPCODE_MAD; - inst_mad->I.DstReg.File = RC_FILE_TEMPORARY; - inst_mad->I.DstReg.Index = tempregi; - inst_mad->I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = tempregi; + inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_mad->I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_mad->I.SrcReg[0].Index = tempregi; - inst_mad->I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); + inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[0].Index = tempregi; + inst_mad->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); - inst_mad->I.SrcReg[1].File = RC_FILE_CONSTANT; - inst_mad->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); - inst_mad->I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); + inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); + inst_mad->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); - inst_mad->I.SrcReg[2].File = RC_FILE_CONSTANT; - inst_mad->I.SrcReg[2].Index = inst_mad->I.SrcReg[1].Index; - inst_mad->I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); + inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[2].Index = inst_mad->U.I.SrcReg[1].Index; + inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); struct rc_instruction * inst; for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned i; for(i = 0; i < opcode->NumSrcRegs; i++) { - if (inst->I.SrcReg[i].File == RC_FILE_INPUT && - inst->I.SrcReg[i].Index == wpos) { - inst->I.SrcReg[i].File = RC_FILE_TEMPORARY; - inst->I.SrcReg[i].Index = tempregi; + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && + inst->U.I.SrcReg[i].Index == wpos) { + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = tempregi; } } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c index 2ae3c56689..f30b1ff067 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -134,26 +134,26 @@ static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); struct instruction_state * insts = &s->Instructions[inst->IP]; unsigned int usedmask = 0; if (opcode->HasDstReg) { - unsigned char * pused = get_used_ptr(s, inst->I.DstReg.File, inst->I.DstReg.Index); + unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index); if (pused) { - usedmask = *pused & inst->I.DstReg.WriteMask; + usedmask = *pused & inst->U.I.DstReg.WriteMask; *pused &= ~usedmask; } } insts->WriteMask |= usedmask; - if (inst->I.WriteALUResult) { + if (inst->U.I.WriteALUResult) { unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT); if (pused && *pused) { - if (inst->I.WriteALUResult == RC_ALURESULT_X) + if (inst->U.I.WriteALUResult == RC_ALURESULT_X) usedmask |= RC_MASK_X; - else if (inst->I.WriteALUResult == RC_ALURESULT_W) + else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) usedmask |= RC_MASK_W; *pused = 0; @@ -171,7 +171,7 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction for(unsigned int chan = 0; chan < 4; ++chan) { if (GET_BIT(newsrcmask, chan)) - refmask |= 1 << GET_SWZ(inst->I.SrcReg[src].Swizzle, chan); + refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); } /* get rid of spurious bits from ZERO, ONE, etc. swizzles */ @@ -180,9 +180,9 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction if (!refmask) continue; - mark_used(s, inst->I.SrcReg[src].File, inst->I.SrcReg[src].Index, refmask); + mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask); - if (inst->I.SrcReg[src].RelAddr) + if (inst->U.I.SrcReg[src].RelAddr) mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X); } } @@ -211,7 +211,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f for(struct rc_instruction * inst = c->Program.Instructions.Prev; inst != &c->Program.Instructions; inst = inst->Prev) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); if (opcode->IsControlFlow) { if (opcode->Opcode == RC_OPCODE_ENDIF) { @@ -250,20 +250,20 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f for(struct rc_instruction * inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next, ++ip) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);\ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);\ int dead = 1; if (!opcode->HasDstReg) { dead = 0; } else { - inst->I.DstReg.WriteMask = s.Instructions[ip].WriteMask; + inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask; if (s.Instructions[ip].WriteMask) dead = 0; if (s.Instructions[ip].WriteALUResult) dead = 0; else - inst->I.WriteALUResult = RC_ALURESULT_NONE; + inst->U.I.WriteALUResult = RC_ALURESULT_NONE; } if (dead) { @@ -276,9 +276,9 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f unsigned int srcmasks[3]; unsigned int usemask = s.Instructions[ip].WriteMask; - if (inst->I.WriteALUResult == RC_ALURESULT_X) + if (inst->U.I.WriteALUResult == RC_ALURESULT_X) usemask |= RC_MASK_X; - else if (inst->I.WriteALUResult == RC_ALURESULT_W) + else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) usemask |= RC_MASK_W; rc_compute_sources_for_writemask(opcode, usemask, srcmasks); @@ -286,7 +286,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f for(unsigned int src = 0; src < 3; ++src) { for(unsigned int chan = 0; chan < 4; ++chan) { if (!GET_BIT(srcmasks[src], chan)) - SET_SWZ(inst->I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); } } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c index fcef218b59..33acbd30f4 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c @@ -40,48 +40,48 @@ static void rewrite_source(struct radeon_compiler * c, usemask = 0; for(unsigned int chan = 0; chan < 4; ++chan) { - if (GET_SWZ(inst->I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED) + if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED) usemask |= 1 << chan; } - c->SwizzleCaps->Split(inst->I.SrcReg[src], usemask, &split); + c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split); for(unsigned int phase = 0; phase < split.NumPhases; ++phase) { struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev); unsigned int phase_refmask; unsigned int masked_negate; - mov->I.Opcode = RC_OPCODE_MOV; - mov->I.DstReg.File = RC_FILE_TEMPORARY; - mov->I.DstReg.Index = tempreg; - mov->I.DstReg.WriteMask = split.Phase[phase]; - mov->I.SrcReg[0] = inst->I.SrcReg[src]; + mov->U.I.Opcode = RC_OPCODE_MOV; + mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + mov->U.I.DstReg.Index = tempreg; + mov->U.I.DstReg.WriteMask = split.Phase[phase]; + mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; phase_refmask = 0; for(unsigned int chan = 0; chan < 4; ++chan) { if (!GET_BIT(split.Phase[phase], chan)) - SET_SWZ(mov->I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED); + SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED); else - phase_refmask |= 1 << GET_SWZ(mov->I.SrcReg[0].Swizzle, chan); + phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan); } phase_refmask &= RC_MASK_XYZW; - masked_negate = split.Phase[phase] & mov->I.SrcReg[0].Negate; + masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate; if (masked_negate == 0) - mov->I.SrcReg[0].Negate = 0; + mov->U.I.SrcReg[0].Negate = 0; else if (masked_negate == split.Phase[phase]) - mov->I.SrcReg[0].Negate = RC_MASK_XYZW; + mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW; } - inst->I.SrcReg[src].File = RC_FILE_TEMPORARY; - inst->I.SrcReg[src].Index = tempreg; - inst->I.SrcReg[src].Swizzle = 0; - inst->I.SrcReg[src].Negate = RC_MASK_NONE; - inst->I.SrcReg[src].Abs = 0; + inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[src].Index = tempreg; + inst->U.I.SrcReg[src].Swizzle = 0; + inst->U.I.SrcReg[src].Negate = RC_MASK_NONE; + inst->U.I.SrcReg[src].Abs = 0; for(unsigned int chan = 0; chan < 4; ++chan) { - SET_SWZ(inst->I.SrcReg[src].Swizzle, chan, + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED); } } @@ -91,11 +91,11 @@ void rc_dataflow_swizzles(struct radeon_compiler * c) struct rc_instruction * inst; for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned int src; for(src = 0; src < opcode->NumSrcRegs; ++src) { - if (!c->SwizzleCaps->IsNative(inst->I.Opcode, inst->I.SrcReg[src])) + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) rewrite_source(c, inst, src); } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c index a1ee7c0cab..68a093b8c0 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -98,7 +98,7 @@ unsigned int rc_find_free_temporary(struct radeon_compiler * c) memset(used, 0, sizeof(used)); for (struct rc_instruction * rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) { - const struct rc_sub_instruction *inst = &rcinst->I; + const struct rc_sub_instruction *inst = &rcinst->U.I; const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode); unsigned int k; @@ -129,11 +129,11 @@ struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c) memset(inst, 0, sizeof(struct rc_instruction)); - inst->I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; - inst->I.DstReg.WriteMask = RC_MASK_XYZW; - inst->I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; - inst->I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW; - inst->I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW; return inst; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index 071b0a0ca9..067cb545fd 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -109,11 +109,19 @@ struct rc_sub_instruction { /*@}*/ }; +typedef enum { + RC_INSTRUCTION_NORMAL = 0, + RC_INSTRUCTION_PAIR +} rc_instruction_type; + struct rc_instruction { struct rc_instruction * Prev; struct rc_instruction * Next; - struct rc_sub_instruction I; + rc_instruction_type Type; + union { + struct rc_sub_instruction I; + } U; /** * Warning: IPs are not stable. If you want to use them, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index e25bc4ec87..425b929668 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -45,10 +45,10 @@ static struct rc_instruction *emit1( { struct rc_instruction *fpi = rc_insert_new_instruction(c, after); - fpi->I.Opcode = Opcode; - fpi->I.SaturateMode = Saturate; - fpi->I.DstReg = DstReg; - fpi->I.SrcReg[0] = SrcReg; + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg; return fpi; } @@ -59,11 +59,11 @@ static struct rc_instruction *emit2( { struct rc_instruction *fpi = rc_insert_new_instruction(c, after); - fpi->I.Opcode = Opcode; - fpi->I.SaturateMode = Saturate; - fpi->I.DstReg = DstReg; - fpi->I.SrcReg[0] = SrcReg0; - fpi->I.SrcReg[1] = SrcReg1; + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg0; + fpi->U.I.SrcReg[1] = SrcReg1; return fpi; } @@ -75,12 +75,12 @@ static struct rc_instruction *emit3( { struct rc_instruction *fpi = rc_insert_new_instruction(c, after); - fpi->I.Opcode = Opcode; - fpi->I.SaturateMode = Saturate; - fpi->I.DstReg = DstReg; - fpi->I.SrcReg[0] = SrcReg0; - fpi->I.SrcReg[1] = SrcReg1; - fpi->I.SrcReg[2] = SrcReg2; + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg0; + fpi->U.I.SrcReg[1] = SrcReg1; + fpi->U.I.SrcReg[2] = SrcReg2; return fpi; } @@ -168,36 +168,36 @@ static struct rc_src_register scalar(struct rc_src_register reg) static void transform_ABS(struct radeon_compiler* c, struct rc_instruction* inst) { - struct rc_src_register src = inst->I.SrcReg[0]; + struct rc_src_register src = inst->U.I.SrcReg[0]; src.Abs = 1; src.Negate = RC_MASK_NONE; - emit1(c, inst->Prev, RC_OPCODE_MOV, inst->I.SaturateMode, inst->I.DstReg, src); + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src); rc_remove_instruction(inst); } static void transform_DP3(struct radeon_compiler* c, struct rc_instruction* inst) { - struct rc_src_register src0 = inst->I.SrcReg[0]; - struct rc_src_register src1 = inst->I.SrcReg[1]; + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + struct rc_src_register src1 = inst->U.I.SrcReg[1]; src0.Negate &= ~RC_MASK_W; src0.Swizzle &= ~(7 << (3 * 3)); src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); src1.Negate &= ~RC_MASK_W; src1.Swizzle &= ~(7 << (3 * 3)); src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); - emit2(c, inst->Prev, RC_OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, src1); + emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); rc_remove_instruction(inst); } static void transform_DPH(struct radeon_compiler* c, struct rc_instruction* inst) { - struct rc_src_register src0 = inst->I.SrcReg[0]; + struct rc_src_register src0 = inst->U.I.SrcReg[0]; src0.Negate &= ~RC_MASK_W; src0.Swizzle &= ~(7 << (3 * 3)); src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3); - emit2(c, inst->Prev, RC_OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, inst->I.SrcReg[1]); + emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]); rc_remove_instruction(inst); } @@ -208,9 +208,9 @@ static void transform_DPH(struct radeon_compiler* c, static void transform_DST(struct radeon_compiler* c, struct rc_instruction* inst) { - emit2(c, inst->Prev, RC_OPCODE_MUL, inst->I.SaturateMode, inst->I.DstReg, - swizzle(inst->I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE), - swizzle(inst->I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W)); + emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg, + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W)); rc_remove_instruction(inst); } @@ -218,9 +218,9 @@ static void transform_FLR(struct radeon_compiler* c, struct rc_instruction* inst) { int tempreg = rc_find_free_temporary(c); - emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->I.SrcReg[0]); - emit2(c, inst->Prev, RC_OPCODE_ADD, inst->I.SaturateMode, inst->I.DstReg, - inst->I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, tempreg))); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0]); + emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, + inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, tempreg))); rc_remove_instruction(inst); } @@ -252,19 +252,19 @@ static void transform_LIT(struct radeon_compiler* c, constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle); - if (inst->I.DstReg.WriteMask != RC_MASK_XYZW || inst->I.DstReg.File != RC_FILE_TEMPORARY) { + if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { struct rc_instruction * inst_mov; inst_mov = emit1(c, inst, - RC_OPCODE_MOV, 0, inst->I.DstReg, + RC_OPCODE_MOV, 0, inst->U.I.DstReg, srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c))); - inst->I.DstReg.File = RC_FILE_TEMPORARY; - inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; - inst->I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; } - temp = inst->I.DstReg.Index; + temp = inst->U.I.DstReg.Index; srctemp = srcreg(RC_FILE_TEMPORARY, temp); // tmp.x = max(0.0, Src.x); @@ -272,7 +272,7 @@ static void transform_LIT(struct radeon_compiler* c, // tmp.w = clamp(Src.z, -128+eps, 128-eps); emit2(c, inst->Prev, RC_OPCODE_MAX, 0, dstregtmpmask(temp, RC_MASK_XYW), - inst->I.SrcReg[0], + inst->U.I.SrcReg[0], swizzle(srcreg(RC_FILE_CONSTANT, constant), RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3)); emit2(c, inst->Prev, RC_OPCODE_MIN, 0, @@ -293,14 +293,14 @@ static void transform_LIT(struct radeon_compiler* c, swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W)); // tmp.z = (tmp.x > 0) ? tmp.w : 0.0 - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->I.SaturateMode, + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, dstregtmpmask(temp, RC_MASK_Z), negate(swizzle(srctemp, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)), swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), builtin_zero); // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 - emit1(c, inst->Prev, RC_OPCODE_MOV, inst->I.SaturateMode, + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, dstregtmpmask(temp, RC_MASK_XYW), swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE)); @@ -314,10 +314,10 @@ static void transform_LRP(struct radeon_compiler* c, emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), - inst->I.SrcReg[1], negate(inst->I.SrcReg[2])); - emit3(c, inst->Prev, RC_OPCODE_MAD, inst->I.SaturateMode, - inst->I.DstReg, - inst->I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg), inst->I.SrcReg[2]); + inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2])); + emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, + inst->U.I.DstReg, + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[2]); rc_remove_instruction(inst); } @@ -331,9 +331,9 @@ static void transform_POW(struct radeon_compiler* c, tempdst.WriteMask = RC_MASK_W; tempsrc.Swizzle = RC_SWIZZLE_WWWW; - emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, scalar(inst->I.SrcReg[0])); - emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->I.SrcReg[1])); - emit1(c, inst->Prev, RC_OPCODE_EX2, inst->I.SaturateMode, inst->I.DstReg, tempsrc); + emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, scalar(inst->U.I.SrcReg[0])); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->U.I.SrcReg[1])); + emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc); rc_remove_instruction(inst); } @@ -341,7 +341,7 @@ static void transform_POW(struct radeon_compiler* c, static void transform_RSQ(struct radeon_compiler* c, struct rc_instruction* inst) { - inst->I.SrcReg[0] = absolute(inst->I.SrcReg[0]); + inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]); } static void transform_SGE(struct radeon_compiler* c, @@ -349,8 +349,8 @@ static void transform_SGE(struct radeon_compiler* c, { int tempreg = rc_find_free_temporary(c); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1])); - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg, + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one); rc_remove_instruction(inst); @@ -361,8 +361,8 @@ static void transform_SLT(struct radeon_compiler* c, { int tempreg = rc_find_free_temporary(c); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1])); - emit3(c, inst->Prev, RC_OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg, + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero); rc_remove_instruction(inst); @@ -371,14 +371,14 @@ static void transform_SLT(struct radeon_compiler* c, static void transform_SUB(struct radeon_compiler* c, struct rc_instruction* inst) { - inst->I.Opcode = RC_OPCODE_ADD; - inst->I.SrcReg[1] = negate(inst->I.SrcReg[1]); + inst->U.I.Opcode = RC_OPCODE_ADD; + inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]); } static void transform_SWZ(struct radeon_compiler* c, struct rc_instruction* inst) { - inst->I.Opcode = RC_OPCODE_MOV; + inst->U.I.Opcode = RC_OPCODE_MOV; } static void transform_XPD(struct radeon_compiler* c, @@ -387,11 +387,11 @@ static void transform_XPD(struct radeon_compiler* c, int tempreg = rc_find_free_temporary(c); emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstreg(RC_FILE_TEMPORARY, tempreg), - swizzle(inst->I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), - swizzle(inst->I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); - emit3(c, inst->Prev, RC_OPCODE_MAD, inst->I.SaturateMode, inst->I.DstReg, - swizzle(inst->I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), - swizzle(inst->I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); + emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), negate(srcreg(RC_FILE_TEMPORARY, tempreg))); rc_remove_instruction(inst); @@ -417,7 +417,7 @@ int radeonTransformALU( struct rc_instruction* inst, void* unused) { - switch(inst->I.Opcode) { + switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_DST: transform_DST(c, inst); return 1; @@ -441,9 +441,9 @@ static void transform_r300_vertex_ABS(struct radeon_compiler* c, struct rc_instruction* inst) { /* Note: r500 can take absolute values, but r300 cannot. */ - inst->I.Opcode = RC_OPCODE_MAX; - inst->I.SrcReg[1] = inst->I.SrcReg[0]; - inst->I.SrcReg[1].Negate ^= RC_MASK_XYZW; + inst->U.I.Opcode = RC_OPCODE_MAX; + inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0]; + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; } /** @@ -455,7 +455,7 @@ int r300_transform_vertex_alu( struct rc_instruction* inst, void* unused) { - switch(inst->I.Opcode) { + switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; case RC_OPCODE_DP3: transform_DP3(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; @@ -531,9 +531,9 @@ int radeonTransformTrigSimple(struct radeon_compiler* c, struct rc_instruction* inst, void* unused) { - if (inst->I.Opcode != RC_OPCODE_COS && - inst->I.Opcode != RC_OPCODE_SIN && - inst->I.Opcode != RC_OPCODE_SCS) + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) return 0; unsigned int constants[2]; @@ -541,12 +541,12 @@ int radeonTransformTrigSimple(struct radeon_compiler* c, sincos_constants(c, constants); - if (inst->I.Opcode == RC_OPCODE_COS) { + if (inst->U.I.Opcode == RC_OPCODE_COS) { // MAD tmp.x, src, 1/(2*PI), 0.75 // FRC tmp.x, tmp.x // MAD tmp.z, tmp.x, 2*PI, -PI emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), - swizzle(inst->I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z), swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)); emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), @@ -556,12 +556,12 @@ int radeonTransformTrigSimple(struct radeon_compiler* c, swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), negate(swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z))); - sin_approx(c, inst, inst->I.DstReg, + sin_approx(c, inst, inst->U.I.DstReg, swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), constants); - } else if (inst->I.Opcode == RC_OPCODE_SIN) { + } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), - swizzle(inst->I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z), swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y)); emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), @@ -571,12 +571,12 @@ int radeonTransformTrigSimple(struct radeon_compiler* c, swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), negate(swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z))); - sin_approx(c, inst, inst->I.DstReg, + sin_approx(c, inst, inst->U.I.DstReg, swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), constants); } else { emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), - swizzle(inst->I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z), swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)); emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY), @@ -586,14 +586,14 @@ int radeonTransformTrigSimple(struct radeon_compiler* c, swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), negate(swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z))); - struct rc_dst_register dst = inst->I.DstReg; + struct rc_dst_register dst = inst->U.I.DstReg; - dst.WriteMask = inst->I.DstReg.WriteMask & RC_MASK_X; + dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X; sin_approx(c, inst, dst, swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), constants); - dst.WriteMask = inst->I.DstReg.WriteMask & RC_MASK_Y; + dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y; sin_approx(c, inst, dst, swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y), constants); @@ -617,9 +617,9 @@ int radeonTransformTrigScale(struct radeon_compiler* c, struct rc_instruction* inst, void* unused) { - if (inst->I.Opcode != RC_OPCODE_COS && - inst->I.Opcode != RC_OPCODE_SIN && - inst->I.Opcode != RC_OPCODE_SCS) + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) return 0; static const float RCP_2PI = 0.15915494309189535; @@ -631,28 +631,28 @@ int radeonTransformTrigScale(struct radeon_compiler* c, constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle); emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W), - swizzle(inst->I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), srcreg(RC_FILE_TEMPORARY, temp)); - if (inst->I.Opcode == RC_OPCODE_COS) { - emit1(c, inst->Prev, RC_OPCODE_COS, inst->I.SaturateMode, inst->I.DstReg, + if (inst->U.I.Opcode == RC_OPCODE_COS) { + emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW)); - } else if (inst->I.Opcode == RC_OPCODE_SIN) { - emit1(c, inst->Prev, RC_OPCODE_SIN, inst->I.SaturateMode, - inst->I.DstReg, srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW)); - } else if (inst->I.Opcode == RC_OPCODE_SCS) { - struct rc_dst_register moddst = inst->I.DstReg; + } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { + emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, + inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW)); + } else if (inst->U.I.Opcode == RC_OPCODE_SCS) { + struct rc_dst_register moddst = inst->U.I.DstReg; - if (inst->I.DstReg.WriteMask & RC_MASK_X) { + if (inst->U.I.DstReg.WriteMask & RC_MASK_X) { moddst.WriteMask = RC_MASK_X; - emit1(c, inst->Prev, RC_OPCODE_COS, inst->I.SaturateMode, moddst, + emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst, srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW)); } - if (inst->I.DstReg.WriteMask & RC_MASK_Y) { + if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) { moddst.WriteMask = RC_MASK_Y; - emit1(c, inst->Prev, RC_OPCODE_SIN, inst->I.SaturateMode, moddst, + emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst, srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW)); } } @@ -674,11 +674,11 @@ int radeonTransformDeriv(struct radeon_compiler* c, struct rc_instruction* inst, void* unused) { - if (inst->I.Opcode != RC_OPCODE_DDX && inst->I.Opcode != RC_OPCODE_DDY) + if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY) return 0; - inst->I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_ONE, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE); - inst->I.SrcReg[1].Negate = RC_MASK_XYZW; + inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_ONE, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE); + inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW; return 1; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c index 6cda208600..0d8d8e0b3b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -341,7 +341,7 @@ static void scan_instructions(struct pair_state *s) struct pair_state_instruction *pairinst = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*pairinst)); memset(pairinst, 0, sizeof(struct pair_state_instruction)); - pairinst->Instruction = source->I; + pairinst->Instruction = source->U.I; pairinst->IP = ip; final_rewrite(s, &pairinst->Instruction); classify_instruction(s, pairinst); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index da2bcc5d89..440069d558 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -28,8 +28,6 @@ #ifndef __RADEON_PROGRAM_PAIR_H_ #define __RADEON_PROGRAM_PAIR_H_ -#include "radeon_program.h" - struct r300_fragment_program_compiler; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c index 6645d7cacb..fe90a5900e 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c @@ -144,12 +144,12 @@ static void rc_print_src_register(FILE * f, struct rc_src_register src) static void rc_print_instruction(FILE * f, struct rc_instruction * inst) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned int reg; fprintf(f, "%s", opcode->Name); - switch(inst->I.SaturateMode) { + switch(inst->U.I.SaturateMode) { case RC_SATURATE_NONE: break; case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break; case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break; @@ -158,7 +158,7 @@ static void rc_print_instruction(FILE * f, struct rc_instruction * inst) if (opcode->HasDstReg) { fprintf(f, " "); - rc_print_dst_register(f, inst->I.DstReg); + rc_print_dst_register(f, inst->U.I.DstReg); if (opcode->NumSrcRegs) fprintf(f, ","); } @@ -167,23 +167,23 @@ static void rc_print_instruction(FILE * f, struct rc_instruction * inst) if (reg > 0) fprintf(f, ","); fprintf(f, " "); - rc_print_src_register(f, inst->I.SrcReg[reg]); + rc_print_src_register(f, inst->U.I.SrcReg[reg]); } if (opcode->HasTexture) { fprintf(f, ", %s%s[%u]", - textarget_to_string(inst->I.TexSrcTarget), - inst->I.TexShadow ? "SHADOW" : "", - inst->I.TexSrcUnit); + textarget_to_string(inst->U.I.TexSrcTarget), + inst->U.I.TexShadow ? "SHADOW" : "", + inst->U.I.TexSrcUnit); } fprintf(f, ";"); - if (inst->I.WriteALUResult) { + if (inst->U.I.WriteALUResult) { fprintf(f, " [aluresult = ("); rc_print_comparefunc(f, - (inst->I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w", - inst->I.ALUResultCompare, "0"); + (inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w", + inst->U.I.ALUResultCompare, "0"); fprintf(f, ")]"); } diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index fb8d6bceda..43629d643b 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -217,20 +217,20 @@ static void initialize_NV_registers(struct radeon_compiler * compiler) for(reg = 0; reg < 12; ++reg) { inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions); - inst->I.Opcode = RC_OPCODE_MOV; - inst->I.DstReg.File = RC_FILE_TEMPORARY; - inst->I.DstReg.Index = reg; - inst->I.SrcReg[0].File = RC_FILE_NONE; - inst->I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = reg; + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; } inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions); - inst->I.Opcode = RC_OPCODE_ARL; - inst->I.DstReg.File = RC_FILE_ADDRESS; - inst->I.DstReg.Index = 0; - inst->I.DstReg.WriteMask = WRITEMASK_X; - inst->I.SrcReg[0].File = RC_FILE_NONE; - inst->I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + inst->U.I.Opcode = RC_OPCODE_ARL; + inst->U.I.DstReg.File = RC_FILE_ADDRESS; + inst->U.I.DstReg.Index = 0; + inst->U.I.DstReg.WriteMask = WRITEMASK_X; + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; } static struct r300_vertex_program *build_program(GLcontext *ctx, diff --git a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c index fb9bb9ce91..9f9dec840b 100644 --- a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c +++ b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c @@ -152,25 +152,25 @@ static void translate_instruction(struct radeon_compiler * c, const struct rc_opcode_info * opcode; unsigned int i; - dest->I.Opcode = translate_opcode(src->Opcode); - if (dest->I.Opcode == RC_OPCODE_ILLEGAL_OPCODE) { + dest->U.I.Opcode = translate_opcode(src->Opcode); + if (dest->U.I.Opcode == RC_OPCODE_ILLEGAL_OPCODE) { rc_error(c, "Unsupported opcode %i\n", src->Opcode); return; } - dest->I.SaturateMode = translate_saturate(src->SaturateMode); + dest->U.I.SaturateMode = translate_saturate(src->SaturateMode); - opcode = rc_get_opcode_info(dest->I.Opcode); + opcode = rc_get_opcode_info(dest->U.I.Opcode); for(i = 0; i < opcode->NumSrcRegs; ++i) - translate_srcreg(&dest->I.SrcReg[i], &src->SrcReg[i]); + translate_srcreg(&dest->U.I.SrcReg[i], &src->SrcReg[i]); if (opcode->HasDstReg) - translate_dstreg(&dest->I.DstReg, &src->DstReg); + translate_dstreg(&dest->U.I.DstReg, &src->DstReg); if (opcode->HasTexture) { - dest->I.TexSrcUnit = src->TexSrcUnit; - dest->I.TexSrcTarget = translate_tex_target(src->TexSrcTarget); - dest->I.TexShadow = src->TexShadow; + dest->U.I.TexSrcUnit = src->TexSrcUnit; + dest->U.I.TexSrcTarget = translate_tex_target(src->TexSrcTarget); + dest->U.I.TexShadow = src->TexShadow; } } -- cgit v1.2.3 From 2a929a08ab4fa4501dca88cc988cbf469b7deeb5 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 30 Sep 2009 19:44:38 -0700 Subject: r300g: xRGB and RGBx formats. We now have 48 GLX visuals. Pretty soon, we'll have 90+ visuals, only five of which ever get tested. :3 --- src/gallium/drivers/r300/r300_screen.c | 2 ++ src/gallium/drivers/r300/r300_state_inlines.h | 9 +++++++++ src/gallium/drivers/r300/r300_texture.h | 4 ++++ 3 files changed, 15 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index f2659ca61f..81d01b1320 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -205,7 +205,9 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage, /* Colorbuffer or texture */ case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: case PIPE_FORMAT_I8_UNORM: return usage & (PIPE_TEXTURE_USAGE_RENDER_TARGET | diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 91b93fc367..88eb66b79e 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -292,6 +292,9 @@ static INLINE uint32_t r300_translate_colorformat(enum pipe_format format) return R300_COLOR_FORMAT_ARGB4444; /* 32-bit buffers */ case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: case PIPE_FORMAT_Z24S8_UNORM: return R300_COLOR_FORMAT_ARGB8888; /* XXX Not in pipe_format @@ -338,10 +341,16 @@ static INLINE uint32_t r300_translate_out_fmt(enum pipe_format format) { switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: case PIPE_FORMAT_Z24S8_UNORM: return R300_US_OUT_FMT_C4_8 | R300_C0_SEL_B | R300_C1_SEL_G | R300_C2_SEL_R | R300_C3_SEL_A; + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + return R300_US_OUT_FMT_C4_8 | + R300_C0_SEL_A | R300_C1_SEL_B | + R300_C2_SEL_G | R300_C3_SEL_R; default: debug_printf("r300: Implementation error: " "Got unsupported output format %s in %s\n", diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 78ee0f1611..bd87790bc3 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -48,6 +48,10 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); case PIPE_FORMAT_R8G8B8A8_UNORM: return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8); + case PIPE_FORMAT_X8R8G8B8_UNORM: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); + case PIPE_FORMAT_R8G8B8X8_UNORM: + return R300_EASY_TX_FORMAT(Y, Z, ONE, X, W8Z8Y8X8); case PIPE_FORMAT_A8R8G8B8_SRGB: return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA; -- cgit v1.2.3 From cd0a39681377644b7d4574c9a33acbc9c844bb59 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 3 Oct 2009 22:15:17 +0100 Subject: llvmpipe: Adjust format assertion. We support array layout too -- if it has a single channel. --- src/gallium/drivers/llvmpipe/lp_bld_format_soa.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c index b5ff434e1a..66bebdcdec 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c @@ -119,7 +119,9 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, unsigned chan; /* FIXME: Support more formats */ - assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH || + (format_desc->layout == UTIL_FORMAT_LAYOUT_ARRAY && + format_desc->block.bits == format_desc->channel[0].size)); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); assert(format_desc->block.bits <= 32); @@ -195,10 +197,9 @@ lp_build_load_rgba_soa(LLVMBuilderRef builder, { LLVMValueRef packed; - assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); - assert(format_desc->block.bits <= 32); + assert(format_desc->block.bits <= type.width); packed = lp_build_gather(builder, type.length, format_desc->block.bits, type.width, -- cgit v1.2.3 From 10981c0a767f146ca649e50af9871cd499b0617e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 4 Oct 2009 11:35:50 +0100 Subject: llvmpipe: Match header's protection macro with filename. --- src/gallium/drivers/llvmpipe/lp_bld_format.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h index 6d3f692619..c087fc986e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h @@ -25,8 +25,8 @@ * **************************************************************************/ -#ifndef LP_BLD_H -#define LP_BLD_H +#ifndef LP_BLD_FORMAT_H +#define LP_BLD_FORMAT_H /** @@ -116,4 +116,4 @@ lp_build_load_rgba_soa(LLVMBuilderRef builder, LLVMValueRef offsets, LLVMValueRef *rgba); -#endif /* !LP_BLD_H */ +#endif /* !LP_BLD_FORMAT_H */ -- cgit v1.2.3 From eb2e41f0c636eb77634ec7ada93b869a43f11e9f Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 4 Oct 2009 11:36:42 +0100 Subject: llvmpipe: Remove loop testing from format testing. Loop building will be rewritten. --- src/gallium/drivers/llvmpipe/lp_test_format.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 7d83f899e6..ab80c0143f 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -37,7 +37,6 @@ #include "util/u_format.h" -#include "lp_bld_flow.h" #include "lp_bld_format.h" @@ -103,7 +102,6 @@ add_load_rgba_test(LLVMModuleRef module, LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef rgba; - struct lp_build_loop_state loop; args[0] = LLVMPointerType(LLVMInt8Type(), 0); args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); @@ -117,13 +115,9 @@ add_load_rgba_test(LLVMModuleRef module, builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), &loop); - rgba = lp_build_load_rgba_aos(builder, format, ptr); LLVMBuildStore(builder, rgba, rgba_ptr); - lp_build_loop_end(builder, LLVMConstInt(LLVMInt32Type(), 4, 0), NULL, &loop); - LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); -- cgit v1.2.3 From 589ec337f0080893baba996201cf65bb6e1a2fec Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 4 Oct 2009 13:04:08 +0100 Subject: llvmpipe: Autogenerate lp_tile_soa.c from u_format.csv. This is just a temporary change until we code generate the tile read/write functions in runtime. The new code avoids an extra memcpy that exists in u_tile.c functions, from which lp_tile_soa.c was originally based. This achieves up to 5% improvement, particularly in frames with little geometry overlap. --- src/gallium/drivers/llvmpipe/.gitignore | 1 + src/gallium/drivers/llvmpipe/Makefile | 3 + src/gallium/drivers/llvmpipe/SConscript | 7 + src/gallium/drivers/llvmpipe/lp_tile_cache.c | 77 +-- src/gallium/drivers/llvmpipe/lp_tile_soa.c | 931 --------------------------- src/gallium/drivers/llvmpipe/lp_tile_soa.h | 16 +- src/gallium/drivers/llvmpipe/lp_tile_soa.py | 278 ++++++++ 7 files changed, 339 insertions(+), 974 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/.gitignore delete mode 100644 src/gallium/drivers/llvmpipe/lp_tile_soa.c create mode 100644 src/gallium/drivers/llvmpipe/lp_tile_soa.py (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/.gitignore b/src/gallium/drivers/llvmpipe/.gitignore new file mode 100644 index 0000000000..257b72d7b2 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/.gitignore @@ -0,0 +1 @@ +lp_tile_soa.c diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index cd7b6356d2..21aff1967a 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -55,3 +55,6 @@ C_SOURCES = \ lp_tile_soa.c include ../../Makefile.template + +lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_access.py ../../auxiliary/util/u_format.csv + python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index f4a9a3b22e..13cd465838 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -9,6 +9,13 @@ if not env.has_key('LLVM_VERSION'): env.Tool('udis86') +env.CodeGenerate( + target = 'lp_tile_soa.c', + script = 'lp_tile_soa.py', + source = ['#src/gallium/auxiliary/util/u_format.csv'], + command = 'python $SCRIPT $SOURCE > $TARGET' +) + llvmpipe = env.ConvenienceLibrary( target = 'llvmpipe', source = [ diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c index 2ac8cb5c82..68d3fa3282 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c @@ -241,42 +241,34 @@ lp_flush_tile_cache(struct llvmpipe_tile_cache *tc) for (x = 0; x < pt->width; x += TILE_SIZE) { struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - switch(tile->status) { - case LP_TILE_STATUS_UNDEFINED: - break; - - case LP_TILE_STATUS_CLEAR: { - /** - * Actually clear the tiles which were flagged as being in a clear state. - */ - - struct pipe_screen *screen = pt->texture->screen; - unsigned tw = TILE_SIZE; - unsigned th = TILE_SIZE; - void *dst; - - if (pipe_clip_tile(x, y, &tw, &th, pt)) - continue; - - dst = screen->transfer_map(screen, pt); - assert(dst); - if(!dst) - continue; - - util_fill_rect(dst, &pt->block, pt->stride, - x, y, tw, th, - tc->clear_val); - - screen->transfer_unmap(screen, pt); + if(tile->status != LP_TILE_STATUS_UNDEFINED) { + unsigned w = TILE_SIZE; + unsigned h = TILE_SIZE; + + if (!pipe_clip_tile(x, y, &w, &h, pt)) { + switch(tile->status) { + case LP_TILE_STATUS_CLEAR: + /* Actually clear the tiles which were flagged as being in a + * clear state. */ + util_fill_rect(tc->transfer_map, &pt->block, pt->stride, + x, y, w, h, + tc->clear_val); + break; + + case LP_TILE_STATUS_DEFINED: + lp_tile_write_4ub(pt->format, + tile->color, + tc->transfer_map, pt->stride, + x, y, w, h); + break; + + default: + assert(0); + break; + } + } tile->status = LP_TILE_STATUS_UNDEFINED; - break; - } - - case LP_TILE_STATUS_DEFINED: - lp_put_tile_rgba_soa(pt, x, y, tile->color); - tile->status = LP_TILE_STATUS_UNDEFINED; - break; } } } @@ -304,11 +296,22 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc, tile->status = LP_TILE_STATUS_DEFINED; break; - case LP_TILE_STATUS_UNDEFINED: - /* get new tile data from transfer */ - lp_get_tile_rgba_soa(pt, x & ~(TILE_SIZE - 1), y & ~(TILE_SIZE - 1), tile->color); + case LP_TILE_STATUS_UNDEFINED: { + unsigned w = TILE_SIZE; + unsigned h = TILE_SIZE; + + x &= ~(TILE_SIZE - 1); + y &= ~(TILE_SIZE - 1); + + if (!pipe_clip_tile(x, y, &w, &h, tc->transfer)) + lp_tile_read_4ub(pt->format, + tile->color, + tc->transfer_map, tc->transfer->stride, + x, y, w, h); + tile->status = LP_TILE_STATUS_DEFINED; break; + } case LP_TILE_STATUS_DEFINED: /* nothing to do */ diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.c b/src/gallium/drivers/llvmpipe/lp_tile_soa.c deleted file mode 100644 index 4e4ccb31cc..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.c +++ /dev/null @@ -1,931 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * RGBA/float tile get/put functions. - * Usable both by drivers and state trackers. - */ - - -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" - -#include "util/u_math.h" -#include "util/u_memory.h" -#include "util/u_rect.h" -#include "util/u_tile.h" -#include "lp_tile_cache.h" -#include "lp_tile_soa.h" - - -const unsigned char -tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = { - { 0, 1, 4, 5, 8, 9, 12, 13}, - { 2, 3, 6, 7, 10, 11, 14, 15} -}; - - - -/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/ - -static void -a8r8g8b8_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - const unsigned pixel = *src++; - TILE_PIXEL(p, j, i, 0) = (pixel >> 16) & 0xff; - TILE_PIXEL(p, j, i, 1) = (pixel >> 8) & 0xff; - TILE_PIXEL(p, j, i, 2) = (pixel >> 0) & 0xff; - TILE_PIXEL(p, j, i, 3) = (pixel >> 24) & 0xff; - } - } -} - - -static void -a8r8g8b8_put_tile_rgba(unsigned *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - unsigned r, g, b, a; - r = TILE_PIXEL(p, j, i, 0); - g = TILE_PIXEL(p, j, i, 1); - b = TILE_PIXEL(p, j, i, 2); - a = TILE_PIXEL(p, j, i, 3); - *dst++ = (a << 24) | (r << 16) | (g << 8) | b; - } - } -} - - -/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/ - -static void -x8r8g8b8_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - const unsigned pixel = *src++; - TILE_PIXEL(p, j, i, 0) = (pixel >> 16) & 0xff; - TILE_PIXEL(p, j, i, 1) = (pixel >> 8) & 0xff; - TILE_PIXEL(p, j, i, 2) = (pixel >> 0) & 0xff; - TILE_PIXEL(p, j, i, 3) = 0xff; - } - } -} - - -static void -x8r8g8b8_put_tile_rgba(unsigned *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - unsigned r, g, b; - r = TILE_PIXEL(p, j, i, 0); - g = TILE_PIXEL(p, j, i, 1); - b = TILE_PIXEL(p, j, i, 2); - *dst++ = (0xff << 24) | (r << 16) | (g << 8) | b; - } - } -} - - -/*** PIPE_FORMAT_B8G8R8A8_UNORM ***/ - -static void -b8g8r8a8_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - const unsigned pixel = *src++; - TILE_PIXEL(p, j, i, 0) = (pixel >> 8) & 0xff; - TILE_PIXEL(p, j, i, 1) = (pixel >> 16) & 0xff; - TILE_PIXEL(p, j, i, 2) = (pixel >> 24) & 0xff; - TILE_PIXEL(p, j, i, 3) = (pixel >> 0) & 0xff; - } - } -} - - -static void -b8g8r8a8_put_tile_rgba(unsigned *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - unsigned r, g, b, a; - r = TILE_PIXEL(p, j, i, 0); - g = TILE_PIXEL(p, j, i, 1); - b = TILE_PIXEL(p, j, i, 2); - a = TILE_PIXEL(p, j, i, 3); - *dst++ = (b << 24) | (g << 16) | (r << 8) | a; - } - } -} - - -/*** PIPE_FORMAT_A1R5G5B5_UNORM ***/ - -static void -a1r5g5b5_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - const ushort pixel = *src++; - TILE_PIXEL(p, j, i, 0) = ((pixel >> 10) & 0x1f) * 255 / 31; - TILE_PIXEL(p, j, i, 1) = ((pixel >> 5) & 0x1f) * 255 / 31; - TILE_PIXEL(p, j, i, 2) = ((pixel ) & 0x1f) * 255 / 31; - TILE_PIXEL(p, j, i, 3) = ((pixel >> 15) ) * 255; - } - } -} - - -static void -a1r5g5b5_put_tile_rgba(ushort *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - unsigned r, g, b, a; - r = TILE_PIXEL(p, j, i, 0); - g = TILE_PIXEL(p, j, i, 1); - b = TILE_PIXEL(p, j, i, 2); - a = TILE_PIXEL(p, j, i, 3); - r = r >> 3; /* 5 bits */ - g = g >> 3; /* 5 bits */ - b = b >> 3; /* 5 bits */ - a = a >> 7; /* 1 bit */ - *dst++ = (a << 15) | (r << 10) | (g << 5) | b; - } - } -} - - -/*** PIPE_FORMAT_A4R4G4B4_UNORM ***/ - -static void -a4r4g4b4_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - const ushort pixel = *src++; - TILE_PIXEL(p, j, i, 0) = ((pixel >> 8) & 0xf) * 255 / 15; - TILE_PIXEL(p, j, i, 1) = ((pixel >> 4) & 0xf) * 255 / 15; - TILE_PIXEL(p, j, i, 2) = ((pixel ) & 0xf) * 255 / 15; - TILE_PIXEL(p, j, i, 3) = ((pixel >> 12) ) * 255 / 15; - } - } -} - - -static void -a4r4g4b4_put_tile_rgba(ushort *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - unsigned r, g, b, a; - r = TILE_PIXEL(p, j, i, 0); - g = TILE_PIXEL(p, j, i, 1); - b = TILE_PIXEL(p, j, i, 2); - a = TILE_PIXEL(p, j, i, 3); - r >>= 4; - g >>= 4; - b >>= 4; - a >>= 4; - *dst++ = (a << 12) | (r << 16) | (g << 4) | b; - } - } -} - - -/*** PIPE_FORMAT_R5G6B5_UNORM ***/ - -static void -r5g6b5_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - const ushort pixel = *src++; - TILE_PIXEL(p, j, i, 0) = ((pixel >> 11) & 0x1f) * 255 / 31; - TILE_PIXEL(p, j, i, 1) = ((pixel >> 5) & 0x3f) * 255 / 63; - TILE_PIXEL(p, j, i, 2) = ((pixel ) & 0x1f) * 255 / 31; - TILE_PIXEL(p, j, i, 3) = 255; - } - } -} - - -static void -r5g6b5_put_tile_rgba(ushort *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - uint r = (uint) TILE_PIXEL(p, j, i, 0) * 31 / 255; - uint g = (uint) TILE_PIXEL(p, j, i, 1) * 63 / 255; - uint b = (uint) TILE_PIXEL(p, j, i, 2) * 31 / 255; - *dst++ = (r << 11) | (g << 5) | (b); - } - } -} - - - -/*** PIPE_FORMAT_Z16_UNORM ***/ - -/** - * Return each Z value as four floats in [0,1]. - */ -static void -z16_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - uint8_t *p) -{ - const float scale = 1.0f / 65535.0f; - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - TILE_PIXEL(p, j, i, 0) = - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = - TILE_PIXEL(p, j, i, 3) = *src++ * scale; - } - } -} - - - - -/*** PIPE_FORMAT_L8_UNORM ***/ - -static void -l8_get_tile_rgba(const ubyte *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++, src++) { - TILE_PIXEL(p, j, i, 0) = - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = *src; - TILE_PIXEL(p, j, i, 3) = 255; - } - } -} - - -static void -l8_put_tile_rgba(ubyte *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - unsigned r; - r = TILE_PIXEL(p, j, i, 0); - *dst++ = (ubyte) r; - } - } -} - - - -/*** PIPE_FORMAT_A8_UNORM ***/ - -static void -a8_get_tile_rgba(const ubyte *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++, src++) { - TILE_PIXEL(p, j, i, 0) = - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = 0; - TILE_PIXEL(p, j, i, 3) = *src; - } - } -} - - -static void -a8_put_tile_rgba(ubyte *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - unsigned a; - a = TILE_PIXEL(p, j, i, 3); - *dst++ = (ubyte) a; - } - } -} - - - -/*** PIPE_FORMAT_R16_SNORM ***/ - -static void -r16_get_tile_rgba(const short *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++, src++) { - TILE_PIXEL(p, j, i, 0) = MAX2(src[0] >> 7, 0); - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = 0; - TILE_PIXEL(p, j, i, 3) = 255; - } - } -} - - -static void -r16_put_tile_rgba(short *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++, dst++) { - dst[0] = TILE_PIXEL(p, j, i, 0) << 7; - } - } -} - - -/*** PIPE_FORMAT_R16G16B16A16_SNORM ***/ - -static void -r16g16b16a16_get_tile_rgba(const short *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++, src += 4) { - TILE_PIXEL(p, j, i, 0) = src[0] >> 8; - TILE_PIXEL(p, j, i, 1) = src[1] >> 8; - TILE_PIXEL(p, j, i, 2) = src[2] >> 8; - TILE_PIXEL(p, j, i, 3) = src[3] >> 8; - } - } -} - - -static void -r16g16b16a16_put_tile_rgba(short *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++, dst += 4) { - dst[0] = TILE_PIXEL(p, j, i, 0) << 8; - dst[1] = TILE_PIXEL(p, j, i, 1) << 8; - dst[2] = TILE_PIXEL(p, j, i, 2) << 8; - dst[3] = TILE_PIXEL(p, j, i, 3) << 8; - } - } -} - - - -/*** PIPE_FORMAT_I8_UNORM ***/ - -static void -i8_get_tile_rgba(const ubyte *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++, src++) { - TILE_PIXEL(p, j, i, 0) = - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = - TILE_PIXEL(p, j, i, 3) = *src; - } - } -} - - -static void -i8_put_tile_rgba(ubyte *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - unsigned r; - r = TILE_PIXEL(p, j, i, 0); - *dst++ = (ubyte) r; - } - } -} - - -/*** PIPE_FORMAT_A8L8_UNORM ***/ - -static void -a8l8_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - ushort ra = *src++; - TILE_PIXEL(p, j, i, 0) = - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = ra & 0xff; - TILE_PIXEL(p, j, i, 3) = ra >> 8; - } - } -} - - -static void -a8l8_put_tile_rgba(ushort *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - unsigned r, a; - r = TILE_PIXEL(p, j, i, 0); - a = TILE_PIXEL(p, j, i, 3); - *dst++ = (a << 8) | r; - } - } -} - - - - -/*** PIPE_FORMAT_Z32_UNORM ***/ - -/** - * Return each Z value as four floats in [0,1]. - */ -static void -z32_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - uint8_t *p) -{ - const double scale = 1.0 / (double) 0xffffffff; - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - TILE_PIXEL(p, j, i, 0) = - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = - TILE_PIXEL(p, j, i, 3) = (float) (*src++ * scale); - } - } -} - - -/*** PIPE_FORMAT_S8Z24_UNORM ***/ - -/** - * Return Z component as four float in [0,1]. Stencil part ignored. - */ -static void -s8z24_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - uint8_t *p) -{ - const double scale = 1.0 / ((1 << 24) - 1); - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - TILE_PIXEL(p, j, i, 0) = - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = - TILE_PIXEL(p, j, i, 3) = (float) (scale * (*src++ & 0xffffff)); - } - } -} - - -/*** PIPE_FORMAT_Z24S8_UNORM ***/ - -/** - * Return Z component as four float in [0,1]. Stencil part ignored. - */ -static void -z24s8_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - uint8_t *p) -{ - const double scale = 1.0 / ((1 << 24) - 1); - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - TILE_PIXEL(p, j, i, 0) = - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = - TILE_PIXEL(p, j, i, 3) = (float) (scale * (*src++ >> 8)); - } - } -} - - -/*** PIPE_FORMAT_Z32_FLOAT ***/ - -/** - * Return each Z value as four floats in [0,1]. - */ -static void -z32f_get_tile_rgba(const float *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - TILE_PIXEL(p, j, i, 0) = - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = - TILE_PIXEL(p, j, i, 3) = *src++; - } - } -} - - -/*** PIPE_FORMAT_YCBCR / PIPE_FORMAT_YCBCR_REV ***/ - -/** - * Convert YCbCr (or YCrCb) to RGBA. - */ -static void -ycbcr_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - uint8_t *p, - boolean rev) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - /* do two texels at a time */ - for (j = 0; j < (w & ~1); j += 2, src += 2) { - const ushort t0 = src[0]; - const ushort t1 = src[1]; - const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */ - const ubyte y1 = (t1 >> 8) & 0xff; /* luminance */ - ubyte cb, cr; - float r, g, b; - - if (rev) { - cb = t1 & 0xff; /* chroma U */ - cr = t0 & 0xff; /* chroma V */ - } - else { - cb = t0 & 0xff; /* chroma U */ - cr = t1 & 0xff; /* chroma V */ - } - - /* even pixel: y0,cr,cb */ - r = 1.164f * (y0-16) + 1.596f * (cr-128); - g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128); - b = 1.164f * (y0-16) + 2.018f * (cb-128); - TILE_PIXEL(p, j, i, 0) = r; - TILE_PIXEL(p, j, i, 1) = g; - TILE_PIXEL(p, j, i, 2) = b; - TILE_PIXEL(p, j, i, 3) = 255; - - /* odd pixel: use y1,cr,cb */ - r = 1.164f * (y1-16) + 1.596f * (cr-128); - g = 1.164f * (y1-16) - 0.813f * (cr-128) - 0.391f * (cb-128); - b = 1.164f * (y1-16) + 2.018f * (cb-128); - TILE_PIXEL(p, j + 1, i, 0) = r; - TILE_PIXEL(p, j + 1, i, 1) = g; - TILE_PIXEL(p, j + 1, i, 2) = b; - TILE_PIXEL(p, j + 1, i, 3) = 255; - } - /* do the last texel */ - if (w & 1) { - const ushort t0 = src[0]; - const ushort t1 = src[1]; - const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */ - ubyte cb, cr; - float r, g, b; - - if (rev) { - cb = t1 & 0xff; /* chroma U */ - cr = t0 & 0xff; /* chroma V */ - } - else { - cb = t0 & 0xff; /* chroma U */ - cr = t1 & 0xff; /* chroma V */ - } - - /* even pixel: y0,cr,cb */ - r = 1.164f * (y0-16) + 1.596f * (cr-128); - g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128); - b = 1.164f * (y0-16) + 2.018f * (cb-128); - TILE_PIXEL(p, j, i, 0) = r; - TILE_PIXEL(p, j, i, 1) = g; - TILE_PIXEL(p, j, i, 2) = b; - TILE_PIXEL(p, j, i, 3) = 255; - } - } -} - - -static void -fake_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - TILE_PIXEL(p, j, i, 0) = - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = - TILE_PIXEL(p, j, i, 3) = (i ^ j) & 1 ? 255 : 0; - } - } -} - - -static void -lp_tile_raw_to_rgba_soa(enum pipe_format format, - void *src, - uint w, uint h, - uint8_t *p) -{ - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - a8r8g8b8_get_tile_rgba((unsigned *) src, w, h, p); - break; - case PIPE_FORMAT_X8R8G8B8_UNORM: - x8r8g8b8_get_tile_rgba((unsigned *) src, w, h, p); - break; - case PIPE_FORMAT_B8G8R8A8_UNORM: - b8g8r8a8_get_tile_rgba((unsigned *) src, w, h, p); - break; - case PIPE_FORMAT_A1R5G5B5_UNORM: - a1r5g5b5_get_tile_rgba((ushort *) src, w, h, p); - break; - case PIPE_FORMAT_A4R4G4B4_UNORM: - a4r4g4b4_get_tile_rgba((ushort *) src, w, h, p); - break; - case PIPE_FORMAT_R5G6B5_UNORM: - r5g6b5_get_tile_rgba((ushort *) src, w, h, p); - break; - case PIPE_FORMAT_L8_UNORM: - l8_get_tile_rgba((ubyte *) src, w, h, p); - break; - case PIPE_FORMAT_A8_UNORM: - a8_get_tile_rgba((ubyte *) src, w, h, p); - break; - case PIPE_FORMAT_I8_UNORM: - i8_get_tile_rgba((ubyte *) src, w, h, p); - break; - case PIPE_FORMAT_A8L8_UNORM: - a8l8_get_tile_rgba((ushort *) src, w, h, p); - break; - case PIPE_FORMAT_R16_SNORM: - r16_get_tile_rgba((short *) src, w, h, p); - break; - case PIPE_FORMAT_R16G16B16A16_SNORM: - r16g16b16a16_get_tile_rgba((short *) src, w, h, p); - break; - case PIPE_FORMAT_Z16_UNORM: - z16_get_tile_rgba((ushort *) src, w, h, p); - break; - case PIPE_FORMAT_Z32_UNORM: - z32_get_tile_rgba((unsigned *) src, w, h, p); - break; - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - s8z24_get_tile_rgba((unsigned *) src, w, h, p); - break; - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: - z24s8_get_tile_rgba((unsigned *) src, w, h, p); - break; - case PIPE_FORMAT_Z32_FLOAT: - z32f_get_tile_rgba((float *) src, w, h, p); - break; - case PIPE_FORMAT_YCBCR: - ycbcr_get_tile_rgba((ushort *) src, w, h, p, FALSE); - break; - case PIPE_FORMAT_YCBCR_REV: - ycbcr_get_tile_rgba((ushort *) src, w, h, p, TRUE); - break; - default: - debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(format)); - fake_get_tile_rgba(src, w, h, p); - } -} - - -void -lp_get_tile_rgba_soa(struct pipe_transfer *pt, - uint x, uint y, - uint8_t *p) -{ - uint w = TILE_SIZE, h = TILE_SIZE; - void *packed; - - if (pipe_clip_tile(x, y, &w, &h, pt)) - return; - - packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size); - - if (!packed) - return; - - if(pt->format == PIPE_FORMAT_YCBCR || pt->format == PIPE_FORMAT_YCBCR_REV) - assert((x & 1) == 0); - - pipe_get_tile_raw(pt, x, y, w, h, packed, 0); - - lp_tile_raw_to_rgba_soa(pt->format, packed, w, h, p); - - FREE(packed); -} - - -void -lp_put_tile_rgba_soa(struct pipe_transfer *pt, - uint x, uint y, - const uint8_t *p) -{ - uint w = TILE_SIZE, h = TILE_SIZE; - void *packed; - - if (pipe_clip_tile(x, y, &w, &h, pt)) - return; - - packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size); - - if (!packed) - return; - - switch (pt->format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p); - break; - case PIPE_FORMAT_X8R8G8B8_UNORM: - x8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p); - break; - case PIPE_FORMAT_B8G8R8A8_UNORM: - b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p); - break; - case PIPE_FORMAT_A1R5G5B5_UNORM: - a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p); - break; - case PIPE_FORMAT_R5G6B5_UNORM: - r5g6b5_put_tile_rgba((ushort *) packed, w, h, p); - break; - case PIPE_FORMAT_R8G8B8A8_UNORM: - assert(0); - break; - case PIPE_FORMAT_A4R4G4B4_UNORM: - a4r4g4b4_put_tile_rgba((ushort *) packed, w, h, p); - break; - case PIPE_FORMAT_L8_UNORM: - l8_put_tile_rgba((ubyte *) packed, w, h, p); - break; - case PIPE_FORMAT_A8_UNORM: - a8_put_tile_rgba((ubyte *) packed, w, h, p); - break; - case PIPE_FORMAT_I8_UNORM: - i8_put_tile_rgba((ubyte *) packed, w, h, p); - break; - case PIPE_FORMAT_A8L8_UNORM: - a8l8_put_tile_rgba((ushort *) packed, w, h, p); - break; - case PIPE_FORMAT_R16_SNORM: - r16_put_tile_rgba((short *) packed, w, h, p); - break; - case PIPE_FORMAT_R16G16B16A16_SNORM: - r16g16b16a16_put_tile_rgba((short *) packed, w, h, p); - break; - case PIPE_FORMAT_Z16_UNORM: - /*z16_put_tile_rgba((ushort *) packed, w, h, p);*/ - break; - case PIPE_FORMAT_Z32_UNORM: - /*z32_put_tile_rgba((unsigned *) packed, w, h, p);*/ - break; - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - /*s8z24_put_tile_rgba((unsigned *) packed, w, h, p);*/ - break; - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: - /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p);*/ - break; - default: - debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(pt->format)); - } - - pipe_put_tile_raw(pt, x, y, w, h, packed, 0); - - FREE(packed); -} - - diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index 3d8c703b73..040b01865d 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -64,14 +64,18 @@ tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH]; void -lp_get_tile_rgba_soa(struct pipe_transfer *pt, - uint x, uint y, - uint8_t *p); +lp_tile_read_4ub(enum pipe_format format, + uint8_t *dst, + const void *src, unsigned src_stride, + unsigned x, unsigned y, unsigned w, unsigned h); + void -lp_put_tile_rgba_soa(struct pipe_transfer *pt, - uint x, uint y, - const uint8_t *p); +lp_tile_write_4ub(enum pipe_format format, + const uint8_t *src, + void *dst, unsigned dst_stride, + unsigned x, unsigned y, unsigned w, unsigned h); + #ifdef __cplusplus diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py new file mode 100644 index 0000000000..004c5c979e --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python + +''' +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Pixel format accessor functions. + * + * @author Jose Fonseca + */ +''' + + +import sys +import os.path + +sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '../../auxiliary/util')) + +from u_format_access import * + + +def generate_format_read(format, dst_type, dst_native_type, dst_suffix): + '''Generate the function to read pixels from a particular format''' + + name = short_name(format) + + src_native_type = native_type(format) + + print 'static void' + print 'lp_tile_%s_read_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, dst_suffix, dst_native_type) + print '{' + print ' unsigned x, y;' + print ' const uint8_t *src_row = src + y0*src_stride;' + print ' for (y = 0; y < h; ++y) {' + print ' const %s *src_pixel = (const %s *)(src_row + x0*%u);' % (src_native_type, src_native_type, format.stride()) + print ' for (x = 0; x < w; ++x) {' + + names = ['']*4 + if format.colorspace == 'rgb': + for i in range(4): + swizzle = format.out_swizzle[i] + if swizzle < 4: + names[swizzle] += 'rgba'[i] + elif format.colorspace == 'zs': + swizzle = format.out_swizzle[0] + if swizzle < 4: + names[swizzle] = 'z' + else: + assert False + else: + assert False + + if format.layout == ARITH: + print ' %s pixel = *src_pixel++;' % src_native_type + shift = 0; + for i in range(4): + src_type = format.in_types[i] + width = src_type.size + if names[i]: + value = 'pixel' + mask = (1 << width) - 1 + if shift: + value = '(%s >> %u)' % (value, shift) + if shift + width < format.block_size(): + value = '(%s & 0x%x)' % (value, mask) + value = conversion_expr(src_type, dst_type, dst_native_type, value) + print ' %s %s = %s;' % (dst_native_type, names[i], value) + shift += width + elif format.layout == ARRAY: + for i in range(4): + src_type = format.in_types[i] + if names[i]: + value = '(*src_pixel++)' + value = conversion_expr(src_type, dst_type, dst_native_type, value) + print ' %s %s = %s;' % (dst_native_type, names[i], value) + else: + assert False + + for i in range(4): + if format.colorspace == 'rgb': + swizzle = format.out_swizzle[i] + if swizzle < 4: + value = names[swizzle] + elif swizzle == SWIZZLE_0: + value = '0' + elif swizzle == SWIZZLE_1: + value = '1' + else: + assert False + elif format.colorspace == 'zs': + if i < 3: + value = 'z' + else: + value = '1' + else: + assert False + print ' TILE_PIXEL(dst, x, y, %u) = %s; /* %s */' % (i, value, 'rgba'[i]) + + print ' }' + print ' src_row += src_stride;' + print ' }' + print '}' + print + + +def generate_format_write(format, src_type, src_native_type, src_suffix): + '''Generate the function to write pixels to a particular format''' + + name = short_name(format) + + dst_native_type = native_type(format) + + print 'static void' + print 'lp_tile_%s_write_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type) + print '{' + print ' unsigned x, y;' + print ' uint8_t *dst_row = dst + y0*dst_stride;' + print ' for (y = 0; y < h; ++y) {' + print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride()) + print ' for (x = 0; x < w; ++x) {' + + inv_swizzle = [None]*4 + if format.colorspace == 'rgb': + for i in range(4): + swizzle = format.out_swizzle[i] + if swizzle < 4: + inv_swizzle[swizzle] = i + elif format.colorspace == 'zs': + swizzle = format.out_swizzle[0] + if swizzle < 4: + inv_swizzle[swizzle] = 0 + else: + assert False + + if format.layout == ARITH: + print ' %s pixel = 0;' % dst_native_type + shift = 0; + for i in range(4): + dst_type = format.in_types[i] + width = dst_type.size + if inv_swizzle[i] is not None: + value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] + value = conversion_expr(src_type, dst_type, dst_native_type, value) + if shift: + value = '(%s << %u)' % (value, shift) + print ' pixel |= %s;' % value + shift += width + print ' *dst_pixel++ = pixel;' + elif format.layout == ARRAY: + for i in range(4): + dst_type = format.in_types[i] + if inv_swizzle[i] is not None: + value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] + value = conversion_expr(src_type, dst_type, dst_native_type, value) + print ' *dst_pixel++ = %s;' % value + else: + assert False + + print ' }' + print ' dst_row += dst_stride;' + print ' }' + print '}' + print + + +def generate_read(formats, dst_type, dst_native_type, dst_suffix): + '''Generate the dispatch function to read pixels from any format''' + + for format in formats: + if is_format_supported(format): + generate_format_read(format, dst_type, dst_native_type, dst_suffix) + + print 'void' + print 'lp_tile_read_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type) + print '{' + print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % dst_native_type + print ' switch(format) {' + for format in formats: + if is_format_supported(format): + print ' case %s:' % format.name + print ' func = &lp_tile_%s_read_%s;' % (short_name(format), dst_suffix) + print ' break;' + print ' default:' + print ' debug_printf("unsupported format\\n");' + print ' return;' + print ' }' + print ' func(dst, (const uint8_t *)src, src_stride, x, y, w, h);' + print '}' + print + + +def generate_write(formats, src_type, src_native_type, src_suffix): + '''Generate the dispatch function to write pixels to any format''' + + for format in formats: + if is_format_supported(format): + generate_format_write(format, src_type, src_native_type, src_suffix) + + print 'void' + print 'lp_tile_write_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type) + + print '{' + print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % src_native_type + print ' switch(format) {' + for format in formats: + if is_format_supported(format): + print ' case %s:' % format.name + print ' func = &lp_tile_%s_write_%s;' % (short_name(format), src_suffix) + print ' break;' + print ' default:' + print ' debug_printf("unsupported format\\n");' + print ' return;' + print ' }' + print ' func(src, (uint8_t *)dst, dst_stride, x, y, w, h);' + print '}' + print + + +def main(): + formats = [] + for arg in sys.argv[1:]: + formats.extend(parse(arg)) + + print '/* This file is autogenerated by lp_tile_soa.py from u_format.csv. Do not edit directly. */' + print + # This will print the copyright message on the top of this file + print __doc__.strip() + print + print '#include "pipe/p_compiler.h"' + print '#include "util/u_format.h"' + print '#include "util/u_math.h"' + print '#include "lp_tile_soa.h"' + print + print 'const unsigned char' + print 'tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = {' + print ' { 0, 1, 4, 5, 8, 9, 12, 13},' + print ' { 2, 3, 6, 7, 10, 11, 14, 15}' + print '};' + print + + generate_clamp() + + type = Type(UNSIGNED, True, 8) + native_type = 'uint8_t' + suffix = '4ub' + + generate_read(formats, type, native_type, suffix) + generate_write(formats, type, native_type, suffix) + + +if __name__ == '__main__': + main() -- cgit v1.2.3 From 77ef7050587bba43c219e9d22170237898b2bb23 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 4 Oct 2009 13:25:24 +0100 Subject: llvmpipe: Ensure tile cache transfers are mapped before flushing it. --- src/gallium/drivers/llvmpipe/lp_flush.c | 8 ++++++-- src/gallium/drivers/llvmpipe/lp_state_surface.c | 1 + src/gallium/drivers/llvmpipe/lp_tile_cache.c | 2 ++ 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index b5c1c95bb7..cd8381fe30 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -58,8 +58,10 @@ llvmpipe_flush( struct pipe_context *pipe, * in the hope that a later clear will wipe them out. */ for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) - if (llvmpipe->cbuf_cache[i]) + if (llvmpipe->cbuf_cache[i]) { + lp_tile_cache_map_transfers(llvmpipe->cbuf_cache[i]); lp_flush_tile_cache(llvmpipe->cbuf_cache[i]); + } /* Need this call for hardware buffers before swapbuffers. * @@ -71,8 +73,10 @@ llvmpipe_flush( struct pipe_context *pipe, } else if (flags & PIPE_FLUSH_RENDER_CACHE) { for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) - if (llvmpipe->cbuf_cache[i]) + if (llvmpipe->cbuf_cache[i]) { + lp_tile_cache_map_transfers(llvmpipe->cbuf_cache[i]); lp_flush_tile_cache(llvmpipe->cbuf_cache[i]); + } /* FIXME: untile zsbuf! */ diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index 2c29144c03..c06ce8b75c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -53,6 +53,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, /* check if changing cbuf */ if (lp->framebuffer.cbufs[i] != fb->cbufs[i]) { /* flush old */ + lp_tile_cache_map_transfers(lp->cbuf_cache[i]); lp_flush_tile_cache(lp->cbuf_cache[i]); /* assign new */ diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c index 68d3fa3282..ec3e002d62 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c @@ -236,6 +236,8 @@ lp_flush_tile_cache(struct llvmpipe_tile_cache *tc) if(!pt) return; + assert(tc->transfer_map); + /* push the tile to all positions marked as clear */ for (y = 0; y < pt->height; y += TILE_SIZE) { for (x = 0; x < pt->width; x += TILE_SIZE) { -- cgit v1.2.3 From 7bfc3172e88cc7ad8be9ab81de56f7e263c15824 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 5 Oct 2009 13:41:33 +1000 Subject: nv50: support PIPE_FORMAT_X8R8G8B8_UNORM Signed-off-by: Ben Skeggs --- src/gallium/drivers/nv50/nv50_screen.c | 2 ++ src/gallium/drivers/nv50/nv50_state_validate.c | 3 +++ src/gallium/drivers/nv50/nv50_tex.c | 7 +++++++ 3 files changed, 12 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 3b08e1b89f..dd7baecba7 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -35,6 +35,7 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, { if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { switch (format) { + case PIPE_FORMAT_X8R8G8B8_UNORM: case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: return TRUE; @@ -55,6 +56,7 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, } else { switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: case PIPE_FORMAT_A1R5G5B5_UNORM: case PIPE_FORMAT_A4R4G4B4_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 4ed76973c4..867b1ea872 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -57,6 +57,9 @@ nv50_state_validate_fb(struct nv50_context *nv50) case PIPE_FORMAT_A8R8G8B8_UNORM: so_data(so, NV50TCL_RT_FORMAT_A8R8G8B8_UNORM); break; + case PIPE_FORMAT_X8R8G8B8_UNORM: + so_data(so, NV50TCL_RT_FORMAT_X8R8G8B8_UNORM); + break; case PIPE_FORMAT_R5G6B5_UNORM: so_data(so, NV50TCL_RT_FORMAT_R5G6B5_UNORM); break; diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index 033cb50c11..21825a0411 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -37,6 +37,13 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so, NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | NV50TIC_0_0_FMT_8_8_8_8); break; + case PIPE_FORMAT_X8R8G8B8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8_8_8_8); + break; case PIPE_FORMAT_A1R5G5B5_UNORM: so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM | -- cgit v1.2.3 From 1f39d59a2996e2acf6893a8dd1a0293bd8790cc2 Mon Sep 17 00:00:00 2001 From: Joakim Sindholt Date: Mon, 5 Oct 2009 19:25:04 +0200 Subject: r300g: fix scons build So I didn't touch r300compiler, but r300g now compiles after having declarations and code untangled. As nha so gently points out, we shouldn't have to do this just to comply with MSVC compilers. --- src/gallium/drivers/r300/SConscript | 7 ++++++- src/gallium/drivers/r300/r300_debug.c | 7 ++++--- src/gallium/drivers/r300/r300_tgsi_to_rc.c | 7 ++++--- src/gallium/drivers/r300/r300_vs.c | 6 +++--- src/mesa/drivers/dri/r300/compiler/SConscript | 30 +++++++++++++++++++++++++++ 5 files changed, 47 insertions(+), 10 deletions(-) create mode 100755 src/mesa/drivers/dri/r300/compiler/SConscript (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 493d7b28bc..b4c8ba2015 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -1,6 +1,10 @@ Import('*') +r300compiler = SConscript('#/src/mesa/drivers/dri/r300/compiler/SConscript') + env = env.Clone() +# add the paths for r300compiler +env.Append(CPPPATH = ['#/src/mesa/drivers/dri/r300/compiler', '#/include', '#/src/mesa']) r300 = env.ConvenienceLibrary( target = 'r300', @@ -23,7 +27,8 @@ r300 = env.ConvenienceLibrary( 'r300_vs.c', 'r300_surface.c', 'r300_texture.c', - ]) + 'r300_tgsi_to_rc.c', + ] + r300compiler) + r300compiler Export('r300') diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 15308dda1d..85d69c0747 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -48,6 +48,8 @@ void r300_init_debug(struct r300_context * ctx) { const char * options = debug_get_option("RADEON_DEBUG", 0); boolean printhint = false; + size_t length; + struct debug_option * opt; if (options) { while(*options) { @@ -56,8 +58,7 @@ void r300_init_debug(struct r300_context * ctx) continue; } - size_t length = strcspn(options, " ,"); - struct debug_option * opt; + length = strcspn(options, " ,"); for(opt = debug_options; opt->name; ++opt) { if (!strncmp(options, opt->name, length)) { @@ -81,7 +82,7 @@ void r300_init_debug(struct r300_context * ctx) if (printhint || ctx->debug & DBG_HELP) { debug_printf("You can enable debug output by setting the RADEON_DEBUG environment variable\n" "to a comma-separated list of debug options. Available options are:\n"); - for(struct debug_option * opt = debug_options; opt->name; ++opt) { + for(opt = debug_options; opt->name; ++opt) { debug_printf(" %s: %s\n", opt->name, opt->description); } } diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 0913ca1bd5..4534a6dd80 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -257,12 +257,13 @@ static void transform_texture(struct rc_instruction * dst, struct tgsi_instructi static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_instruction * src) { + struct rc_instruction * dst; + int i; + if (src->Instruction.Opcode == TGSI_OPCODE_END) return; - struct rc_instruction * dst = rc_insert_new_instruction(ttr->compiler, ttr->compiler->Program.Instructions.Prev); - int i; - + dst = rc_insert_new_instruction(ttr->compiler, ttr->compiler->Program.Instructions.Prev); dst->I.Opcode = translate_opcode(src->Instruction.Opcode); dst->I.SaturateMode = translate_saturate(src->Instruction.Saturate); diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 12a6e37be6..8460cfaf51 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -35,6 +35,8 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) { struct r300_vertex_shader * vs = c->UserData; struct tgsi_shader_info* info = &vs->info; + struct tgsi_parse_context parser; + struct tgsi_full_declaration * decl; boolean pointsize = false; int out_colors = 0; int colors = 0; @@ -62,8 +64,6 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) } } - struct tgsi_parse_context parser; - tgsi_parse_init(&parser, vs->state.tokens); while (!tgsi_parse_end_of_tokens(&parser)) { @@ -72,7 +72,7 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) if (parser.FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION) continue; - struct tgsi_full_declaration * decl = &parser.FullToken.FullDeclaration; + decl = &parser.FullToken.FullDeclaration; if (decl->Declaration.File != TGSI_FILE_OUTPUT) continue; diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript new file mode 100755 index 0000000000..48fd65fb71 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/SConscript @@ -0,0 +1,30 @@ +Import('*') + +env = env.Clone() +env.Append(CPPPATH = '#/include') +env.Append(CPPPATH = '#/src/mesa') + +# temporary fix +env['CFLAGS'] = str(env['CFLAGS']).replace('-Werror=declaration-after-statement', '') + +r300compiler = env.ConvenienceLibrary( + target = 'r300compiler', + source = [ + 'radeon_code.c', + 'radeon_compiler.c', + 'radeon_nqssadce.c', + 'radeon_program.c', + 'radeon_program_alu.c', + 'radeon_program_pair.c', + 'r3xx_fragprog.c', + 'r300_fragprog.c', + 'r300_fragprog_swizzle.c', + 'r300_fragprog_emit.c', + 'r500_fragprog.c', + 'r500_fragprog_emit.c', + 'r3xx_vertprog.c', + 'r3xx_vertprog_dump.c', + 'memory_pool.c', + ]) + +Return('r300compiler') -- cgit v1.2.3 From c4b821a4c64d75d944653d665bede946763ed95b Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Tue, 29 Sep 2009 10:22:15 -0700 Subject: i965g: Drop i965simple The driver never work with real hardware and has bitrotted for quite some time now, might as well drop it. If somebody wants to look at it just use git. --- SConstruct | 2 +- src/gallium/drivers/i965simple/Makefile | 52 - src/gallium/drivers/i965simple/SConscript | 54 - src/gallium/drivers/i965simple/brw_batch.h | 59 - src/gallium/drivers/i965simple/brw_blit.c | 218 ---- src/gallium/drivers/i965simple/brw_blit.h | 33 - src/gallium/drivers/i965simple/brw_cc.c | 269 ---- src/gallium/drivers/i965simple/brw_clip.c | 206 --- src/gallium/drivers/i965simple/brw_clip.h | 170 --- src/gallium/drivers/i965simple/brw_clip_line.c | 245 ---- src/gallium/drivers/i965simple/brw_clip_point.c | 47 - src/gallium/drivers/i965simple/brw_clip_state.c | 93 -- src/gallium/drivers/i965simple/brw_clip_tri.c | 566 -------- src/gallium/drivers/i965simple/brw_clip_unfilled.c | 477 ------- src/gallium/drivers/i965simple/brw_clip_util.c | 351 ----- src/gallium/drivers/i965simple/brw_context.c | 139 -- src/gallium/drivers/i965simple/brw_context.h | 684 ---------- src/gallium/drivers/i965simple/brw_curbe.c | 369 ------ src/gallium/drivers/i965simple/brw_defines.h | 870 ------------- src/gallium/drivers/i965simple/brw_draw.c | 226 ---- src/gallium/drivers/i965simple/brw_draw.h | 55 - src/gallium/drivers/i965simple/brw_draw_upload.c | 300 ----- src/gallium/drivers/i965simple/brw_eu.c | 130 -- src/gallium/drivers/i965simple/brw_eu.h | 888 ------------- src/gallium/drivers/i965simple/brw_eu_debug.c | 90 -- src/gallium/drivers/i965simple/brw_eu_emit.c | 1080 ---------------- src/gallium/drivers/i965simple/brw_eu_util.c | 126 -- src/gallium/drivers/i965simple/brw_flush.c | 73 -- src/gallium/drivers/i965simple/brw_gs.c | 196 --- src/gallium/drivers/i965simple/brw_gs.h | 75 -- src/gallium/drivers/i965simple/brw_gs_emit.c | 148 --- src/gallium/drivers/i965simple/brw_gs_state.c | 90 -- src/gallium/drivers/i965simple/brw_misc_state.c | 488 ------- src/gallium/drivers/i965simple/brw_reg.h | 76 -- src/gallium/drivers/i965simple/brw_screen.c | 244 ---- src/gallium/drivers/i965simple/brw_screen.h | 68 - src/gallium/drivers/i965simple/brw_sf.c | 351 ----- src/gallium/drivers/i965simple/brw_sf.h | 122 -- src/gallium/drivers/i965simple/brw_sf_emit.c | 382 ------ src/gallium/drivers/i965simple/brw_sf_state.c | 181 --- src/gallium/drivers/i965simple/brw_shader_info.c | 48 - src/gallium/drivers/i965simple/brw_state.c | 469 ------- src/gallium/drivers/i965simple/brw_state.h | 151 --- src/gallium/drivers/i965simple/brw_state_batch.c | 113 -- src/gallium/drivers/i965simple/brw_state_cache.c | 443 ------- src/gallium/drivers/i965simple/brw_state_pool.c | 138 -- src/gallium/drivers/i965simple/brw_state_upload.c | 202 --- src/gallium/drivers/i965simple/brw_structs.h | 1348 -------------------- src/gallium/drivers/i965simple/brw_surface.c | 126 -- src/gallium/drivers/i965simple/brw_tex_layout.c | 380 ------ src/gallium/drivers/i965simple/brw_tex_layout.h | 44 - src/gallium/drivers/i965simple/brw_urb.c | 186 --- src/gallium/drivers/i965simple/brw_util.c | 104 -- src/gallium/drivers/i965simple/brw_util.h | 43 - src/gallium/drivers/i965simple/brw_vs.c | 120 -- src/gallium/drivers/i965simple/brw_vs.h | 82 -- src/gallium/drivers/i965simple/brw_vs_emit.c | 1330 ------------------- src/gallium/drivers/i965simple/brw_vs_state.c | 103 -- src/gallium/drivers/i965simple/brw_winsys.h | 209 --- src/gallium/drivers/i965simple/brw_wm.c | 209 --- src/gallium/drivers/i965simple/brw_wm.h | 142 --- src/gallium/drivers/i965simple/brw_wm_decl.c | 392 ------ src/gallium/drivers/i965simple/brw_wm_glsl.c | 1076 ---------------- src/gallium/drivers/i965simple/brw_wm_iz.c | 214 ---- .../drivers/i965simple/brw_wm_sampler_state.c | 275 ---- src/gallium/drivers/i965simple/brw_wm_state.c | 195 --- .../drivers/i965simple/brw_wm_surface_state.c | 305 ----- src/gallium/winsys/xlib/SConscript | 9 - src/gallium/winsys/xlib/xlib.c | 9 - src/gallium/winsys/xlib/xlib.h | 1 - src/gallium/winsys/xlib/xlib_brw.h | 30 - src/gallium/winsys/xlib/xlib_brw_aub.c | 399 ------ src/gallium/winsys/xlib/xlib_brw_aub.h | 114 -- src/gallium/winsys/xlib/xlib_brw_context.c | 209 --- src/gallium/winsys/xlib/xlib_brw_screen.c | 469 ------- 75 files changed, 1 insertion(+), 19979 deletions(-) delete mode 100644 src/gallium/drivers/i965simple/Makefile delete mode 100644 src/gallium/drivers/i965simple/SConscript delete mode 100644 src/gallium/drivers/i965simple/brw_batch.h delete mode 100644 src/gallium/drivers/i965simple/brw_blit.c delete mode 100644 src/gallium/drivers/i965simple/brw_blit.h delete mode 100644 src/gallium/drivers/i965simple/brw_cc.c delete mode 100644 src/gallium/drivers/i965simple/brw_clip.c delete mode 100644 src/gallium/drivers/i965simple/brw_clip.h delete mode 100644 src/gallium/drivers/i965simple/brw_clip_line.c delete mode 100644 src/gallium/drivers/i965simple/brw_clip_point.c delete mode 100644 src/gallium/drivers/i965simple/brw_clip_state.c delete mode 100644 src/gallium/drivers/i965simple/brw_clip_tri.c delete mode 100644 src/gallium/drivers/i965simple/brw_clip_unfilled.c delete mode 100644 src/gallium/drivers/i965simple/brw_clip_util.c delete mode 100644 src/gallium/drivers/i965simple/brw_context.c delete mode 100644 src/gallium/drivers/i965simple/brw_context.h delete mode 100644 src/gallium/drivers/i965simple/brw_curbe.c delete mode 100644 src/gallium/drivers/i965simple/brw_defines.h delete mode 100644 src/gallium/drivers/i965simple/brw_draw.c delete mode 100644 src/gallium/drivers/i965simple/brw_draw.h delete mode 100644 src/gallium/drivers/i965simple/brw_draw_upload.c delete mode 100644 src/gallium/drivers/i965simple/brw_eu.c delete mode 100644 src/gallium/drivers/i965simple/brw_eu.h delete mode 100644 src/gallium/drivers/i965simple/brw_eu_debug.c delete mode 100644 src/gallium/drivers/i965simple/brw_eu_emit.c delete mode 100644 src/gallium/drivers/i965simple/brw_eu_util.c delete mode 100644 src/gallium/drivers/i965simple/brw_flush.c delete mode 100644 src/gallium/drivers/i965simple/brw_gs.c delete mode 100644 src/gallium/drivers/i965simple/brw_gs.h delete mode 100644 src/gallium/drivers/i965simple/brw_gs_emit.c delete mode 100644 src/gallium/drivers/i965simple/brw_gs_state.c delete mode 100644 src/gallium/drivers/i965simple/brw_misc_state.c delete mode 100644 src/gallium/drivers/i965simple/brw_reg.h delete mode 100644 src/gallium/drivers/i965simple/brw_screen.c delete mode 100644 src/gallium/drivers/i965simple/brw_screen.h delete mode 100644 src/gallium/drivers/i965simple/brw_sf.c delete mode 100644 src/gallium/drivers/i965simple/brw_sf.h delete mode 100644 src/gallium/drivers/i965simple/brw_sf_emit.c delete mode 100644 src/gallium/drivers/i965simple/brw_sf_state.c delete mode 100644 src/gallium/drivers/i965simple/brw_shader_info.c delete mode 100644 src/gallium/drivers/i965simple/brw_state.c delete mode 100644 src/gallium/drivers/i965simple/brw_state.h delete mode 100644 src/gallium/drivers/i965simple/brw_state_batch.c delete mode 100644 src/gallium/drivers/i965simple/brw_state_cache.c delete mode 100644 src/gallium/drivers/i965simple/brw_state_pool.c delete mode 100644 src/gallium/drivers/i965simple/brw_state_upload.c delete mode 100644 src/gallium/drivers/i965simple/brw_structs.h delete mode 100644 src/gallium/drivers/i965simple/brw_surface.c delete mode 100644 src/gallium/drivers/i965simple/brw_tex_layout.c delete mode 100644 src/gallium/drivers/i965simple/brw_tex_layout.h delete mode 100644 src/gallium/drivers/i965simple/brw_urb.c delete mode 100644 src/gallium/drivers/i965simple/brw_util.c delete mode 100644 src/gallium/drivers/i965simple/brw_util.h delete mode 100644 src/gallium/drivers/i965simple/brw_vs.c delete mode 100644 src/gallium/drivers/i965simple/brw_vs.h delete mode 100644 src/gallium/drivers/i965simple/brw_vs_emit.c delete mode 100644 src/gallium/drivers/i965simple/brw_vs_state.c delete mode 100644 src/gallium/drivers/i965simple/brw_winsys.h delete mode 100644 src/gallium/drivers/i965simple/brw_wm.c delete mode 100644 src/gallium/drivers/i965simple/brw_wm.h delete mode 100644 src/gallium/drivers/i965simple/brw_wm_decl.c delete mode 100644 src/gallium/drivers/i965simple/brw_wm_glsl.c delete mode 100644 src/gallium/drivers/i965simple/brw_wm_iz.c delete mode 100644 src/gallium/drivers/i965simple/brw_wm_sampler_state.c delete mode 100644 src/gallium/drivers/i965simple/brw_wm_state.c delete mode 100644 src/gallium/drivers/i965simple/brw_wm_surface_state.c delete mode 100644 src/gallium/winsys/xlib/xlib_brw.h delete mode 100644 src/gallium/winsys/xlib/xlib_brw_aub.c delete mode 100644 src/gallium/winsys/xlib/xlib_brw_aub.h delete mode 100644 src/gallium/winsys/xlib/xlib_brw_context.c delete mode 100644 src/gallium/winsys/xlib/xlib_brw_screen.c (limited to 'src/gallium/drivers') diff --git a/SConstruct b/SConstruct index e9baab0947..9ef7c01f0b 100644 --- a/SConstruct +++ b/SConstruct @@ -46,7 +46,7 @@ common.AddOptions(opts) opts.Add(ListVariable('statetrackers', 'state trackers to build', default_statetrackers, ['mesa', 'python', 'xorg'])) opts.Add(ListVariable('drivers', 'pipe drivers to build', default_drivers, - ['softpipe', 'failover', 'i915simple', 'i965simple', 'cell', 'trace', 'r300', 'identity', 'llvmpipe'])) + ['softpipe', 'failover', 'i915simple', 'cell', 'trace', 'r300', 'identity', 'llvmpipe'])) opts.Add(ListVariable('winsys', 'winsys drivers to build', default_winsys, ['xlib', 'intel', 'gdi', 'radeon'])) diff --git a/src/gallium/drivers/i965simple/Makefile b/src/gallium/drivers/i965simple/Makefile deleted file mode 100644 index 19182afa75..0000000000 --- a/src/gallium/drivers/i965simple/Makefile +++ /dev/null @@ -1,52 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = i965simple - -C_SOURCES = \ - brw_blit.c \ - brw_flush.c \ - brw_screen.c \ - brw_surface.c \ - brw_cc.c \ - brw_clip.c \ - brw_clip_line.c \ - brw_clip_point.c \ - brw_clip_state.c \ - brw_clip_tri.c \ - brw_clip_util.c \ - brw_context.c \ - brw_curbe.c \ - brw_draw.c \ - brw_draw_upload.c \ - brw_eu.c \ - brw_eu_debug.c \ - brw_eu_emit.c \ - brw_eu_util.c \ - brw_gs.c \ - brw_gs_emit.c \ - brw_gs_state.c \ - brw_misc_state.c \ - brw_sf.c \ - brw_sf_emit.c \ - brw_sf_state.c \ - brw_state.c \ - brw_state_batch.c \ - brw_state_cache.c \ - brw_state_pool.c \ - brw_state_upload.c \ - brw_tex_layout.c \ - brw_urb.c \ - brw_util.c \ - brw_vs.c \ - brw_vs_emit.c \ - brw_vs_state.c \ - brw_wm.c \ - brw_wm_iz.c \ - brw_wm_decl.c \ - brw_wm_glsl.c \ - brw_wm_sampler_state.c \ - brw_wm_state.c \ - brw_wm_surface_state.c - -include ../../Makefile.template diff --git a/src/gallium/drivers/i965simple/SConscript b/src/gallium/drivers/i965simple/SConscript deleted file mode 100644 index 43fc2a4005..0000000000 --- a/src/gallium/drivers/i965simple/SConscript +++ /dev/null @@ -1,54 +0,0 @@ -Import('*') - -env = env.Clone() - -i965simple = env.ConvenienceLibrary( - target = 'i965simple', - source = [ - 'brw_blit.c', - 'brw_cc.c', - 'brw_clip.c', - 'brw_clip_line.c', - 'brw_clip_point.c', - 'brw_clip_state.c', - 'brw_clip_tri.c', - 'brw_clip_util.c', - 'brw_context.c', - 'brw_curbe.c', - 'brw_draw.c', - 'brw_draw_upload.c', - 'brw_eu.c', - 'brw_eu_debug.c', - 'brw_eu_emit.c', - 'brw_eu_util.c', - 'brw_flush.c', - 'brw_gs.c', - 'brw_gs_emit.c', - 'brw_gs_state.c', - 'brw_misc_state.c', - 'brw_screen.c', - 'brw_sf.c', - 'brw_sf_emit.c', - 'brw_sf_state.c', - 'brw_state.c', - 'brw_state_batch.c', - 'brw_state_cache.c', - 'brw_state_pool.c', - 'brw_state_upload.c', - 'brw_surface.c', - 'brw_tex_layout.c', - 'brw_urb.c', - 'brw_util.c', - 'brw_vs.c', - 'brw_vs_emit.c', - 'brw_vs_state.c', - 'brw_wm.c', - 'brw_wm_decl.c', - 'brw_wm_glsl.c', - 'brw_wm_iz.c', - 'brw_wm_sampler_state.c', - 'brw_wm_state.c', - 'brw_wm_surface_state.c', - ]) - -Export('i965simple') diff --git a/src/gallium/drivers/i965simple/brw_batch.h b/src/gallium/drivers/i965simple/brw_batch.h deleted file mode 100644 index 5f5932a488..0000000000 --- a/src/gallium/drivers/i965simple/brw_batch.h +++ /dev/null @@ -1,59 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef BRW_BATCH_H -#define BRW_BATCH_H - -#include "brw_winsys.h" - -#define BATCH_LOCALS - -#define INTEL_BATCH_NO_CLIPRECTS 0x1 -#define INTEL_BATCH_CLIPRECTS 0x2 - -#define BEGIN_BATCH( dwords, relocs ) \ - brw->winsys->batch_start(brw->winsys, dwords, relocs) - -#define OUT_BATCH( dword ) \ - brw->winsys->batch_dword(brw->winsys, dword) - -#define OUT_RELOC( buf, flags, delta ) \ - brw->winsys->batch_reloc(brw->winsys, buf, flags, delta) - -#define ADVANCE_BATCH() \ - brw->winsys->batch_end( brw->winsys ) - -/* XXX: this is bogus - need proper handling for out-of-memory in batchbuffer. - */ -#define FLUSH_BATCH(fence) do { \ - brw->winsys->batch_flush(brw->winsys, fence); \ - brw->hardware_dirty = ~0; \ -} while (0) - -#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->winsys, (s), sizeof(*(s))) - -#endif diff --git a/src/gallium/drivers/i965simple/brw_blit.c b/src/gallium/drivers/i965simple/brw_blit.c deleted file mode 100644 index 4d11f8d2ab..0000000000 --- a/src/gallium/drivers/i965simple/brw_blit.c +++ /dev/null @@ -1,218 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include -#include - -#include "brw_batch.h" -#include "brw_blit.h" -#include "brw_context.h" -#include "brw_reg.h" - -#include "pipe/p_context.h" -#include "pipe/internal/p_winsys_screen.h" - -#define FILE_DEBUG_FLAG DEBUG_BLIT - -void brw_fill_blit(struct brw_context *brw, - unsigned cpp, - short dst_pitch, - struct pipe_buffer *dst_buffer, - unsigned dst_offset, - boolean dst_tiled, - short x, short y, - short w, short h, - unsigned color) -{ - unsigned BR13, CMD; - BATCH_LOCALS; - - dst_pitch *= cpp; - - switch(cpp) { - case 1: - case 2: - case 3: - BR13 = (0xF0 << 16) | (1<<24); - CMD = XY_COLOR_BLT_CMD; - break; - case 4: - BR13 = (0xF0 << 16) | (1<<24) | (1<<25); - CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; - break; - default: - return; - } - - if (dst_tiled) { - CMD |= XY_DST_TILED; - dst_pitch /= 4; - } - - BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH( CMD ); - OUT_BATCH( dst_pitch | BR13 ); - OUT_BATCH( (y << 16) | x ); - OUT_BATCH( ((y+h) << 16) | (x+w) ); - OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, dst_offset ); - OUT_BATCH( color ); - ADVANCE_BATCH(); -} - -static unsigned translate_raster_op(unsigned logicop) -{ - switch(logicop) { - case PIPE_LOGICOP_CLEAR: return 0x00; - case PIPE_LOGICOP_AND: return 0x88; - case PIPE_LOGICOP_AND_REVERSE: return 0x44; - case PIPE_LOGICOP_COPY: return 0xCC; - case PIPE_LOGICOP_AND_INVERTED: return 0x22; - case PIPE_LOGICOP_NOOP: return 0xAA; - case PIPE_LOGICOP_XOR: return 0x66; - case PIPE_LOGICOP_OR: return 0xEE; - case PIPE_LOGICOP_NOR: return 0x11; - case PIPE_LOGICOP_EQUIV: return 0x99; - case PIPE_LOGICOP_INVERT: return 0x55; - case PIPE_LOGICOP_OR_REVERSE: return 0xDD; - case PIPE_LOGICOP_COPY_INVERTED: return 0x33; - case PIPE_LOGICOP_OR_INVERTED: return 0xBB; - case PIPE_LOGICOP_NAND: return 0x77; - case PIPE_LOGICOP_SET: return 0xFF; - default: return 0; - } -} - - -/* Copy BitBlt - */ -void brw_copy_blit(struct brw_context *brw, - unsigned do_flip, - unsigned cpp, - short src_pitch, - struct pipe_buffer *src_buffer, - unsigned src_offset, - boolean src_tiled, - short dst_pitch, - struct pipe_buffer *dst_buffer, - unsigned dst_offset, - boolean dst_tiled, - short src_x, short src_y, - short dst_x, short dst_y, - short w, short h, - unsigned logic_op) -{ - unsigned CMD, BR13; - int dst_y2 = dst_y + h; - int dst_x2 = dst_x + w; - BATCH_LOCALS; - - - DBG("%s src:buf(%d)/%d %d,%d dst:buf(%d)/%d %d,%d sz:%dx%d op:%d\n", - __FUNCTION__, - src_buffer, src_pitch, src_x, src_y, - dst_buffer, dst_pitch, dst_x, dst_y, - w,h,logic_op); - - assert( logic_op - PIPE_LOGICOP_CLEAR >= 0 ); - assert( logic_op - PIPE_LOGICOP_CLEAR < 0x10 ); - - src_pitch *= cpp; - dst_pitch *= cpp; - - switch(cpp) { - case 1: - case 2: - case 3: - BR13 = (translate_raster_op(logic_op) << 16) | (1<<24); - CMD = XY_SRC_COPY_BLT_CMD; - break; - case 4: - BR13 = (translate_raster_op(logic_op) << 16) | (1<<24) | - (1<<25); - CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; - break; - default: - return; - } - - if (src_tiled) { - CMD |= XY_SRC_TILED; - src_pitch /= 4; - } - - if (dst_tiled) { - CMD |= XY_DST_TILED; - dst_pitch /= 4; - } - - if (dst_y2 < dst_y || - dst_x2 < dst_x) { - return; - } - - dst_pitch &= 0xffff; - src_pitch &= 0xffff; - - /* Initial y values don't seem to work with negative pitches. If - * we adjust the offsets manually (below), it seems to work fine. - * - * On the other hand, if we always adjust, the hardware doesn't - * know which blit directions to use, so overlapping copypixels get - * the wrong result. - */ - if (dst_pitch > 0 && src_pitch > 0) { - BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH( CMD ); - OUT_BATCH( dst_pitch | BR13 ); - OUT_BATCH( (dst_y << 16) | dst_x ); - OUT_BATCH( (dst_y2 << 16) | dst_x2 ); - OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, - dst_offset ); - OUT_BATCH( (src_y << 16) | src_x ); - OUT_BATCH( src_pitch ); - OUT_RELOC( src_buffer, BRW_BUFFER_ACCESS_READ, - src_offset ); - ADVANCE_BATCH(); - } - else { - BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH( CMD ); - OUT_BATCH( (dst_pitch & 0xffff) | BR13 ); - OUT_BATCH( (0 << 16) | dst_x ); - OUT_BATCH( (h << 16) | dst_x2 ); - OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, - dst_offset + dst_y * dst_pitch ); - OUT_BATCH( (src_pitch & 0xffff) ); - OUT_RELOC( src_buffer, BRW_BUFFER_ACCESS_READ, - src_offset + src_y * src_pitch ); - ADVANCE_BATCH(); - } -} - - - diff --git a/src/gallium/drivers/i965simple/brw_blit.h b/src/gallium/drivers/i965simple/brw_blit.h deleted file mode 100644 index 111c5d91d3..0000000000 --- a/src/gallium/drivers/i965simple/brw_blit.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef BRW_BLIT_H -#define BRW_BLIT_H - -#include "pipe/p_compiler.h" - -struct pipe_buffer; -struct brw_context; - -void brw_fill_blit(struct brw_context *intel, - unsigned cpp, - short dst_pitch, - struct pipe_buffer *dst_buffer, - unsigned dst_offset, - boolean dst_tiled, - short x, short y, - short w, short h, - unsigned color); -void brw_copy_blit(struct brw_context *intel, - unsigned do_flip, - unsigned cpp, - short src_pitch, - struct pipe_buffer *src_buffer, - unsigned src_offset, - boolean src_tiled, - short dst_pitch, - struct pipe_buffer *dst_buffer, - unsigned dst_offset, - boolean dst_tiled, - short src_x, short src_y, - short dst_x, short dst_y, - short w, short h, - unsigned logic_op); -#endif diff --git a/src/gallium/drivers/i965simple/brw_cc.c b/src/gallium/drivers/i965simple/brw_cc.c deleted file mode 100644 index 3668123e2e..0000000000 --- a/src/gallium/drivers/i965simple/brw_cc.c +++ /dev/null @@ -1,269 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "util/u_math.h" -#include "util/u_memory.h" - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "brw_util.h" - - -static int brw_translate_compare_func(int func) -{ - switch(func) { - case PIPE_FUNC_NEVER: - return BRW_COMPAREFUNCTION_NEVER; - case PIPE_FUNC_LESS: - return BRW_COMPAREFUNCTION_LESS; - case PIPE_FUNC_LEQUAL: - return BRW_COMPAREFUNCTION_LEQUAL; - case PIPE_FUNC_GREATER: - return BRW_COMPAREFUNCTION_GREATER; - case PIPE_FUNC_GEQUAL: - return BRW_COMPAREFUNCTION_GEQUAL; - case PIPE_FUNC_NOTEQUAL: - return BRW_COMPAREFUNCTION_NOTEQUAL; - case PIPE_FUNC_EQUAL: - return BRW_COMPAREFUNCTION_EQUAL; - case PIPE_FUNC_ALWAYS: - return BRW_COMPAREFUNCTION_ALWAYS; - } - - debug_printf("Unknown value in %s: %x\n", __FUNCTION__, func); - return BRW_COMPAREFUNCTION_ALWAYS; -} - -static int brw_translate_stencil_op(int op) -{ - switch(op) { - case PIPE_STENCIL_OP_KEEP: - return BRW_STENCILOP_KEEP; - case PIPE_STENCIL_OP_ZERO: - return BRW_STENCILOP_ZERO; - case PIPE_STENCIL_OP_REPLACE: - return BRW_STENCILOP_REPLACE; - case PIPE_STENCIL_OP_INCR: - return BRW_STENCILOP_INCRSAT; - case PIPE_STENCIL_OP_DECR: - return BRW_STENCILOP_DECRSAT; - case PIPE_STENCIL_OP_INCR_WRAP: - return BRW_STENCILOP_INCR; - case PIPE_STENCIL_OP_DECR_WRAP: - return BRW_STENCILOP_DECR; - case PIPE_STENCIL_OP_INVERT: - return BRW_STENCILOP_INVERT; - default: - return BRW_STENCILOP_ZERO; - } -} - - -static int brw_translate_logic_op(int opcode) -{ - switch(opcode) { - case PIPE_LOGICOP_CLEAR: - return BRW_LOGICOPFUNCTION_CLEAR; - case PIPE_LOGICOP_AND: - return BRW_LOGICOPFUNCTION_AND; - case PIPE_LOGICOP_AND_REVERSE: - return BRW_LOGICOPFUNCTION_AND_REVERSE; - case PIPE_LOGICOP_COPY: - return BRW_LOGICOPFUNCTION_COPY; - case PIPE_LOGICOP_COPY_INVERTED: - return BRW_LOGICOPFUNCTION_COPY_INVERTED; - case PIPE_LOGICOP_AND_INVERTED: - return BRW_LOGICOPFUNCTION_AND_INVERTED; - case PIPE_LOGICOP_NOOP: - return BRW_LOGICOPFUNCTION_NOOP; - case PIPE_LOGICOP_XOR: - return BRW_LOGICOPFUNCTION_XOR; - case PIPE_LOGICOP_OR: - return BRW_LOGICOPFUNCTION_OR; - case PIPE_LOGICOP_OR_INVERTED: - return BRW_LOGICOPFUNCTION_OR_INVERTED; - case PIPE_LOGICOP_NOR: - return BRW_LOGICOPFUNCTION_NOR; - case PIPE_LOGICOP_EQUIV: - return BRW_LOGICOPFUNCTION_EQUIV; - case PIPE_LOGICOP_INVERT: - return BRW_LOGICOPFUNCTION_INVERT; - case PIPE_LOGICOP_OR_REVERSE: - return BRW_LOGICOPFUNCTION_OR_REVERSE; - case PIPE_LOGICOP_NAND: - return BRW_LOGICOPFUNCTION_NAND; - case PIPE_LOGICOP_SET: - return BRW_LOGICOPFUNCTION_SET; - default: - return BRW_LOGICOPFUNCTION_SET; - } -} - - -static void upload_cc_vp( struct brw_context *brw ) -{ - struct brw_cc_viewport ccv; - - memset(&ccv, 0, sizeof(ccv)); - - ccv.min_depth = 0.0; - ccv.max_depth = 1.0; - - brw->cc.vp_gs_offset = brw_cache_data( &brw->cache[BRW_CC_VP], &ccv ); -} - -const struct brw_tracked_state brw_cc_vp = { - .dirty = { - .brw = BRW_NEW_SCENE, - .cache = 0 - }, - .update = upload_cc_vp -}; - - -static void upload_cc_unit( struct brw_context *brw ) -{ - struct brw_cc_unit_state cc; - - memset(&cc, 0, sizeof(cc)); - - /* BRW_NEW_DEPTH_STENCIL */ - if (brw->attribs.DepthStencil->stencil[0].enabled) { - cc.cc0.stencil_enable = brw->attribs.DepthStencil->stencil[0].enabled; - cc.cc0.stencil_func = brw_translate_compare_func(brw->attribs.DepthStencil->stencil[0].func); - cc.cc0.stencil_fail_op = brw_translate_stencil_op(brw->attribs.DepthStencil->stencil[0].fail_op); - cc.cc0.stencil_pass_depth_fail_op = brw_translate_stencil_op( - brw->attribs.DepthStencil->stencil[0].zfail_op); - cc.cc0.stencil_pass_depth_pass_op = brw_translate_stencil_op( - brw->attribs.DepthStencil->stencil[0].zpass_op); - cc.cc1.stencil_ref = brw->attribs.DepthStencil->stencil[0].ref_value; - cc.cc1.stencil_write_mask = brw->attribs.DepthStencil->stencil[0].writemask; - cc.cc1.stencil_test_mask = brw->attribs.DepthStencil->stencil[0].valuemask; - - if (brw->attribs.DepthStencil->stencil[1].enabled) { - cc.cc0.bf_stencil_enable = brw->attribs.DepthStencil->stencil[1].enabled; - cc.cc0.bf_stencil_func = brw_translate_compare_func( - brw->attribs.DepthStencil->stencil[1].func); - cc.cc0.bf_stencil_fail_op = brw_translate_stencil_op( - brw->attribs.DepthStencil->stencil[1].fail_op); - cc.cc0.bf_stencil_pass_depth_fail_op = brw_translate_stencil_op( - brw->attribs.DepthStencil->stencil[1].zfail_op); - cc.cc0.bf_stencil_pass_depth_pass_op = brw_translate_stencil_op( - brw->attribs.DepthStencil->stencil[1].zpass_op); - cc.cc1.bf_stencil_ref = brw->attribs.DepthStencil->stencil[1].ref_value; - cc.cc2.bf_stencil_write_mask = brw->attribs.DepthStencil->stencil[1].writemask; - cc.cc2.bf_stencil_test_mask = brw->attribs.DepthStencil->stencil[1].valuemask; - } - - /* Not really sure about this: - */ - if (brw->attribs.DepthStencil->stencil[0].writemask || - brw->attribs.DepthStencil->stencil[1].writemask) - cc.cc0.stencil_write_enable = 1; - } - - /* BRW_NEW_BLEND */ - if (brw->attribs.Blend->logicop_enable) { - cc.cc2.logicop_enable = 1; - cc.cc5.logicop_func = brw_translate_logic_op( brw->attribs.Blend->logicop_func ); - } - else if (brw->attribs.Blend->blend_enable) { - int eqRGB = brw->attribs.Blend->rgb_func; - int eqA = brw->attribs.Blend->alpha_func; - int srcRGB = brw->attribs.Blend->rgb_src_factor; - int dstRGB = brw->attribs.Blend->rgb_dst_factor; - int srcA = brw->attribs.Blend->alpha_src_factor; - int dstA = brw->attribs.Blend->alpha_dst_factor; - - if (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX) { - srcRGB = dstRGB = PIPE_BLENDFACTOR_ONE; - } - - if (eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX) { - srcA = dstA = PIPE_BLENDFACTOR_ONE; - } - - cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); - cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); - cc.cc6.blend_function = brw_translate_blend_equation( eqRGB ); - - cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); - cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); - cc.cc5.ia_blend_function = brw_translate_blend_equation( eqA ); - - cc.cc3.blend_enable = 1; - cc.cc3.ia_blend_enable = (srcA != srcRGB || - dstA != dstRGB || - eqA != eqRGB); - } - - /* BRW_NEW_ALPHATEST - */ - if (brw->attribs.DepthStencil->alpha.enabled) { - cc.cc3.alpha_test = 1; - cc.cc3.alpha_test_func = - brw_translate_compare_func(brw->attribs.DepthStencil->alpha.func); - - cc.cc7.alpha_ref.ub[0] = float_to_ubyte(brw->attribs.DepthStencil->alpha.ref_value); - - cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; - } - - if (brw->attribs.Blend->dither) { - cc.cc5.dither_enable = 1; - cc.cc6.y_dither_offset = 0; - cc.cc6.x_dither_offset = 0; - } - - if (brw->attribs.DepthStencil->depth.enabled) { - cc.cc2.depth_test = brw->attribs.DepthStencil->depth.enabled; - cc.cc2.depth_test_function = brw_translate_compare_func(brw->attribs.DepthStencil->depth.func); - cc.cc2.depth_write_enable = brw->attribs.DepthStencil->depth.writemask; - } - - /* CACHE_NEW_CC_VP */ - cc.cc4.cc_viewport_state_offset = brw->cc.vp_gs_offset >> 5; - - if (BRW_DEBUG & DEBUG_STATS) - cc.cc5.statistics_enable = 1; - - brw->cc.state_gs_offset = brw_cache_data( &brw->cache[BRW_CC_UNIT], &cc ); -} - -const struct brw_tracked_state brw_cc_unit = { - .dirty = { - .brw = BRW_NEW_DEPTH_STENCIL | BRW_NEW_BLEND | BRW_NEW_ALPHA_TEST, - .cache = CACHE_NEW_CC_VP - }, - .update = upload_cc_unit -}; - diff --git a/src/gallium/drivers/i965simple/brw_clip.c b/src/gallium/drivers/i965simple/brw_clip.c deleted file mode 100644 index 268124cc53..0000000000 --- a/src/gallium/drivers/i965simple/brw_clip.c +++ /dev/null @@ -1,206 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_util.h" -#include "brw_state.h" -#include "brw_clip.h" - -#define FRONT_UNFILLED_BIT 0x1 -#define BACK_UNFILLED_BIT 0x2 - - -static void compile_clip_prog( struct brw_context *brw, - struct brw_clip_prog_key *key ) -{ - struct brw_clip_compile c; - const unsigned *program; - unsigned program_size; - unsigned delta; - unsigned i; - - memset(&c, 0, sizeof(c)); - - /* Begin the compilation: - */ - brw_init_compile(&c.func); - - c.func.single_program_flow = 1; - - c.key = *key; - - - /* Need to locate the two positions present in vertex + header. - * These are currently hardcoded: - */ - c.header_position_offset = ATTR_SIZE; - - for (i = 0, delta = REG_SIZE; i < PIPE_MAX_SHADER_OUTPUTS; i++) - if (c.key.attrs & (1<primitive) { - case PIPE_PRIM_TRIANGLES: -#if 0 - if (key->do_unfilled) - brw_emit_unfilled_clip( &c ); - else -#endif - brw_emit_tri_clip( &c ); - break; - case PIPE_PRIM_LINES: - brw_emit_line_clip( &c ); - break; - case PIPE_PRIM_POINTS: - brw_emit_point_clip( &c ); - break; - default: - assert(0); - return; - } - - - - /* get the program - */ - program = brw_get_program(&c.func, &program_size); - - /* Upload - */ - brw->clip.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_CLIP_PROG], - &c.key, - sizeof(c.key), - program, - program_size, - &c.prog_data, - &brw->clip.prog_data ); -} - - -static boolean search_cache( struct brw_context *brw, - struct brw_clip_prog_key *key ) -{ - return brw_search_cache(&brw->cache[BRW_CLIP_PROG], - key, sizeof(*key), - &brw->clip.prog_data, - &brw->clip.prog_gs_offset); -} - - - - -/* Calculate interpolants for triangle and line rasterization. - */ -static void upload_clip_prog(struct brw_context *brw) -{ - struct brw_clip_prog_key key; - - memset(&key, 0, sizeof(key)); - - /* Populate the key: - */ - /* BRW_NEW_REDUCED_PRIMITIVE */ - key.primitive = brw->reduced_primitive; - /* CACHE_NEW_VS_PROG */ - key.attrs = brw->vs.prog_data->outputs_written; - /* BRW_NEW_RASTER */ - key.do_flat_shading = (brw->attribs.Raster->flatshade); - /* BRW_NEW_CLIP */ - key.nr_userclip = brw->attribs.Clip.nr; /* XXX */ - -#if 0 - key.clip_mode = BRW_CLIPMODE_NORMAL; - - if (key.primitive == PIPE_PRIM_TRIANGLES) { - if (brw->attribs.Raster->cull_mode == PIPE_WINDING_BOTH) - key.clip_mode = BRW_CLIPMODE_REJECT_ALL; - else { - if (brw->attribs.Raster->fill_cw != PIPE_POLYGON_MODE_FILL || - brw->attribs.Raster->fill_ccw != PIPE_POLYGON_MODE_FILL) - key.do_unfilled = 1; - - /* Most cases the fixed function units will handle. Cases where - * one or more polygon faces are unfilled will require help: - */ - if (key.do_unfilled) { - key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; - - if (brw->attribs.Raster->offset_cw || - brw->attribs.Raster->offset_ccw) { - key.offset_units = brw->attribs.Raster->offset_units; - key.offset_factor = brw->attribs.Raster->offset_scale; - } - key.fill_ccw = brw->attribs.Raster->fill_ccw; - key.fill_cw = brw->attribs.Raster->fill_cw; - key.offset_ccw = brw->attribs.Raster->offset_ccw; - key.offset_cw = brw->attribs.Raster->offset_cw; - if (brw->attribs.Raster->light_twoside && - key.fill_cw != CLIP_CULL) - key.copy_bfc_cw = 1; - } - } - } -#else - key.clip_mode = BRW_CLIPMODE_ACCEPT_ALL; -#endif - - if (!search_cache(brw, &key)) - compile_clip_prog( brw, &key ); -} - -const struct brw_tracked_state brw_clip_prog = { - .dirty = { - .brw = (BRW_NEW_RASTERIZER | - BRW_NEW_CLIP | - BRW_NEW_REDUCED_PRIMITIVE), - .cache = CACHE_NEW_VS_PROG - }, - .update = upload_clip_prog -}; diff --git a/src/gallium/drivers/i965simple/brw_clip.h b/src/gallium/drivers/i965simple/brw_clip.h deleted file mode 100644 index d70fc094ff..0000000000 --- a/src/gallium/drivers/i965simple/brw_clip.h +++ /dev/null @@ -1,170 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#ifndef BRW_CLIP_H -#define BRW_CLIP_H - - -#include "brw_context.h" -#include "brw_eu.h" - -#define MAX_VERTS (3+6+6) - -/* Note that if unfilled primitives are being emitted, we have to fix - * up polygon offset and flatshading at this point: - */ -struct brw_clip_prog_key { - unsigned attrs:32; - unsigned primitive:4; - unsigned nr_userclip:3; - unsigned do_flat_shading:1; - unsigned do_unfilled:1; - unsigned fill_cw:2; /* includes cull information */ - unsigned fill_ccw:2; /* includes cull information */ - unsigned offset_cw:1; - unsigned offset_ccw:1; - unsigned pad0:17; - - unsigned copy_bfc_cw:1; - unsigned copy_bfc_ccw:1; - unsigned clip_mode:3; - unsigned pad1:27; - - float offset_factor; - float offset_units; -}; - - -#define CLIP_LINE 0 -#define CLIP_POINT 1 -#define CLIP_FILL 2 -#define CLIP_CULL 3 - - -#define PRIM_MASK (0x1f) - -struct brw_clip_compile { - struct brw_compile func; - struct brw_clip_prog_key key; - struct brw_clip_prog_data prog_data; - - struct { - struct brw_reg R0; - struct brw_reg vertex[MAX_VERTS]; - - struct brw_reg t; - struct brw_reg t0, t1; - struct brw_reg dp0, dp1; - - struct brw_reg dpPrev; - struct brw_reg dp; - struct brw_reg loopcount; - struct brw_reg nr_verts; - struct brw_reg planemask; - - struct brw_reg inlist; - struct brw_reg outlist; - struct brw_reg freelist; - - struct brw_reg dir; - struct brw_reg tmp0, tmp1; - struct brw_reg offset; - - struct brw_reg fixed_planes; - struct brw_reg plane_equation; - } reg; - - /* 3 different ways of expressing vertex size: - */ - unsigned nr_attrs; - unsigned nr_regs; - unsigned nr_bytes; - - unsigned first_tmp; - unsigned last_tmp; - - boolean need_direction; - - unsigned last_mrf; - - unsigned header_position_offset; - unsigned offset[PIPE_MAX_ATTRIBS]; -}; - -#define ATTR_SIZE (4*4) - -/* Points are only culled, so no need for a clip routine, however it - * works out easier to have a dummy one. - */ -void brw_emit_unfilled_clip( struct brw_clip_compile *c ); -void brw_emit_tri_clip( struct brw_clip_compile *c ); -void brw_emit_line_clip( struct brw_clip_compile *c ); -void brw_emit_point_clip( struct brw_clip_compile *c ); - -/* brw_clip_tri.c, for use by the unfilled clip routine: - */ -void brw_clip_tri_init_vertices( struct brw_clip_compile *c ); -void brw_clip_tri_flat_shade( struct brw_clip_compile *c ); -void brw_clip_tri( struct brw_clip_compile *c ); -void brw_clip_tri_emit_polygon( struct brw_clip_compile *c ); -void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, - unsigned nr_verts ); - - -/* Utils: - */ - -void brw_clip_interp_vertex( struct brw_clip_compile *c, - struct brw_indirect dest_ptr, - struct brw_indirect v0_ptr, /* from */ - struct brw_indirect v1_ptr, /* to */ - struct brw_reg t0, - boolean force_edgeflag ); - -void brw_clip_init_planes( struct brw_clip_compile *c ); - -void brw_clip_emit_vue(struct brw_clip_compile *c, - struct brw_indirect vert, - boolean allocate, - boolean eot, - unsigned header); - -void brw_clip_kill_thread(struct brw_clip_compile *c); - -struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ); -struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ); - -void brw_clip_copy_colors( struct brw_clip_compile *c, - unsigned to, unsigned from ); - -void brw_clip_init_clipmask( struct brw_clip_compile *c ); - -#endif diff --git a/src/gallium/drivers/i965simple/brw_clip_line.c b/src/gallium/drivers/i965simple/brw_clip_line.c deleted file mode 100644 index 75d9e5fcda..0000000000 --- a/src/gallium/drivers/i965simple/brw_clip_line.c +++ /dev/null @@ -1,245 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_util.h" -#include "brw_clip.h" - - - -static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) -{ - unsigned i = 0,j; - - /* Register usage is static, precompute here: - */ - c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; - - if (c->key.nr_userclip) { - c->reg.fixed_planes = brw_vec4_grf(i, 0); - i += (6 + c->key.nr_userclip + 1) / 2; - - c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; - } - else - c->prog_data.curb_read_length = 0; - - - /* Payload vertices plus space for more generated vertices: - */ - for (j = 0; j < 4; j++) { - c->reg.vertex[j] = brw_vec4_grf(i, 0); - i += c->nr_regs; - } - - c->reg.t = brw_vec1_grf(i, 0); - c->reg.t0 = brw_vec1_grf(i, 1); - c->reg.t1 = brw_vec1_grf(i, 2); - c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); - c->reg.plane_equation = brw_vec4_grf(i, 4); - i++; - - c->reg.dp0 = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ - c->reg.dp1 = brw_vec1_grf(i, 4); - i++; - - if (!c->key.nr_userclip) { - c->reg.fixed_planes = brw_vec8_grf(i, 0); - i++; - } - - - c->first_tmp = i; - c->last_tmp = i; - - c->prog_data.urb_read_length = c->nr_regs; /* ? */ - c->prog_data.total_grf = i; -} - - - -/* Line clipping, more or less following the following algorithm: - * - * for (p=0;p t1) t1 = t; - * } else { - * float t = dp0 / (dp0 - dp1); - * if (t > t0) t0 = t; - * } - * - * if (t0 + t1 >= 1.0) - * return; - * } - * } - * - * interp( ctx, newvtx0, vtx0, vtx1, t0 ); - * interp( ctx, newvtx1, vtx1, vtx0, t1 ); - * - */ -static void clip_and_emit_line( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_indirect vtx0 = brw_indirect(0, 0); - struct brw_indirect vtx1 = brw_indirect(1, 0); - struct brw_indirect newvtx0 = brw_indirect(2, 0); - struct brw_indirect newvtx1 = brw_indirect(3, 0); - struct brw_indirect plane_ptr = brw_indirect(4, 0); - struct brw_instruction *plane_loop; - struct brw_instruction *plane_active; - struct brw_instruction *is_negative; - struct brw_instruction *is_neg2; - struct brw_instruction *not_culled; - struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); - - brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0])); - brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1])); - brw_MOV(p, get_addr_reg(newvtx0), brw_address(c->reg.vertex[2])); - brw_MOV(p, get_addr_reg(newvtx1), brw_address(c->reg.vertex[3])); - brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); - - /* Note: init t0, t1 together: - */ - brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0)); - - brw_clip_init_planes(c); - brw_clip_init_clipmask(c); - - /* -ve rhw workaround */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), - brw_imm_ud(1<<20)); - brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - plane_loop = brw_DO(p, BRW_EXECUTE_1); - { - /* if (planemask & 1) - */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1)); - - plane_active = brw_IF(p, BRW_EXECUTE_1); - { - if (c->key.nr_userclip) - brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); - else - brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); - -#if 0 - /* dp = DP4(vtx->position, plane) - */ - brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); - - /* if (IS_NEGATIVE(dp1)) - */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_L); - brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); -#else - #warning "disabled" -#endif - is_negative = brw_IF(p, BRW_EXECUTE_1); - { - brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0)); - brw_math_invert(p, c->reg.t, c->reg.t); - brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1); - - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 ); - brw_MOV(p, c->reg.t1, c->reg.t); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } - is_negative = brw_ELSE(p, is_negative); - { - /* Coming back in. We know that both cannot be negative - * because the line would have been culled in that case. - */ - - /* If both are positive, do nothing */ - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); - is_neg2 = brw_IF(p, BRW_EXECUTE_1); - { - brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); - brw_math_invert(p, c->reg.t, c->reg.t); - brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); - - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); - brw_MOV(p, c->reg.t0, c->reg.t); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } - brw_ENDIF(p, is_neg2); - } - brw_ENDIF(p, is_negative); - } - brw_ENDIF(p, plane_active); - - /* plane_ptr++; - */ - brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); - - /* while (planemask>>=1) != 0 - */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); - } - brw_WHILE(p, plane_loop); - - brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1); - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0)); - not_culled = brw_IF(p, BRW_EXECUTE_1); - { - brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, FALSE); - brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, FALSE); - - brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); - brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); - } - brw_ENDIF(p, not_culled); - brw_clip_kill_thread(c); -} - - - -void brw_emit_line_clip( struct brw_clip_compile *c ) -{ - brw_clip_line_alloc_regs(c); - - if (c->key.do_flat_shading) - brw_clip_copy_colors(c, 0, 1); - - clip_and_emit_line(c); -} diff --git a/src/gallium/drivers/i965simple/brw_clip_point.c b/src/gallium/drivers/i965simple/brw_clip_point.c deleted file mode 100644 index 6fce7210d1..0000000000 --- a/src/gallium/drivers/i965simple/brw_clip_point.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_util.h" -#include "brw_clip.h" - - -/* Point clipping, nothing to do? - */ -void brw_emit_point_clip( struct brw_clip_compile *c ) -{ - /* Send an empty message to kill the thread: - */ - brw_clip_tri_alloc_regs(c, 0); - brw_clip_kill_thread(c); -} diff --git a/src/gallium/drivers/i965simple/brw_clip_state.c b/src/gallium/drivers/i965simple/brw_clip_state.c deleted file mode 100644 index 8e78dd51be..0000000000 --- a/src/gallium/drivers/i965simple/brw_clip_state.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "util/u_math.h" -#include "util/u_memory.h" - - -static void upload_clip_unit( struct brw_context *brw ) -{ - struct brw_clip_unit_state clip; - - memset(&clip, 0, sizeof(clip)); - - /* CACHE_NEW_CLIP_PROG */ - clip.thread0.grf_reg_count = - align(brw->clip.prog_data->total_grf, 16) / 16 - 1; - clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6; - clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length; - clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length; - clip.clip5.clip_mode = brw->clip.prog_data->clip_mode; - - /* BRW_NEW_CURBE_OFFSETS */ - clip.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; - - /* BRW_NEW_URB_FENCE */ - clip.thread4.nr_urb_entries = brw->urb.nr_clip_entries; - clip.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; - clip.thread4.max_threads = 1; /* 2 threads */ - - if (BRW_DEBUG & DEBUG_STATS) - clip.thread4.stats_enable = 1; - - /* CONSTANT */ - clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - clip.thread1.single_program_flow = 1; - clip.thread3.dispatch_grf_start_reg = 1; - clip.thread3.urb_entry_read_offset = 0; - clip.clip5.userclip_enable_flags = 0x7f; - clip.clip5.userclip_must_clip = 1; - clip.clip5.guard_band_enable = 0; - clip.clip5.viewport_z_clip_enable = 1; - clip.clip5.viewport_xy_clip_enable = 1; - clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE; - clip.clip5.api_mode = BRW_CLIP_API_OGL; - clip.clip6.clipper_viewport_state_ptr = 0; - clip.viewport_xmin = -1; - clip.viewport_xmax = 1; - clip.viewport_ymin = -1; - clip.viewport_ymax = 1; - - brw->clip.state_gs_offset = brw_cache_data( &brw->cache[BRW_CLIP_UNIT], &clip ); -} - - -const struct brw_tracked_state brw_clip_unit = { - .dirty = { - .brw = (BRW_NEW_CURBE_OFFSETS | - BRW_NEW_URB_FENCE), - .cache = CACHE_NEW_CLIP_PROG - }, - .update = upload_clip_unit -}; diff --git a/src/gallium/drivers/i965simple/brw_clip_tri.c b/src/gallium/drivers/i965simple/brw_clip_tri.c deleted file mode 100644 index c5da7b825e..0000000000 --- a/src/gallium/drivers/i965simple/brw_clip_tri.c +++ /dev/null @@ -1,566 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_util.h" -#include "brw_clip.h" - -static struct brw_reg get_tmp( struct brw_clip_compile *c ) -{ - struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); - - if (++c->last_tmp > c->prog_data.total_grf) - c->prog_data.total_grf = c->last_tmp; - - return tmp; -} - -static void release_tmps( struct brw_clip_compile *c ) -{ - c->last_tmp = c->first_tmp; -} - - -void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, - unsigned nr_verts ) -{ - unsigned i = 0,j; - - /* Register usage is static, precompute here: - */ - c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; - - if (c->key.nr_userclip) { - c->reg.fixed_planes = brw_vec4_grf(i, 0); - i += (6 + c->key.nr_userclip + 1) / 2; - - c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; - } - else - c->prog_data.curb_read_length = 0; - - - /* Payload vertices plus space for more generated vertices: - */ - for (j = 0; j < nr_verts; j++) { - c->reg.vertex[j] = brw_vec4_grf(i, 0); - i += c->nr_regs; - } - - if (c->nr_attrs & 1) { - for (j = 0; j < 3; j++) { - unsigned delta = c->nr_attrs*16 + 32; - brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); - } - } - - c->reg.t = brw_vec1_grf(i, 0); - c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_UD); - c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD); - c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); - c->reg.plane_equation = brw_vec4_grf(i, 4); - i++; - - c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ - c->reg.dp = brw_vec1_grf(i, 4); - i++; - - c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); - i++; - - c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); - i++; - - c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); - i++; - - if (!c->key.nr_userclip) { - c->reg.fixed_planes = brw_vec8_grf(i, 0); - i++; - } - - if (c->key.do_unfilled) { - c->reg.dir = brw_vec4_grf(i, 0); - c->reg.offset = brw_vec4_grf(i, 4); - i++; - c->reg.tmp0 = brw_vec4_grf(i, 0); - c->reg.tmp1 = brw_vec4_grf(i, 4); - i++; - } - - c->first_tmp = i; - c->last_tmp = i; - - c->prog_data.urb_read_length = c->nr_regs; /* ? */ - c->prog_data.total_grf = i; -} - - - -void brw_clip_tri_init_vertices( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ - struct brw_instruction *is_rev; - - /* Initial list of indices for incoming vertexes: - */ - brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); - brw_CMP(p, - vec1(brw_null_reg()), - BRW_CONDITIONAL_EQ, - tmp0, - brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); - - /* XXX: Is there an easier way to do this? Need to reverse every - * second tristrip element: Can ignore sometimes? - */ - is_rev = brw_IF(p, BRW_EXECUTE_1); - { - brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[1]) ); - brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[0]) ); - if (c->need_direction) - brw_MOV(p, c->reg.dir, brw_imm_f(-1)); - } - is_rev = brw_ELSE(p, is_rev); - { - brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[0]) ); - brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[1]) ); - if (c->need_direction) - brw_MOV(p, c->reg.dir, brw_imm_f(1)); - } - brw_ENDIF(p, is_rev); - - brw_MOV(p, get_element(c->reg.inlist, 2), brw_address(c->reg.vertex[2]) ); - brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0)); - brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3)); -} - - - -void brw_clip_tri_flat_shade( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *is_poly; - struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ - - brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); - brw_CMP(p, - vec1(brw_null_reg()), - BRW_CONDITIONAL_EQ, - tmp0, - brw_imm_ud(_3DPRIM_POLYGON)); - - is_poly = brw_IF(p, BRW_EXECUTE_1); - { - brw_clip_copy_colors(c, 1, 0); - brw_clip_copy_colors(c, 2, 0); - } - is_poly = brw_ELSE(p, is_poly); - { - brw_clip_copy_colors(c, 0, 2); - brw_clip_copy_colors(c, 1, 2); - } - brw_ENDIF(p, is_poly); -} - - - -/* Use mesa's clipping algorithms, translated to GEN4 assembly. - */ -void brw_clip_tri( struct brw_clip_compile *c ) -{ -#if 0 - struct brw_compile *p = &c->func; - struct brw_indirect vtx = brw_indirect(0, 0); - struct brw_indirect vtxPrev = brw_indirect(1, 0); - struct brw_indirect vtxOut = brw_indirect(2, 0); - struct brw_indirect plane_ptr = brw_indirect(3, 0); - struct brw_indirect inlist_ptr = brw_indirect(4, 0); - struct brw_indirect outlist_ptr = brw_indirect(5, 0); - struct brw_indirect freelist_ptr = brw_indirect(6, 0); - struct brw_instruction *plane_loop; - struct brw_instruction *plane_active; - struct brw_instruction *vertex_loop; - struct brw_instruction *next_test; - struct brw_instruction *prev_test; - - brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) ); - brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); - brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); - brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); - - brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) ); - - plane_loop = brw_DO(p, BRW_EXECUTE_1); - { - /* if (planemask & 1) - */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1)); - - plane_active = brw_IF(p, BRW_EXECUTE_1); - { - /* vtxOut = freelist_ptr++ - */ - brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) ); - brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE)); - - if (c->key.nr_userclip) - brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); - else - brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); - - brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); - brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0)); - - vertex_loop = brw_DO(p, BRW_EXECUTE_1); - { - /* vtx = *input_ptr; - */ - brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0)); - - /* IS_NEGATIVE(prev) */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_L); - brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); - prev_test = brw_IF(p, BRW_EXECUTE_1); - { - /* IS_POSITIVE(next) - */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_GE); - brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); - next_test = brw_IF(p, BRW_EXECUTE_1); - { - - /* Coming back in. - */ - brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp)); - brw_math_invert(p, c->reg.t, c->reg.t); - brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev); - - /* If (vtxOut == 0) vtxOut = vtxPrev - */ - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); - brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) ); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, FALSE); - - /* *outlist_ptr++ = vtxOut; - * nr_verts++; - * vtxOut = 0; - */ - brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); - brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); - brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); - brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); - } - brw_ENDIF(p, next_test); - - } - prev_test = brw_ELSE(p, prev_test); - { - /* *outlist_ptr++ = vtxPrev; - * nr_verts++; - */ - brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev)); - brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); - brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); - - /* IS_NEGATIVE(next) - */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_L); - brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); - next_test = brw_IF(p, BRW_EXECUTE_1); - { - /* Going out of bounds. Avoid division by zero as we - * know dp != dpPrev from DIFFERENT_SIGNS, above. - */ - brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev)); - brw_math_invert(p, c->reg.t, c->reg.t); - brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp); - - /* If (vtxOut == 0) vtxOut = vtx - */ - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); - brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) ); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, TRUE); - - /* *outlist_ptr++ = vtxOut; - * nr_verts++; - * vtxOut = 0; - */ - brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); - brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); - brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); - brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); - } - brw_ENDIF(p, next_test); - } - brw_ENDIF(p, prev_test); - - /* vtxPrev = vtx; - * inlist_ptr++; - */ - brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx)); - brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short))); - - /* while (--loopcount != 0) - */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); - } - brw_WHILE(p, vertex_loop); - - /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1] - * inlist = outlist - * inlist_ptr = &inlist[0] - * outlist_ptr = &outlist[0] - */ - brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2)); - brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0)); - brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0)); - brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); - brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); - } - brw_ENDIF(p, plane_active); - - /* plane_ptr++; - */ - brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); - - /* nr_verts >= 3 - */ - brw_CMP(p, - vec1(brw_null_reg()), - BRW_CONDITIONAL_GE, - c->reg.nr_verts, - brw_imm_ud(3)); - - /* && (planemask>>=1) != 0 - */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); - } - brw_WHILE(p, plane_loop); -#else - #warning "disabled" -#endif -} - - - -void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *loop, *if_insn; - - /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--) - */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_G); - brw_ADD(p, - c->reg.loopcount, - c->reg.nr_verts, - brw_imm_d(-2)); - - if_insn = brw_IF(p, BRW_EXECUTE_1); - { - struct brw_indirect v0 = brw_indirect(0, 0); - struct brw_indirect vptr = brw_indirect(1, 0); - - brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist)); - brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); - - brw_clip_emit_vue(c, v0, 1, 0, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_START)); - - brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); - brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); - - loop = brw_DO(p, BRW_EXECUTE_1); - { - brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << 2)); - - brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); - brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); - - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); - } - brw_WHILE(p, loop); - - brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END)); - } - brw_ENDIF(p, if_insn); -} - -static void do_clip_tri( struct brw_clip_compile *c ) -{ - brw_clip_init_planes(c); - - brw_clip_tri(c); -} - - -static void maybe_do_clip_tri( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *do_clip; - - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); - do_clip = brw_IF(p, BRW_EXECUTE_1); - { - do_clip_tri(c); - } - brw_ENDIF(p, do_clip); -} - -static void brw_clip_test( struct brw_clip_compile *c ) -{ -#if 0 - struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); - struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); - struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); - struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); - - struct brw_reg v0 = get_tmp(c); - struct brw_reg v1 = get_tmp(c); - struct brw_reg v2 = get_tmp(c); - - struct brw_indirect vt0 = brw_indirect(0, 0); - struct brw_indirect vt1 = brw_indirect(1, 0); - struct brw_indirect vt2 = brw_indirect(2, 0); - - struct brw_compile *p = &c->func; - - brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); - brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); - brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2])); - brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS])); - brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS])); - brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS])); - - /* test nearz, xmin, ymin plane */ - brw_CMP(p, t1, BRW_CONDITIONAL_LE, negate(v0), get_element(v0, 3)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, t2, BRW_CONDITIONAL_LE, negate(v1), get_element(v1, 3)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, t3, BRW_CONDITIONAL_LE, negate(v2), get_element(v2, 3)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_XOR(p, t, t1, t2); - brw_XOR(p, t1, t2, t3); - brw_OR(p, t, t, t1); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 0), brw_imm_ud(0)); - brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5))); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 1), brw_imm_ud(0)); - brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3))); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 2), brw_imm_ud(0)); - brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1))); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - /* test farz, xmax, ymax plane */ - brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, get_element(v0, 3)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, get_element(v1, 3)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, get_element(v2, 3)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - brw_XOR(p, t, t1, t2); - brw_XOR(p, t1, t2, t3); - brw_OR(p, t, t, t1); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 0), brw_imm_ud(0)); - brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4))); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 1), brw_imm_ud(0)); - brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2))); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 2), brw_imm_ud(0)); - brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0))); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - release_tmps(c); -#else - #warning "disabled" -#endif -} - - -void brw_emit_tri_clip( struct brw_clip_compile *c ) -{ - struct brw_instruction *neg_rhw; - struct brw_compile *p = &c->func; - brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); - brw_clip_tri_init_vertices(c); - brw_clip_init_clipmask(c); - - /* if -ve rhw workaround bit is set, - do cliptest */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), - brw_imm_ud(1<<20)); - neg_rhw = brw_IF(p, BRW_EXECUTE_1); - { - brw_clip_test(c); - } - brw_ENDIF(p, neg_rhw); - - /* Can't push into do_clip_tri because with polygon (or quad) - * flatshading, need to apply the flatshade here because we don't - * respect the PV when converting to trifan for emit: - */ - if (c->key.do_flat_shading) - brw_clip_tri_flat_shade(c); - - if (c->key.clip_mode == BRW_CLIPMODE_NORMAL) - do_clip_tri(c); - else - maybe_do_clip_tri(c); - - brw_clip_tri_emit_polygon(c); - - /* Send an empty message to kill the thread: - */ - brw_clip_kill_thread(c); -} diff --git a/src/gallium/drivers/i965simple/brw_clip_unfilled.c b/src/gallium/drivers/i965simple/brw_clip_unfilled.c deleted file mode 100644 index b774a76dd6..0000000000 --- a/src/gallium/drivers/i965simple/brw_clip_unfilled.c +++ /dev/null @@ -1,477 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_util.h" -#include "brw_clip.h" - - - -/* This is performed against the original triangles, so no indirection - * required: -BZZZT! - */ -static void compute_tri_direction( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg e = c->reg.tmp0; - struct brw_reg f = c->reg.tmp1; - struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]); - struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]); - struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); - - - /* Calculate the vectors of two edges of the triangle: - */ - brw_ADD(p, e, v0, negate(v2)); - brw_ADD(p, f, v1, negate(v2)); - - /* Take their crossproduct: - */ - brw_set_access_mode(p, BRW_ALIGN_16); - brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3), brw_swizzle(f,2,0,1,3)); - brw_MAC(p, vec4(e), negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3)); - brw_set_access_mode(p, BRW_ALIGN_1); - - brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e)); -} - - -static void cull_direction( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *ccw; - unsigned conditional; - - assert (!(c->key.fill_ccw == CLIP_CULL && - c->key.fill_cw == CLIP_CULL)); - - if (c->key.fill_ccw == CLIP_CULL) - conditional = BRW_CONDITIONAL_GE; - else - conditional = BRW_CONDITIONAL_L; - - brw_CMP(p, - vec1(brw_null_reg()), - conditional, - get_element(c->reg.dir, 2), - brw_imm_f(0)); - - ccw = brw_IF(p, BRW_EXECUTE_1); - { - brw_clip_kill_thread(c); - } - brw_ENDIF(p, ccw); -} - - - -static void copy_bfc( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *ccw; - unsigned conditional; - - /* Do we have any colors to copy? - */ - if (!(c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) && - !(c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1])) - return; - - /* In some wierd degnerate cases we can end up testing the - * direction twice, once for culling and once for bfc copying. Oh - * well, that's what you get for setting wierd GL state. - */ - if (c->key.copy_bfc_ccw) - conditional = BRW_CONDITIONAL_GE; - else - conditional = BRW_CONDITIONAL_L; - - brw_CMP(p, - vec1(brw_null_reg()), - conditional, - get_element(c->reg.dir, 2), - brw_imm_f(0)); - - ccw = brw_IF(p, BRW_EXECUTE_1); - { - unsigned i; - - for (i = 0; i < 3; i++) { - if (c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) - brw_MOV(p, - byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL0]), - byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC0])); - - if (c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1]) - brw_MOV(p, - byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL1]), - byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC1])); - } - } - brw_ENDIF(p, ccw); -} - - - - -/* - float iz = 1.0 / dir.z; - float ac = dir.x * iz; - float bc = dir.y * iz; - offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE; - offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor; - offset *= MRD; -*/ -static void compute_offset( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg off = c->reg.offset; - struct brw_reg dir = c->reg.dir; - - brw_math_invert(p, get_element(off, 2), get_element(dir, 2)); - brw_MUL(p, vec2(off), dir, get_element(off, 2)); - - brw_CMP(p, - vec1(brw_null_reg()), - BRW_CONDITIONAL_GE, - brw_abs(get_element(off, 0)), - brw_abs(get_element(off, 1))); - - brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1))); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor)); - brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units)); -} - - -static void merge_edgeflags( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *is_poly; - struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0); - - brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); - brw_CMP(p, - vec1(brw_null_reg()), - BRW_CONDITIONAL_EQ, - tmp0, - brw_imm_ud(_3DPRIM_POLYGON)); - - /* Get away with using reg.vertex because we know that this is not - * a _3DPRIM_TRISTRIP_REVERSE: - */ - is_poly = brw_IF(p, BRW_EXECUTE_1); - { - brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); - brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8)); - brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); - brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9)); - brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } - brw_ENDIF(p, is_poly); -} - - - -static void apply_one_offset( struct brw_clip_compile *c, - struct brw_indirect vert ) -{ - struct brw_compile *p = &c->func; - struct brw_reg pos = deref_4f(vert, c->offset[VERT_RESULT_HPOS]); - struct brw_reg z = get_element(pos, 2); - - brw_ADD(p, z, z, vec1(c->reg.offset)); -} - - - -/*********************************************************************** - * Output clipped polygon as an unfilled primitive: - */ -static void emit_lines(struct brw_clip_compile *c, - boolean do_offset) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *loop; - struct brw_instruction *draw_edge; - struct brw_indirect v0 = brw_indirect(0, 0); - struct brw_indirect v1 = brw_indirect(1, 0); - struct brw_indirect v0ptr = brw_indirect(2, 0); - struct brw_indirect v1ptr = brw_indirect(3, 0); - - /* Need a seperate loop for offset: - */ - if (do_offset) { - brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); - brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); - - loop = brw_DO(p, BRW_EXECUTE_1); - { - brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); - brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); - - apply_one_offset(c, v0); - - brw_set_conditionalmod(p, BRW_CONDITIONAL_G); - brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); - } - brw_WHILE(p, loop); - } - - /* v1ptr = &inlist[nr_verts] - * *v1ptr = v0 - */ - brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); - brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); - brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); - brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); - brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0)); - - loop = brw_DO(p, BRW_EXECUTE_1); - { - brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); - brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2)); - brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); - - /* draw edge if edgeflag != 0 */ - brw_CMP(p, - vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, - deref_1f(v0, c->offset[VERT_RESULT_EDGE]), - brw_imm_f(0)); - draw_edge = brw_IF(p, BRW_EXECUTE_1); - { - brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); - brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); - } - brw_ENDIF(p, draw_edge); - - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); - } - brw_WHILE(p, loop); -} - - - -static void emit_points(struct brw_clip_compile *c, - boolean do_offset ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *loop; - struct brw_instruction *draw_point; - - struct brw_indirect v0 = brw_indirect(0, 0); - struct brw_indirect v0ptr = brw_indirect(2, 0); - - brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); - brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); - - loop = brw_DO(p, BRW_EXECUTE_1); - { - brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); - brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); - - /* draw if edgeflag != 0 - */ - brw_CMP(p, - vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, - deref_1f(v0, c->offset[VERT_RESULT_EDGE]), - brw_imm_f(0)); - draw_point = brw_IF(p, BRW_EXECUTE_1); - { - if (do_offset) - apply_one_offset(c, v0); - - brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END); - } - brw_ENDIF(p, draw_point); - - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); - } - brw_WHILE(p, loop); -} - - - - - - - -static void emit_primitives( struct brw_clip_compile *c, - unsigned mode, - boolean do_offset ) -{ - switch (mode) { - case CLIP_FILL: - brw_clip_tri_emit_polygon(c); - break; - - case CLIP_LINE: - emit_lines(c, do_offset); - break; - - case CLIP_POINT: - emit_points(c, do_offset); - break; - - case CLIP_CULL: - assert(0); - break; - } -} - - - -static void emit_unfilled_primitives( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *ccw; - - /* Direction culling has already been done. - */ - if (c->key.fill_ccw != c->key.fill_cw && - c->key.fill_ccw != CLIP_CULL && - c->key.fill_cw != CLIP_CULL) - { - brw_CMP(p, - vec1(brw_null_reg()), - BRW_CONDITIONAL_GE, - get_element(c->reg.dir, 2), - brw_imm_f(0)); - - ccw = brw_IF(p, BRW_EXECUTE_1); - { - emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); - } - ccw = brw_ELSE(p, ccw); - { - emit_primitives(c, c->key.fill_cw, c->key.offset_cw); - } - brw_ENDIF(p, ccw); - } - else if (c->key.fill_cw != CLIP_CULL) { - emit_primitives(c, c->key.fill_cw, c->key.offset_cw); - } - else if (c->key.fill_ccw != CLIP_CULL) { - emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); - } -} - - - - -static void check_nr_verts( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *if_insn; - - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3)); - if_insn = brw_IF(p, BRW_EXECUTE_1); - { - brw_clip_kill_thread(c); - } - brw_ENDIF(p, if_insn); -} - - -void brw_emit_unfilled_clip( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *do_clip; - - - c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) || - (c->key.fill_ccw != c->key.fill_cw) || - c->key.fill_ccw == CLIP_CULL || - c->key.fill_cw == CLIP_CULL || - c->key.copy_bfc_cw || - c->key.copy_bfc_ccw); - - brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); - brw_clip_tri_init_vertices(c); - - assert(c->offset[VERT_RESULT_EDGE]); - - if (c->key.fill_ccw == CLIP_CULL && - c->key.fill_cw == CLIP_CULL) { - brw_clip_kill_thread(c); - return; - } - - merge_edgeflags(c); - - /* Need to use the inlist indirection here: - */ - if (c->need_direction) - compute_tri_direction(c); - - if (c->key.fill_ccw == CLIP_CULL || - c->key.fill_cw == CLIP_CULL) - cull_direction(c); - - if (c->key.offset_ccw || - c->key.offset_cw) - compute_offset(c); - - if (c->key.copy_bfc_ccw || - c->key.copy_bfc_cw) - copy_bfc(c); - - /* Need to do this whether we clip or not: - */ - if (c->key.do_flat_shading) - brw_clip_tri_flat_shade(c); - - brw_clip_init_clipmask(c); - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); - do_clip = brw_IF(p, BRW_EXECUTE_1); - { - brw_clip_init_planes(c); - brw_clip_tri(c); - check_nr_verts(c); - } - brw_ENDIF(p, do_clip); - - emit_unfilled_primitives(c); - brw_clip_kill_thread(c); -} - - - diff --git a/src/gallium/drivers/i965simple/brw_clip_util.c b/src/gallium/drivers/i965simple/brw_clip_util.c deleted file mode 100644 index 6d58ceafff..0000000000 --- a/src/gallium/drivers/i965simple/brw_clip_util.c +++ /dev/null @@ -1,351 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_util.h" -#include "brw_clip.h" - - - - - -static struct brw_reg get_tmp( struct brw_clip_compile *c ) -{ - struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); - - if (++c->last_tmp > c->prog_data.total_grf) - c->prog_data.total_grf = c->last_tmp; - - return tmp; -} - -static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp ) -{ - if (tmp.nr == c->last_tmp-1) - c->last_tmp--; -} - - -static struct brw_reg make_plane_ud(unsigned x, unsigned y, unsigned z, unsigned w) -{ - return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x); -} - - -void brw_clip_init_planes( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - - if (!c->key.nr_userclip) { - brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1)); - brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1)); - brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1)); - brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1)); - brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1)); - brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1)); - } -} - - - -#define W 3 - -/* Project 'pos' to screen space (or back again), overwrite with results: - */ -static void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ) -{ - struct brw_compile *p = &c->func; - - /* calc rhw - */ - brw_math_invert(p, get_element(pos, W), get_element(pos, W)); - - /* value.xyz *= value.rhw - */ - brw_set_access_mode(p, BRW_ALIGN_16); - brw_MUL(p, brw_writemask(pos, TGSI_WRITEMASK_XYZ), pos, brw_swizzle1(pos, W)); - brw_set_access_mode(p, BRW_ALIGN_1); -} - - -static void brw_clip_project_vertex( struct brw_clip_compile *c, - struct brw_indirect vert_addr ) -{ -#if 0 - struct brw_compile *p = &c->func; - struct brw_reg tmp = get_tmp(c); - - /* Fixup position. Extract from the original vertex and re-project - * to screen space: - */ - brw_MOV(p, tmp, deref_4f(vert_addr, c->offset[VERT_RESULT_HPOS])); - brw_clip_project_position(c, tmp); - brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp); - - release_tmp(c, tmp); -#else - #warning "disabled" -#endif -} - - - - -/* Interpolate between two vertices and put the result into a0.0. - * Increment a0.0 accordingly. - */ -void brw_clip_interp_vertex( struct brw_clip_compile *c, - struct brw_indirect dest_ptr, - struct brw_indirect v0_ptr, /* from */ - struct brw_indirect v1_ptr, /* to */ - struct brw_reg t0, - boolean force_edgeflag) -{ -#if 0 - struct brw_compile *p = &c->func; - struct brw_reg tmp = get_tmp(c); - unsigned i; - - /* Just copy the vertex header: - */ - brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); - - /* Iterate over each attribute (could be done in pairs?) - */ - for (i = 0; i < c->nr_attrs; i++) { - unsigned delta = i*16 + 32; - - if (delta == c->offset[VERT_RESULT_EDGE]) { - if (force_edgeflag) - brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); - else - brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta)); - } - else { - /* Interpolate: - * - * New = attr0 + t*attr1 - t*attr0 - */ - brw_MUL(p, - vec4(brw_null_reg()), - deref_4f(v1_ptr, delta), - t0); - - brw_MAC(p, - tmp, - negate(deref_4f(v0_ptr, delta)), - t0); - - brw_ADD(p, - deref_4f(dest_ptr, delta), - deref_4f(v0_ptr, delta), - tmp); - } - } - - if (i & 1) { - unsigned delta = i*16 + 32; - brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); - } - - release_tmp(c, tmp); - - /* Recreate the projected (NDC) coordinate in the new vertex - * header: - */ - brw_clip_project_vertex(c, dest_ptr ); -#else - #warning "disabled" -#endif -} - - - - -#define MAX_MRF 16 - -void brw_clip_emit_vue(struct brw_clip_compile *c, - struct brw_indirect vert, - boolean allocate, - boolean eot, - unsigned header) -{ - struct brw_compile *p = &c->func; - unsigned start = c->last_mrf; - - assert(!(allocate && eot)); - - /* Cycle through mrf regs - probably futile as we have to wait for - * the allocation response anyway. Also, the order this function - * is invoked doesn't correspond to the order the instructions will - * be executed, so it won't have any effect in many cases. - */ -#if 0 - if (start + c->nr_regs + 1 >= MAX_MRF) - start = 0; - - c->last_mrf = start + c->nr_regs + 1; -#endif - - /* Copy the vertex from vertn into m1..mN+1: - */ - brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs); - - /* Overwrite PrimType and PrimStart in the message header, for - * each vertex in turn: - */ - brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); - - - /* Send each vertex as a seperate write to the urb. This - * is different to the concept in brw_sf_emit.c, where - * subsequent writes are used to build up a single urb - * entry. Each of these writes instantiates a seperate - * urb entry - (I think... what about 'allocate'?) - */ - brw_urb_WRITE(p, - allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), - start, - c->reg.R0, - allocate, - 1, /* used */ - c->nr_regs + 1, /* msg length */ - allocate ? 1 : 0, /* response_length */ - eot, /* eot */ - 1, /* writes_complete */ - 0, /* urb offset */ - BRW_URB_SWIZZLE_NONE); -} - - - -void brw_clip_kill_thread(struct brw_clip_compile *c) -{ - struct brw_compile *p = &c->func; - - /* Send an empty message to kill the thread and release any - * allocated urb entry: - */ - brw_urb_WRITE(p, - retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), - 0, - c->reg.R0, - 0, /* allocate */ - 0, /* used */ - 0, /* msg len */ - 0, /* response len */ - 1, /* eot */ - 1, /* writes complete */ - 0, - BRW_URB_SWIZZLE_NONE); -} - - - - -struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ) -{ - return brw_address(c->reg.fixed_planes); -} - - -struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ) -{ - if (c->key.nr_userclip) { - return brw_imm_uw(16); - } - else { - return brw_imm_uw(4); - } -} - - -/* If flatshading, distribute color from provoking vertex prior to - * clipping. - */ -void brw_clip_copy_colors( struct brw_clip_compile *c, - unsigned to, unsigned from ) -{ -#if 0 - struct brw_compile *p = &c->func; - - if (c->offset[VERT_RESULT_COL0]) - brw_MOV(p, - byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]), - byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0])); - - if (c->offset[VERT_RESULT_COL1]) - brw_MOV(p, - byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]), - byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1])); - - if (c->offset[VERT_RESULT_BFC0]) - brw_MOV(p, - byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]), - byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0])); - - if (c->offset[VERT_RESULT_BFC1]) - brw_MOV(p, - byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]), - byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1])); -#else - #warning "disabled" -#endif -} - - - -void brw_clip_init_clipmask( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg incoming = get_element_ud(c->reg.R0, 2); - - /* Shift so that lowest outcode bit is rightmost: - */ - brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26)); - - if (c->key.nr_userclip) { - struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD); - - /* Rearrange userclip outcodes so that they come directly after - * the fixed plane bits. - */ - brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14)); - brw_SHR(p, tmp, tmp, brw_imm_ud(8)); - brw_OR(p, c->reg.planemask, c->reg.planemask, tmp); - - release_tmp(c, tmp); - } -} - diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c deleted file mode 100644 index 9b33285bc7..0000000000 --- a/src/gallium/drivers/i965simple/brw_context.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_draw.h" -#include "brw_vs.h" -#include "brw_tex_layout.h" -#include "brw_winsys.h" - -#include "pipe/internal/p_winsys_screen.h" -#include "pipe/p_context.h" -#include "util/u_memory.h" -#include "pipe/p_screen.h" - - -#ifndef BRW_DEBUG -int BRW_DEBUG = (0); -#endif - - -static void brw_destroy(struct pipe_context *pipe) -{ - struct brw_context *brw = brw_context(pipe); - - if(brw->winsys->destroy) - brw->winsys->destroy(brw->winsys); - - FREE(brw); -} - - -static void brw_clear(struct pipe_context *pipe, struct pipe_surface *ps, - unsigned clearValue) -{ - int x, y, w, h; - /* FIXME: corny... */ - - x = 0; - y = 0; - w = ps->width; - h = ps->height; - - pipe->surface_fill(pipe, ps, x, y, w, h, clearValue); -} - -static unsigned int -brw_is_texture_referenced( struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - -static unsigned int -brw_is_buffer_referenced( struct pipe_context *pipe, - struct pipe_buffer *buf) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - -struct pipe_context *brw_create(struct pipe_screen *screen, - struct brw_winsys *brw_winsys, - unsigned pci_id) -{ - struct brw_context *brw; - - debug_printf("%s: creating brw_context with pci id 0x%x\n", - __FUNCTION__, pci_id); - - brw = CALLOC_STRUCT(brw_context); - if (brw == NULL) - return NULL; - - brw->winsys = brw_winsys; - brw->pipe.winsys = screen->winsys; - brw->pipe.screen = screen; - - brw->pipe.destroy = brw_destroy; - brw->pipe.clear = brw_clear; - - brw->pipe.is_texture_referenced = brw_is_texture_referenced; - brw->pipe.is_buffer_referenced = brw_is_buffer_referenced; - - brw_init_surface_functions(brw); - brw_init_texture_functions(brw); - brw_init_state_functions(brw); - brw_init_flush_functions(brw); - brw_init_draw_functions( brw ); - - - brw_init_state( brw ); - - brw->pci_id = pci_id; - brw->dirty = ~0; - brw->hardware_dirty = ~0; - - memset(&brw->wm.bind, ~0, sizeof(brw->wm.bind)); - - return &brw->pipe; -} - diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h deleted file mode 100644 index 3079485180..0000000000 --- a/src/gallium/drivers/i965simple/brw_context.h +++ /dev/null @@ -1,684 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#ifndef BRWCONTEXT_INC -#define BRWCONTEXT_INC - - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "tgsi/tgsi_scan.h" - -#include "brw_structs.h" -#include "brw_winsys.h" - - -/* Glossary: - * - * URB - uniform resource buffer. A mid-sized buffer which is - * partitioned between the fixed function units and used for passing - * values (vertices, primitives, constants) between them. - * - * CURBE - constant URB entry. An urb region (entry) used to hold - * constant values which the fixed function units can be instructed to - * preload into the GRF when spawining a thread. - * - * VUE - vertex URB entry. An urb entry holding a vertex and usually - * a vertex header. The header contains control information and - * things like primitive type, Begin/end flags and clip codes. - * - * PUE - primitive URB entry. An urb entry produced by the setup (SF) - * unit holding rasterization and interpolation parameters. - * - * GRF - general register file. One of several register files - * addressable by programmed threads. The inputs (r0, payload, curbe, - * urb) of the thread are preloaded to this area before the thread is - * spawned. The registers are individually 8 dwords wide and suitable - * for general usage. Registers holding thread input values are not - * special and may be overwritten. - * - * MRF - message register file. Threads communicate (and terminate) - * by sending messages. Message parameters are placed in contigous - * MRF registers. All program output is via these messages. URB - * entries are populated by sending a message to the shared URB - * function containing the new data, together with a control word, - * often an unmodified copy of R0. - * - * R0 - GRF register 0. Typically holds control information used when - * sending messages to other threads. - * - * EU or GEN4 EU: The name of the programmable subsystem of the - * i965 hardware. Threads are executed by the EU, the registers - * described above are part of the EU architecture. - * - * Fixed function units: - * - * CS - Command streamer. Notional first unit, little software - * interaction. Holds the URB entries used for constant data, ie the - * CURBEs. - * - * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of - * this unit is responsible for pulling vertices out of vertex buffers - * in vram and injecting them into the processing pipe as VUEs. If - * enabled, it first passes them to a VS thread which is a good place - * for the driver to implement any active vertex shader. - * - * GS - Geometry Shader. This corresponds to a new DX10 concept. If - * enabled, incoming strips etc are passed to GS threads in individual - * line/triangle/point units. The GS thread may perform arbitary - * computation and emit whatever primtives with whatever vertices it - * chooses. This makes GS an excellent place to implement GL's - * unfilled polygon modes, though of course it is capable of much - * more. Additionally, GS is used to translate away primitives not - * handled by latter units, including Quads and Lineloops. - * - * CS - Clipper. Mesa's clipping algorithms are imported to run on - * this unit. The fixed function part performs cliptesting against - * the 6 fixed clipplanes and makes descisions on whether or not the - * incoming primitive needs to be passed to a thread for clipping. - * User clip planes are handled via cooperation with the VS thread. - * - * SF - Strips Fans or Setup: Triangles are prepared for - * rasterization. Interpolation coefficients are calculated. - * Flatshading and two-side lighting usually performed here. - * - * WM - Windower. Interpolation of vertex attributes performed here. - * Fragment shader implemented here. SIMD aspects of EU taken full - * advantage of, as pixels are processed in blocks of 16. - * - * CC - Color Calculator. No EU threads associated with this unit. - * Handles blending and (presumably) depth and stencil testing. - */ - -#define BRW_MAX_CURBE (32*16) - -struct brw_context; -struct brw_winsys; - - -/* Raised when we receive new state across the pipe interface: - */ -#define BRW_NEW_VIEWPORT 0x1 -#define BRW_NEW_RASTERIZER 0x2 -#define BRW_NEW_FS 0x4 -#define BRW_NEW_BLEND 0x8 -#define BRW_NEW_CLIP 0x10 -#define BRW_NEW_SCISSOR 0x20 -#define BRW_NEW_STIPPLE 0x40 -#define BRW_NEW_FRAMEBUFFER 0x80 -#define BRW_NEW_ALPHA_TEST 0x100 -#define BRW_NEW_DEPTH_STENCIL 0x200 -#define BRW_NEW_SAMPLER 0x400 -#define BRW_NEW_TEXTURE 0x800 -#define BRW_NEW_CONSTANTS 0x1000 -#define BRW_NEW_VBO 0x2000 -#define BRW_NEW_VS 0x4000 - -/* Raised for other internal events: - */ -#define BRW_NEW_URB_FENCE 0x10000 -#define BRW_NEW_PSP 0x20000 -#define BRW_NEW_CURBE_OFFSETS 0x40000 -#define BRW_NEW_REDUCED_PRIMITIVE 0x80000 -#define BRW_NEW_PRIMITIVE 0x100000 -#define BRW_NEW_SCENE 0x200000 -#define BRW_NEW_SF_LINKAGE 0x400000 - -extern int BRW_DEBUG; - -#define DEBUG_TEXTURE 0x1 -#define DEBUG_STATE 0x2 -#define DEBUG_IOCTL 0x4 -#define DEBUG_PRIMS 0x8 -#define DEBUG_VERTS 0x10 -#define DEBUG_FALLBACKS 0x20 -#define DEBUG_VERBOSE 0x40 -#define DEBUG_DRI 0x80 -#define DEBUG_DMA 0x100 -#define DEBUG_SANITY 0x200 -#define DEBUG_SYNC 0x400 -#define DEBUG_SLEEP 0x800 -#define DEBUG_PIXEL 0x1000 -#define DEBUG_STATS 0x2000 -#define DEBUG_TILE 0x4000 -#define DEBUG_SINGLE_THREAD 0x8000 -#define DEBUG_WM 0x10000 -#define DEBUG_URB 0x20000 -#define DEBUG_VS 0x40000 -#define DEBUG_BATCH 0x80000 -#define DEBUG_BUFMGR 0x100000 -#define DEBUG_BLIT 0x200000 -#define DEBUG_REGION 0x400000 -#define DEBUG_MIPTREE 0x800000 - -#define DBG(...) do { \ - if (BRW_DEBUG & FILE_DEBUG_FLAG) \ - debug_printf(__VA_ARGS__); \ -} while(0) - -#define PRINT(...) do { \ - debug_printf(__VA_ARGS__); \ -} while(0) - -struct brw_state_flags { - unsigned cache; - unsigned brw; -}; - - -struct brw_vertex_program { - struct pipe_shader_state program; - struct tgsi_shader_info info; - int id; -}; - - -struct brw_fragment_program { - struct pipe_shader_state program; - struct tgsi_shader_info info; - - boolean UsesDepth; /* XXX add this to tgsi_shader_info? */ - int id; -}; - - -struct pipe_setup_linkage { - struct { - unsigned vp_output:5; - unsigned interp_mode:4; - unsigned bf_vp_output:5; - } fp_input[PIPE_MAX_SHADER_INPUTS]; - - unsigned fp_input_count:5; - unsigned max_vp_output:5; -}; - - - -struct brw_texture { - struct pipe_texture base; - - /* Derived from the above: - */ - unsigned stride; - unsigned depth_pitch; /* per-image on i945? */ - unsigned total_nblocksy; - - unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; - - /* Explicitly store the offset of each image for each cube face or - * depth value. Pretty much have to accept that hardware formats - * are going to be so diverse that there is no unified way to - * compute the offsets of depth/cube images within a mipmap level, - * so have to store them as a lookup table: - */ - unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ - - /* Includes image offset tables: - */ - unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; - - /* The data is held here: - */ - struct pipe_buffer *buffer; -}; - -/* Data about a particular attempt to compile a program. Note that - * there can be many of these, each in a different GL state - * corresponding to a different brw_wm_prog_key struct, with different - * compiled programs: - */ -/* Data about a particular attempt to compile a program. Note that - * there can be many of these, each in a different GL state - * corresponding to a different brw_wm_prog_key struct, with different - * compiled programs: - */ - -struct brw_wm_prog_data { - unsigned curb_read_length; - unsigned urb_read_length; - - unsigned first_curbe_grf; - unsigned total_grf; - unsigned total_scratch; - - /* Internally generated constants for the CURBE. These are loaded - * ahead of the data from the constant buffer. - */ - const float internal_const[8]; - unsigned nr_internal_consts; - unsigned max_const; - - boolean error; -}; - -struct brw_sf_prog_data { - unsigned urb_read_length; - unsigned total_grf; - - /* Each vertex may have upto 12 attributes, 4 components each, - * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 - * rows. - * - * Actually we use 4 for each, so call it 12 rows. - */ - unsigned urb_entry_size; -}; - -struct brw_clip_prog_data { - unsigned curb_read_length; /* user planes? */ - unsigned clip_mode; - unsigned urb_read_length; - unsigned total_grf; -}; - -struct brw_gs_prog_data { - unsigned urb_read_length; - unsigned total_grf; -}; - -struct brw_vs_prog_data { - unsigned curb_read_length; - unsigned urb_read_length; - unsigned total_grf; - unsigned outputs_written; - - unsigned inputs_read; - - unsigned max_const; - - float imm_buf[PIPE_MAX_CONSTANT][4]; - unsigned num_imm; - unsigned num_consts; - - /* Used for calculating urb partitions: - */ - unsigned urb_entry_size; -}; - - -#define BRW_MAX_TEX_UNIT 8 -#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 - -/* Create a fixed sized struct for caching binding tables: - */ -struct brw_surface_binding_table { - unsigned surf_ss_offset[BRW_WM_MAX_SURF]; -}; - - -struct brw_cache; - -struct brw_mem_pool { - struct pipe_buffer *buffer; - - unsigned size; - unsigned offset; /* offset of first free byte */ - - struct brw_context *brw; -}; - -struct brw_cache_item { - unsigned hash; - unsigned key_size; /* for variable-sized keys */ - const void *key; - - unsigned offset; /* offset within pool's buffer */ - unsigned data_size; - - struct brw_cache_item *next; -}; - - - -struct brw_cache { - unsigned id; - - const char *name; - - struct brw_context *brw; - struct brw_mem_pool *pool; - - struct brw_cache_item **items; - unsigned size, n_items; - - unsigned key_size; /* for fixed-size keys */ - unsigned aux_size; - - unsigned last_addr; /* offset of active item */ -}; - - - - -/* Considered adding a member to this struct to document which flags - * an update might raise so that ordering of the state atoms can be - * checked or derived at runtime. Dropped the idea in favor of having - * a debug mode where the state is monitored for flags which are - * raised that have already been tested against. - */ -struct brw_tracked_state { - struct brw_state_flags dirty; - void (*update)( struct brw_context *brw ); -}; - - -/* Flags for brw->state.cache. - */ -#define CACHE_NEW_CC_VP (1< 32. Wouldn't life - * be easier if C allowed arrays of packed elements? - */ -#define ATTRIB_BIT_DWORDS ((PIPE_MAX_ATTRIBS+31)/32) - - - - -struct brw_vertex_info { - unsigned varying; /* varying:1[PIPE_MAX_ATTRIBS] */ - unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_MAX_ATTRIBS] */ -}; - - - - - -struct brw_context -{ - struct pipe_context pipe; - struct brw_winsys *winsys; - - unsigned primitive; - unsigned reduced_primitive; - - boolean emit_state_always; - - struct { - struct brw_state_flags dirty; - } state; - - - struct { - const struct pipe_blend_state *Blend; - const struct pipe_depth_stencil_alpha_state *DepthStencil; - const struct pipe_poly_stipple *PolygonStipple; - const struct pipe_rasterizer_state *Raster; - const struct pipe_sampler_state *Samplers[PIPE_MAX_SAMPLERS]; - const struct brw_vertex_program *VertexProgram; - const struct brw_fragment_program *FragmentProgram; - - struct pipe_clip_state Clip; - struct pipe_blend_color BlendColor; - struct pipe_scissor_state Scissor; - struct pipe_viewport_state Viewport; - struct pipe_framebuffer_state FrameBuffer; - - const struct pipe_constant_buffer *Constants[2]; - const struct brw_texture *Texture[PIPE_MAX_SAMPLERS]; - } attribs; - - unsigned num_samplers; - unsigned num_textures; - - struct brw_mem_pool pool[BRW_MAX_POOL]; - struct brw_cache cache[BRW_MAX_CACHE]; - struct brw_cached_batch_item *cached_batch_items; - - struct { - - /* Arrays with buffer objects to copy non-bufferobj arrays into - * for upload: - */ - const struct pipe_vertex_buffer *vbo_array[PIPE_MAX_ATTRIBS]; - - struct brw_vertex_element_state inputs[PIPE_MAX_ATTRIBS]; - -#define BRW_NR_UPLOAD_BUFS 17 -#define BRW_UPLOAD_INIT_SIZE (128*1024) - - /* Summary of size and varying of active arrays, so we can check - * for changes to this state: - */ - struct brw_vertex_info info; - } vb; - - - unsigned hardware_dirty; - unsigned dirty; - unsigned pci_id; - /* BRW_NEW_URB_ALLOCATIONS: - */ - struct { - unsigned vsize; /* vertex size plus header in urb registers */ - unsigned csize; /* constant buffer size in urb registers */ - unsigned sfsize; /* setup data size in urb registers */ - - boolean constrained; - - unsigned nr_vs_entries; - unsigned nr_gs_entries; - unsigned nr_clip_entries; - unsigned nr_sf_entries; - unsigned nr_cs_entries; - -/* unsigned vs_size; */ -/* unsigned gs_size; */ -/* unsigned clip_size; */ -/* unsigned sf_size; */ -/* unsigned cs_size; */ - - unsigned vs_start; - unsigned gs_start; - unsigned clip_start; - unsigned sf_start; - unsigned cs_start; - } urb; - - - /* BRW_NEW_CURBE_OFFSETS: - */ - struct { - unsigned wm_start; - unsigned wm_size; - unsigned clip_start; - unsigned clip_size; - unsigned vs_start; - unsigned vs_size; - unsigned total_size; - - unsigned gs_offset; - - float *last_buf; - unsigned last_bufsz; - } curbe; - - struct { - struct brw_vs_prog_data *prog_data; - - unsigned prog_gs_offset; - unsigned state_gs_offset; - } vs; - - struct { - struct brw_gs_prog_data *prog_data; - - boolean prog_active; - unsigned prog_gs_offset; - unsigned state_gs_offset; - } gs; - - struct { - struct brw_clip_prog_data *prog_data; - - unsigned prog_gs_offset; - unsigned vp_gs_offset; - unsigned state_gs_offset; - } clip; - - - struct { - struct brw_sf_prog_data *prog_data; - - struct pipe_setup_linkage linkage; - - unsigned prog_gs_offset; - unsigned vp_gs_offset; - unsigned state_gs_offset; - } sf; - - struct { - struct brw_wm_prog_data *prog_data; - -// struct brw_wm_compiler *compile_data; - - - /** - * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER - * cache - */ - struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; - - unsigned render_surf; - unsigned nr_surfaces; - - unsigned max_threads; - struct pipe_buffer *scratch_buffer; - unsigned scratch_buffer_size; - - unsigned sampler_count; - unsigned sampler_gs_offset; - - struct brw_surface_binding_table bind; - unsigned bind_ss_offset; - - unsigned prog_gs_offset; - unsigned state_gs_offset; - } wm; - - - struct { - unsigned vp_gs_offset; - unsigned state_gs_offset; - } cc; - - - /* Used to give every program string a unique id - */ - unsigned program_id; -}; - - -#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) - - -/*====================================================================== - * brw_vtbl.c - */ -void brw_do_flush( struct brw_context *brw, - unsigned flags ); - - -/*====================================================================== - * brw_state.c - */ -void brw_validate_state(struct brw_context *brw); -void brw_init_state(struct brw_context *brw); -void brw_destroy_state(struct brw_context *brw); - - -/*====================================================================== - * brw_tex.c - */ -void brwUpdateTextureState( struct brw_context *brw ); - - -/* brw_urb.c - */ -void brw_upload_urb_fence(struct brw_context *brw); - -void brw_upload_constant_buffer_state(struct brw_context *brw); - -void brw_init_surface_functions(struct brw_context *brw); -void brw_init_state_functions(struct brw_context *brw); -void brw_init_flush_functions(struct brw_context *brw); -void brw_init_string_functions(struct brw_context *brw); - -/*====================================================================== - * Inline conversion functions. These are better-typed than the - * macros used previously: - */ -static inline struct brw_context * -brw_context( struct pipe_context *ctx ) -{ - return (struct brw_context *)ctx; -} - -#endif - diff --git a/src/gallium/drivers/i965simple/brw_curbe.c b/src/gallium/drivers/i965simple/brw_curbe.c deleted file mode 100644 index 904cde8e30..0000000000 --- a/src/gallium/drivers/i965simple/brw_curbe.c +++ /dev/null @@ -1,369 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" -#include "brw_batch.h" -#include "brw_util.h" -#include "brw_wm.h" -#include "pipe/p_state.h" -#include "pipe/internal/p_winsys_screen.h" -#include "util/u_math.h" -#include "util/u_memory.h" - -#define FILE_DEBUG_FLAG DEBUG_FALLBACKS - -/* Partition the CURBE between the various users of constant values: - */ -static void calculate_curbe_offsets( struct brw_context *brw ) -{ - /* CACHE_NEW_WM_PROG */ - unsigned nr_fp_regs = align(brw->wm.prog_data->max_const, 16); - - /* BRW_NEW_VERTEX_PROGRAM */ - unsigned nr_vp_regs = align(brw->vs.prog_data->max_const, 16); - unsigned nr_clip_regs = 0; - unsigned total_regs; - -#if 0 - /* BRW_NEW_CLIP ? */ - if (brw->attribs.Transform->ClipPlanesEnabled) { - unsigned nr_planes = 6 + brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled); - nr_clip_regs = align(nr_planes * 4, 16); - } -#endif - - - total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs; - - /* This can happen - what to do? Probably rather than falling - * back, the best thing to do is emit programs which code the - * constants as immediate values. Could do this either as a static - * cap on WM and VS, or adaptively. - * - * Unfortunately, this is currently dependent on the results of the - * program generation process (in the case of wm), so this would - * introduce the need to re-generate programs in the event of a - * curbe allocation failure. - */ - /* Max size is 32 - just large enough to - * hold the 128 parameters allowed by - * the fragment and vertex program - * api's. It's not clear what happens - * when both VP and FP want to use 128 - * parameters, though. - */ - assert(total_regs <= 32); - - /* Lazy resize: - */ - if (nr_fp_regs > brw->curbe.wm_size || - nr_vp_regs > brw->curbe.vs_size || - nr_clip_regs != brw->curbe.clip_size || - (total_regs < brw->curbe.total_size / 4 && - brw->curbe.total_size > 16)) { - - unsigned reg = 0; - - /* Calculate a new layout: - */ - reg = 0; - brw->curbe.wm_start = reg; - brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs; - brw->curbe.clip_start = reg; - brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs; - brw->curbe.vs_start = reg; - brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs; - brw->curbe.total_size = reg; - -#if 0 - if (0) - DBG("curbe wm %d+%d clip %d+%d vs %d+%d\n", - brw->curbe.wm_start, - brw->curbe.wm_size, - brw->curbe.clip_start, - brw->curbe.clip_size, - brw->curbe.vs_start, - brw->curbe.vs_size ); -#endif - - brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS; - } -} - - -const struct brw_tracked_state brw_curbe_offsets = { - .dirty = { - .brw = (BRW_NEW_CLIP | - BRW_NEW_VS), - .cache = CACHE_NEW_WM_PROG - }, - .update = calculate_curbe_offsets -}; - - - -/* Define the number of curbes within CS's urb allocation. Multiple - * urb entries -> multiple curbes. These will be used by - * fixed-function hardware in a double-buffering scheme to avoid a - * pipeline stall each time the contents of the curbe is changed. - */ -void brw_upload_constant_buffer_state(struct brw_context *brw) -{ - struct brw_constant_buffer_state cbs; - memset(&cbs, 0, sizeof(cbs)); - - /* It appears that this is the state packet for the CS unit, ie. the - * urb entries detailed here are housed in the CS range from the - * URB_FENCE command. - */ - cbs.header.opcode = CMD_CONST_BUFFER_STATE; - cbs.header.length = sizeof(cbs)/4 - 2; - - /* BRW_NEW_URB_FENCE */ - cbs.bits0.nr_urb_entries = brw->urb.nr_cs_entries; - cbs.bits0.urb_entry_size = brw->urb.csize - 1; - - assert(brw->urb.nr_cs_entries); - BRW_CACHED_BATCH_STRUCT(brw, &cbs); -} - - -static float fixed_plane[6][4] = { - { 0, 0, -1, 1 }, - { 0, 0, 1, 1 }, - { 0, -1, 0, 1 }, - { 0, 1, 0, 1 }, - {-1, 0, 0, 1 }, - { 1, 0, 0, 1 } -}; - -/* Upload a new set of constants. Too much variability to go into the - * cache mechanism, but maybe would benefit from a comparison against - * the current uploaded set of constants. - */ -static void upload_constant_buffer(struct brw_context *brw) -{ - struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL]; - unsigned sz = brw->curbe.total_size; - unsigned bufsz = sz * sizeof(float); - float *buf; - unsigned i; - - - if (sz == 0) { - struct brw_constant_buffer cb; - cb.header.opcode = CMD_CONST_BUFFER; - cb.header.length = sizeof(cb)/4 - 2; - cb.header.valid = 0; - cb.bits0.buffer_length = 0; - cb.bits0.buffer_address = 0; - BRW_BATCH_STRUCT(brw, &cb); - - if (brw->curbe.last_buf) { - free(brw->curbe.last_buf); - brw->curbe.last_buf = NULL; - brw->curbe.last_bufsz = 0; - } - - return; - } - - buf = (float *)malloc(bufsz); - - memset(buf, 0, bufsz); - - if (brw->curbe.wm_size) { - unsigned offset = brw->curbe.wm_start * 16; - - /* First the constant buffer constants: - */ - - /* Then any internally generated constants: - */ - for (i = 0; i < brw->wm.prog_data->nr_internal_consts; i++) - buf[offset + i] = brw->wm.prog_data->internal_const[i]; - - assert(brw->wm.prog_data->max_const == - brw->wm.prog_data->nr_internal_consts); - } - - - /* The clipplanes are actually delivered to both CLIP and VS units. - * VS uses them to calculate the outcode bitmasks. - */ - if (brw->curbe.clip_size) { - unsigned offset = brw->curbe.clip_start * 16; - unsigned j; - - /* If any planes are going this way, send them all this way: - */ - for (i = 0; i < 6; i++) { - buf[offset + i * 4 + 0] = fixed_plane[i][0]; - buf[offset + i * 4 + 1] = fixed_plane[i][1]; - buf[offset + i * 4 + 2] = fixed_plane[i][2]; - buf[offset + i * 4 + 3] = fixed_plane[i][3]; - } - - /* Clip planes: BRW_NEW_CLIP: - */ - for (j = 0; j < brw->attribs.Clip.nr; j++) { - buf[offset + i * 4 + 0] = brw->attribs.Clip.ucp[j][0]; - buf[offset + i * 4 + 1] = brw->attribs.Clip.ucp[j][1]; - buf[offset + i * 4 + 2] = brw->attribs.Clip.ucp[j][2]; - buf[offset + i * 4 + 3] = brw->attribs.Clip.ucp[j][3]; - i++; - } - } - - - if (brw->curbe.vs_size) { - unsigned offset = brw->curbe.vs_start * 16; - /*unsigned nr = vp->max_const;*/ - const struct pipe_constant_buffer *cbuffer = brw->attribs.Constants[0]; - struct pipe_winsys *ws = brw->pipe.winsys; - /* FIXME: buffer size is num_consts + num_immediates */ - if (brw->vs.prog_data->num_consts) { - /* map the vertex constant buffer and copy to curbe: */ - void *data = ws->buffer_map(ws, cbuffer->buffer, 0); - /* FIXME: this is wrong. the cbuffer->buffer->size currently - * represents size of consts + immediates. so if we'll - * have both we'll copy over the end of the buffer - * with the subsequent memcpy */ - memcpy(&buf[offset], data, cbuffer->buffer->size); - ws->buffer_unmap(ws, cbuffer->buffer); - offset += cbuffer->buffer->size; - } - /*immediates*/ - if (brw->vs.prog_data->num_imm) { - memcpy(&buf[offset], brw->vs.prog_data->imm_buf, - brw->vs.prog_data->num_imm * 4 * sizeof(float)); - } - } - - if (1) { - for (i = 0; i < sz; i+=4) - debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, - buf[i+0], buf[i+1], buf[i+2], buf[i+3]); - - debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n", - brw->curbe.last_buf, buf, - bufsz, brw->curbe.last_bufsz, - brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); - } - - if (brw->curbe.last_buf && - bufsz == brw->curbe.last_bufsz && - memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { - free(buf); -/* return; */ - } - else { - if (brw->curbe.last_buf) - free(brw->curbe.last_buf); - brw->curbe.last_buf = buf; - brw->curbe.last_bufsz = bufsz; - - - if (!brw_pool_alloc(pool, - bufsz, - 1 << 6, - &brw->curbe.gs_offset)) { - debug_printf("out of GS memory for curbe\n"); - assert(0); - return; - } - - - /* Copy data to the buffer: - */ - brw->winsys->buffer_subdata_typed(brw->winsys, - pool->buffer, - brw->curbe.gs_offset, - bufsz, - buf, - BRW_CONSTANT_BUFFER ); - } - - /* TODO: only emit the constant_buffer packet when necessary, ie: - - contents have changed - - offset has changed - - hw requirements due to other packets emitted. - */ - { - struct brw_constant_buffer cb; - - memset(&cb, 0, sizeof(cb)); - - cb.header.opcode = CMD_CONST_BUFFER; - cb.header.length = sizeof(cb)/4 - 2; - cb.header.valid = 1; - cb.bits0.buffer_length = sz - 1; - cb.bits0.buffer_address = brw->curbe.gs_offset >> 6; - - /* Because this provokes an action (ie copy the constants into the - * URB), it shouldn't be shortcircuited if identical to the - * previous time - because eg. the urb destination may have - * changed, or the urb contents different to last time. - * - * Note that the data referred to is actually copied internally, - * not just used in place according to passed pointer. - * - * It appears that the CS unit takes care of using each available - * URB entry (Const URB Entry == CURBE) in turn, and issuing - * flushes as necessary when doublebuffering of CURBEs isn't - * possible. - */ - BRW_BATCH_STRUCT(brw, &cb); - } -} - -/* This tracked state is unique in that the state it monitors varies - * dynamically depending on the parameters tracked by the fragment and - * vertex programs. This is the template used as a starting point, - * each context will maintain a copy of this internally and update as - * required. - */ -const struct brw_tracked_state brw_constant_buffer = { - .dirty = { - .brw = (BRW_NEW_CLIP | - BRW_NEW_CONSTANTS | - BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ - BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */ - BRW_NEW_CURBE_OFFSETS), - .cache = (CACHE_NEW_WM_PROG) - }, - .update = upload_constant_buffer -}; - diff --git a/src/gallium/drivers/i965simple/brw_defines.h b/src/gallium/drivers/i965simple/brw_defines.h deleted file mode 100644 index 715d2d2d01..0000000000 --- a/src/gallium/drivers/i965simple/brw_defines.h +++ /dev/null @@ -1,870 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#ifndef BRW_DEFINES_H -#define BRW_DEFINES_H - -/* - */ -#define MI_NOOP 0x00 -#define MI_USER_INTERRUPT 0x02 -#define MI_WAIT_FOR_EVENT 0x03 -#define MI_FLUSH 0x04 -#define MI_REPORT_HEAD 0x07 -#define MI_ARB_ON_OFF 0x08 -#define MI_BATCH_BUFFER_END 0x0A -#define MI_OVERLAY_FLIP 0x11 -#define MI_LOAD_SCAN_LINES_INCL 0x12 -#define MI_LOAD_SCAN_LINES_EXCL 0x13 -#define MI_DISPLAY_BUFFER_INFO 0x14 -#define MI_SET_CONTEXT 0x18 -#define MI_STORE_DATA_IMM 0x20 -#define MI_STORE_DATA_INDEX 0x21 -#define MI_LOAD_REGISTER_IMM 0x22 -#define MI_STORE_REGISTER_MEM 0x24 -#define MI_BATCH_BUFFER_START 0x31 - -#define MI_SYNCHRONOUS_FLIP 0x0 -#define MI_ASYNCHRONOUS_FLIP 0x1 - -#define MI_BUFFER_SECURE 0x0 -#define MI_BUFFER_NONSECURE 0x1 - -#define MI_ARBITRATE_AT_CHAIN_POINTS 0x0 -#define MI_ARBITRATE_BETWEEN_INSTS 0x1 -#define MI_NO_ARBITRATION 0x3 - -#define MI_CONDITION_CODE_WAIT_DISABLED 0x0 -#define MI_CONDITION_CODE_WAIT_0 0x1 -#define MI_CONDITION_CODE_WAIT_1 0x2 -#define MI_CONDITION_CODE_WAIT_2 0x3 -#define MI_CONDITION_CODE_WAIT_3 0x4 -#define MI_CONDITION_CODE_WAIT_4 0x5 - -#define MI_DISPLAY_PIPE_A 0x0 -#define MI_DISPLAY_PIPE_B 0x1 - -#define MI_DISPLAY_PLANE_A 0x0 -#define MI_DISPLAY_PLANE_B 0x1 -#define MI_DISPLAY_PLANE_C 0x2 - -#define MI_STANDARD_FLIP 0x0 -#define MI_ENQUEUE_FLIP_PERFORM_BASE_FRAME_NUMBER_LOAD 0x1 -#define MI_ENQUEUE_FLIP_TARGET_FRAME_NUMBER_RELATIVE 0x2 -#define MI_ENQUEUE_FLIP_ABSOLUTE_TARGET_FRAME_NUMBER 0x3 - -#define MI_PHYSICAL_ADDRESS 0x0 -#define MI_VIRTUAL_ADDRESS 0x1 - -#define MI_BUFFER_MEMORY_MAIN 0x0 -#define MI_BUFFER_MEMORY_GTT 0x2 -#define MI_BUFFER_MEMORY_PER_PROCESS_GTT 0x3 - -#define MI_FLIP_CONTINUE 0x0 -#define MI_FLIP_ON 0x1 -#define MI_FLIP_OFF 0x2 - -#define MI_UNTRUSTED_REGISTER_SPACE 0x0 -#define MI_TRUSTED_REGISTER_SPACE 0x1 - -/* 3D state: - */ -#define _3DOP_3DSTATE_PIPELINED 0x0 -#define _3DOP_3DSTATE_NONPIPELINED 0x1 -#define _3DOP_3DCONTROL 0x2 -#define _3DOP_3DPRIMITIVE 0x3 - -#define _3DSTATE_PIPELINED_POINTERS 0x00 -#define _3DSTATE_BINDING_TABLE_POINTERS 0x01 -#define _3DSTATE_VERTEX_BUFFERS 0x08 -#define _3DSTATE_VERTEX_ELEMENTS 0x09 -#define _3DSTATE_INDEX_BUFFER 0x0A -#define _3DSTATE_VF_STATISTICS 0x0B -#define _3DSTATE_DRAWING_RECTANGLE 0x00 -#define _3DSTATE_CONSTANT_COLOR 0x01 -#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02 -#define _3DSTATE_CHROMA_KEY 0x04 -#define _3DSTATE_DEPTH_BUFFER 0x05 -#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06 -#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07 -#define _3DSTATE_LINE_STIPPLE 0x08 -#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 -#define _3DCONTROL 0x00 -#define _3DPRIMITIVE 0x00 - -#define PIPE_CONTROL_NOWRITE 0x00 -#define PIPE_CONTROL_WRITEIMMEDIATE 0x01 -#define PIPE_CONTROL_WRITEDEPTH 0x02 -#define PIPE_CONTROL_WRITETIMESTAMP 0x03 - -#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00 -#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01 - -#define _3DPRIM_POINTLIST 0x01 -#define _3DPRIM_LINELIST 0x02 -#define _3DPRIM_LINESTRIP 0x03 -#define _3DPRIM_TRILIST 0x04 -#define _3DPRIM_TRISTRIP 0x05 -#define _3DPRIM_TRIFAN 0x06 -#define _3DPRIM_QUADLIST 0x07 -#define _3DPRIM_QUADSTRIP 0x08 -#define _3DPRIM_LINELIST_ADJ 0x09 -#define _3DPRIM_LINESTRIP_ADJ 0x0A -#define _3DPRIM_TRILIST_ADJ 0x0B -#define _3DPRIM_TRISTRIP_ADJ 0x0C -#define _3DPRIM_TRISTRIP_REVERSE 0x0D -#define _3DPRIM_POLYGON 0x0E -#define _3DPRIM_RECTLIST 0x0F -#define _3DPRIM_LINELOOP 0x10 -#define _3DPRIM_POINTLIST_BF 0x11 -#define _3DPRIM_LINESTRIP_CONT 0x12 -#define _3DPRIM_LINESTRIP_BF 0x13 -#define _3DPRIM_LINESTRIP_CONT_BF 0x14 -#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 - -#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0 -#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1 - -#define BRW_ANISORATIO_2 0 -#define BRW_ANISORATIO_4 1 -#define BRW_ANISORATIO_6 2 -#define BRW_ANISORATIO_8 3 -#define BRW_ANISORATIO_10 4 -#define BRW_ANISORATIO_12 5 -#define BRW_ANISORATIO_14 6 -#define BRW_ANISORATIO_16 7 - -#define BRW_BLENDFACTOR_ONE 0x1 -#define BRW_BLENDFACTOR_SRC_COLOR 0x2 -#define BRW_BLENDFACTOR_SRC_ALPHA 0x3 -#define BRW_BLENDFACTOR_DST_ALPHA 0x4 -#define BRW_BLENDFACTOR_DST_COLOR 0x5 -#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 -#define BRW_BLENDFACTOR_CONST_COLOR 0x7 -#define BRW_BLENDFACTOR_CONST_ALPHA 0x8 -#define BRW_BLENDFACTOR_SRC1_COLOR 0x9 -#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A -#define BRW_BLENDFACTOR_ZERO 0x11 -#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12 -#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13 -#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14 -#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15 -#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17 -#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18 -#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19 -#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A - -#define BRW_BLENDFUNCTION_ADD 0 -#define BRW_BLENDFUNCTION_SUBTRACT 1 -#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2 -#define BRW_BLENDFUNCTION_MIN 3 -#define BRW_BLENDFUNCTION_MAX 4 - -#define BRW_ALPHATEST_FORMAT_UNORM8 0 -#define BRW_ALPHATEST_FORMAT_FLOAT32 1 - -#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0 -#define BRW_CHROMAKEY_REPLACE_BLACK 1 - -#define BRW_CLIP_API_OGL 0 -#define BRW_CLIP_API_DX 1 - -#define BRW_CLIPMODE_NORMAL 0 -#define BRW_CLIPMODE_CLIP_ALL 1 -#define BRW_CLIPMODE_CLIP_NON_REJECTED 2 -#define BRW_CLIPMODE_REJECT_ALL 3 -#define BRW_CLIPMODE_ACCEPT_ALL 4 - -#define BRW_CLIP_NDCSPACE 0 -#define BRW_CLIP_SCREENSPACE 1 - -#define BRW_COMPAREFUNCTION_ALWAYS 0 -#define BRW_COMPAREFUNCTION_NEVER 1 -#define BRW_COMPAREFUNCTION_LESS 2 -#define BRW_COMPAREFUNCTION_EQUAL 3 -#define BRW_COMPAREFUNCTION_LEQUAL 4 -#define BRW_COMPAREFUNCTION_GREATER 5 -#define BRW_COMPAREFUNCTION_NOTEQUAL 6 -#define BRW_COMPAREFUNCTION_GEQUAL 7 - -#define BRW_COVERAGE_PIXELS_HALF 0 -#define BRW_COVERAGE_PIXELS_1 1 -#define BRW_COVERAGE_PIXELS_2 2 -#define BRW_COVERAGE_PIXELS_4 3 - -#define BRW_CULLMODE_BOTH 0 -#define BRW_CULLMODE_NONE 1 -#define BRW_CULLMODE_FRONT 2 -#define BRW_CULLMODE_BACK 3 - -#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0 -#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 - -#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 -#define BRW_DEPTHFORMAT_D32_FLOAT 1 -#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2 -#define BRW_DEPTHFORMAT_D16_UNORM 5 - -#define BRW_FLOATING_POINT_IEEE_754 0 -#define BRW_FLOATING_POINT_NON_IEEE_754 1 - -#define BRW_FRONTWINDING_CW 0 -#define BRW_FRONTWINDING_CCW 1 - -#define BRW_SPRITE_POINT_ENABLE 16 - -#define BRW_INDEX_BYTE 0 -#define BRW_INDEX_WORD 1 -#define BRW_INDEX_DWORD 2 - -#define BRW_LOGICOPFUNCTION_CLEAR 0 -#define BRW_LOGICOPFUNCTION_NOR 1 -#define BRW_LOGICOPFUNCTION_AND_INVERTED 2 -#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3 -#define BRW_LOGICOPFUNCTION_AND_REVERSE 4 -#define BRW_LOGICOPFUNCTION_INVERT 5 -#define BRW_LOGICOPFUNCTION_XOR 6 -#define BRW_LOGICOPFUNCTION_NAND 7 -#define BRW_LOGICOPFUNCTION_AND 8 -#define BRW_LOGICOPFUNCTION_EQUIV 9 -#define BRW_LOGICOPFUNCTION_NOOP 10 -#define BRW_LOGICOPFUNCTION_OR_INVERTED 11 -#define BRW_LOGICOPFUNCTION_COPY 12 -#define BRW_LOGICOPFUNCTION_OR_REVERSE 13 -#define BRW_LOGICOPFUNCTION_OR 14 -#define BRW_LOGICOPFUNCTION_SET 15 - -#define BRW_MAPFILTER_NEAREST 0x0 -#define BRW_MAPFILTER_LINEAR 0x1 -#define BRW_MAPFILTER_ANISOTROPIC 0x2 - -#define BRW_MIPFILTER_NONE 0 -#define BRW_MIPFILTER_NEAREST 1 -#define BRW_MIPFILTER_LINEAR 3 - -#define BRW_POLYGON_FRONT_FACING 0 -#define BRW_POLYGON_BACK_FACING 1 - -#define BRW_PREFILTER_ALWAYS 0x0 -#define BRW_PREFILTER_NEVER 0x1 -#define BRW_PREFILTER_LESS 0x2 -#define BRW_PREFILTER_EQUAL 0x3 -#define BRW_PREFILTER_LEQUAL 0x4 -#define BRW_PREFILTER_GREATER 0x5 -#define BRW_PREFILTER_NOTEQUAL 0x6 -#define BRW_PREFILTER_GEQUAL 0x7 - -#define BRW_PROVOKING_VERTEX_0 0 -#define BRW_PROVOKING_VERTEX_1 1 -#define BRW_PROVOKING_VERTEX_2 2 - -#define BRW_RASTRULE_UPPER_LEFT 0 -#define BRW_RASTRULE_UPPER_RIGHT 1 -/* These are listed as "Reserved, but not seen as useful" - * in Intel documentation (page 212, "Point Rasterization Rule", - * section 7.4 "SF Pipeline State Summary", of document - * "Intel® 965 Express Chipset Family and Intel® G35 Express - * Chipset Graphics Controller Programmer's Reference Manual, - * Volume 2: 3D/Media", Revision 1.0b as of January 2008, - * available at - * http://intellinuxgraphics.org/documentation.html - * at the time of this writing). - * - * These appear to be supported on at least some - * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT - * is useful when using OpenGL to render to a FBO - * (which has the pixel coordinate Y orientation inverted - * with respect to the normal OpenGL pixel coordinate system). - */ -#define BRW_RASTRULE_LOWER_LEFT 2 -#define BRW_RASTRULE_LOWER_RIGHT 3 - -#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 -#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 -#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2 - -#define BRW_STENCILOP_KEEP 0 -#define BRW_STENCILOP_ZERO 1 -#define BRW_STENCILOP_REPLACE 2 -#define BRW_STENCILOP_INCRSAT 3 -#define BRW_STENCILOP_DECRSAT 4 -#define BRW_STENCILOP_INCR 5 -#define BRW_STENCILOP_DECR 6 -#define BRW_STENCILOP_INVERT 7 - -#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0 -#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1 - -#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 -#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001 -#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002 -#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 -#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 -#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005 -#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 -#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 -#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 -#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040 -#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041 -#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042 -#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043 -#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044 -#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045 -#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046 -#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 -#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 -#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082 -#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083 -#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 -#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085 -#define BRW_SURFACEFORMAT_R32G32_SINT 0x086 -#define BRW_SURFACEFORMAT_R32G32_UINT 0x087 -#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 -#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 -#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A -#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B -#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C -#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D -#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E -#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F -#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090 -#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091 -#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092 -#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 -#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 -#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095 -#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096 -#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 -#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 -#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 -#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 -#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 -#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 -#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 -#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 -#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 -#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA -#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB -#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC -#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD -#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE -#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF -#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0 -#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 -#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 -#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 -#define BRW_SURFACEFORMAT_R32_SINT 0x0D6 -#define BRW_SURFACEFORMAT_R32_UINT 0x0D7 -#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8 -#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 -#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA -#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF -#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0 -#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1 -#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2 -#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3 -#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4 -#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5 -#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 -#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA -#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB -#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC -#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED -#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE -#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0 -#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1 -#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2 -#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 -#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 -#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 -#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6 -#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7 -#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8 -#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9 -#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100 -#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 -#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 -#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 -#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 -#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 -#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106 -#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107 -#define BRW_SURFACEFORMAT_R8G8_SINT 0x108 -#define BRW_SURFACEFORMAT_R8G8_UINT 0x109 -#define BRW_SURFACEFORMAT_R16_UNORM 0x10A -#define BRW_SURFACEFORMAT_R16_SNORM 0x10B -#define BRW_SURFACEFORMAT_R16_SINT 0x10C -#define BRW_SURFACEFORMAT_R16_UINT 0x10D -#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E -#define BRW_SURFACEFORMAT_I16_UNORM 0x111 -#define BRW_SURFACEFORMAT_L16_UNORM 0x112 -#define BRW_SURFACEFORMAT_A16_UNORM 0x113 -#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114 -#define BRW_SURFACEFORMAT_I16_FLOAT 0x115 -#define BRW_SURFACEFORMAT_L16_FLOAT 0x116 -#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 -#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 -#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A -#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B -#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C -#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D -#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E -#define BRW_SURFACEFORMAT_R16_USCALED 0x11F -#define BRW_SURFACEFORMAT_R8_UNORM 0x140 -#define BRW_SURFACEFORMAT_R8_SNORM 0x141 -#define BRW_SURFACEFORMAT_R8_SINT 0x142 -#define BRW_SURFACEFORMAT_R8_UINT 0x143 -#define BRW_SURFACEFORMAT_A8_UNORM 0x144 -#define BRW_SURFACEFORMAT_I8_UNORM 0x145 -#define BRW_SURFACEFORMAT_L8_UNORM 0x146 -#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147 -#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148 -#define BRW_SURFACEFORMAT_R8_SSCALED 0x149 -#define BRW_SURFACEFORMAT_R8_USCALED 0x14A -#define BRW_SURFACEFORMAT_R1_UINT 0x181 -#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 -#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 -#define BRW_SURFACEFORMAT_BC1_UNORM 0x186 -#define BRW_SURFACEFORMAT_BC2_UNORM 0x187 -#define BRW_SURFACEFORMAT_BC3_UNORM 0x188 -#define BRW_SURFACEFORMAT_BC4_UNORM 0x189 -#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A -#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B -#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C -#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D -#define BRW_SURFACEFORMAT_MONO8 0x18E -#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F -#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190 -#define BRW_SURFACEFORMAT_DXT1_RGB 0x191 -#define BRW_SURFACEFORMAT_FXT1 0x192 -#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193 -#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194 -#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195 -#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196 -#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 -#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198 -#define BRW_SURFACEFORMAT_BC4_SNORM 0x199 -#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A -#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C -#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D -#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E -#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F - -#define BRW_SURFACERETURNFORMAT_FLOAT32 0 -#define BRW_SURFACERETURNFORMAT_S1 1 - -#define BRW_SURFACE_1D 0 -#define BRW_SURFACE_2D 1 -#define BRW_SURFACE_3D 2 -#define BRW_SURFACE_CUBE 3 -#define BRW_SURFACE_BUFFER 4 -#define BRW_SURFACE_NULL 7 - -#define BRW_TEXCOORDMODE_WRAP 0 -#define BRW_TEXCOORDMODE_MIRROR 1 -#define BRW_TEXCOORDMODE_CLAMP 2 -#define BRW_TEXCOORDMODE_CUBE 3 -#define BRW_TEXCOORDMODE_CLAMP_BORDER 4 -#define BRW_TEXCOORDMODE_MIRROR_ONCE 5 - -#define BRW_THREAD_PRIORITY_NORMAL 0 -#define BRW_THREAD_PRIORITY_HIGH 1 - -#define BRW_TILEWALK_XMAJOR 0 -#define BRW_TILEWALK_YMAJOR 1 - -#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0 -#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1 - -#define BRW_VERTEXBUFFER_ACCESS_VERTEXDATA 0 -#define BRW_VERTEXBUFFER_ACCESS_INSTANCEDATA 1 - -#define BRW_VFCOMPONENT_NOSTORE 0 -#define BRW_VFCOMPONENT_STORE_SRC 1 -#define BRW_VFCOMPONENT_STORE_0 2 -#define BRW_VFCOMPONENT_STORE_1_FLT 3 -#define BRW_VFCOMPONENT_STORE_1_INT 4 -#define BRW_VFCOMPONENT_STORE_VID 5 -#define BRW_VFCOMPONENT_STORE_IID 6 -#define BRW_VFCOMPONENT_STORE_PID 7 - - - -/* Execution Unit (EU) defines - */ - -#define BRW_ALIGN_1 0 -#define BRW_ALIGN_16 1 - -#define BRW_ADDRESS_DIRECT 0 -#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 - -#define BRW_CHANNEL_X 0 -#define BRW_CHANNEL_Y 1 -#define BRW_CHANNEL_Z 2 -#define BRW_CHANNEL_W 3 - -#define BRW_COMPRESSION_NONE 0 -#define BRW_COMPRESSION_2NDHALF 1 -#define BRW_COMPRESSION_COMPRESSED 2 - -#define BRW_CONDITIONAL_NONE 0 -#define BRW_CONDITIONAL_Z 1 -#define BRW_CONDITIONAL_NZ 2 -#define BRW_CONDITIONAL_EQ 1 /* Z */ -#define BRW_CONDITIONAL_NEQ 2 /* NZ */ -#define BRW_CONDITIONAL_G 3 -#define BRW_CONDITIONAL_GE 4 -#define BRW_CONDITIONAL_L 5 -#define BRW_CONDITIONAL_LE 6 -#define BRW_CONDITIONAL_C 7 -#define BRW_CONDITIONAL_O 8 - -#define BRW_DEBUG_NONE 0 -#define BRW_DEBUG_BREAKPOINT 1 - -#define BRW_DEPENDENCY_NORMAL 0 -#define BRW_DEPENDENCY_NOTCLEARED 1 -#define BRW_DEPENDENCY_NOTCHECKED 2 -#define BRW_DEPENDENCY_DISABLE 3 - -#define BRW_EXECUTE_1 0 -#define BRW_EXECUTE_2 1 -#define BRW_EXECUTE_4 2 -#define BRW_EXECUTE_8 3 -#define BRW_EXECUTE_16 4 -#define BRW_EXECUTE_32 5 - -#define BRW_HORIZONTAL_STRIDE_0 0 -#define BRW_HORIZONTAL_STRIDE_1 1 -#define BRW_HORIZONTAL_STRIDE_2 2 -#define BRW_HORIZONTAL_STRIDE_4 3 - -#define BRW_INSTRUCTION_NORMAL 0 -#define BRW_INSTRUCTION_SATURATE 1 - -#define BRW_MASK_ENABLE 0 -#define BRW_MASK_DISABLE 1 - -#define BRW_OPCODE_MOV 1 -#define BRW_OPCODE_SEL 2 -#define BRW_OPCODE_NOT 4 -#define BRW_OPCODE_AND 5 -#define BRW_OPCODE_OR 6 -#define BRW_OPCODE_XOR 7 -#define BRW_OPCODE_SHR 8 -#define BRW_OPCODE_SHL 9 -#define BRW_OPCODE_RSR 10 -#define BRW_OPCODE_RSL 11 -#define BRW_OPCODE_ASR 12 -#define BRW_OPCODE_CMP 16 -#define BRW_OPCODE_JMPI 32 -#define BRW_OPCODE_IF 34 -#define BRW_OPCODE_IFF 35 -#define BRW_OPCODE_ELSE 36 -#define BRW_OPCODE_ENDIF 37 -#define BRW_OPCODE_DO 38 -#define BRW_OPCODE_WHILE 39 -#define BRW_OPCODE_BREAK 40 -#define BRW_OPCODE_CONTINUE 41 -#define BRW_OPCODE_HALT 42 -#define BRW_OPCODE_MSAVE 44 -#define BRW_OPCODE_MRESTORE 45 -#define BRW_OPCODE_PUSH 46 -#define BRW_OPCODE_POP 47 -#define BRW_OPCODE_WAIT 48 -#define BRW_OPCODE_SEND 49 -#define BRW_OPCODE_ADD 64 -#define BRW_OPCODE_MUL 65 -#define BRW_OPCODE_AVG 66 -#define BRW_OPCODE_FRC 67 -#define BRW_OPCODE_RNDU 68 -#define BRW_OPCODE_RNDD 69 -#define BRW_OPCODE_RNDE 70 -#define BRW_OPCODE_RNDZ 71 -#define BRW_OPCODE_MAC 72 -#define BRW_OPCODE_MACH 73 -#define BRW_OPCODE_LZD 74 -#define BRW_OPCODE_SAD2 80 -#define BRW_OPCODE_SADA2 81 -#define BRW_OPCODE_DP4 84 -#define BRW_OPCODE_DPH 85 -#define BRW_OPCODE_DP3 86 -#define BRW_OPCODE_DP2 87 -#define BRW_OPCODE_DPA2 88 -#define BRW_OPCODE_LINE 89 -#define BRW_OPCODE_NOP 126 - -#define BRW_PREDICATE_NONE 0 -#define BRW_PREDICATE_NORMAL 1 -#define BRW_PREDICATE_ALIGN1_ANYV 2 -#define BRW_PREDICATE_ALIGN1_ALLV 3 -#define BRW_PREDICATE_ALIGN1_ANY2H 4 -#define BRW_PREDICATE_ALIGN1_ALL2H 5 -#define BRW_PREDICATE_ALIGN1_ANY4H 6 -#define BRW_PREDICATE_ALIGN1_ALL4H 7 -#define BRW_PREDICATE_ALIGN1_ANY8H 8 -#define BRW_PREDICATE_ALIGN1_ALL8H 9 -#define BRW_PREDICATE_ALIGN1_ANY16H 10 -#define BRW_PREDICATE_ALIGN1_ALL16H 11 -#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 -#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 -#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 -#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 -#define BRW_PREDICATE_ALIGN16_ANY4H 6 -#define BRW_PREDICATE_ALIGN16_ALL4H 7 - -#define BRW_ARCHITECTURE_REGISTER_FILE 0 -#define BRW_GENERAL_REGISTER_FILE 1 -#define BRW_MESSAGE_REGISTER_FILE 2 -#define BRW_IMMEDIATE_VALUE 3 - -#define BRW_REGISTER_TYPE_UD 0 -#define BRW_REGISTER_TYPE_D 1 -#define BRW_REGISTER_TYPE_UW 2 -#define BRW_REGISTER_TYPE_W 3 -#define BRW_REGISTER_TYPE_UB 4 -#define BRW_REGISTER_TYPE_B 5 -#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ -#define BRW_REGISTER_TYPE_HF 6 -#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ -#define BRW_REGISTER_TYPE_F 7 - -#define BRW_ARF_NULL 0x00 -#define BRW_ARF_ADDRESS 0x10 -#define BRW_ARF_ACCUMULATOR 0x20 -#define BRW_ARF_FLAG 0x30 -#define BRW_ARF_MASK 0x40 -#define BRW_ARF_MASK_STACK 0x50 -#define BRW_ARF_MASK_STACK_DEPTH 0x60 -#define BRW_ARF_STATE 0x70 -#define BRW_ARF_CONTROL 0x80 -#define BRW_ARF_NOTIFICATION_COUNT 0x90 -#define BRW_ARF_IP 0xA0 - -#define BRW_AMASK 0 -#define BRW_IMASK 1 -#define BRW_LMASK 2 -#define BRW_CMASK 3 - - - -#define BRW_THREAD_NORMAL 0 -#define BRW_THREAD_ATOMIC 1 -#define BRW_THREAD_SWITCH 2 - -#define BRW_VERTICAL_STRIDE_0 0 -#define BRW_VERTICAL_STRIDE_1 1 -#define BRW_VERTICAL_STRIDE_2 2 -#define BRW_VERTICAL_STRIDE_4 3 -#define BRW_VERTICAL_STRIDE_8 4 -#define BRW_VERTICAL_STRIDE_16 5 -#define BRW_VERTICAL_STRIDE_32 6 -#define BRW_VERTICAL_STRIDE_64 7 -#define BRW_VERTICAL_STRIDE_128 8 -#define BRW_VERTICAL_STRIDE_256 9 -#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF - -#define BRW_WIDTH_1 0 -#define BRW_WIDTH_2 1 -#define BRW_WIDTH_4 2 -#define BRW_WIDTH_8 3 -#define BRW_WIDTH_16 4 - -#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 -#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 -#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 -#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 -#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 -#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 -#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 -#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 -#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 -#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 -#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 -#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 - -#define BRW_POLYGON_FACING_FRONT 0 -#define BRW_POLYGON_FACING_BACK 1 - -#define BRW_MESSAGE_TARGET_NULL 0 -#define BRW_MESSAGE_TARGET_MATH 1 -#define BRW_MESSAGE_TARGET_SAMPLER 2 -#define BRW_MESSAGE_TARGET_GATEWAY 3 -#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 -#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 -#define BRW_MESSAGE_TARGET_URB 6 -#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 - -#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 -#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 -#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 - -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 -#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 -#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2 -#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 -#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 -#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 - -#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 -#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 -#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 -#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 -#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 - -#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 -#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 - -#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 -#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 - -#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 -#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 -#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 -#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 - -#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 -#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 -#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 - -#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 -#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 -#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 -#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 -#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 - -#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 -#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 -#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2 -#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 -#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 -#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 -#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 - -#define BRW_MATH_FUNCTION_INV 1 -#define BRW_MATH_FUNCTION_LOG 2 -#define BRW_MATH_FUNCTION_EXP 3 -#define BRW_MATH_FUNCTION_SQRT 4 -#define BRW_MATH_FUNCTION_RSQ 5 -#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ -#define BRW_MATH_FUNCTION_COS 7 /* was 8 */ -#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ -#define BRW_MATH_FUNCTION_TAN 9 -#define BRW_MATH_FUNCTION_POW 10 -#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 -#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 -#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 - -#define BRW_MATH_INTEGER_UNSIGNED 0 -#define BRW_MATH_INTEGER_SIGNED 1 - -#define BRW_MATH_PRECISION_FULL 0 -#define BRW_MATH_PRECISION_PARTIAL 1 - -#define BRW_MATH_SATURATE_NONE 0 -#define BRW_MATH_SATURATE_SATURATE 1 - -#define BRW_MATH_DATA_VECTOR 0 -#define BRW_MATH_DATA_SCALAR 1 - -#define BRW_URB_OPCODE_WRITE 0 - -#define BRW_URB_SWIZZLE_NONE 0 -#define BRW_URB_SWIZZLE_INTERLEAVE 1 -#define BRW_URB_SWIZZLE_TRANSPOSE 2 - -#define BRW_SCRATCH_SPACE_SIZE_1K 0 -#define BRW_SCRATCH_SPACE_SIZE_2K 1 -#define BRW_SCRATCH_SPACE_SIZE_4K 2 -#define BRW_SCRATCH_SPACE_SIZE_8K 3 -#define BRW_SCRATCH_SPACE_SIZE_16K 4 -#define BRW_SCRATCH_SPACE_SIZE_32K 5 -#define BRW_SCRATCH_SPACE_SIZE_64K 6 -#define BRW_SCRATCH_SPACE_SIZE_128K 7 -#define BRW_SCRATCH_SPACE_SIZE_256K 8 -#define BRW_SCRATCH_SPACE_SIZE_512K 9 -#define BRW_SCRATCH_SPACE_SIZE_1M 10 -#define BRW_SCRATCH_SPACE_SIZE_2M 11 - - - - -#define CMD_URB_FENCE 0x6000 -#define CMD_CONST_BUFFER_STATE 0x6001 -#define CMD_CONST_BUFFER 0x6002 - -#define CMD_STATE_BASE_ADDRESS 0x6101 -#define CMD_STATE_INSN_POINTER 0x6102 -#define CMD_PIPELINE_SELECT 0x6104 - -#define CMD_PIPELINED_STATE_POINTERS 0x7800 -#define CMD_BINDING_TABLE_PTRS 0x7801 -#define CMD_VERTEX_BUFFER 0x7808 -#define CMD_VERTEX_ELEMENT 0x7809 -#define CMD_INDEX_BUFFER 0x780a -#define CMD_VF_STATISTICS 0x780b - -#define CMD_DRAW_RECT 0x7900 -#define CMD_BLEND_CONSTANT_COLOR 0x7901 -#define CMD_CHROMA_KEY 0x7904 -#define CMD_DEPTH_BUFFER 0x7905 -#define CMD_POLY_STIPPLE_OFFSET 0x7906 -#define CMD_POLY_STIPPLE_PATTERN 0x7907 -#define CMD_LINE_STIPPLE_PATTERN 0x7908 -#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 - -#define CMD_PIPE_CONTROL 0x7a00 - -#define CMD_3D_PRIM 0x7b00 - -#define CMD_MI_FLUSH 0x0200 - - -/* Various values from the R0 vertex header: - */ -#define R02_PRIM_END 0x1 -#define R02_PRIM_START 0x2 - - - -#endif diff --git a/src/gallium/drivers/i965simple/brw_draw.c b/src/gallium/drivers/i965simple/brw_draw.c deleted file mode 100644 index 49d80cb41c..0000000000 --- a/src/gallium/drivers/i965simple/brw_draw.c +++ /dev/null @@ -1,226 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include - -#include "brw_batch.h" -#include "brw_draw.h" -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_state.h" - -#include "pipe/p_context.h" -#include "pipe/internal/p_winsys_screen.h" -#include "util/u_prim.h" - -static unsigned hw_prim[PIPE_PRIM_POLYGON+1] = { - _3DPRIM_POINTLIST, - _3DPRIM_LINELIST, - _3DPRIM_LINELOOP, - _3DPRIM_LINESTRIP, - _3DPRIM_TRILIST, - _3DPRIM_TRISTRIP, - _3DPRIM_TRIFAN, - _3DPRIM_QUADLIST, - _3DPRIM_QUADSTRIP, - _3DPRIM_POLYGON -}; - - -/* When the primitive changes, set a state bit and re-validate. Not - * the nicest and would rather deal with this by having all the - * programs be immune to the active primitive (ie. cope with all - * possibilities). That may not be realistic however. - */ -static void brw_set_prim(struct brw_context *brw, int prim) -{ - PRINT("PRIM: %d\n", prim); - - /* Slight optimization to avoid the GS program when not needed: - */ - if (prim == PIPE_PRIM_QUAD_STRIP && - brw->attribs.Raster->flatshade && - brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_FILL && - brw->attribs.Raster->fill_ccw == PIPE_POLYGON_MODE_FILL) - prim = PIPE_PRIM_TRIANGLE_STRIP; - - if (prim != brw->primitive) { - brw->primitive = prim; - brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; - - if (u_reduced_prim(prim) != brw->reduced_primitive) { - brw->reduced_primitive = u_reduced_prim(prim); - brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; - } - - brw_validate_state(brw); - } - -} - - -static unsigned trim(int prim, unsigned length) -{ - if (prim == PIPE_PRIM_QUAD_STRIP) - return length > 3 ? (length - length % 2) : 0; - else if (prim == PIPE_PRIM_QUADS) - return length - length % 4; - else - return length; -} - - - -static boolean brw_emit_prim( struct brw_context *brw, - boolean indexed, - unsigned start, - unsigned count ) - -{ - struct brw_3d_primitive prim_packet; - - if (BRW_DEBUG & DEBUG_PRIMS) - PRINT("PRIM: %d %d %d\n", brw->primitive, start, count); - - prim_packet.header.opcode = CMD_3D_PRIM; - prim_packet.header.length = sizeof(prim_packet)/4 - 2; - prim_packet.header.pad = 0; - prim_packet.header.topology = hw_prim[brw->primitive]; - prim_packet.header.indexed = indexed; - - prim_packet.verts_per_instance = trim(brw->primitive, count); - prim_packet.start_vert_location = start; - prim_packet.instance_count = 1; - prim_packet.start_instance_location = 0; - prim_packet.base_vert_location = 0; - - if (prim_packet.verts_per_instance == 0) - return TRUE; - - return brw_batchbuffer_data( brw->winsys, - &prim_packet, - sizeof(prim_packet) ); -} - - -/* May fail if out of video memory for texture or vbo upload, or on - * fallback conditions. - */ -static boolean brw_try_draw_elements( struct pipe_context *pipe, - struct pipe_buffer *index_buffer, - unsigned index_size, - unsigned mode, - unsigned start, - unsigned count ) -{ - struct brw_context *brw = brw_context(pipe); - - /* Set the first primitive ahead of validate_state: - */ - brw_set_prim(brw, mode); - - /* Upload index, vertex data: - */ - if (index_buffer && - !brw_upload_indices( brw, index_buffer, index_size, start, count )) - return FALSE; - - if (!brw_upload_vertex_buffers(brw)) - return FALSE; - - if (!brw_upload_vertex_elements( brw )) - return FALSE; - - /* XXX: Need to separate validate and upload of state. - */ - if (brw->state.dirty.brw) - brw_validate_state( brw ); - - if (!brw_emit_prim(brw, index_buffer != NULL, - start, count)) - return FALSE; - - return TRUE; -} - - - -static boolean brw_draw_elements( struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, - unsigned start, - unsigned count ) -{ - if (!brw_try_draw_elements( pipe, - indexBuffer, - indexSize, - mode, start, count )) - { - /* flush ? */ - - if (!brw_try_draw_elements( pipe, - indexBuffer, - indexSize, - mode, start, - count )) { - assert(0); - return FALSE; - } - } - - return TRUE; -} - - - -static boolean brw_draw_arrays( struct pipe_context *pipe, - unsigned mode, - unsigned start, - unsigned count ) -{ - if (!brw_try_draw_elements( pipe, NULL, 0, mode, start, count )) { - /* flush ? */ - - if (!brw_try_draw_elements( pipe, NULL, 0, mode, start, count )) { - assert(0); - return FALSE; - } - } - - return TRUE; -} - - - -void brw_init_draw_functions( struct brw_context *brw ) -{ - brw->pipe.draw_arrays = brw_draw_arrays; - brw->pipe.draw_elements = brw_draw_elements; -} - - diff --git a/src/gallium/drivers/i965simple/brw_draw.h b/src/gallium/drivers/i965simple/brw_draw.h deleted file mode 100644 index 62fe0d5d0e..0000000000 --- a/src/gallium/drivers/i965simple/brw_draw.h +++ /dev/null @@ -1,55 +0,0 @@ - /************************************************************************** - * - * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef BRW_DRAW_H -#define BRW_DRAW_H - -#include "pipe/p_context.h" - -struct brw_context; - - - -void brw_init_draw_functions( struct brw_context *brw ); - - -boolean brw_upload_vertices( struct brw_context *brw, - unsigned min_index, - unsigned max_index ); - -boolean brw_upload_indices(struct brw_context *brw, - const struct pipe_buffer *index_buffer, - int ib_size, int start, int count); - -boolean brw_upload_vertex_buffers( struct brw_context *brw ); -boolean brw_upload_vertex_elements( struct brw_context *brw ); - -unsigned brw_translate_surface_format( unsigned id ); - - - -#endif diff --git a/src/gallium/drivers/i965simple/brw_draw_upload.c b/src/gallium/drivers/i965simple/brw_draw_upload.c deleted file mode 100644 index 2d9ca3f2ea..0000000000 --- a/src/gallium/drivers/i965simple/brw_draw_upload.c +++ /dev/null @@ -1,300 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include - -#include "brw_batch.h" -#include "brw_draw.h" -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_state.h" - - -struct brw_array_state { - union header_union header; - - struct { - union { - struct { - unsigned pitch:11; - unsigned pad:15; - unsigned access_type:1; - unsigned vb_index:5; - } bits; - unsigned dword; - } vb0; - - struct pipe_buffer *buffer; - unsigned offset; - - unsigned max_index; - unsigned instance_data_step_rate; - - } vb[BRW_VBP_MAX]; -}; - - - -unsigned brw_translate_surface_format( unsigned id ) -{ - switch (id) { - case PIPE_FORMAT_R64_FLOAT: - return BRW_SURFACEFORMAT_R64_FLOAT; - case PIPE_FORMAT_R64G64_FLOAT: - return BRW_SURFACEFORMAT_R64G64_FLOAT; - case PIPE_FORMAT_R64G64B64_FLOAT: - return BRW_SURFACEFORMAT_R64G64B64_FLOAT; - case PIPE_FORMAT_R64G64B64A64_FLOAT: - return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT; - - case PIPE_FORMAT_R32_FLOAT: - return BRW_SURFACEFORMAT_R32_FLOAT; - case PIPE_FORMAT_R32G32_FLOAT: - return BRW_SURFACEFORMAT_R32G32_FLOAT; - case PIPE_FORMAT_R32G32B32_FLOAT: - return BRW_SURFACEFORMAT_R32G32B32_FLOAT; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; - - case PIPE_FORMAT_R32_UNORM: - return BRW_SURFACEFORMAT_R32_UNORM; - case PIPE_FORMAT_R32G32_UNORM: - return BRW_SURFACEFORMAT_R32G32_UNORM; - case PIPE_FORMAT_R32G32B32_UNORM: - return BRW_SURFACEFORMAT_R32G32B32_UNORM; - case PIPE_FORMAT_R32G32B32A32_UNORM: - return BRW_SURFACEFORMAT_R32G32B32A32_UNORM; - - case PIPE_FORMAT_R32_USCALED: - return BRW_SURFACEFORMAT_R32_USCALED; - case PIPE_FORMAT_R32G32_USCALED: - return BRW_SURFACEFORMAT_R32G32_USCALED; - case PIPE_FORMAT_R32G32B32_USCALED: - return BRW_SURFACEFORMAT_R32G32B32_USCALED; - case PIPE_FORMAT_R32G32B32A32_USCALED: - return BRW_SURFACEFORMAT_R32G32B32A32_USCALED; - - case PIPE_FORMAT_R32_SNORM: - return BRW_SURFACEFORMAT_R32_SNORM; - case PIPE_FORMAT_R32G32_SNORM: - return BRW_SURFACEFORMAT_R32G32_SNORM; - case PIPE_FORMAT_R32G32B32_SNORM: - return BRW_SURFACEFORMAT_R32G32B32_SNORM; - case PIPE_FORMAT_R32G32B32A32_SNORM: - return BRW_SURFACEFORMAT_R32G32B32A32_SNORM; - - case PIPE_FORMAT_R32_SSCALED: - return BRW_SURFACEFORMAT_R32_SSCALED; - case PIPE_FORMAT_R32G32_SSCALED: - return BRW_SURFACEFORMAT_R32G32_SSCALED; - case PIPE_FORMAT_R32G32B32_SSCALED: - return BRW_SURFACEFORMAT_R32G32B32_SSCALED; - case PIPE_FORMAT_R32G32B32A32_SSCALED: - return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED; - - case PIPE_FORMAT_R16_UNORM: - return BRW_SURFACEFORMAT_R16_UNORM; - case PIPE_FORMAT_R16G16_UNORM: - return BRW_SURFACEFORMAT_R16G16_UNORM; - case PIPE_FORMAT_R16G16B16_UNORM: - return BRW_SURFACEFORMAT_R16G16B16_UNORM; - case PIPE_FORMAT_R16G16B16A16_UNORM: - return BRW_SURFACEFORMAT_R16G16B16A16_UNORM; - - case PIPE_FORMAT_R16_USCALED: - return BRW_SURFACEFORMAT_R16_USCALED; - case PIPE_FORMAT_R16G16_USCALED: - return BRW_SURFACEFORMAT_R16G16_USCALED; - case PIPE_FORMAT_R16G16B16_USCALED: - return BRW_SURFACEFORMAT_R16G16B16_USCALED; - case PIPE_FORMAT_R16G16B16A16_USCALED: - return BRW_SURFACEFORMAT_R16G16B16A16_USCALED; - - case PIPE_FORMAT_R16_SNORM: - return BRW_SURFACEFORMAT_R16_SNORM; - case PIPE_FORMAT_R16G16_SNORM: - return BRW_SURFACEFORMAT_R16G16_SNORM; - case PIPE_FORMAT_R16G16B16_SNORM: - return BRW_SURFACEFORMAT_R16G16B16_SNORM; - case PIPE_FORMAT_R16G16B16A16_SNORM: - return BRW_SURFACEFORMAT_R16G16B16A16_SNORM; - - case PIPE_FORMAT_R16_SSCALED: - return BRW_SURFACEFORMAT_R16_SSCALED; - case PIPE_FORMAT_R16G16_SSCALED: - return BRW_SURFACEFORMAT_R16G16_SSCALED; - case PIPE_FORMAT_R16G16B16_SSCALED: - return BRW_SURFACEFORMAT_R16G16B16_SSCALED; - case PIPE_FORMAT_R16G16B16A16_SSCALED: - return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED; - - case PIPE_FORMAT_R8_UNORM: - return BRW_SURFACEFORMAT_R8_UNORM; - case PIPE_FORMAT_R8G8_UNORM: - return BRW_SURFACEFORMAT_R8G8_UNORM; - case PIPE_FORMAT_R8G8B8_UNORM: - return BRW_SURFACEFORMAT_R8G8B8_UNORM; - case PIPE_FORMAT_R8G8B8A8_UNORM: - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; - - case PIPE_FORMAT_R8_USCALED: - return BRW_SURFACEFORMAT_R8_USCALED; - case PIPE_FORMAT_R8G8_USCALED: - return BRW_SURFACEFORMAT_R8G8_USCALED; - case PIPE_FORMAT_R8G8B8_USCALED: - return BRW_SURFACEFORMAT_R8G8B8_USCALED; - case PIPE_FORMAT_R8G8B8A8_USCALED: - return BRW_SURFACEFORMAT_R8G8B8A8_USCALED; - - case PIPE_FORMAT_R8_SNORM: - return BRW_SURFACEFORMAT_R8_SNORM; - case PIPE_FORMAT_R8G8_SNORM: - return BRW_SURFACEFORMAT_R8G8_SNORM; - case PIPE_FORMAT_R8G8B8_SNORM: - return BRW_SURFACEFORMAT_R8G8B8_SNORM; - case PIPE_FORMAT_R8G8B8A8_SNORM: - return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; - - case PIPE_FORMAT_R8_SSCALED: - return BRW_SURFACEFORMAT_R8_SSCALED; - case PIPE_FORMAT_R8G8_SSCALED: - return BRW_SURFACEFORMAT_R8G8_SSCALED; - case PIPE_FORMAT_R8G8B8_SSCALED: - return BRW_SURFACEFORMAT_R8G8B8_SSCALED; - case PIPE_FORMAT_R8G8B8A8_SSCALED: - return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED; - - default: - assert(0); - return 0; - } -} - -static unsigned get_index_type(int type) -{ - switch (type) { - case 1: return BRW_INDEX_BYTE; - case 2: return BRW_INDEX_WORD; - case 4: return BRW_INDEX_DWORD; - default: assert(0); return 0; - } -} - - -boolean brw_upload_vertex_buffers( struct brw_context *brw ) -{ - struct brw_array_state vbp; - unsigned nr_enabled = 0; - unsigned i; - - memset(&vbp, 0, sizeof(vbp)); - - /* This is a hardware limit: - */ - - for (i = 0; i < BRW_VEP_MAX; i++) - { - if (brw->vb.vbo_array[i] == NULL) { - nr_enabled = i; - break; - } - - vbp.vb[i].vb0.bits.pitch = brw->vb.vbo_array[i]->stride; - vbp.vb[i].vb0.bits.pad = 0; - vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA; - vbp.vb[i].vb0.bits.vb_index = i; - vbp.vb[i].offset = brw->vb.vbo_array[i]->buffer_offset; - vbp.vb[i].buffer = brw->vb.vbo_array[i]->buffer; - vbp.vb[i].max_index = brw->vb.vbo_array[i]->max_index; - } - - - vbp.header.bits.length = (1 + nr_enabled * 4) - 2; - vbp.header.bits.opcode = CMD_VERTEX_BUFFER; - - BEGIN_BATCH(vbp.header.bits.length+2, 0); - OUT_BATCH( vbp.header.dword ); - - for (i = 0; i < nr_enabled; i++) { - OUT_BATCH( vbp.vb[i].vb0.dword ); - OUT_RELOC( vbp.vb[i].buffer, PIPE_BUFFER_USAGE_GPU_READ, - vbp.vb[i].offset); - OUT_BATCH( vbp.vb[i].max_index ); - OUT_BATCH( vbp.vb[i].instance_data_step_rate ); - } - ADVANCE_BATCH(); - return TRUE; -} - - - -boolean brw_upload_vertex_elements( struct brw_context *brw ) -{ - struct brw_vertex_element_packet vep; - - unsigned i; - unsigned nr_enabled = brw->attribs.VertexProgram->info.num_inputs; - - memset(&vep, 0, sizeof(vep)); - - for (i = 0; i < nr_enabled; i++) - vep.ve[i] = brw->vb.inputs[i]; - - - vep.header.length = (1 + nr_enabled * sizeof(vep.ve[0])/4) - 2; - vep.header.opcode = CMD_VERTEX_ELEMENT; - brw_cached_batch_struct(brw, &vep, 4 + nr_enabled * sizeof(vep.ve[0])); - - return TRUE; -} - -boolean brw_upload_indices( struct brw_context *brw, - const struct pipe_buffer *index_buffer, - int ib_size, int start, int count) -{ - /* Emit the indexbuffer packet: - */ - { - struct brw_indexbuffer ib; - - memset(&ib, 0, sizeof(ib)); - - ib.header.bits.opcode = CMD_INDEX_BUFFER; - ib.header.bits.length = sizeof(ib)/4 - 2; - ib.header.bits.index_format = get_index_type(ib_size); - ib.header.bits.cut_index_enable = 0; - - - BEGIN_BATCH(4, 0); - OUT_BATCH( ib.header.dword ); - OUT_RELOC( index_buffer, PIPE_BUFFER_USAGE_GPU_READ, start); - OUT_RELOC( index_buffer, PIPE_BUFFER_USAGE_GPU_READ, start + count); - OUT_BATCH( 0 ); - ADVANCE_BATCH(); - } - return TRUE; -} diff --git a/src/gallium/drivers/i965simple/brw_eu.c b/src/gallium/drivers/i965simple/brw_eu.c deleted file mode 100644 index e2002d1821..0000000000 --- a/src/gallium/drivers/i965simple/brw_eu.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_eu.h" - - - -/* How does predicate control work when execution_size != 8? Do I - * need to test/set for 0xffff when execution_size is 16? - */ -void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ) -{ - p->current->header.predicate_control = BRW_PREDICATE_NONE; - - if (value != 0xff) { - if (value != p->flag_value) { - brw_push_insn_state(p); - brw_MOV(p, brw_flag_reg(), brw_imm_uw(value)); - p->flag_value = value; - brw_pop_insn_state(p); - } - - p->current->header.predicate_control = BRW_PREDICATE_NORMAL; - } -} - -void brw_set_predicate_control( struct brw_compile *p, unsigned pc ) -{ - p->current->header.predicate_control = pc; -} - -void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional ) -{ - p->current->header.destreg__conditonalmod = conditional; -} - -void brw_set_access_mode( struct brw_compile *p, unsigned access_mode ) -{ - p->current->header.access_mode = access_mode; -} - -void brw_set_compression_control( struct brw_compile *p, boolean compression_control ) -{ - p->current->header.compression_control = compression_control; -} - -void brw_set_mask_control( struct brw_compile *p, unsigned value ) -{ - p->current->header.mask_control = value; -} - -void brw_set_saturate( struct brw_compile *p, unsigned value ) -{ - p->current->header.saturate = value; -} - -void brw_push_insn_state( struct brw_compile *p ) -{ - assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); - memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); - p->current++; -} - -void brw_pop_insn_state( struct brw_compile *p ) -{ - assert(p->current != p->stack); - p->current--; -} - - -/*********************************************************************** - */ -void brw_init_compile( struct brw_compile *p ) -{ - p->nr_insn = 0; - p->current = p->stack; - memset(p->current, 0, sizeof(p->current[0])); - - /* Some defaults? - */ - brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ - brw_set_saturate(p, 0); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_set_predicate_control_flag_value(p, 0xff); -} - - -const unsigned *brw_get_program( struct brw_compile *p, - unsigned *sz ) -{ - unsigned i; - - for (i = 0; i < 8; i++) - brw_NOP(p); - - *sz = p->nr_insn * sizeof(struct brw_instruction); - return (const unsigned *)p->store; -} - diff --git a/src/gallium/drivers/i965simple/brw_eu.h b/src/gallium/drivers/i965simple/brw_eu.h deleted file mode 100644 index 23151ae9ed..0000000000 --- a/src/gallium/drivers/i965simple/brw_eu.h +++ /dev/null @@ -1,888 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#ifndef BRW_EU_H -#define BRW_EU_H - -#include "brw_structs.h" -#include "brw_defines.h" - -#include "pipe/p_compiler.h" -#include "pipe/p_shader_tokens.h" - -#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) -#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) - -#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) -#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) -#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) -#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) - - -#define REG_SIZE (8*4) - - -/* These aren't hardware structs, just something useful for us to pass around: - * - * Align1 operation has a lot of control over input ranges. Used in - * WM programs to implement shaders decomposed into "channel serial" - * or "structure of array" form: - */ -struct brw_reg -{ - unsigned type:4; - unsigned file:2; - unsigned nr:8; - unsigned subnr:5; /* :1 in align16 */ - unsigned negate:1; /* source only */ - unsigned abs:1; /* source only */ - unsigned vstride:4; /* source only */ - unsigned width:3; /* src only, align1 only */ - unsigned hstride:2; /* src only, align1 only */ - unsigned address_mode:1; /* relative addressing, hopefully! */ - unsigned pad0:1; - - union { - struct { - unsigned swizzle:8; /* src only, align16 only */ - unsigned writemask:4; /* dest only, align16 only */ - int indirect_offset:10; /* relative addressing offset */ - unsigned pad1:10; /* two dwords total */ - } bits; - - float f; - int d; - unsigned ud; - } dw1; -}; - - -struct brw_indirect { - unsigned addr_subnr:4; - int addr_offset:10; - unsigned pad:18; -}; - - -#define BRW_EU_MAX_INSN_STACK 5 -#define BRW_EU_MAX_INSN 1200 - -struct brw_compile { - struct brw_instruction store[BRW_EU_MAX_INSN]; - unsigned nr_insn; - - /* Allow clients to push/pop instruction state: - */ - struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; - struct brw_instruction *current; - - unsigned flag_value; - boolean single_program_flow; -}; - - - -static __inline int type_sz( unsigned type ) -{ - switch( type ) { - case BRW_REGISTER_TYPE_UD: - case BRW_REGISTER_TYPE_D: - case BRW_REGISTER_TYPE_F: - return 4; - case BRW_REGISTER_TYPE_HF: - case BRW_REGISTER_TYPE_UW: - case BRW_REGISTER_TYPE_W: - return 2; - case BRW_REGISTER_TYPE_UB: - case BRW_REGISTER_TYPE_B: - return 1; - default: - return 0; - } -} - -static __inline struct brw_reg brw_reg( unsigned file, - unsigned nr, - unsigned subnr, - unsigned type, - unsigned vstride, - unsigned width, - unsigned hstride, - unsigned swizzle, - unsigned writemask) -{ - - struct brw_reg reg; - reg.type = type; - reg.file = file; - reg.nr = nr; - reg.subnr = subnr * type_sz(type); - reg.negate = 0; - reg.abs = 0; - reg.vstride = vstride; - reg.width = width; - reg.hstride = hstride; - reg.address_mode = BRW_ADDRESS_DIRECT; - reg.pad0 = 0; - - /* Could do better: If the reg is r5.3<0;1,0>, we probably want to - * set swizzle and writemask to W, as the lower bits of subnr will - * be lost when converted to align16. This is probably too much to - * keep track of as you'd want it adjusted by suboffset(), etc. - * Perhaps fix up when converting to align16? - */ - reg.dw1.bits.swizzle = swizzle; - reg.dw1.bits.writemask = writemask; - reg.dw1.bits.indirect_offset = 0; - reg.dw1.bits.pad1 = 0; - return reg; -} - -static __inline struct brw_reg brw_vec16_reg( unsigned file, - unsigned nr, - unsigned subnr ) -{ - return brw_reg(file, - nr, - subnr, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_16, - BRW_WIDTH_16, - BRW_HORIZONTAL_STRIDE_1, - BRW_SWIZZLE_XYZW, - TGSI_WRITEMASK_XYZW); -} - -static __inline struct brw_reg brw_vec8_reg( unsigned file, - unsigned nr, - unsigned subnr ) -{ - return brw_reg(file, - nr, - subnr, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_8, - BRW_WIDTH_8, - BRW_HORIZONTAL_STRIDE_1, - BRW_SWIZZLE_XYZW, - TGSI_WRITEMASK_XYZW); -} - - -static __inline struct brw_reg brw_vec4_reg( unsigned file, - unsigned nr, - unsigned subnr ) -{ - return brw_reg(file, - nr, - subnr, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_4, - BRW_WIDTH_4, - BRW_HORIZONTAL_STRIDE_1, - BRW_SWIZZLE_XYZW, - TGSI_WRITEMASK_XYZW); -} - - -static __inline struct brw_reg brw_vec2_reg( unsigned file, - unsigned nr, - unsigned subnr ) -{ - return brw_reg(file, - nr, - subnr, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_2, - BRW_WIDTH_2, - BRW_HORIZONTAL_STRIDE_1, - BRW_SWIZZLE_XYXY, - TGSI_WRITEMASK_XY); -} - -static __inline struct brw_reg brw_vec1_reg( unsigned file, - unsigned nr, - unsigned subnr ) -{ - return brw_reg(file, - nr, - subnr, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_0, - BRW_WIDTH_1, - BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XXXX, - TGSI_WRITEMASK_X); -} - - -static __inline struct brw_reg retype( struct brw_reg reg, - unsigned type ) -{ - reg.type = type; - return reg; -} - -static __inline struct brw_reg suboffset( struct brw_reg reg, - unsigned delta ) -{ - reg.subnr += delta * type_sz(reg.type); - return reg; -} - - -static __inline struct brw_reg offset( struct brw_reg reg, - unsigned delta ) -{ - reg.nr += delta; - return reg; -} - - -static __inline struct brw_reg byte_offset( struct brw_reg reg, - unsigned bytes ) -{ - unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; - reg.nr = newoffset / REG_SIZE; - reg.subnr = newoffset % REG_SIZE; - return reg; -} - - -static __inline struct brw_reg brw_uw16_reg( unsigned file, - unsigned nr, - unsigned subnr ) -{ - return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); -} - -static __inline struct brw_reg brw_uw8_reg( unsigned file, - unsigned nr, - unsigned subnr ) -{ - return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); -} - -static __inline struct brw_reg brw_uw1_reg( unsigned file, - unsigned nr, - unsigned subnr ) -{ - return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); -} - -static __inline struct brw_reg brw_imm_reg( unsigned type ) -{ - return brw_reg( BRW_IMMEDIATE_VALUE, - 0, - 0, - type, - BRW_VERTICAL_STRIDE_0, - BRW_WIDTH_1, - BRW_HORIZONTAL_STRIDE_0, - 0, - 0); -} - -static __inline struct brw_reg brw_imm_f( float f ) -{ - struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); - imm.dw1.f = f; - return imm; -} - -static __inline struct brw_reg brw_imm_d( int d ) -{ - struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); - imm.dw1.d = d; - return imm; -} - -static __inline struct brw_reg brw_imm_ud( unsigned ud ) -{ - struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); - imm.dw1.ud = ud; - return imm; -} - -static __inline struct brw_reg brw_imm_uw( ushort uw ) -{ - struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); - imm.dw1.ud = uw; - return imm; -} - -static __inline struct brw_reg brw_imm_w( short w ) -{ - struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); - imm.dw1.d = w; - return imm; -} - -/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type - * numbers alias with _V and _VF below: - */ - -/* Vector of eight signed half-byte values: - */ -static __inline struct brw_reg brw_imm_v( unsigned v ) -{ - struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); - imm.vstride = BRW_VERTICAL_STRIDE_0; - imm.width = BRW_WIDTH_8; - imm.hstride = BRW_HORIZONTAL_STRIDE_1; - imm.dw1.ud = v; - return imm; -} - -/* Vector of four 8-bit float values: - */ -static __inline struct brw_reg brw_imm_vf( unsigned v ) -{ - struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); - imm.vstride = BRW_VERTICAL_STRIDE_0; - imm.width = BRW_WIDTH_4; - imm.hstride = BRW_HORIZONTAL_STRIDE_1; - imm.dw1.ud = v; - return imm; -} - -#define VF_ZERO 0x0 -#define VF_ONE 0x30 -#define VF_NEG (1<<7) - -static __inline struct brw_reg brw_imm_vf4( unsigned v0, - unsigned v1, - unsigned v2, - unsigned v3) -{ - struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); - imm.vstride = BRW_VERTICAL_STRIDE_0; - imm.width = BRW_WIDTH_4; - imm.hstride = BRW_HORIZONTAL_STRIDE_1; - imm.dw1.ud = ((v0 << 0) | - (v1 << 8) | - (v2 << 16) | - (v3 << 24)); - return imm; -} - - -static __inline struct brw_reg brw_address( struct brw_reg reg ) -{ - return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); -} - - -static __inline struct brw_reg brw_vec1_grf( unsigned nr, - unsigned subnr ) -{ - return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); -} - -static __inline struct brw_reg brw_vec8_grf( unsigned nr, - unsigned subnr ) -{ - return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); -} - -static __inline struct brw_reg brw_vec4_grf( unsigned nr, - unsigned subnr ) -{ - return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); -} - - -static __inline struct brw_reg brw_vec2_grf( unsigned nr, - unsigned subnr ) -{ - return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); -} - -static __inline struct brw_reg brw_uw8_grf( unsigned nr, - unsigned subnr ) -{ - return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); -} - -static __inline struct brw_reg brw_null_reg( void ) -{ - return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_NULL, - 0); -} - -static __inline struct brw_reg brw_address_reg( unsigned subnr ) -{ - return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_ADDRESS, - subnr); -} - -/* If/else instructions break in align16 mode if writemask & swizzle - * aren't xyzw. This goes against the convention for other scalar - * regs: - */ -static __inline struct brw_reg brw_ip_reg( void ) -{ - return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_IP, - 0, - BRW_REGISTER_TYPE_UD, - BRW_VERTICAL_STRIDE_4, /* ? */ - BRW_WIDTH_1, - BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XYZW, /* NOTE! */ - TGSI_WRITEMASK_XYZW); /* NOTE! */ -} - -static __inline struct brw_reg brw_acc_reg( void ) -{ - return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_ACCUMULATOR, - 0); -} - - -static __inline struct brw_reg brw_flag_reg( void ) -{ - return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_FLAG, - 0); -} - - -static __inline struct brw_reg brw_mask_reg( unsigned subnr ) -{ - return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_MASK, - subnr); -} - -static __inline struct brw_reg brw_message_reg( unsigned nr ) -{ - return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, - nr, - 0); -} - - - - -/* This is almost always called with a numeric constant argument, so - * make things easy to evaluate at compile time: - */ -static __inline unsigned cvt( unsigned val ) -{ - switch (val) { - case 0: return 0; - case 1: return 1; - case 2: return 2; - case 4: return 3; - case 8: return 4; - case 16: return 5; - case 32: return 6; - } - return 0; -} - -static __inline struct brw_reg stride( struct brw_reg reg, - unsigned vstride, - unsigned width, - unsigned hstride ) -{ - - reg.vstride = cvt(vstride); - reg.width = cvt(width) - 1; - reg.hstride = cvt(hstride); - return reg; -} - -static __inline struct brw_reg vec16( struct brw_reg reg ) -{ - return stride(reg, 16,16,1); -} - -static __inline struct brw_reg vec8( struct brw_reg reg ) -{ - return stride(reg, 8,8,1); -} - -static __inline struct brw_reg vec4( struct brw_reg reg ) -{ - return stride(reg, 4,4,1); -} - -static __inline struct brw_reg vec2( struct brw_reg reg ) -{ - return stride(reg, 2,2,1); -} - -static __inline struct brw_reg vec1( struct brw_reg reg ) -{ - return stride(reg, 0,1,0); -} - -static __inline struct brw_reg get_element( struct brw_reg reg, unsigned elt ) -{ - return vec1(suboffset(reg, elt)); -} - -static __inline struct brw_reg get_element_ud( struct brw_reg reg, unsigned elt ) -{ - return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt)); -} - - -static __inline struct brw_reg brw_swizzle( struct brw_reg reg, - unsigned x, - unsigned y, - unsigned z, - unsigned w) -{ - reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x), - BRW_GET_SWZ(reg.dw1.bits.swizzle, y), - BRW_GET_SWZ(reg.dw1.bits.swizzle, z), - BRW_GET_SWZ(reg.dw1.bits.swizzle, w)); - return reg; -} - - -static __inline struct brw_reg brw_swizzle1( struct brw_reg reg, - unsigned x ) -{ - return brw_swizzle(reg, x, x, x, x); -} - -static __inline struct brw_reg brw_writemask( struct brw_reg reg, - unsigned mask ) -{ - reg.dw1.bits.writemask &= mask; - return reg; -} - -static __inline struct brw_reg brw_set_writemask( struct brw_reg reg, - unsigned mask ) -{ - reg.dw1.bits.writemask = mask; - return reg; -} - -static __inline struct brw_reg negate( struct brw_reg reg ) -{ - reg.negate ^= 1; - return reg; -} - -static __inline struct brw_reg brw_abs( struct brw_reg reg ) -{ - reg.abs = 1; - return reg; -} - -/*********************************************************************** - */ -static __inline struct brw_reg brw_vec4_indirect( unsigned subnr, - int offset ) -{ - struct brw_reg reg = brw_vec4_grf(0, 0); - reg.subnr = subnr; - reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; - reg.dw1.bits.indirect_offset = offset; - return reg; -} - -static __inline struct brw_reg brw_vec1_indirect( unsigned subnr, - int offset ) -{ - struct brw_reg reg = brw_vec1_grf(0, 0); - reg.subnr = subnr; - reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; - reg.dw1.bits.indirect_offset = offset; - return reg; -} - -static __inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset) -{ - return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); -} - -static __inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset) -{ - return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); -} - -static __inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset) -{ - return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); -} - -static __inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset) -{ - return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); -} - -static __inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset) -{ - return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); -} - -static __inline struct brw_reg get_addr_reg(struct brw_indirect ptr) -{ - return brw_address_reg(ptr.addr_subnr); -} - -static __inline struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, int offset ) -{ - ptr.addr_offset += offset; - return ptr; -} - -static __inline struct brw_indirect brw_indirect( unsigned addr_subnr, int offset ) -{ - struct brw_indirect ptr; - ptr.addr_subnr = addr_subnr; - ptr.addr_offset = offset; - ptr.pad = 0; - return ptr; -} - -static __inline struct brw_instruction *current_insn( struct brw_compile *p) -{ - return &p->store[p->nr_insn]; -} - -void brw_pop_insn_state( struct brw_compile *p ); -void brw_push_insn_state( struct brw_compile *p ); -void brw_set_mask_control( struct brw_compile *p, unsigned value ); -void brw_set_saturate( struct brw_compile *p, unsigned value ); -void brw_set_access_mode( struct brw_compile *p, unsigned access_mode ); -void brw_set_compression_control( struct brw_compile *p, boolean control ); -void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ); -void brw_set_predicate_control( struct brw_compile *p, unsigned pc ); -void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional ); - -void brw_init_compile( struct brw_compile *p ); -const unsigned *brw_get_program( struct brw_compile *p, unsigned *sz ); - - -struct brw_instruction *brw_alu1( struct brw_compile *p, - unsigned opcode, - struct brw_reg dest, - struct brw_reg src ); - -struct brw_instruction *brw_alu2(struct brw_compile *p, - unsigned opcode, - struct brw_reg dest, - struct brw_reg src0, - struct brw_reg src1 ); - -/* Helpers for regular instructions: - */ -#define ALU1(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ - struct brw_reg dest, \ - struct brw_reg src0); - -#define ALU2(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ - struct brw_reg dest, \ - struct brw_reg src0, \ - struct brw_reg src1); - -ALU1(MOV) -ALU2(SEL) -ALU1(NOT) -ALU2(AND) -ALU2(OR) -ALU2(XOR) -ALU2(SHR) -ALU2(SHL) -ALU2(RSR) -ALU2(RSL) -ALU2(ASR) -ALU2(JMPI) -ALU2(ADD) -ALU2(MUL) -ALU1(FRC) -ALU1(RNDD) -ALU2(MAC) -ALU2(MACH) -ALU1(LZD) -ALU2(DP4) -ALU2(DPH) -ALU2(DP3) -ALU2(DP2) -ALU2(LINE) - -#undef ALU1 -#undef ALU2 - - - -/* Helpers for SEND instruction: - */ -void brw_urb_WRITE(struct brw_compile *p, - struct brw_reg dest, - unsigned msg_reg_nr, - struct brw_reg src0, - boolean allocate, - boolean used, - unsigned msg_length, - unsigned response_length, - boolean eot, - boolean writes_complete, - unsigned offset, - unsigned swizzle); - -void brw_fb_WRITE(struct brw_compile *p, - struct brw_reg dest, - unsigned msg_reg_nr, - struct brw_reg src0, - unsigned binding_table_index, - unsigned msg_length, - unsigned response_length, - boolean eot); - -void brw_SAMPLE(struct brw_compile *p, - struct brw_reg dest, - unsigned msg_reg_nr, - struct brw_reg src0, - unsigned binding_table_index, - unsigned sampler, - unsigned writemask, - unsigned msg_type, - unsigned response_length, - unsigned msg_length, - boolean eot); - -void brw_math_16( struct brw_compile *p, - struct brw_reg dest, - unsigned function, - unsigned saturate, - unsigned msg_reg_nr, - struct brw_reg src, - unsigned precision ); - -void brw_math( struct brw_compile *p, - struct brw_reg dest, - unsigned function, - unsigned saturate, - unsigned msg_reg_nr, - struct brw_reg src, - unsigned data_type, - unsigned precision ); - -void brw_dp_READ_16( struct brw_compile *p, - struct brw_reg dest, - unsigned msg_reg_nr, - unsigned scratch_offset ); - -void brw_dp_WRITE_16( struct brw_compile *p, - struct brw_reg src, - unsigned msg_reg_nr, - unsigned scratch_offset ); - -/* If/else/endif. Works by manipulating the execution flags on each - * channel. - */ -struct brw_instruction *brw_IF(struct brw_compile *p, - unsigned execute_size); - -struct brw_instruction *brw_ELSE(struct brw_compile *p, - struct brw_instruction *if_insn); - -void brw_ENDIF(struct brw_compile *p, - struct brw_instruction *if_or_else_insn); - - -/* DO/WHILE loops: - */ -struct brw_instruction *brw_DO(struct brw_compile *p, - unsigned execute_size); - -struct brw_instruction *brw_WHILE(struct brw_compile *p, - struct brw_instruction *patch_insn); - -struct brw_instruction *brw_BREAK(struct brw_compile *p); -struct brw_instruction *brw_CONT(struct brw_compile *p); -/* Forward jumps: - */ -void brw_land_fwd_jump(struct brw_compile *p, - struct brw_instruction *jmp_insn); - - - -void brw_NOP(struct brw_compile *p); - -/* Special case: there is never a destination, execution size will be - * taken from src0: - */ -void brw_CMP(struct brw_compile *p, - struct brw_reg dest, - unsigned conditional, - struct brw_reg src0, - struct brw_reg src1); - -void brw_print_reg( struct brw_reg reg ); - - -/*********************************************************************** - * brw_eu_util.c: - */ - -void brw_copy_indirect_to_indirect(struct brw_compile *p, - struct brw_indirect dst_ptr, - struct brw_indirect src_ptr, - unsigned count); - -void brw_copy_from_indirect(struct brw_compile *p, - struct brw_reg dst, - struct brw_indirect ptr, - unsigned count); - -void brw_copy4(struct brw_compile *p, - struct brw_reg dst, - struct brw_reg src, - unsigned count); - -void brw_copy8(struct brw_compile *p, - struct brw_reg dst, - struct brw_reg src, - unsigned count); - -void brw_math_invert( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg src); - -void brw_set_src1( struct brw_instruction *insn, - struct brw_reg reg ); -#endif diff --git a/src/gallium/drivers/i965simple/brw_eu_debug.c b/src/gallium/drivers/i965simple/brw_eu_debug.c deleted file mode 100644 index 4adfb0c02f..0000000000 --- a/src/gallium/drivers/i965simple/brw_eu_debug.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "util/u_debug.h" - -#include "brw_eu.h" - -void brw_print_reg( struct brw_reg hwreg ) -{ - static const char *file[] = { - "arf", - "grf", - "msg", - "imm" - }; - - static const char *type[] = { - "ud", - "d", - "uw", - "w", - "ub", - "vf", - "hf", - "f" - }; - - debug_printf("%s%s", - hwreg.abs ? "abs/" : "", - hwreg.negate ? "-" : ""); - - if (hwreg.file == BRW_GENERAL_REGISTER_FILE && - hwreg.nr % 2 == 0 && - hwreg.subnr == 0 && - hwreg.vstride == BRW_VERTICAL_STRIDE_8 && - hwreg.width == BRW_WIDTH_8 && - hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && - hwreg.type == BRW_REGISTER_TYPE_F) { - debug_printf("vec%d", hwreg.nr); - } - else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && - hwreg.vstride == BRW_VERTICAL_STRIDE_0 && - hwreg.width == BRW_WIDTH_1 && - hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && - hwreg.type == BRW_REGISTER_TYPE_F) { - debug_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); - } - else { - debug_printf("%s%d.%d<%d;%d,%d>:%s", - file[hwreg.file], - hwreg.nr, - hwreg.subnr / type_sz(hwreg.type), - hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0, - 1< - */ - - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_eu.h" - - - - -/*********************************************************************** - * Internal helper for constructing instructions - */ - -static void guess_execution_size( struct brw_instruction *insn, - struct brw_reg reg ) -{ - if (reg.width == BRW_WIDTH_8 && - insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) - insn->header.execution_size = BRW_EXECUTE_16; - else - insn->header.execution_size = reg.width; /* note - definitions are compatible */ -} - - -static void brw_set_dest( struct brw_instruction *insn, - struct brw_reg dest ) -{ - insn->bits1.da1.dest_reg_file = dest.file; - insn->bits1.da1.dest_reg_type = dest.type; - insn->bits1.da1.dest_address_mode = dest.address_mode; - - if (dest.address_mode == BRW_ADDRESS_DIRECT) { - insn->bits1.da1.dest_reg_nr = dest.nr; - - if (insn->header.access_mode == BRW_ALIGN_1) { - insn->bits1.da1.dest_subreg_nr = dest.subnr; - insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; - } - else { - insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; - insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; - } - } - else { - insn->bits1.ia1.dest_subreg_nr = dest.subnr; - - /* These are different sizes in align1 vs align16: - */ - if (insn->header.access_mode == BRW_ALIGN_1) { - insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; - insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; - } - else { - insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; - } - } - - /* NEW: Set the execution size based on dest.width and - * insn->compression_control: - */ - guess_execution_size(insn, dest); -} - -static void brw_set_src0( struct brw_instruction *insn, - struct brw_reg reg ) -{ - assert(reg.file != BRW_MESSAGE_REGISTER_FILE); - - insn->bits1.da1.src0_reg_file = reg.file; - insn->bits1.da1.src0_reg_type = reg.type; - insn->bits2.da1.src0_abs = reg.abs; - insn->bits2.da1.src0_negate = reg.negate; - insn->bits2.da1.src0_address_mode = reg.address_mode; - - if (reg.file == BRW_IMMEDIATE_VALUE) { - insn->bits3.ud = reg.dw1.ud; - - /* Required to set some fields in src1 as well: - */ - insn->bits1.da1.src1_reg_file = 0; /* arf */ - insn->bits1.da1.src1_reg_type = reg.type; - } - else - { - if (reg.address_mode == BRW_ADDRESS_DIRECT) { - if (insn->header.access_mode == BRW_ALIGN_1) { - insn->bits2.da1.src0_subreg_nr = reg.subnr; - insn->bits2.da1.src0_reg_nr = reg.nr; - } - else { - insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; - insn->bits2.da16.src0_reg_nr = reg.nr; - } - } - else { - insn->bits2.ia1.src0_subreg_nr = reg.subnr; - - if (insn->header.access_mode == BRW_ALIGN_1) { - insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; - } - else { - insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; - } - } - - if (insn->header.access_mode == BRW_ALIGN_1) { - if (reg.width == BRW_WIDTH_1 && - insn->header.execution_size == BRW_EXECUTE_1) { - insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; - insn->bits2.da1.src0_width = BRW_WIDTH_1; - insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; - } - else { - insn->bits2.da1.src0_horiz_stride = reg.hstride; - insn->bits2.da1.src0_width = reg.width; - insn->bits2.da1.src0_vert_stride = reg.vstride; - } - } - else { - insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); - insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); - insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); - insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); - - /* This is an oddity of the fact we're using the same - * descriptions for registers in align_16 as align_1: - */ - if (reg.vstride == BRW_VERTICAL_STRIDE_8) - insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; - else - insn->bits2.da16.src0_vert_stride = reg.vstride; - } - } -} - - -void brw_set_src1( struct brw_instruction *insn, - struct brw_reg reg ) -{ - assert(reg.file != BRW_MESSAGE_REGISTER_FILE); - - insn->bits1.da1.src1_reg_file = reg.file; - insn->bits1.da1.src1_reg_type = reg.type; - insn->bits3.da1.src1_abs = reg.abs; - insn->bits3.da1.src1_negate = reg.negate; - - /* Only src1 can be immediate in two-argument instructions. - */ - assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); - - if (reg.file == BRW_IMMEDIATE_VALUE) { - insn->bits3.ud = reg.dw1.ud; - } - else { - /* This is a hardware restriction, which may or may not be lifted - * in the future: - */ - assert (reg.address_mode == BRW_ADDRESS_DIRECT); - //assert (reg.file == BRW_GENERAL_REGISTER_FILE); - - if (insn->header.access_mode == BRW_ALIGN_1) { - insn->bits3.da1.src1_subreg_nr = reg.subnr; - insn->bits3.da1.src1_reg_nr = reg.nr; - } - else { - insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; - insn->bits3.da16.src1_reg_nr = reg.nr; - } - - if (insn->header.access_mode == BRW_ALIGN_1) { - if (reg.width == BRW_WIDTH_1 && - insn->header.execution_size == BRW_EXECUTE_1) { - insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; - insn->bits3.da1.src1_width = BRW_WIDTH_1; - insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; - } - else { - insn->bits3.da1.src1_horiz_stride = reg.hstride; - insn->bits3.da1.src1_width = reg.width; - insn->bits3.da1.src1_vert_stride = reg.vstride; - } - } - else { - insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); - insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); - insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); - insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); - - /* This is an oddity of the fact we're using the same - * descriptions for registers in align_16 as align_1: - */ - if (reg.vstride == BRW_VERTICAL_STRIDE_8) - insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; - else - insn->bits3.da16.src1_vert_stride = reg.vstride; - } - } -} - - - -static void brw_set_math_message( struct brw_instruction *insn, - unsigned msg_length, - unsigned response_length, - unsigned function, - unsigned integer_type, - boolean low_precision, - boolean saturate, - unsigned dataType ) -{ - brw_set_src1(insn, brw_imm_d(0)); - - insn->bits3.math.function = function; - insn->bits3.math.int_type = integer_type; - insn->bits3.math.precision = low_precision; - insn->bits3.math.saturate = saturate; - insn->bits3.math.data_type = dataType; - insn->bits3.math.response_length = response_length; - insn->bits3.math.msg_length = msg_length; - insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; - insn->bits3.math.end_of_thread = 0; -} - -static void brw_set_urb_message( struct brw_instruction *insn, - boolean allocate, - boolean used, - unsigned msg_length, - unsigned response_length, - boolean end_of_thread, - boolean complete, - unsigned offset, - unsigned swizzle_control ) -{ - brw_set_src1(insn, brw_imm_d(0)); - - insn->bits3.urb.opcode = 0; /* ? */ - insn->bits3.urb.offset = offset; - insn->bits3.urb.swizzle_control = swizzle_control; - insn->bits3.urb.allocate = allocate; - insn->bits3.urb.used = used; /* ? */ - insn->bits3.urb.complete = complete; - insn->bits3.urb.response_length = response_length; - insn->bits3.urb.msg_length = msg_length; - insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; - insn->bits3.urb.end_of_thread = end_of_thread; -} - -static void brw_set_dp_write_message( struct brw_instruction *insn, - unsigned binding_table_index, - unsigned msg_control, - unsigned msg_type, - unsigned msg_length, - unsigned pixel_scoreboard_clear, - unsigned response_length, - unsigned end_of_thread ) -{ - brw_set_src1(insn, brw_imm_d(0)); - - insn->bits3.dp_write.binding_table_index = binding_table_index; - insn->bits3.dp_write.msg_control = msg_control; - insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; - insn->bits3.dp_write.msg_type = msg_type; - insn->bits3.dp_write.send_commit_msg = 0; - insn->bits3.dp_write.response_length = response_length; - insn->bits3.dp_write.msg_length = msg_length; - insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; - insn->bits3.urb.end_of_thread = end_of_thread; -} - -static void brw_set_dp_read_message( struct brw_instruction *insn, - unsigned binding_table_index, - unsigned msg_control, - unsigned msg_type, - unsigned target_cache, - unsigned msg_length, - unsigned response_length, - unsigned end_of_thread ) -{ - brw_set_src1(insn, brw_imm_d(0)); - - insn->bits3.dp_read.binding_table_index = binding_table_index; - insn->bits3.dp_read.msg_control = msg_control; - insn->bits3.dp_read.msg_type = msg_type; - insn->bits3.dp_read.target_cache = target_cache; - insn->bits3.dp_read.response_length = response_length; - insn->bits3.dp_read.msg_length = msg_length; - insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; - insn->bits3.dp_read.end_of_thread = end_of_thread; -} - -static void brw_set_sampler_message( struct brw_instruction *insn, - unsigned binding_table_index, - unsigned sampler, - unsigned msg_type, - unsigned response_length, - unsigned msg_length, - boolean eot) -{ - brw_set_src1(insn, brw_imm_d(0)); - - insn->bits3.sampler.binding_table_index = binding_table_index; - insn->bits3.sampler.sampler = sampler; - insn->bits3.sampler.msg_type = msg_type; - insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; - insn->bits3.sampler.response_length = response_length; - insn->bits3.sampler.msg_length = msg_length; - insn->bits3.sampler.end_of_thread = eot; - insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; -} - - - -static struct brw_instruction *next_insn( struct brw_compile *p, - unsigned opcode ) -{ - struct brw_instruction *insn; - - assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); - - insn = &p->store[p->nr_insn++]; - memcpy(insn, p->current, sizeof(*insn)); - - /* Reset this one-shot flag: - */ - - if (p->current->header.destreg__conditonalmod) { - p->current->header.destreg__conditonalmod = 0; - p->current->header.predicate_control = BRW_PREDICATE_NORMAL; - } - - insn->header.opcode = opcode; - return insn; -} - - -struct brw_instruction *brw_alu1( struct brw_compile *p, - unsigned opcode, - struct brw_reg dest, - struct brw_reg src ) -{ - struct brw_instruction *insn = next_insn(p, opcode); - brw_set_dest(insn, dest); - brw_set_src0(insn, src); - return insn; -} - -struct brw_instruction *brw_alu2(struct brw_compile *p, - unsigned opcode, - struct brw_reg dest, - struct brw_reg src0, - struct brw_reg src1 ) -{ - struct brw_instruction *insn = next_insn(p, opcode); - brw_set_dest(insn, dest); - brw_set_src0(insn, src0); - brw_set_src1(insn, src1); - return insn; -} - - -/*********************************************************************** - * Convenience routines. - */ -#define ALU1(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ - struct brw_reg dest, \ - struct brw_reg src0) \ -{ \ - return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ -} - -#define ALU2(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ - struct brw_reg dest, \ - struct brw_reg src0, \ - struct brw_reg src1) \ -{ \ - return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ -} - - -ALU1(MOV) -ALU2(SEL) -ALU1(NOT) -ALU2(AND) -ALU2(OR) -ALU2(XOR) -ALU2(SHR) -ALU2(SHL) -ALU2(RSR) -ALU2(RSL) -ALU2(ASR) -ALU2(ADD) -ALU2(MUL) -ALU1(FRC) -ALU1(RNDD) -ALU2(MAC) -ALU2(MACH) -ALU1(LZD) -ALU2(DP4) -ALU2(DPH) -ALU2(DP3) -ALU2(DP2) -ALU2(LINE) - - - - -void brw_NOP(struct brw_compile *p) -{ - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); - brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src1(insn, brw_imm_ud(0x0)); -} - - - - - -/*********************************************************************** - * Comparisons, if/else/endif - */ - -struct brw_instruction *brw_JMPI(struct brw_compile *p, - struct brw_reg dest, - struct brw_reg src0, - struct brw_reg src1) -{ - struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); - - p->current->header.predicate_control = BRW_PREDICATE_NONE; - - return insn; -} - -/* EU takes the value from the flag register and pushes it onto some - * sort of a stack (presumably merging with any flag value already on - * the stack). Within an if block, the flags at the top of the stack - * control execution on each channel of the unit, eg. on each of the - * 16 pixel values in our wm programs. - * - * When the matching 'else' instruction is reached (presumably by - * countdown of the instruction count patched in by our ELSE/ENDIF - * functions), the relevent flags are inverted. - * - * When the matching 'endif' instruction is reached, the flags are - * popped off. If the stack is now empty, normal execution resumes. - * - * No attempt is made to deal with stack overflow (14 elements?). - */ -struct brw_instruction *brw_IF(struct brw_compile *p, unsigned execute_size) -{ - struct brw_instruction *insn; - - if (p->single_program_flow) { - assert(execute_size == BRW_EXECUTE_1); - - insn = next_insn(p, BRW_OPCODE_ADD); - insn->header.predicate_inverse = 1; - } else { - insn = next_insn(p, BRW_OPCODE_IF); - } - - /* Override the defaults for this instruction: - */ - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); - - insn->header.execution_size = execute_size; - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.predicate_control = BRW_PREDICATE_NORMAL; - insn->header.mask_control = BRW_MASK_ENABLE; - - p->current->header.predicate_control = BRW_PREDICATE_NONE; - - return insn; -} - - -struct brw_instruction *brw_ELSE(struct brw_compile *p, - struct brw_instruction *if_insn) -{ - struct brw_instruction *insn; - - if (p->single_program_flow) { - insn = next_insn(p, BRW_OPCODE_ADD); - } else { - insn = next_insn(p, BRW_OPCODE_ELSE); - } - - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); - - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = if_insn->header.execution_size; - insn->header.mask_control = BRW_MASK_ENABLE; - - /* Patch the if instruction to point at this instruction. - */ - if (p->single_program_flow) { - assert(if_insn->header.opcode == BRW_OPCODE_ADD); - - if_insn->bits3.ud = (insn - if_insn + 1) * 16; - } else { - assert(if_insn->header.opcode == BRW_OPCODE_IF); - - if_insn->bits3.if_else.jump_count = insn - if_insn; - if_insn->bits3.if_else.pop_count = 1; - if_insn->bits3.if_else.pad0 = 0; - } - - return insn; -} - -void brw_ENDIF(struct brw_compile *p, - struct brw_instruction *patch_insn) -{ - if (p->single_program_flow) { - /* In single program flow mode, there's no need to execute an ENDIF, - * since we don't need to do any stack operations, and if we're executing - * currently, we want to just continue executing. - */ - struct brw_instruction *next = &p->store[p->nr_insn]; - - assert(patch_insn->header.opcode == BRW_OPCODE_ADD); - - patch_insn->bits3.ud = (next - patch_insn) * 16; - } else { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); - - brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src1(insn, brw_imm_d(0x0)); - - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = patch_insn->header.execution_size; - insn->header.mask_control = BRW_MASK_ENABLE; - - assert(patch_insn->bits3.if_else.jump_count == 0); - - /* Patch the if or else instructions to point at this or the next - * instruction respectively. - */ - if (patch_insn->header.opcode == BRW_OPCODE_IF) { - /* Automagically turn it into an IFF: - */ - patch_insn->header.opcode = BRW_OPCODE_IFF; - patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; - patch_insn->bits3.if_else.pop_count = 0; - patch_insn->bits3.if_else.pad0 = 0; - } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { - patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; - patch_insn->bits3.if_else.pop_count = 1; - patch_insn->bits3.if_else.pad0 = 0; - } else { - assert(0); - } - - /* Also pop item off the stack in the endif instruction: - */ - insn->bits3.if_else.jump_count = 0; - insn->bits3.if_else.pop_count = 1; - insn->bits3.if_else.pad0 = 0; - } -} - -struct brw_instruction *brw_BREAK(struct brw_compile *p) -{ - struct brw_instruction *insn; - insn = next_insn(p, BRW_OPCODE_BREAK); - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = BRW_EXECUTE_8; - insn->header.mask_control = BRW_MASK_DISABLE; - insn->bits3.if_else.pad0 = 0; - return insn; -} - -struct brw_instruction *brw_CONT(struct brw_compile *p) -{ - struct brw_instruction *insn; - insn = next_insn(p, BRW_OPCODE_CONTINUE); - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = BRW_EXECUTE_8; - insn->header.mask_control = BRW_MASK_DISABLE; - insn->bits3.if_else.pad0 = 0; - return insn; -} - -/* DO/WHILE loop: - */ -struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size) -{ - if (p->single_program_flow) { - return &p->store[p->nr_insn]; - } else { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); - - /* Override the defaults for this instruction: - */ - brw_set_dest(insn, brw_null_reg()); - brw_set_src0(insn, brw_null_reg()); - brw_set_src1(insn, brw_null_reg()); - - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = execute_size; - insn->header.predicate_control = BRW_PREDICATE_NONE; - /* insn->header.mask_control = BRW_MASK_ENABLE; */ - insn->header.mask_control = BRW_MASK_DISABLE; - - return insn; - } -} - - - -struct brw_instruction *brw_WHILE(struct brw_compile *p, - struct brw_instruction *do_insn) -{ - struct brw_instruction *insn; - - if (p->single_program_flow) - insn = next_insn(p, BRW_OPCODE_ADD); - else - insn = next_insn(p, BRW_OPCODE_WHILE); - - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); - - insn->header.compression_control = BRW_COMPRESSION_NONE; - - if (p->single_program_flow) { - insn->header.execution_size = BRW_EXECUTE_1; - - insn->bits3.d = (do_insn - insn) * 16; - } else { - insn->header.execution_size = do_insn->header.execution_size; - - assert(do_insn->header.opcode == BRW_OPCODE_DO); - insn->bits3.if_else.jump_count = do_insn - insn; - insn->bits3.if_else.pop_count = 0; - insn->bits3.if_else.pad0 = 0; - } - -/* insn->header.mask_control = BRW_MASK_ENABLE; */ - - insn->header.mask_control = BRW_MASK_DISABLE; - p->current->header.predicate_control = BRW_PREDICATE_NONE; - return insn; -} - - -/* FORWARD JUMPS: - */ -void brw_land_fwd_jump(struct brw_compile *p, - struct brw_instruction *jmp_insn) -{ - struct brw_instruction *landing = &p->store[p->nr_insn]; - - assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); - assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); - - jmp_insn->bits3.ud = (landing - jmp_insn) - 1; -} - - - -/* To integrate with the above, it makes sense that the comparison - * instruction should populate the flag register. It might be simpler - * just to use the flag reg for most WM tasks? - */ -void brw_CMP(struct brw_compile *p, - struct brw_reg dest, - unsigned conditional, - struct brw_reg src0, - struct brw_reg src1) -{ - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); - - insn->header.destreg__conditonalmod = conditional; - brw_set_dest(insn, dest); - brw_set_src0(insn, src0); - brw_set_src1(insn, src1); - -/* guess_execution_size(insn, src0); */ - - - /* Make it so that future instructions will use the computed flag - * value until brw_set_predicate_control_flag_value() is called - * again. - */ - if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && - dest.nr == 0) { - p->current->header.predicate_control = BRW_PREDICATE_NORMAL; - p->flag_value = 0xff; - } -} - - - -/*********************************************************************** - * Helpers for the various SEND message types: - */ - -/* Invert 8 values - */ -void brw_math( struct brw_compile *p, - struct brw_reg dest, - unsigned function, - unsigned saturate, - unsigned msg_reg_nr, - struct brw_reg src, - unsigned data_type, - unsigned precision ) -{ - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; - unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; - - /* Example code doesn't set predicate_control for send - * instructions. - */ - insn->header.predicate_control = 0; - insn->header.destreg__conditonalmod = msg_reg_nr; - - brw_set_dest(insn, dest); - brw_set_src0(insn, src); - brw_set_math_message(insn, - msg_length, response_length, - function, - BRW_MATH_INTEGER_UNSIGNED, - precision, - saturate, - data_type); -} - -/* Use 2 send instructions to invert 16 elements - */ -void brw_math_16( struct brw_compile *p, - struct brw_reg dest, - unsigned function, - unsigned saturate, - unsigned msg_reg_nr, - struct brw_reg src, - unsigned precision ) -{ - struct brw_instruction *insn; - unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; - unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; - - /* First instruction: - */ - brw_push_insn_state(p); - brw_set_predicate_control_flag_value(p, 0xff); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - - insn = next_insn(p, BRW_OPCODE_SEND); - insn->header.destreg__conditonalmod = msg_reg_nr; - - brw_set_dest(insn, dest); - brw_set_src0(insn, src); - brw_set_math_message(insn, - msg_length, response_length, - function, - BRW_MATH_INTEGER_UNSIGNED, - precision, - saturate, - BRW_MATH_DATA_VECTOR); - - /* Second instruction: - */ - insn = next_insn(p, BRW_OPCODE_SEND); - insn->header.compression_control = BRW_COMPRESSION_2NDHALF; - insn->header.destreg__conditonalmod = msg_reg_nr+1; - - brw_set_dest(insn, offset(dest,1)); - brw_set_src0(insn, src); - brw_set_math_message(insn, - msg_length, response_length, - function, - BRW_MATH_INTEGER_UNSIGNED, - precision, - saturate, - BRW_MATH_DATA_VECTOR); - - brw_pop_insn_state(p); -} - - - - -void brw_dp_WRITE_16( struct brw_compile *p, - struct brw_reg src, - unsigned msg_reg_nr, - unsigned scratch_offset ) -{ - { - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - - brw_MOV(p, - retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), - brw_imm_d(scratch_offset)); - - brw_pop_insn_state(p); - } - - { - unsigned msg_length = 3; - struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - - insn->header.predicate_control = 0; /* XXX */ - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; - - brw_set_dest(insn, dest); - brw_set_src0(insn, src); - - brw_set_dp_write_message(insn, - 255, /* bti */ - BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ - BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ - msg_length, - 0, /* pixel scoreboard */ - 0, /* response_length */ - 0); /* eot */ - } - -} - - -void brw_dp_READ_16( struct brw_compile *p, - struct brw_reg dest, - unsigned msg_reg_nr, - unsigned scratch_offset ) -{ - { - brw_push_insn_state(p); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_set_mask_control(p, BRW_MASK_DISABLE); - - brw_MOV(p, - retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), - brw_imm_d(scratch_offset)); - - brw_pop_insn_state(p); - } - - { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - - insn->header.predicate_control = 0; /* XXX */ - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; - - brw_set_dest(insn, dest); /* UW? */ - brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); - - brw_set_dp_read_message(insn, - 255, /* bti */ - 3, /* msg_control */ - BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ - 1, /* target cache */ - 1, /* msg_length */ - 2, /* response_length */ - 0); /* eot */ - } -} - - -void brw_fb_WRITE(struct brw_compile *p, - struct brw_reg dest, - unsigned msg_reg_nr, - struct brw_reg src0, - unsigned binding_table_index, - unsigned msg_length, - unsigned response_length, - boolean eot) -{ - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - - insn->header.predicate_control = 0; /* XXX */ - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; - - brw_set_dest(insn, dest); - brw_set_src0(insn, src0); - brw_set_dp_write_message(insn, - binding_table_index, - BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ - BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ - msg_length, - 1, /* pixel scoreboard */ - response_length, - eot); -} - - - -void brw_SAMPLE(struct brw_compile *p, - struct brw_reg dest, - unsigned msg_reg_nr, - struct brw_reg src0, - unsigned binding_table_index, - unsigned sampler, - unsigned writemask, - unsigned msg_type, - unsigned response_length, - unsigned msg_length, - boolean eot) -{ - boolean need_stall = 0; - - if(writemask == 0) { -/* debug_printf("%s: zero writemask??\n", __FUNCTION__); */ - return; - } - - /* Hardware doesn't do destination dependency checking on send - * instructions properly. Add a workaround which generates the - * dependency by other means. In practice it seems like this bug - * only crops up for texture samples, and only where registers are - * written by the send and then written again later without being - * read in between. Luckily for us, we already track that - * information and use it to modify the writemask for the - * instruction, so that is a guide for whether a workaround is - * needed. - */ - if (writemask != TGSI_WRITEMASK_XYZW) { - unsigned dst_offset = 0; - unsigned i, newmask = 0, len = 0; - - for (i = 0; i < 4; i++) { - if (writemask & (1<header.predicate_control = 0; /* XXX */ - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; - - brw_set_dest(insn, dest); - brw_set_src0(insn, src0); - brw_set_sampler_message(insn, - binding_table_index, - sampler, - msg_type, - response_length, - msg_length, - eot); - } - - if (need_stall) - { - struct brw_reg reg = vec8(offset(dest, response_length-1)); - - /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } - */ - brw_push_insn_state(p); - brw_set_compression_control(p, FALSE); - brw_MOV(p, reg, reg); - brw_pop_insn_state(p); - } - -} - -/* All these variables are pretty confusing - we might be better off - * using bitmasks and macros for this, in the old style. Or perhaps - * just having the caller instantiate the fields in dword3 itself. - */ -void brw_urb_WRITE(struct brw_compile *p, - struct brw_reg dest, - unsigned msg_reg_nr, - struct brw_reg src0, - boolean allocate, - boolean used, - unsigned msg_length, - unsigned response_length, - boolean eot, - boolean writes_complete, - unsigned offset, - unsigned swizzle) -{ - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - - assert(msg_length < 16); - - brw_set_dest(insn, dest); - brw_set_src0(insn, src0); - brw_set_src1(insn, brw_imm_d(0)); - - insn->header.destreg__conditonalmod = msg_reg_nr; - - brw_set_urb_message(insn, - allocate, - used, - msg_length, - response_length, - eot, - writes_complete, - offset, - swizzle); -} - diff --git a/src/gallium/drivers/i965simple/brw_eu_util.c b/src/gallium/drivers/i965simple/brw_eu_util.c deleted file mode 100644 index 3a65b141f0..0000000000 --- a/src/gallium/drivers/i965simple/brw_eu_util.c +++ /dev/null @@ -1,126 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_eu.h" - - -void brw_math_invert( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg src) -{ - brw_math( p, - dst, - BRW_MATH_FUNCTION_INV, - BRW_MATH_SATURATE_NONE, - 0, - src, - BRW_MATH_PRECISION_FULL, - BRW_MATH_DATA_VECTOR ); -} - - - -void brw_copy4(struct brw_compile *p, - struct brw_reg dst, - struct brw_reg src, - unsigned count) -{ - unsigned i; - - dst = vec4(dst); - src = vec4(src); - - for (i = 0; i < count; i++) - { - unsigned delta = i*32; - brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); - brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16)); - } -} - - -void brw_copy8(struct brw_compile *p, - struct brw_reg dst, - struct brw_reg src, - unsigned count) -{ - unsigned i; - - dst = vec8(dst); - src = vec8(src); - - for (i = 0; i < count; i++) - { - unsigned delta = i*32; - brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); - } -} - - -void brw_copy_indirect_to_indirect(struct brw_compile *p, - struct brw_indirect dst_ptr, - struct brw_indirect src_ptr, - unsigned count) -{ - unsigned i; - - for (i = 0; i < count; i++) - { - unsigned delta = i*32; - brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta)); - brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16)); - } -} - - -void brw_copy_from_indirect(struct brw_compile *p, - struct brw_reg dst, - struct brw_indirect ptr, - unsigned count) -{ - unsigned i; - - dst = vec4(dst); - - for (i = 0; i < count; i++) - { - unsigned delta = i*32; - brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta)); - brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16)); - } -} - - - - diff --git a/src/gallium/drivers/i965simple/brw_flush.c b/src/gallium/drivers/i965simple/brw_flush.c deleted file mode 100644 index e6001c30d9..0000000000 --- a/src/gallium/drivers/i965simple/brw_flush.c +++ /dev/null @@ -1,73 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Author: - * Keith Whitwell - */ - - -#include "pipe/p_defines.h" -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_batch.h" - - -static void brw_flush( struct pipe_context *pipe, - unsigned flags, - struct pipe_fence_handle **fence ) -{ - struct brw_context *brw = brw_context(pipe); - - /* Do we need to emit an MI_FLUSH command to flush the hardware - * caches? - */ - if (flags & (PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE)) { - struct brw_mi_flush flush; - - memset(&flush, 0, sizeof(flush)); - flush.opcode = CMD_MI_FLUSH; - - if (!(flags & PIPE_FLUSH_RENDER_CACHE)) - flush.flags |= BRW_INHIBIT_FLUSH_RENDER_CACHE; - - if (flags & PIPE_FLUSH_TEXTURE_CACHE) - flush.flags |= BRW_FLUSH_READ_CACHE; - - BRW_BATCH_STRUCT(brw, &flush); - } - - /* If there are no flags, just flush pending commands to hardware: - */ - FLUSH_BATCH( fence ); -} - - - -void brw_init_flush_functions( struct brw_context *brw ) -{ - brw->pipe.flush = brw_flush; -} diff --git a/src/gallium/drivers/i965simple/brw_gs.c b/src/gallium/drivers/i965simple/brw_gs.c deleted file mode 100644 index de60868ccc..0000000000 --- a/src/gallium/drivers/i965simple/brw_gs.c +++ /dev/null @@ -1,196 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_util.h" -#include "brw_state.h" -#include "brw_gs.h" - - - -static void compile_gs_prog( struct brw_context *brw, - struct brw_gs_prog_key *key ) -{ - struct brw_gs_compile c; - const unsigned *program; - unsigned program_size; - - memset(&c, 0, sizeof(c)); - - c.key = *key; - - /* Need to locate the two positions present in vertex + header. - * These are currently hardcoded: - */ - c.nr_attrs = brw_count_bits(c.key.attrs); - c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ - c.nr_bytes = c.nr_regs * REG_SIZE; - - - /* Begin the compilation: - */ - brw_init_compile(&c.func); - - c.func.single_program_flow = 1; - - /* For some reason the thread is spawned with only 4 channels - * unmasked. - */ - brw_set_mask_control(&c.func, BRW_MASK_DISABLE); - - - /* Note that primitives which don't require a GS program have - * already been weeded out by this stage: - */ - switch (key->primitive) { - case PIPE_PRIM_QUADS: - brw_gs_quads( &c ); - break; - case PIPE_PRIM_QUAD_STRIP: - brw_gs_quad_strip( &c ); - break; - case PIPE_PRIM_LINE_LOOP: - brw_gs_lines( &c ); - break; - case PIPE_PRIM_LINES: - if (key->hint_gs_always) - brw_gs_lines( &c ); - else { - return; - } - break; - case PIPE_PRIM_TRIANGLES: - if (key->hint_gs_always) - brw_gs_tris( &c ); - else { - return; - } - break; - case PIPE_PRIM_POINTS: - if (key->hint_gs_always) - brw_gs_points( &c ); - else { - return; - } - break; - default: - return; - } - - /* get the program - */ - program = brw_get_program(&c.func, &program_size); - - /* Upload - */ - brw->gs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_GS_PROG], - &c.key, - sizeof(c.key), - program, - program_size, - &c.prog_data, - &brw->gs.prog_data ); -} - - -static boolean search_cache( struct brw_context *brw, - struct brw_gs_prog_key *key ) -{ - return brw_search_cache(&brw->cache[BRW_GS_PROG], - key, sizeof(*key), - &brw->gs.prog_data, - &brw->gs.prog_gs_offset); -} - - -static const int gs_prim[PIPE_PRIM_POLYGON+1] = { - PIPE_PRIM_POINTS, - PIPE_PRIM_LINES, - PIPE_PRIM_LINE_LOOP, - PIPE_PRIM_LINES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_QUADS, - PIPE_PRIM_QUAD_STRIP, - PIPE_PRIM_TRIANGLES -}; - -static void populate_key( struct brw_context *brw, - struct brw_gs_prog_key *key ) -{ - memset(key, 0, sizeof(*key)); - - /* CACHE_NEW_VS_PROG */ - key->attrs = brw->vs.prog_data->outputs_written; - - /* BRW_NEW_PRIMITIVE */ - key->primitive = gs_prim[brw->primitive]; - - key->hint_gs_always = 0; /* debug code? */ - - key->need_gs_prog = (key->hint_gs_always || - brw->primitive == PIPE_PRIM_QUADS || - brw->primitive == PIPE_PRIM_QUAD_STRIP || - brw->primitive == PIPE_PRIM_LINE_LOOP); -} - -/* Calculate interpolants for triangle and line rasterization. - */ -static void upload_gs_prog( struct brw_context *brw ) -{ - struct brw_gs_prog_key key; - - /* Populate the key: - */ - populate_key(brw, &key); - - if (brw->gs.prog_active != key.need_gs_prog) { - brw->state.dirty.cache |= CACHE_NEW_GS_PROG; - brw->gs.prog_active = key.need_gs_prog; - } - - if (brw->gs.prog_active) { - if (!search_cache(brw, &key)) - compile_gs_prog( brw, &key ); - } -} - - -const struct brw_tracked_state brw_gs_prog = { - .dirty = { - .brw = BRW_NEW_PRIMITIVE, - .cache = CACHE_NEW_VS_PROG - }, - .update = upload_gs_prog -}; diff --git a/src/gallium/drivers/i965simple/brw_gs.h b/src/gallium/drivers/i965simple/brw_gs.h deleted file mode 100644 index f09141c6aa..0000000000 --- a/src/gallium/drivers/i965simple/brw_gs.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#ifndef BRW_GS_H -#define BRW_GS_H - - -#include "brw_context.h" -#include "brw_eu.h" - -#define MAX_GS_VERTS (4) - -struct brw_gs_prog_key { - unsigned attrs:32; - unsigned primitive:4; - unsigned hint_gs_always:1; - unsigned need_gs_prog:1; - unsigned pad:26; -}; - -struct brw_gs_compile { - struct brw_compile func; - struct brw_gs_prog_key key; - struct brw_gs_prog_data prog_data; - - struct { - struct brw_reg R0; - struct brw_reg vertex[MAX_GS_VERTS]; - } reg; - - /* 3 different ways of expressing vertex size: - */ - unsigned nr_attrs; - unsigned nr_regs; - unsigned nr_bytes; -}; - -#define ATTR_SIZE (4*4) - -void brw_gs_quads( struct brw_gs_compile *c ); -void brw_gs_quad_strip( struct brw_gs_compile *c ); -void brw_gs_tris( struct brw_gs_compile *c ); -void brw_gs_lines( struct brw_gs_compile *c ); -void brw_gs_points( struct brw_gs_compile *c ); - -#endif diff --git a/src/gallium/drivers/i965simple/brw_gs_emit.c b/src/gallium/drivers/i965simple/brw_gs_emit.c deleted file mode 100644 index c3cc90b10f..0000000000 --- a/src/gallium/drivers/i965simple/brw_gs_emit.c +++ /dev/null @@ -1,148 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_util.h" -#include "brw_gs.h" - -static void brw_gs_alloc_regs( struct brw_gs_compile *c, - unsigned nr_verts ) -{ - unsigned i = 0,j; - - /* Register usage is static, precompute here: - */ - c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; - - /* Payload vertices plus space for more generated vertices: - */ - for (j = 0; j < nr_verts; j++) { - c->reg.vertex[j] = brw_vec4_grf(i, 0); - i += c->nr_regs; - } - - c->prog_data.urb_read_length = c->nr_regs; - c->prog_data.total_grf = i; -} - - -static void brw_gs_emit_vue(struct brw_gs_compile *c, - struct brw_reg vert, - boolean last, - unsigned header) -{ - struct brw_compile *p = &c->func; - boolean allocate = !last; - - /* Overwrite PrimType and PrimStart in the message header, for - * each vertex in turn: - */ - brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); - - /* Copy the vertex from vertn into m1..mN+1: - */ - brw_copy8(p, brw_message_reg(1), vert, c->nr_regs); - - /* Send each vertex as a seperate write to the urb. This is - * different to the concept in brw_sf_emit.c, where subsequent - * writes are used to build up a single urb entry. Each of these - * writes instantiates a seperate urb entry, and a new one must be - * allocated each time. - */ - brw_urb_WRITE(p, - allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), - 0, - c->reg.R0, - allocate, - 1, /* used */ - c->nr_regs + 1, /* msg length */ - allocate ? 1 : 0, /* response length */ - allocate ? 0 : 1, /* eot */ - 1, /* writes_complete */ - 0, /* urb offset */ - BRW_URB_SWIZZLE_NONE); -} - - - -void brw_gs_quads( struct brw_gs_compile *c ) -{ - brw_gs_alloc_regs(c, 4); - - /* Use polygons for correct edgeflag behaviour. Note that vertex 3 - * is the PV for quads, but vertex 0 for polygons: - */ - brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); - brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); -} - -void brw_gs_quad_strip( struct brw_gs_compile *c ) -{ - brw_gs_alloc_regs(c, 4); - - brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); - brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); -} - -void brw_gs_tris( struct brw_gs_compile *c ) -{ - brw_gs_alloc_regs(c, 3); - brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START)); - brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2)); - brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END)); -} - -void brw_gs_lines( struct brw_gs_compile *c ) -{ - brw_gs_alloc_regs(c, 2); - brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START)); - brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END)); -} - -void brw_gs_points( struct brw_gs_compile *c ) -{ - brw_gs_alloc_regs(c, 1); - brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END)); -} - - - - - - - - diff --git a/src/gallium/drivers/i965simple/brw_gs_state.c b/src/gallium/drivers/i965simple/brw_gs_state.c deleted file mode 100644 index 5b8016b2e9..0000000000 --- a/src/gallium/drivers/i965simple/brw_gs_state.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "util/u_math.h" -#include "util/u_memory.h" - - - -static void upload_gs_unit( struct brw_context *brw ) -{ - struct brw_gs_unit_state gs; - - memset(&gs, 0, sizeof(gs)); - - /* CACHE_NEW_GS_PROG */ - if (brw->gs.prog_active) { - gs.thread0.grf_reg_count = - align(brw->gs.prog_data->total_grf, 16) / 16 - 1; - gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6; - gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length; - } - else { - gs.thread0.grf_reg_count = 0; - gs.thread0.kernel_start_pointer = 0; - gs.thread3.urb_entry_read_length = 1; - } - - /* BRW_NEW_URB_FENCE */ - gs.thread4.nr_urb_entries = brw->urb.nr_gs_entries; - gs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; - - gs.thread4.max_threads = 0; /* Hardware requirement */ - - if (BRW_DEBUG & DEBUG_STATS) - gs.thread4.stats_enable = 1; - - /* CONSTANT */ - gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - gs.thread1.single_program_flow = 1; - gs.thread3.dispatch_grf_start_reg = 1; - gs.thread3.const_urb_entry_read_offset = 0; - gs.thread3.const_urb_entry_read_length = 0; - gs.thread3.urb_entry_read_offset = 0; - - - brw->gs.state_gs_offset = brw_cache_data( &brw->cache[BRW_GS_UNIT], &gs ); -} - - -const struct brw_tracked_state brw_gs_unit = { - .dirty = { - .brw = (BRW_NEW_CURBE_OFFSETS | - BRW_NEW_URB_FENCE), - .cache = CACHE_NEW_GS_PROG - }, - .update = upload_gs_unit -}; diff --git a/src/gallium/drivers/i965simple/brw_misc_state.c b/src/gallium/drivers/i965simple/brw_misc_state.c deleted file mode 100644 index 99ff4403a5..0000000000 --- a/src/gallium/drivers/i965simple/brw_misc_state.c +++ /dev/null @@ -1,488 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "brw_batch.h" -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" - - - - - -/*********************************************************************** - * Blend color - */ - -static void upload_blend_constant_color(struct brw_context *brw) -{ - struct brw_blend_constant_color bcc; - - memset(&bcc, 0, sizeof(bcc)); - bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR; - bcc.header.length = sizeof(bcc)/4-2; - bcc.blend_constant_color[0] = brw->attribs.BlendColor.color[0]; - bcc.blend_constant_color[1] = brw->attribs.BlendColor.color[1]; - bcc.blend_constant_color[2] = brw->attribs.BlendColor.color[2]; - bcc.blend_constant_color[3] = brw->attribs.BlendColor.color[3]; - - BRW_CACHED_BATCH_STRUCT(brw, &bcc); -} - - -const struct brw_tracked_state brw_blend_constant_color = { - .dirty = { - .brw = BRW_NEW_BLEND, - .cache = 0 - }, - .update = upload_blend_constant_color -}; - - -/*********************************************************************** - * Drawing rectangle - */ -static void upload_drawing_rect(struct brw_context *brw) -{ - struct brw_drawrect bdr; - - memset(&bdr, 0, sizeof(bdr)); - bdr.header.opcode = CMD_DRAW_RECT; - bdr.header.length = sizeof(bdr)/4 - 2; - bdr.xmin = 0; - bdr.ymin = 0; - bdr.xmax = brw->attribs.FrameBuffer.cbufs[0]->width; - bdr.ymax = brw->attribs.FrameBuffer.cbufs[0]->height; - bdr.xorg = 0; - bdr.yorg = 0; - - /* Can't use BRW_CACHED_BATCH_STRUCT because this is also emitted - * uncached in brw_draw.c: - */ - BRW_BATCH_STRUCT(brw, &bdr); -} - -const struct brw_tracked_state brw_drawing_rect = { - .dirty = { - .brw = BRW_NEW_SCENE, - .cache = 0 - }, - .update = upload_drawing_rect -}; - -/** - * Upload the binding table pointers, which point each stage's array of surface - * state pointers. - * - * The binding table pointers are relative to the surface state base address, - * which is the BRW_SS_POOL cache buffer. - */ -static void upload_binding_table_pointers(struct brw_context *brw) -{ - struct brw_binding_table_pointers btp; - memset(&btp, 0, sizeof(btp)); - - btp.header.opcode = CMD_BINDING_TABLE_PTRS; - btp.header.length = sizeof(btp)/4 - 2; - btp.vs = 0; - btp.gs = 0; - btp.clp = 0; - btp.sf = 0; - btp.wm = brw->wm.bind_ss_offset; - - BRW_CACHED_BATCH_STRUCT(brw, &btp); -} - -const struct brw_tracked_state brw_binding_table_pointers = { - .dirty = { - .brw = 0, - .cache = CACHE_NEW_SURF_BIND - }, - .update = upload_binding_table_pointers, -}; - - -/** - * Upload pointers to the per-stage state. - * - * The state pointers in this packet are all relative to the general state - * base address set by CMD_STATE_BASE_ADDRESS, which is the BRW_GS_POOL buffer. - */ -static void upload_pipelined_state_pointers(struct brw_context *brw ) -{ - struct brw_pipelined_state_pointers psp; - memset(&psp, 0, sizeof(psp)); - - psp.header.opcode = CMD_PIPELINED_STATE_POINTERS; - psp.header.length = sizeof(psp)/4 - 2; - - psp.vs.offset = brw->vs.state_gs_offset >> 5; - psp.sf.offset = brw->sf.state_gs_offset >> 5; - psp.wm.offset = brw->wm.state_gs_offset >> 5; - psp.cc.offset = brw->cc.state_gs_offset >> 5; - - /* GS gets turned on and off regularly. Need to re-emit URB fence - * after this occurs. - */ - if (brw->gs.prog_active) { - psp.gs.offset = brw->gs.state_gs_offset >> 5; - psp.gs.enable = 1; - } - - if (0) { - psp.clp.offset = brw->clip.state_gs_offset >> 5; - psp.clp.enable = 1; - } - - - if (BRW_CACHED_BATCH_STRUCT(brw, &psp)) - brw->state.dirty.brw |= BRW_NEW_PSP; -} - -const struct brw_tracked_state brw_pipelined_state_pointers = { - .dirty = { - .brw = 0, - .cache = (CACHE_NEW_VS_UNIT | - CACHE_NEW_GS_UNIT | - CACHE_NEW_GS_PROG | - CACHE_NEW_CLIP_UNIT | - CACHE_NEW_SF_UNIT | - CACHE_NEW_WM_UNIT | - CACHE_NEW_CC_UNIT) - }, - .update = upload_pipelined_state_pointers -}; - -static void upload_psp_urb_cbs(struct brw_context *brw ) -{ - upload_pipelined_state_pointers(brw); - brw_upload_urb_fence(brw); - brw_upload_constant_buffer_state(brw); -} - - -const struct brw_tracked_state brw_psp_urb_cbs = { - .dirty = { - .brw = BRW_NEW_URB_FENCE, - .cache = (CACHE_NEW_VS_UNIT | - CACHE_NEW_GS_UNIT | - CACHE_NEW_GS_PROG | - CACHE_NEW_CLIP_UNIT | - CACHE_NEW_SF_UNIT | - CACHE_NEW_WM_UNIT | - CACHE_NEW_CC_UNIT) - }, - .update = upload_psp_urb_cbs -}; - -/** - * Upload the depthbuffer offset and format. - * - * We have to do this per state validation as we need to emit the relocation - * in the batch buffer. - */ -static void upload_depthbuffer(struct brw_context *brw) -{ - struct pipe_surface *depth_surface = brw->attribs.FrameBuffer.zsbuf; - - BEGIN_BATCH(5, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (5 - 2)); - if (depth_surface == NULL) { - OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | - (BRW_SURFACE_NULL << 29)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - } else { - unsigned int format; - struct brw_texture *tex = (struct brw_texture *)depth_surface->texture; - assert(depth_surface->block.width == 1); - assert(depth_surface->block.height == 1); - switch (depth_surface->block.size) { - case 2: - format = BRW_DEPTHFORMAT_D16_UNORM; - break; - case 4: - if (depth_surface->format == PIPE_FORMAT_Z32_FLOAT) - format = BRW_DEPTHFORMAT_D32_FLOAT; - else - format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; - break; - default: - assert(0); - return; - } - - OUT_BATCH((depth_surface->stride - 1) | - (format << 18) | - (BRW_TILEWALK_YMAJOR << 26) | -// (depth_surface->region->tiled << 27) | - (BRW_SURFACE_2D << 29)); - OUT_RELOC(tex->buffer, - PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE, 0); - OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | - ((depth_surface->stride/depth_surface->block.size - 1) << 6) | - ((depth_surface->height - 1) << 19)); - OUT_BATCH(0); - } - ADVANCE_BATCH(); -} - -const struct brw_tracked_state brw_depthbuffer = { - .dirty = { - .brw = BRW_NEW_SCENE, - .cache = 0 - }, - .update = upload_depthbuffer, -}; - - - - -/*********************************************************************** - * Polygon stipple packet - */ - -static void upload_polygon_stipple(struct brw_context *brw) -{ - struct brw_polygon_stipple bps; - unsigned i; - - memset(&bps, 0, sizeof(bps)); - bps.header.opcode = CMD_POLY_STIPPLE_PATTERN; - bps.header.length = sizeof(bps)/4-2; - - /* XXX: state tracker should send *all* state down initially! - */ - if (brw->attribs.PolygonStipple) - for (i = 0; i < 32; i++) - bps.stipple[i] = brw->attribs.PolygonStipple->stipple[31 - i]; /* invert */ - - BRW_CACHED_BATCH_STRUCT(brw, &bps); -} - -const struct brw_tracked_state brw_polygon_stipple = { - .dirty = { - .brw = BRW_NEW_STIPPLE, - .cache = 0 - }, - .update = upload_polygon_stipple -}; - - -/*********************************************************************** - * Line stipple packet - */ - -static void upload_line_stipple(struct brw_context *brw) -{ - struct brw_line_stipple bls; - float tmp; - int tmpi; - - memset(&bls, 0, sizeof(bls)); - bls.header.opcode = CMD_LINE_STIPPLE_PATTERN; - bls.header.length = sizeof(bls)/4 - 2; - - bls.bits0.pattern = brw->attribs.Raster->line_stipple_pattern; - bls.bits1.repeat_count = brw->attribs.Raster->line_stipple_factor; - - tmp = 1.0 / (float) brw->attribs.Raster->line_stipple_factor; - tmpi = tmp * (1<<13); - - - bls.bits1.inverse_repeat_count = tmpi; - - BRW_CACHED_BATCH_STRUCT(brw, &bls); -} - -const struct brw_tracked_state brw_line_stipple = { - .dirty = { - .brw = BRW_NEW_STIPPLE, - .cache = 0 - }, - .update = upload_line_stipple -}; - - -/*********************************************************************** - * Misc constant state packets - */ - -static void upload_pipe_control(struct brw_context *brw) -{ - struct brw_pipe_control pc; - - return; - - memset(&pc, 0, sizeof(pc)); - - pc.header.opcode = CMD_PIPE_CONTROL; - pc.header.length = sizeof(pc)/4 - 2; - pc.header.post_sync_operation = PIPE_CONTROL_NOWRITE; - - pc.header.instruction_state_cache_flush_enable = 1; - - pc.bits1.dest_addr_type = PIPE_CONTROL_GTTWRITE_GLOBAL; - - BRW_BATCH_STRUCT(brw, &pc); -} - -const struct brw_tracked_state brw_pipe_control = { - .dirty = { - .brw = BRW_NEW_SCENE, - .cache = 0 - }, - .update = upload_pipe_control -}; - - -/*********************************************************************** - * Misc invarient state packets - */ - -static void upload_invarient_state( struct brw_context *brw ) -{ - { - struct brw_mi_flush flush; - - memset(&flush, 0, sizeof(flush)); - flush.opcode = CMD_MI_FLUSH; - flush.flags = BRW_FLUSH_STATE_CACHE | BRW_FLUSH_READ_CACHE; - BRW_BATCH_STRUCT(brw, &flush); - } - - { - /* 0x61040000 Pipeline Select */ - /* PipelineSelect : 0 */ - struct brw_pipeline_select ps; - - memset(&ps, 0, sizeof(ps)); - ps.header.opcode = CMD_PIPELINE_SELECT; - ps.header.pipeline_select = 0; - BRW_BATCH_STRUCT(brw, &ps); - } - - { - struct brw_global_depth_offset_clamp gdo; - memset(&gdo, 0, sizeof(gdo)); - - /* Disable depth offset clamping. - */ - gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP; - gdo.header.length = sizeof(gdo)/4 - 2; - gdo.depth_offset_clamp = 0.0; - - BRW_BATCH_STRUCT(brw, &gdo); - } - - - /* 0x61020000 State Instruction Pointer */ - { - struct brw_system_instruction_pointer sip; - memset(&sip, 0, sizeof(sip)); - - sip.header.opcode = CMD_STATE_INSN_POINTER; - sip.header.length = 0; - sip.bits0.pad = 0; - sip.bits0.system_instruction_pointer = 0; - BRW_BATCH_STRUCT(brw, &sip); - } - - - { - struct brw_vf_statistics vfs; - memset(&vfs, 0, sizeof(vfs)); - - vfs.opcode = CMD_VF_STATISTICS; - if (BRW_DEBUG & DEBUG_STATS) - vfs.statistics_enable = 1; - - BRW_BATCH_STRUCT(brw, &vfs); - } - - - { - struct brw_polygon_stipple_offset bpso; - - memset(&bpso, 0, sizeof(bpso)); - bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; - bpso.header.length = sizeof(bpso)/4-2; - bpso.bits0.x_offset = 0; - bpso.bits0.y_offset = 0; - - BRW_BATCH_STRUCT(brw, &bpso); - } -} - -const struct brw_tracked_state brw_invarient_state = { - .dirty = { - .brw = BRW_NEW_SCENE, - .cache = 0 - }, - .update = upload_invarient_state -}; - -/** - * Define the base addresses which some state is referenced from. - * - * This allows us to avoid having to emit relocations in many places for - * cached state, and instead emit pointers inside of large, mostly-static - * state pools. This comes at the expense of memory, and more expensive cache - * misses. - */ -static void upload_state_base_address( struct brw_context *brw ) -{ - /* Output the structure (brw_state_base_address) directly to the - * batchbuffer, so we can emit relocations inline. - */ - BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); - OUT_RELOC(brw->pool[BRW_GS_POOL].buffer, - PIPE_BUFFER_USAGE_GPU_READ, - 1); /* General state base address */ - OUT_RELOC(brw->pool[BRW_SS_POOL].buffer, - PIPE_BUFFER_USAGE_GPU_READ, - 1); /* Surface state base address */ - OUT_BATCH(1); /* Indirect object base address */ - OUT_BATCH(1); /* General state upper bound */ - OUT_BATCH(1); /* Indirect object upper bound */ - ADVANCE_BATCH(); -} - - -const struct brw_tracked_state brw_state_base_address = { - .dirty = { - .brw = BRW_NEW_SCENE, - .cache = 0 - }, - .update = upload_state_base_address -}; diff --git a/src/gallium/drivers/i965simple/brw_reg.h b/src/gallium/drivers/i965simple/brw_reg.h deleted file mode 100644 index 9e885c3b3b..0000000000 --- a/src/gallium/drivers/i965simple/brw_reg.h +++ /dev/null @@ -1,76 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#define CMD_MI (0x0 << 29) -#define CMD_2D (0x2 << 29) -#define CMD_3D (0x3 << 29) - -#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23) - -/* Stalls command execution waiting for the given events to have occurred. */ -#define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23)) -#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) -#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) - -/* Primitive dispatch on 830-945 */ -#define _3DPRIMITIVE (CMD_3D | (0x1f << 24)) -#define PRIM_INDIRECT (1<<23) -#define PRIM_INLINE (0<<23) -#define PRIM_INDIRECT_SEQUENTIAL (0<<17) -#define PRIM_INDIRECT_ELTS (1<<17) - -#define PRIM3D_TRILIST (0x0<<18) -#define PRIM3D_TRISTRIP (0x1<<18) -#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) -#define PRIM3D_TRIFAN (0x3<<18) -#define PRIM3D_POLY (0x4<<18) -#define PRIM3D_LINELIST (0x5<<18) -#define PRIM3D_LINESTRIP (0x6<<18) -#define PRIM3D_RECTLIST (0x7<<18) -#define PRIM3D_POINTLIST (0x8<<18) -#define PRIM3D_DIB (0x9<<18) -#define PRIM3D_MASK (0x1f<<18) - -#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22) | 6) - -#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 4) - -#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22) | 6) - -/* BR00 */ -#define XY_BLT_WRITE_ALPHA (1 << 21) -#define XY_BLT_WRITE_RGB (1 << 20) -#define XY_SRC_TILED (1 << 15) -#define XY_DST_TILED (1 << 11) - -/* BR13 */ -#define BR13_565 (0x1 << 24) -#define BR13_8888 (0x3 << 24) - -#define FENCE_LINEAR 0 -#define FENCE_XMAJOR 1 -#define FENCE_YMAJOR 2 diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c deleted file mode 100644 index 4a84c4db23..0000000000 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ /dev/null @@ -1,244 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "util/u_memory.h" -#include "pipe/internal/p_winsys_screen.h" -#include "util/u_string.h" -#include "util/u_simple_screen.h" - -#include "brw_context.h" -#include "brw_screen.h" -#include "brw_tex_layout.h" - - -static const char * -brw_get_vendor( struct pipe_screen *screen ) -{ - return "VMware, Inc."; -} - - -static const char * -brw_get_name( struct pipe_screen *screen ) -{ - static char buffer[128]; - const char *chipset; - - switch (brw_screen(screen)->pci_id) { - case PCI_CHIP_I965_Q: - chipset = "Intel(R) 965Q"; - break; - case PCI_CHIP_I965_G: - case PCI_CHIP_I965_G_1: - chipset = "Intel(R) 965G"; - break; - case PCI_CHIP_I965_GM: - chipset = "Intel(R) 965GM"; - break; - case PCI_CHIP_I965_GME: - chipset = "Intel(R) 965GME/GLE"; - break; - default: - chipset = "unknown"; - break; - } - - util_snprintf(buffer, sizeof(buffer), "i965 (chipset: %s)", chipset); - return buffer; -} - - -static int -brw_get_param(struct pipe_screen *screen, int param) -{ - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 8; - case PIPE_CAP_NPOT_TEXTURES: - return 1; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; - case PIPE_CAP_POINT_SPRITE: - return 0; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; - case PIPE_CAP_OCCLUSION_QUERY: - return 0; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 11; /* max 1024x1024 */ - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 8; /* max 128x128x128 */ - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 11; /* max 1024x1024 */ - default: - return 0; - } -} - - -static float -brw_get_paramf(struct pipe_screen *screen, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 7.5; - - case PIPE_CAP_MAX_POINT_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 255.0; - - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 4.0; - - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0; - - default: - return 0; - } -} - - -static boolean -brw_is_format_supported( struct pipe_screen *screen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned tex_usage, - unsigned geom_flags ) -{ -#if 0 - /* XXX: This is broken -- rewrite if still needed. */ - static const unsigned tex_supported[] = { - PIPE_FORMAT_R8G8B8A8_UNORM, - PIPE_FORMAT_A8R8G8B8_UNORM, - PIPE_FORMAT_R5G6B5_UNORM, - PIPE_FORMAT_L8_UNORM, - PIPE_FORMAT_A8_UNORM, - PIPE_FORMAT_I8_UNORM, - PIPE_FORMAT_L8A8_UNORM, - PIPE_FORMAT_YCBCR, - PIPE_FORMAT_YCBCR_REV, - PIPE_FORMAT_S8_Z24, - }; - - - /* Actually a lot more than this - add later: - */ - static const unsigned render_supported[] = { - PIPE_FORMAT_A8R8G8B8_UNORM, - PIPE_FORMAT_R5G6B5_UNORM, - }; - - /* - */ - static const unsigned z_stencil_supported[] = { - PIPE_FORMAT_Z16_UNORM, - PIPE_FORMAT_Z32_UNORM, - PIPE_FORMAT_S8Z24_UNORM, - }; - - switch (type) { - case PIPE_RENDER_FORMAT: - *numFormats = Elements(render_supported); - return render_supported; - - case PIPE_TEX_FORMAT: - *numFormats = Elements(tex_supported); - return render_supported; - - case PIPE_Z_STENCIL_FORMAT: - *numFormats = Elements(render_supported); - return render_supported; - - default: - *numFormats = 0; - return NULL; - } -#else - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: - return TRUE; - default: - return FALSE; - }; - return FALSE; -#endif -} - - -static void -brw_destroy_screen( struct pipe_screen *screen ) -{ - struct pipe_winsys *winsys = screen->winsys; - - if(winsys->destroy) - winsys->destroy(winsys); - - FREE(screen); -} - - -/** - * Create a new brw_screen object - */ -struct pipe_screen * -brw_create_screen(struct pipe_winsys *winsys, uint pci_id) -{ - struct brw_screen *brwscreen = CALLOC_STRUCT(brw_screen); - - if (!brwscreen) - return NULL; - - brwscreen->pci_id = pci_id; - - brwscreen->screen.winsys = winsys; - - brwscreen->screen.destroy = brw_destroy_screen; - - brwscreen->screen.get_name = brw_get_name; - brwscreen->screen.get_vendor = brw_get_vendor; - brwscreen->screen.get_param = brw_get_param; - brwscreen->screen.get_paramf = brw_get_paramf; - brwscreen->screen.is_format_supported = brw_is_format_supported; - - brw_init_screen_texture_funcs(&brwscreen->screen); - u_simple_screen_init(&brwscreen->screen); - - return &brwscreen->screen; -} diff --git a/src/gallium/drivers/i965simple/brw_screen.h b/src/gallium/drivers/i965simple/brw_screen.h deleted file mode 100644 index d3c70387e6..0000000000 --- a/src/gallium/drivers/i965simple/brw_screen.h +++ /dev/null @@ -1,68 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef BRW_SCREEN_H -#define BRW_SCREEN_H - - -#include "pipe/p_screen.h" - - -#ifdef __cplusplus -extern "C" { -#endif - - -/** - * Subclass of pipe_screen - */ -struct brw_screen -{ - struct pipe_screen screen; - - uint pci_id; -}; - - -/** cast wrapper */ -static INLINE struct brw_screen * -brw_screen(struct pipe_screen *pscreen) -{ - return (struct brw_screen *) pscreen; -} - - -extern struct pipe_screen * -brw_create_screen(struct pipe_winsys *winsys, uint pci_id); - - -#ifdef __cplusplus -} -#endif - -#endif /* BRW_SCREEN_H */ diff --git a/src/gallium/drivers/i965simple/brw_sf.c b/src/gallium/drivers/i965simple/brw_sf.c deleted file mode 100644 index b82a2e143b..0000000000 --- a/src/gallium/drivers/i965simple/brw_sf.c +++ /dev/null @@ -1,351 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_util.h" -#include "brw_sf.h" -#include "brw_state.h" -#include "tgsi/tgsi_parse.h" - - -static void compile_sf_prog( struct brw_context *brw, - struct brw_sf_prog_key *key ) -{ - struct brw_sf_compile c; - const unsigned *program; - unsigned program_size; - - memset(&c, 0, sizeof(c)); - - /* Begin the compilation: - */ - brw_init_compile(&c.func); - - c.key = *key; - - - c.nr_attrs = c.key.vp_output_count; - c.nr_attr_regs = (c.nr_attrs+1)/2; - - c.nr_setup_attrs = c.key.fp_input_count + 1; /* +1 for position */ - c.nr_setup_regs = (c.nr_setup_attrs+1)/2; - - c.prog_data.urb_read_length = c.nr_attr_regs; - c.prog_data.urb_entry_size = c.nr_setup_regs * 2; - - - /* Which primitive? Or all three? - */ - switch (key->primitive) { - case SF_TRIANGLES: - c.nr_verts = 3; - brw_emit_tri_setup( &c ); - break; - case SF_LINES: - c.nr_verts = 2; - brw_emit_line_setup( &c ); - break; - case SF_POINTS: - c.nr_verts = 1; - brw_emit_point_setup( &c ); - break; - - case SF_UNFILLED_TRIS: - default: - assert(0); - return; - } - - - - /* get the program - */ - program = brw_get_program(&c.func, &program_size); - - /* Upload - */ - brw->sf.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_SF_PROG], - &c.key, - sizeof(c.key), - program, - program_size, - &c.prog_data, - &brw->sf.prog_data ); -} - - -static boolean search_cache( struct brw_context *brw, - struct brw_sf_prog_key *key ) -{ - return brw_search_cache(&brw->cache[BRW_SF_PROG], - key, sizeof(*key), - &brw->sf.prog_data, - &brw->sf.prog_gs_offset); -} - - -/* Calculate interpolants for triangle and line rasterization. - */ -static void upload_sf_prog( struct brw_context *brw ) -{ - const struct brw_fragment_program *fs = brw->attribs.FragmentProgram; - struct brw_sf_prog_key key; - struct tgsi_parse_context parse; - int i, done = 0; - - - memset(&key, 0, sizeof(key)); - - /* Populate the key, noting state dependencies: - */ - /* CACHE_NEW_VS_PROG */ - key.vp_output_count = brw->vs.prog_data->outputs_written; - - /* BRW_NEW_FS */ - key.fp_input_count = brw->attribs.FragmentProgram->info.file_max[TGSI_FILE_INPUT] + 1; - - - /* BRW_NEW_REDUCED_PRIMITIVE */ - switch (brw->reduced_primitive) { - case PIPE_PRIM_TRIANGLES: -// if (key.attrs & (1<program.tokens ); - while( !done && - !tgsi_parse_end_of_tokens( &parse ) ) - { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) - { - int first = parse.FullToken.FullDeclaration.DeclarationRange.First; - int last = parse.FullToken.FullDeclaration.DeclarationRange.Last; - int interp_mode = parse.FullToken.FullDeclaration.Declaration.Interpolate; - //int semantic = parse.FullToken.FullDeclaration.Semantic.SemanticName; - //int semantic_index = parse.FullToken.FullDeclaration.Semantic.SemanticIndex; - - debug_printf("fs input %d..%d interp mode %d\n", first, last, interp_mode); - - switch (interp_mode) { - case TGSI_INTERPOLATE_CONSTANT: - for (i = first; i <= last; i++) - key.const_mask |= (1 << i); - break; - case TGSI_INTERPOLATE_LINEAR: - for (i = first; i <= last; i++) - key.linear_mask |= (1 << i); - break; - case TGSI_INTERPOLATE_PERSPECTIVE: - for (i = first; i <= last; i++) - key.persp_mask |= (1 << i); - break; - default: - break; - } - - /* Also need stuff for flat shading, twosided color. - */ - - } - break; - default: - done = 1; - break; - } - } - - /* Hack: Adjust for position. Optimize away when not required (ie - * for perspective interpolation). - */ - key.persp_mask <<= 1; - key.linear_mask <<= 1; - key.linear_mask |= 1; - key.const_mask <<= 1; - - debug_printf("key.persp_mask: %x\n", key.persp_mask); - debug_printf("key.linear_mask: %x\n", key.linear_mask); - debug_printf("key.const_mask: %x\n", key.const_mask); - - -// key.do_point_sprite = brw->attribs.Point->PointSprite; -// key.SpriteOrigin = brw->attribs.Point->SpriteOrigin; - -// key.do_flat_shading = (brw->attribs.Raster->flatshade); -// key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide); - -// if (key.do_twoside_color) -// key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW); - - - if (!search_cache(brw, &key)) - compile_sf_prog( brw, &key ); -} - - -const struct brw_tracked_state brw_sf_prog = { - .dirty = { - .brw = (BRW_NEW_RASTERIZER | - BRW_NEW_REDUCED_PRIMITIVE | - BRW_NEW_VS | - BRW_NEW_FS), - .cache = 0, - }, - .update = upload_sf_prog -}; - - - -#if 0 -/* Build a struct like the one we'd like the state tracker to pass to - * us. - */ -static void update_sf_linkage( struct brw_context *brw ) -{ - const struct brw_vertex_program *vs = brw->attribs.VertexProgram; - const struct brw_fragment_program *fs = brw->attribs.FragmentProgram; - struct pipe_setup_linkage state; - struct tgsi_parse_context parse; - - int i, j; - int nr_vp_outputs = 0; - int done = 0; - - struct { - unsigned semantic:8; - unsigned semantic_index:16; - } fp_semantic[32], vp_semantic[32]; - - memset(&state, 0, sizeof(state)); - - state.fp_input_count = 0; - - - - - - - assert(state.fp_input_count == fs->program.num_inputs); - - - /* Then scan vp outputs - */ - done = 0; - tgsi_parse_init( &parse, vs->program.tokens ); - while( !done && - !tgsi_parse_end_of_tokens( &parse ) ) - { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) - { - int first = parse.FullToken.FullDeclaration.DeclarationRange.First; - int last = parse.FullToken.FullDeclaration.DeclarationRange.Last; - - for (i = first; i < last; i++) { - vp_semantic[i].semantic = - parse.FullToken.FullDeclaration.Semantic.SemanticName; - vp_semantic[i].semantic_index = - parse.FullToken.FullDeclaration.Semantic.SemanticIndex; - } - - assert(last > nr_vp_outputs); - nr_vp_outputs = last; - } - break; - default: - done = 1; - break; - } - } - - - /* Now match based on semantic information. - */ - for (i = 0; i< state.fp_input_count; i++) { - for (j = 0; j < nr_vp_outputs; j++) { - if (fp_semantic[i].semantic == vp_semantic[j].semantic && - fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) { - state.fp_input[i].vp_output = j; - } - } - if (fp_semantic[i].semantic == TGSI_SEMANTIC_COLOR) { - for (j = 0; j < nr_vp_outputs; j++) { - if (TGSI_SEMANTIC_BCOLOR == vp_semantic[j].semantic && - fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) { - state.fp_input[i].bf_vp_output = j; - } - } - } - } - - if (memcmp(&brw->sf.linkage, &state, sizeof(state)) != 0) { - brw->sf.linkage = state; - brw->state.dirty.brw |= BRW_NEW_SF_LINKAGE; - } -} - - -const struct brw_tracked_state brw_sf_linkage = { - .dirty = { - .brw = (BRW_NEW_VS | - BRW_NEW_FS), - .cache = 0, - }, - .update = update_sf_linkage -}; - - -#endif diff --git a/src/gallium/drivers/i965simple/brw_sf.h b/src/gallium/drivers/i965simple/brw_sf.h deleted file mode 100644 index b7ada47560..0000000000 --- a/src/gallium/drivers/i965simple/brw_sf.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#ifndef BRW_SF_H -#define BRW_SF_H - -#include "brw_context.h" -#include "brw_eu.h" - - -#define SF_POINTS 0 -#define SF_LINES 1 -#define SF_TRIANGLES 2 -#define SF_UNFILLED_TRIS 3 - - - -struct brw_sf_prog_key { - unsigned vp_output_count:5; - unsigned fp_input_count:5; - - unsigned primitive:2; - unsigned do_twoside_color:1; - unsigned do_flat_shading:1; - unsigned frontface_ccw:1; - unsigned do_point_sprite:1; - - /* Interpolation masks; - */ - unsigned linear_mask; - unsigned persp_mask; - unsigned const_mask; - - -// int SpriteOrigin; -}; - -struct brw_sf_point_tex { - boolean CoordReplace; -}; - -struct brw_sf_compile { - struct brw_compile func; - struct brw_sf_prog_key key; - struct brw_sf_prog_data prog_data; - - struct brw_reg pv; - struct brw_reg det; - struct brw_reg dx0; - struct brw_reg dx2; - struct brw_reg dy0; - struct brw_reg dy2; - - /* z and 1/w passed in seperately: - */ - struct brw_reg z[3]; - struct brw_reg inv_w[3]; - - /* The vertices: - */ - struct brw_reg vert[3]; - - /* Temporaries, allocated after last vertex reg. - */ - struct brw_reg inv_det; - struct brw_reg a1_sub_a0; - struct brw_reg a2_sub_a0; - struct brw_reg tmp; - - struct brw_reg m1Cx; - struct brw_reg m2Cy; - struct brw_reg m3C0; - - unsigned nr_verts; - unsigned nr_attrs; - unsigned nr_attr_regs; - unsigned nr_setup_attrs; - unsigned nr_setup_regs; -#if 0 - ubyte attr_to_idx[VERT_RESULT_MAX]; - ubyte idx_to_attr[VERT_RESULT_MAX]; - struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX]; -#endif -}; - - -void brw_emit_tri_setup( struct brw_sf_compile *c ); -void brw_emit_line_setup( struct brw_sf_compile *c ); -void brw_emit_point_setup( struct brw_sf_compile *c ); -void brw_emit_point_sprite_setup( struct brw_sf_compile *c ); -void brw_emit_anyprim_setup( struct brw_sf_compile *c ); - -#endif diff --git a/src/gallium/drivers/i965simple/brw_sf_emit.c b/src/gallium/drivers/i965simple/brw_sf_emit.c deleted file mode 100644 index 78d6fa5e9e..0000000000 --- a/src/gallium/drivers/i965simple/brw_sf_emit.c +++ /dev/null @@ -1,382 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_util.h" -#include "brw_sf.h" - - - -/*********************************************************************** - * Triangle setup. - */ - - -static void alloc_regs( struct brw_sf_compile *c ) -{ - unsigned reg, i; - - /* Values computed by fixed function unit: - */ - c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_UD); - c->det = brw_vec1_grf(1, 2); - c->dx0 = brw_vec1_grf(1, 3); - c->dx2 = brw_vec1_grf(1, 4); - c->dy0 = brw_vec1_grf(1, 5); - c->dy2 = brw_vec1_grf(1, 6); - - /* z and 1/w passed in seperately: - */ - c->z[0] = brw_vec1_grf(2, 0); - c->inv_w[0] = brw_vec1_grf(2, 1); - c->z[1] = brw_vec1_grf(2, 2); - c->inv_w[1] = brw_vec1_grf(2, 3); - c->z[2] = brw_vec1_grf(2, 4); - c->inv_w[2] = brw_vec1_grf(2, 5); - - /* The vertices: - */ - reg = 3; - for (i = 0; i < c->nr_verts; i++) { - c->vert[i] = brw_vec8_grf(reg, 0); - reg += c->nr_attr_regs; - } - - /* Temporaries, allocated after last vertex reg. - */ - c->inv_det = brw_vec1_grf(reg, 0); reg++; - c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++; - c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++; - c->tmp = brw_vec8_grf(reg, 0); reg++; - - /* Note grf allocation: - */ - c->prog_data.total_grf = reg; - - - /* Outputs of this program - interpolation coefficients for - * rasterization: - */ - c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0); - c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0); - c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0); -} - - -static void copy_z_inv_w( struct brw_sf_compile *c ) -{ - struct brw_compile *p = &c->func; - unsigned i; - - brw_push_insn_state(p); - - /* Copy both scalars with a single MOV: - */ - for (i = 0; i < c->nr_verts; i++) - brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i])); - - brw_pop_insn_state(p); -} - - -static void invert_det( struct brw_sf_compile *c) -{ - brw_math(&c->func, - c->inv_det, - BRW_MATH_FUNCTION_INV, - BRW_MATH_SATURATE_NONE, - 0, - c->det, - BRW_MATH_DATA_SCALAR, - BRW_MATH_PRECISION_FULL); - -} - -#define NON_PERPECTIVE_ATTRS (FRAG_BIT_WPOS | \ - FRAG_BIT_COL0 | \ - FRAG_BIT_COL1) - -static boolean calculate_masks( struct brw_sf_compile *c, - unsigned reg, - ushort *pc, - ushort *pc_persp, - ushort *pc_linear) -{ - boolean is_last_attr = (reg == c->nr_setup_regs - 1); - unsigned persp_mask = c->key.persp_mask; - unsigned linear_mask = c->key.linear_mask; - - debug_printf("persp_mask: %x\n", persp_mask); - debug_printf("linear_mask: %x\n", linear_mask); - - *pc_persp = 0; - *pc_linear = 0; - *pc = 0xf; - - if (persp_mask & (1 << (reg*2))) - *pc_persp = 0xf; - - if (linear_mask & (1 << (reg*2))) - *pc_linear = 0xf; - - /* Maybe only processs one attribute on the final round: - */ - if (reg*2+1 < c->nr_setup_attrs) { - *pc |= 0xf0; - - if (persp_mask & (1 << (reg*2+1))) - *pc_persp |= 0xf0; - - if (linear_mask & (1 << (reg*2+1))) - *pc_linear |= 0xf0; - } - - debug_printf("pc: %x\n", *pc); - debug_printf("pc_persp: %x\n", *pc_persp); - debug_printf("pc_linear: %x\n", *pc_linear); - - - return is_last_attr; -} - - - -void brw_emit_tri_setup( struct brw_sf_compile *c ) -{ - struct brw_compile *p = &c->func; - unsigned i; - - debug_printf("%s START ==============\n", __FUNCTION__); - - c->nr_verts = 3; - alloc_regs(c); - invert_det(c); - copy_z_inv_w(c); - - - for (i = 0; i < c->nr_setup_regs; i++) - { - /* Pair of incoming attributes: - */ - struct brw_reg a0 = offset(c->vert[0], i); - struct brw_reg a1 = offset(c->vert[1], i); - struct brw_reg a2 = offset(c->vert[2], i); - ushort pc = 0, pc_persp = 0, pc_linear = 0; - boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); - - if (pc_persp) - { - brw_set_predicate_control_flag_value(p, pc_persp); - brw_MUL(p, a0, a0, c->inv_w[0]); - brw_MUL(p, a1, a1, c->inv_w[1]); - brw_MUL(p, a2, a2, c->inv_w[2]); - } - - - /* Calculate coefficients for interpolated values: - */ - if (pc_linear) - { - brw_set_predicate_control_flag_value(p, pc_linear); - - brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); - brw_ADD(p, c->a2_sub_a0, a2, negate(a0)); - - /* calculate dA/dx - */ - brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2); - brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0)); - brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); - - /* calculate dA/dy - */ - brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0); - brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2)); - brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); - } - - { - brw_set_predicate_control_flag_value(p, pc); - /* start point for interpolation - */ - brw_MOV(p, c->m3C0, a0); - - /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in - * the send instruction: - */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ - 0, /* allocate */ - 1, /* used */ - 4, /* msg len */ - 0, /* response len */ - last, /* eot */ - last, /* writes complete */ - i*4, /* offset */ - BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */ - } - } - - debug_printf("%s DONE ==============\n", __FUNCTION__); - -} - - - -void brw_emit_line_setup( struct brw_sf_compile *c ) -{ - struct brw_compile *p = &c->func; - unsigned i; - - - c->nr_verts = 2; - alloc_regs(c); - invert_det(c); - copy_z_inv_w(c); - - for (i = 0; i < c->nr_setup_regs; i++) - { - /* Pair of incoming attributes: - */ - struct brw_reg a0 = offset(c->vert[0], i); - struct brw_reg a1 = offset(c->vert[1], i); - ushort pc, pc_persp, pc_linear; - boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); - - if (pc_persp) - { - brw_set_predicate_control_flag_value(p, pc_persp); - brw_MUL(p, a0, a0, c->inv_w[0]); - brw_MUL(p, a1, a1, c->inv_w[1]); - } - - /* Calculate coefficients for position, color: - */ - if (pc_linear) { - brw_set_predicate_control_flag_value(p, pc_linear); - - brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); - - brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0); - brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); - - brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0); - brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); - } - - { - brw_set_predicate_control_flag_value(p, pc); - - /* start point for interpolation - */ - brw_MOV(p, c->m3C0, a0); - - /* Copy m0..m3 to URB. - */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), - 0, /* allocate */ - 1, /* used */ - 4, /* msg len */ - 0, /* response len */ - last, /* eot */ - last, /* writes complete */ - i*4, /* urb destination offset */ - BRW_URB_SWIZZLE_TRANSPOSE); - } - } -} - - -/* Points setup - several simplifications as all attributes are - * constant across the face of the point (point sprites excluded!) - */ -void brw_emit_point_setup( struct brw_sf_compile *c ) -{ - struct brw_compile *p = &c->func; - unsigned i; - - c->nr_verts = 1; - alloc_regs(c); - copy_z_inv_w(c); - - brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */ - brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */ - - for (i = 0; i < c->nr_setup_regs; i++) - { - struct brw_reg a0 = offset(c->vert[0], i); - ushort pc, pc_persp, pc_linear; - boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); - - if (pc_persp) - { - /* This seems odd as the values are all constant, but the - * fragment shader will be expecting it: - */ - brw_set_predicate_control_flag_value(p, pc_persp); - brw_MUL(p, a0, a0, c->inv_w[0]); - } - - - /* The delta values are always zero, just send the starting - * coordinate. Again, this is to fit in with the interpolation - * code in the fragment shader. - */ - { - brw_set_predicate_control_flag_value(p, pc); - - brw_MOV(p, c->m3C0, a0); /* constant value */ - - /* Copy m0..m3 to URB. - */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), - 0, /* allocate */ - 1, /* used */ - 4, /* msg len */ - 0, /* response len */ - last, /* eot */ - last, /* writes complete */ - i*4, /* urb destination offset */ - BRW_URB_SWIZZLE_TRANSPOSE); - } - } -} diff --git a/src/gallium/drivers/i965simple/brw_sf_state.c b/src/gallium/drivers/i965simple/brw_sf_state.c deleted file mode 100644 index 2a5de61c21..0000000000 --- a/src/gallium/drivers/i965simple/brw_sf_state.c +++ /dev/null @@ -1,181 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "util/u_math.h" -#include "util/u_memory.h" - - -static void upload_sf_vp(struct brw_context *brw) -{ - struct brw_sf_viewport sfv; - - memset(&sfv, 0, sizeof(sfv)); - - - /* BRW_NEW_VIEWPORT */ - { - const float *scale = brw->attribs.Viewport.scale; - const float *trans = brw->attribs.Viewport.translate; - - sfv.viewport.m00 = scale[0]; - sfv.viewport.m11 = scale[1]; - sfv.viewport.m22 = scale[2]; - sfv.viewport.m30 = trans[0]; - sfv.viewport.m31 = trans[1]; - sfv.viewport.m32 = trans[2]; - } - - /* _NEW_SCISSOR */ - sfv.scissor.xmin = brw->attribs.Scissor.minx; - sfv.scissor.xmax = brw->attribs.Scissor.maxx - 1; - sfv.scissor.ymin = brw->attribs.Scissor.miny; - sfv.scissor.ymax = brw->attribs.Scissor.maxy - 1; - - brw->sf.vp_gs_offset = brw_cache_data( &brw->cache[BRW_SF_VP], &sfv ); -} - -const struct brw_tracked_state brw_sf_vp = { - .dirty = { - .brw = (BRW_NEW_SCISSOR | - BRW_NEW_VIEWPORT), - .cache = 0 - }, - .update = upload_sf_vp -}; - -static void upload_sf_unit( struct brw_context *brw ) -{ - struct brw_sf_unit_state sf; - memset(&sf, 0, sizeof(sf)); - - /* CACHE_NEW_SF_PROG */ - sf.thread0.grf_reg_count = align(brw->sf.prog_data->total_grf, 16) / 16 - 1; - sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6; - sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length; - - sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - sf.thread3.dispatch_grf_start_reg = 3; - sf.thread3.urb_entry_read_offset = 1; - - /* BRW_NEW_URB_FENCE */ - sf.thread4.nr_urb_entries = brw->urb.nr_sf_entries; - sf.thread4.urb_entry_allocation_size = brw->urb.sfsize - 1; - sf.thread4.max_threads = MIN2(12, brw->urb.nr_sf_entries / 2) - 1; - - if (BRW_DEBUG & DEBUG_SINGLE_THREAD) - sf.thread4.max_threads = 0; - - if (BRW_DEBUG & DEBUG_STATS) - sf.thread4.stats_enable = 1; - - /* CACHE_NEW_SF_VP */ - sf.sf5.sf_viewport_state_offset = brw->sf.vp_gs_offset >> 5; - sf.sf5.viewport_transform = 1; - - /* BRW_NEW_RASTER */ - if (brw->attribs.Raster->scissor) - sf.sf6.scissor = 1; - -#if 0 - if (brw->attribs.Polygon->FrontFace == GL_CCW) - sf.sf5.front_winding = BRW_FRONTWINDING_CCW; - else - sf.sf5.front_winding = BRW_FRONTWINDING_CW; - - - if (brw->attribs.Polygon->CullFlag) { - switch (brw->attribs.Polygon->CullFaceMode) { - case GL_FRONT: - sf.sf6.cull_mode = BRW_CULLMODE_FRONT; - break; - case GL_BACK: - sf.sf6.cull_mode = BRW_CULLMODE_BACK; - break; - case GL_FRONT_AND_BACK: - sf.sf6.cull_mode = BRW_CULLMODE_BOTH; - break; - default: - assert(0); - break; - } - } - else - sf.sf6.cull_mode = BRW_CULLMODE_NONE; -#else - sf.sf5.front_winding = BRW_FRONTWINDING_CCW; - sf.sf6.cull_mode = BRW_CULLMODE_NONE; -#endif - - sf.sf6.line_width = CLAMP(brw->attribs.Raster->line_width, 1.0, 5.0) * (1<<1); - - sf.sf6.line_endcap_aa_region_width = 1; - if (brw->attribs.Raster->line_smooth) - sf.sf6.aa_enable = 1; - else if (sf.sf6.line_width <= 0x2) - sf.sf6.line_width = 0; - - sf.sf6.point_rast_rule = 1; /* opengl conventions */ - - sf.sf7.sprite_point = brw->attribs.Raster->point_sprite; - sf.sf7.point_size = CLAMP(brw->attribs.Raster->line_width, 1.0, 255.0) * (1<<3); - sf.sf7.use_point_size_state = !brw->attribs.Raster->point_size_per_vertex; - - /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: - */ - sf.sf7.trifan_pv = 2; - sf.sf7.linestrip_pv = 1; - sf.sf7.tristrip_pv = 2; - sf.sf7.line_last_pixel_enable = 0; - - /* Set bias for OpenGL rasterization rules: - */ - sf.sf6.dest_org_vbias = 0x8; - sf.sf6.dest_org_hbias = 0x8; - - brw->sf.state_gs_offset = brw_cache_data( &brw->cache[BRW_SF_UNIT], &sf ); -} - - -const struct brw_tracked_state brw_sf_unit = { - .dirty = { - .brw = (BRW_NEW_RASTERIZER | - BRW_NEW_URB_FENCE), - .cache = (CACHE_NEW_SF_VP | - CACHE_NEW_SF_PROG) - }, - .update = upload_sf_unit -}; - - diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c deleted file mode 100644 index 86d877d7ef..0000000000 --- a/src/gallium/drivers/i965simple/brw_shader_info.c +++ /dev/null @@ -1,48 +0,0 @@ - -#include "brw_context.h" -#include "brw_state.h" -#include "util/u_memory.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" - - -/** - * XXX this obsolete new and no longer compiled. - */ -void brw_shader_info(const struct tgsi_token *tokens, - struct brw_shader_info *info ) -{ - struct tgsi_parse_context parse; - int done = 0; - - tgsi_parse_init( &parse, tokens ); - - while( !done && - !tgsi_parse_end_of_tokens( &parse ) ) - { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; - unsigned last = decl->DeclarationRange.Last; - - // Broken by crazy wpos init: - //assert( info->nr_regs[decl->Declaration.File] <= last); - - info->nr_regs[decl->Declaration.File] = MAX2(info->nr_regs[decl->Declaration.File], - last+1); - break; - } - case TGSI_TOKEN_TYPE_IMMEDIATE: - case TGSI_TOKEN_TYPE_INSTRUCTION: - default: - done = 1; - break; - } - } - - tgsi_parse_free (&parse); - -} diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c deleted file mode 100644 index b47f5373f3..0000000000 --- a/src/gallium/drivers/i965simple/brw_state.c +++ /dev/null @@ -1,469 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Zack Rusin - * Keith Whitwell - */ - - -#include "pipe/internal/p_winsys_screen.h" -#include "util/u_memory.h" -#include "pipe/p_inlines.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_dump.h" -#include "tgsi/tgsi_parse.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" -#include "brw_draw.h" - - -#define DUP( TYPE, VAL ) \ -do { \ - struct TYPE *x = malloc(sizeof(*x)); \ - memcpy(x, VAL, sizeof(*x) ); \ - return x; \ -} while (0) - -/************************************************************************ - * Blend - */ -static void * -brw_create_blend_state(struct pipe_context *pipe, - const struct pipe_blend_state *blend) -{ - DUP( pipe_blend_state, blend ); -} - -static void brw_bind_blend_state(struct pipe_context *pipe, - void *blend) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Blend = (struct pipe_blend_state*)blend; - brw->state.dirty.brw |= BRW_NEW_BLEND; -} - - -static void brw_delete_blend_state(struct pipe_context *pipe, void *blend) -{ - free(blend); -} - -static void brw_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.BlendColor = *blend_color; - - brw->state.dirty.brw |= BRW_NEW_BLEND; -} - -/************************************************************************ - * Sampler - */ - -static void * -brw_create_sampler_state(struct pipe_context *pipe, - const struct pipe_sampler_state *sampler) -{ - DUP( pipe_sampler_state, sampler ); -} - -static void brw_bind_sampler_states(struct pipe_context *pipe, - unsigned num, void **sampler) -{ - struct brw_context *brw = brw_context(pipe); - - assert(num <= PIPE_MAX_SAMPLERS); - - /* Check for no-op */ - if (num == brw->num_samplers && - !memcmp(brw->attribs.Samplers, sampler, num * sizeof(void *))) - return; - - memcpy(brw->attribs.Samplers, sampler, num * sizeof(void *)); - memset(&brw->attribs.Samplers[num], 0, (PIPE_MAX_SAMPLERS - num) * - sizeof(void *)); - - brw->num_samplers = num; - - brw->state.dirty.brw |= BRW_NEW_SAMPLER; -} - -static void brw_delete_sampler_state(struct pipe_context *pipe, - void *sampler) -{ - free(sampler); -} - - -/************************************************************************ - * Depth stencil - */ - -static void * -brw_create_depth_stencil_state(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *depth_stencil) -{ - DUP( pipe_depth_stencil_alpha_state, depth_stencil ); -} - -static void brw_bind_depth_stencil_state(struct pipe_context *pipe, - void *depth_stencil) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.DepthStencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; - - brw->state.dirty.brw |= BRW_NEW_DEPTH_STENCIL; -} - -static void brw_delete_depth_stencil_state(struct pipe_context *pipe, - void *depth_stencil) -{ - free(depth_stencil); -} - -/************************************************************************ - * Scissor - */ -static void brw_set_scissor_state( struct pipe_context *pipe, - const struct pipe_scissor_state *scissor ) -{ - struct brw_context *brw = brw_context(pipe); - - memcpy( &brw->attribs.Scissor, scissor, sizeof(*scissor) ); - brw->state.dirty.brw |= BRW_NEW_SCISSOR; -} - - -/************************************************************************ - * Stipple - */ - -static void brw_set_polygon_stipple( struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple ) -{ -} - - -/************************************************************************ - * Fragment shader - */ - -static void * brw_create_fs_state(struct pipe_context *pipe, - const struct pipe_shader_state *shader) -{ - struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program); - - brw_fp->program.tokens = tgsi_dup_tokens(shader->tokens); - brw_fp->id = brw_context(pipe)->program_id++; - - tgsi_scan_shader(shader->tokens, &brw_fp->info); - -#if 0 - brw_shader_info(shader->tokens, - &brw_fp->info2); -#endif - - tgsi_dump(shader->tokens, 0); - - - return (void *)brw_fp; -} - -static void brw_bind_fs_state(struct pipe_context *pipe, void *shader) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.FragmentProgram = (struct brw_fragment_program *)shader; - brw->state.dirty.brw |= BRW_NEW_FS; -} - -static void brw_delete_fs_state(struct pipe_context *pipe, void *shader) -{ - struct brw_fragment_program *brw_fp = (struct brw_fragment_program *) shader; - - FREE((void *) brw_fp->program.tokens); - FREE(brw_fp); -} - - -/************************************************************************ - * Vertex shader and other TNL state - */ - -static void *brw_create_vs_state(struct pipe_context *pipe, - const struct pipe_shader_state *shader) -{ - struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program); - - brw_vp->program.tokens = tgsi_dup_tokens(shader->tokens); - brw_vp->id = brw_context(pipe)->program_id++; - - tgsi_scan_shader(shader->tokens, &brw_vp->info); - -#if 0 - brw_shader_info(shader->tokens, - &brw_vp->info2); -#endif - tgsi_dump(shader->tokens, 0); - - return (void *)brw_vp; -} - -static void brw_bind_vs_state(struct pipe_context *pipe, void *vs) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.VertexProgram = (struct brw_vertex_program *)vs; - brw->state.dirty.brw |= BRW_NEW_VS; - - debug_printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n"); -} - -static void brw_delete_vs_state(struct pipe_context *pipe, void *shader) -{ - struct brw_vertex_program *brw_vp = (struct brw_vertex_program *) shader; - - FREE((void *) brw_vp->program.tokens); - FREE(brw_vp); -} - - -static void brw_set_clip_state( struct pipe_context *pipe, - const struct pipe_clip_state *clip ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Clip = *clip; -} - - -static void brw_set_viewport_state( struct pipe_context *pipe, - const struct pipe_viewport_state *viewport ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Viewport = *viewport; /* struct copy */ - brw->state.dirty.brw |= BRW_NEW_VIEWPORT; - - /* pass the viewport info to the draw module */ - //draw_set_viewport_state(brw->draw, viewport); -} - - -static void brw_set_vertex_buffers(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_buffer *buffers) -{ - struct brw_context *brw = brw_context(pipe); - memcpy(brw->vb.vbo_array, buffers, count * sizeof(buffers[0])); -} - -static void brw_set_vertex_elements(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_element *elements) -{ - /* flush ? */ - struct brw_context *brw = brw_context(pipe); - uint i; - - assert(count <= PIPE_MAX_ATTRIBS); - - for (i = 0; i < count; i++) { - struct brw_vertex_element_state el; - memset(&el, 0, sizeof(el)); - - el.ve0.src_offset = elements[i].src_offset; - el.ve0.src_format = brw_translate_surface_format(elements[i].src_format); - el.ve0.valid = 1; - el.ve0.vertex_buffer_index = elements[i].vertex_buffer_index; - - el.ve1.dst_offset = i * 4; - - el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; - - switch (elements[i].nr_components) { - case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; - case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; - case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; - break; - } - - brw->vb.inputs[i] = el; - } -} - - - -/************************************************************************ - * Constant buffers - */ - -static void brw_set_constant_buffer(struct pipe_context *pipe, - uint shader, uint index, - const struct pipe_constant_buffer *buf) -{ - struct brw_context *brw = brw_context(pipe); - - assert(buf == 0 || index == 0); - - brw->attribs.Constants[shader] = buf; - brw->state.dirty.brw |= BRW_NEW_CONSTANTS; -} - - -/************************************************************************ - * Texture surfaces - */ - - -static void brw_set_sampler_textures(struct pipe_context *pipe, - unsigned num, - struct pipe_texture **texture) -{ - struct brw_context *brw = brw_context(pipe); - uint i; - - assert(num <= PIPE_MAX_SAMPLERS); - - /* Check for no-op */ - if (num == brw->num_textures && - !memcmp(brw->attribs.Texture, texture, num * - sizeof(struct pipe_texture *))) - return; - - for (i = 0; i < num; i++) - pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], - texture[i]); - - for (i = num; i < brw->num_textures; i++) - pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], - NULL); - - brw->num_textures = num; - - brw->state.dirty.brw |= BRW_NEW_TEXTURE; -} - - -/************************************************************************ - * Render targets, etc - */ - -static void brw_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.FrameBuffer = *fb; /* struct copy */ - - brw->state.dirty.brw |= BRW_NEW_FRAMEBUFFER; -} - - - -/************************************************************************ - * Rasterizer state - */ - -static void * -brw_create_rasterizer_state(struct pipe_context *pipe, - const struct pipe_rasterizer_state *rasterizer) -{ - DUP(pipe_rasterizer_state, rasterizer); -} - -static void brw_bind_rasterizer_state( struct pipe_context *pipe, - void *setup ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Raster = (struct pipe_rasterizer_state *)setup; - - /* Also pass-through to draw module: - */ - //draw_set_rasterizer_state(brw->draw, setup); - - brw->state.dirty.brw |= BRW_NEW_RASTERIZER; -} - -static void brw_delete_rasterizer_state(struct pipe_context *pipe, - void *setup) -{ - free(setup); -} - - - -void -brw_init_state_functions( struct brw_context *brw ) -{ - brw->pipe.create_blend_state = brw_create_blend_state; - brw->pipe.bind_blend_state = brw_bind_blend_state; - brw->pipe.delete_blend_state = brw_delete_blend_state; - - brw->pipe.create_sampler_state = brw_create_sampler_state; - brw->pipe.bind_sampler_states = brw_bind_sampler_states; - brw->pipe.delete_sampler_state = brw_delete_sampler_state; - - brw->pipe.create_depth_stencil_alpha_state = brw_create_depth_stencil_state; - brw->pipe.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state; - brw->pipe.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state; - - brw->pipe.create_rasterizer_state = brw_create_rasterizer_state; - brw->pipe.bind_rasterizer_state = brw_bind_rasterizer_state; - brw->pipe.delete_rasterizer_state = brw_delete_rasterizer_state; - brw->pipe.create_fs_state = brw_create_fs_state; - brw->pipe.bind_fs_state = brw_bind_fs_state; - brw->pipe.delete_fs_state = brw_delete_fs_state; - brw->pipe.create_vs_state = brw_create_vs_state; - brw->pipe.bind_vs_state = brw_bind_vs_state; - brw->pipe.delete_vs_state = brw_delete_vs_state; - - brw->pipe.set_blend_color = brw_set_blend_color; - brw->pipe.set_clip_state = brw_set_clip_state; - brw->pipe.set_constant_buffer = brw_set_constant_buffer; - brw->pipe.set_framebuffer_state = brw_set_framebuffer_state; - -// brw->pipe.set_feedback_state = brw_set_feedback_state; -// brw->pipe.set_feedback_buffer = brw_set_feedback_buffer; - - brw->pipe.set_polygon_stipple = brw_set_polygon_stipple; - brw->pipe.set_scissor_state = brw_set_scissor_state; - brw->pipe.set_sampler_textures = brw_set_sampler_textures; - brw->pipe.set_viewport_state = brw_set_viewport_state; - brw->pipe.set_vertex_buffers = brw_set_vertex_buffers; - brw->pipe.set_vertex_elements = brw_set_vertex_elements; -} diff --git a/src/gallium/drivers/i965simple/brw_state.h b/src/gallium/drivers/i965simple/brw_state.h deleted file mode 100644 index de0a6371b8..0000000000 --- a/src/gallium/drivers/i965simple/brw_state.h +++ /dev/null @@ -1,151 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#ifndef BRW_STATE_H -#define BRW_STATE_H - -#include "brw_context.h" -#include "brw_winsys.h" - - -const struct brw_tracked_state brw_blend_constant_color; -const struct brw_tracked_state brw_cc_unit; -const struct brw_tracked_state brw_cc_vp; -const struct brw_tracked_state brw_clip_prog; -const struct brw_tracked_state brw_clip_unit; -const struct brw_tracked_state brw_constant_buffer_state; -const struct brw_tracked_state brw_constant_buffer; -const struct brw_tracked_state brw_curbe_offsets; -const struct brw_tracked_state brw_invarient_state; -const struct brw_tracked_state brw_gs_prog; -const struct brw_tracked_state brw_gs_unit; -const struct brw_tracked_state brw_drawing_rect; -const struct brw_tracked_state brw_line_stipple; -const struct brw_tracked_state brw_pipelined_state_pointers; -const struct brw_tracked_state brw_binding_table_pointers; -const struct brw_tracked_state brw_depthbuffer; -const struct brw_tracked_state brw_polygon_stipple_offset; -const struct brw_tracked_state brw_polygon_stipple; -const struct brw_tracked_state brw_program_parameters; -const struct brw_tracked_state brw_recalculate_urb_fence; -const struct brw_tracked_state brw_sf_prog; -const struct brw_tracked_state brw_sf_unit; -const struct brw_tracked_state brw_sf_vp; -const struct brw_tracked_state brw_state_base_address; -const struct brw_tracked_state brw_urb_fence; -const struct brw_tracked_state brw_vertex_state; -const struct brw_tracked_state brw_vs_prog; -const struct brw_tracked_state brw_vs_unit; -const struct brw_tracked_state brw_wm_prog; -const struct brw_tracked_state brw_wm_samplers; -const struct brw_tracked_state brw_wm_surfaces; -const struct brw_tracked_state brw_wm_unit; - -const struct brw_tracked_state brw_psp_urb_cbs; - -const struct brw_tracked_state brw_active_vertprog; -const struct brw_tracked_state brw_tnl_vertprog; -const struct brw_tracked_state brw_pipe_control; - -const struct brw_tracked_state brw_clear_surface_cache; -const struct brw_tracked_state brw_clear_batch_cache; - -/*********************************************************************** - * brw_state_cache.c - */ -unsigned brw_cache_data(struct brw_cache *cache, - const void *data ); - -unsigned brw_cache_data_sz(struct brw_cache *cache, - const void *data, - unsigned data_sz); - -unsigned brw_upload_cache( struct brw_cache *cache, - const void *key, - unsigned key_sz, - const void *data, - unsigned data_sz, - const void *aux, - void *aux_return ); - -boolean brw_search_cache( struct brw_cache *cache, - const void *key, - unsigned key_size, - void *aux_return, - unsigned *offset_return); - -void brw_init_caches( struct brw_context *brw ); -void brw_destroy_caches( struct brw_context *brw ); - -static inline struct pipe_buffer *brw_cache_buffer(struct brw_context *brw, - enum brw_cache_id id) -{ - return brw->cache[id].pool->buffer; -} - -/*********************************************************************** - * brw_state_batch.c - */ -#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) - -boolean brw_cached_batch_struct( struct brw_context *brw, - const void *data, - unsigned sz ); - -void brw_destroy_batch_cache( struct brw_context *brw ); - - -/*********************************************************************** - * brw_state_pool.c - */ -void brw_init_pools( struct brw_context *brw ); -void brw_destroy_pools( struct brw_context *brw ); - -boolean brw_pool_alloc( struct brw_mem_pool *pool, - unsigned size, - unsigned alignment, - unsigned *offset_return); - -void brw_pool_fence( struct brw_context *brw, - struct brw_mem_pool *pool, - unsigned fence ); - - -void brw_pool_check_wrap( struct brw_context *brw, - struct brw_mem_pool *pool ); - -void brw_clear_all_caches( struct brw_context *brw ); -void brw_invalidate_pools( struct brw_context *brw ); -void brw_clear_batch_cache_flush( struct brw_context *brw ); - - -#endif diff --git a/src/gallium/drivers/i965simple/brw_state_batch.c b/src/gallium/drivers/i965simple/brw_state_batch.c deleted file mode 100644 index 43a1c89fc4..0000000000 --- a/src/gallium/drivers/i965simple/brw_state_batch.c +++ /dev/null @@ -1,113 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "brw_state.h" -#include "brw_winsys.h" - -#include "util/u_memory.h" - -/* A facility similar to the data caching code above, which aims to - * prevent identical commands being issued repeatedly. - */ -boolean brw_cached_batch_struct( struct brw_context *brw, - const void *data, - unsigned sz ) -{ - struct brw_cached_batch_item *item = brw->cached_batch_items; - struct header *newheader = (struct header *)data; - - if (brw->emit_state_always) { - brw_batchbuffer_data(brw->winsys, data, sz); - return TRUE; - } - - while (item) { - if (item->header->opcode == newheader->opcode) { - if (item->sz == sz && memcmp(item->header, newheader, sz) == 0) - return FALSE; - if (item->sz != sz) { - FREE(item->header); - item->header = MALLOC(sz); - item->sz = sz; - } - goto emit; - } - item = item->next; - } - - assert(!item); - item = CALLOC_STRUCT(brw_cached_batch_item); - item->header = MALLOC(sz); - item->sz = sz; - item->next = brw->cached_batch_items; - brw->cached_batch_items = item; - -emit: - memcpy(item->header, newheader, sz); - brw_batchbuffer_data(brw->winsys, data, sz); - return TRUE; -} - -static void clear_batch_cache( struct brw_context *brw ) -{ - struct brw_cached_batch_item *item = brw->cached_batch_items; - - while (item) { - struct brw_cached_batch_item *next = item->next; - free((void *)item->header); - free(item); - item = next; - } - - brw->cached_batch_items = NULL; - - - brw_clear_all_caches(brw); - - brw_invalidate_pools(brw); -} - -void brw_clear_batch_cache_flush( struct brw_context *brw ) -{ - clear_batch_cache(brw); - -/* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */ - - brw->state.dirty.brw |= ~0; - brw->state.dirty.cache |= ~0; -} - - - -void brw_destroy_batch_cache( struct brw_context *brw ) -{ - clear_batch_cache(brw); -} diff --git a/src/gallium/drivers/i965simple/brw_state_cache.c b/src/gallium/drivers/i965simple/brw_state_cache.c deleted file mode 100644 index 094248fa69..0000000000 --- a/src/gallium/drivers/i965simple/brw_state_cache.c +++ /dev/null @@ -1,443 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "brw_state.h" - -#include "brw_wm.h" -#include "brw_vs.h" -#include "brw_clip.h" -#include "brw_sf.h" -#include "brw_gs.h" - -#include "util/u_memory.h" - - - -/*********************************************************************** - * Check cache for uploaded version of struct, else upload new one. - * Fail when memory is exhausted. - * - * XXX: FIXME: Currently search is so slow it would be quicker to - * regenerate the data every time... - */ - -static unsigned hash_key( const void *key, unsigned key_size ) -{ - unsigned *ikey = (unsigned *)key; - unsigned hash = 0, i; - - assert(key_size % 4 == 0); - - /* I'm sure this can be improved on: - */ - for (i = 0; i < key_size/4; i++) - hash ^= ikey[i]; - - return hash; -} - -static struct brw_cache_item *search_cache( struct brw_cache *cache, - unsigned hash, - const void *key, - unsigned key_size) -{ - struct brw_cache_item *c; - - for (c = cache->items[hash % cache->size]; c; c = c->next) { - if (c->hash == hash && - c->key_size == key_size && - memcmp(c->key, key, key_size) == 0) - return c; - } - - return NULL; -} - - -static void rehash( struct brw_cache *cache ) -{ - struct brw_cache_item **items; - struct brw_cache_item *c, *next; - unsigned size, i; - - size = cache->size * 3; - items = (struct brw_cache_item**) MALLOC(size * sizeof(*items)); - memset(items, 0, size * sizeof(*items)); - - for (i = 0; i < cache->size; i++) - for (c = cache->items[i]; c; c = next) { - next = c->next; - c->next = items[c->hash % size]; - items[c->hash % size] = c; - } - - FREE(cache->items); - cache->items = items; - cache->size = size; -} - - -boolean brw_search_cache( struct brw_cache *cache, - const void *key, - unsigned key_size, - void *aux_return, - unsigned *offset_return) -{ - struct brw_cache_item *item; - unsigned addr = 0; - unsigned hash = hash_key(key, key_size); - - item = search_cache(cache, hash, key, key_size); - - if (item) { - if (aux_return) - *(void **)aux_return = (void *)((char *)item->key + item->key_size); - - *offset_return = addr = item->offset; - } - - if (item == NULL || addr != cache->last_addr) { - cache->brw->state.dirty.cache |= 1<id; - cache->last_addr = addr; - } - - return item != NULL; -} - -unsigned brw_upload_cache( struct brw_cache *cache, - const void *key, - unsigned key_size, - const void *data, - unsigned data_size, - const void *aux, - void *aux_return ) -{ - unsigned offset; - struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); - unsigned hash = hash_key(key, key_size); - void *tmp = MALLOC(key_size + cache->aux_size); - - if (!brw_pool_alloc(cache->pool, data_size, 1 << 6, &offset)) { - /* Should not be possible: - */ - debug_printf("brw_pool_alloc failed\n"); - exit(1); - } - - memcpy(tmp, key, key_size); - - if (cache->aux_size) - memcpy(tmp+key_size, aux, cache->aux_size); - - item->key = tmp; - item->hash = hash; - item->key_size = key_size; - item->offset = offset; - item->data_size = data_size; - - if (++cache->n_items > cache->size * 1.5) - rehash(cache); - - hash %= cache->size; - item->next = cache->items[hash]; - cache->items[hash] = item; - - if (aux_return) { - assert(cache->aux_size); - *(void **)aux_return = (void *)((char *)item->key + item->key_size); - } - - if (BRW_DEBUG & DEBUG_STATE) - debug_printf("upload %s: %d bytes to pool buffer %p offset %x\n", - cache->name, - data_size, - (void*)cache->pool->buffer, - offset); - - /* Copy data to the buffer: - */ - cache->brw->winsys->buffer_subdata_typed(cache->brw->winsys, - cache->pool->buffer, - offset, - data_size, - data, - cache->id); - - cache->brw->state.dirty.cache |= 1<id; - cache->last_addr = offset; - - return offset; -} - -/* This doesn't really work with aux data. Use search/upload instead - */ -unsigned brw_cache_data_sz(struct brw_cache *cache, - const void *data, - unsigned data_size) -{ - unsigned addr; - - if (!brw_search_cache(cache, data, data_size, NULL, &addr)) { - addr = brw_upload_cache(cache, - data, data_size, - data, data_size, - NULL, NULL); - } - - return addr; -} - -unsigned brw_cache_data(struct brw_cache *cache, - const void *data) -{ - return brw_cache_data_sz(cache, data, cache->key_size); -} - -enum pool_type { - DW_SURFACE_STATE, - DW_GENERAL_STATE -}; - -static void brw_init_cache( struct brw_context *brw, - const char *name, - unsigned id, - unsigned key_size, - unsigned aux_size, - enum pool_type pool_type) -{ - struct brw_cache *cache = &brw->cache[id]; - cache->brw = brw; - cache->id = id; - cache->name = name; - cache->items = NULL; - - cache->size = 7; - cache->n_items = 0; - cache->items = (struct brw_cache_item **) - CALLOC(cache->size, sizeof(struct brw_cache_item)); - - - cache->key_size = key_size; - cache->aux_size = aux_size; - switch (pool_type) { - case DW_GENERAL_STATE: cache->pool = &brw->pool[BRW_GS_POOL]; break; - case DW_SURFACE_STATE: cache->pool = &brw->pool[BRW_SS_POOL]; break; - default: assert(0); break; - } -} - -void brw_init_caches( struct brw_context *brw ) -{ - - brw_init_cache(brw, - "CC_VP", - BRW_CC_VP, - sizeof(struct brw_cc_viewport), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "CC_UNIT", - BRW_CC_UNIT, - sizeof(struct brw_cc_unit_state), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "WM_PROG", - BRW_WM_PROG, - sizeof(struct brw_wm_prog_key), - sizeof(struct brw_wm_prog_data), - DW_GENERAL_STATE); - - brw_init_cache(brw, - "SAMPLER_DEFAULT_COLOR", - BRW_SAMPLER_DEFAULT_COLOR, - sizeof(struct brw_sampler_default_color), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "SAMPLER", - BRW_SAMPLER, - 0, /* variable key/data size */ - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "WM_UNIT", - BRW_WM_UNIT, - sizeof(struct brw_wm_unit_state), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "SF_PROG", - BRW_SF_PROG, - sizeof(struct brw_sf_prog_key), - sizeof(struct brw_sf_prog_data), - DW_GENERAL_STATE); - - brw_init_cache(brw, - "SF_VP", - BRW_SF_VP, - sizeof(struct brw_sf_viewport), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "SF_UNIT", - BRW_SF_UNIT, - sizeof(struct brw_sf_unit_state), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "VS_UNIT", - BRW_VS_UNIT, - sizeof(struct brw_vs_unit_state), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "VS_PROG", - BRW_VS_PROG, - sizeof(struct brw_vs_prog_key), - sizeof(struct brw_vs_prog_data), - DW_GENERAL_STATE); - - brw_init_cache(brw, - "CLIP_UNIT", - BRW_CLIP_UNIT, - sizeof(struct brw_clip_unit_state), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "CLIP_PROG", - BRW_CLIP_PROG, - sizeof(struct brw_clip_prog_key), - sizeof(struct brw_clip_prog_data), - DW_GENERAL_STATE); - - brw_init_cache(brw, - "GS_UNIT", - BRW_GS_UNIT, - sizeof(struct brw_gs_unit_state), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "GS_PROG", - BRW_GS_PROG, - sizeof(struct brw_gs_prog_key), - sizeof(struct brw_gs_prog_data), - DW_GENERAL_STATE); - - brw_init_cache(brw, - "SS_SURFACE", - BRW_SS_SURFACE, - sizeof(struct brw_surface_state), - 0, - DW_SURFACE_STATE); - - brw_init_cache(brw, - "SS_SURF_BIND", - BRW_SS_SURF_BIND, - sizeof(struct brw_surface_binding_table), - 0, - DW_SURFACE_STATE); -} - - -/* When we lose hardware context, need to invalidate the surface cache - * as these structs must be explicitly re-uploaded. They are subject - * to fixup by the memory manager as they contain absolute agp - * offsets, so we need to ensure there is a fresh version of the - * struct available to receive the fixup. - * - * XXX: Need to ensure that there aren't two versions of a surface or - * bufferobj with different backing data active in the same buffer at - * once? Otherwise the cache could confuse them. Maybe better not to - * cache at all? - * - * --> Isn't this the same as saying need to ensure batch is flushed - * before new data is uploaded to an existing buffer? We - * already try to make sure of that. - */ -static void clear_cache( struct brw_cache *cache ) -{ - struct brw_cache_item *c, *next; - unsigned i; - - for (i = 0; i < cache->size; i++) { - for (c = cache->items[i]; c; c = next) { - next = c->next; - free((void *)c->key); - free(c); - } - cache->items[i] = NULL; - } - - cache->n_items = 0; -} - -void brw_clear_all_caches( struct brw_context *brw ) -{ - int i; - - if (BRW_DEBUG & DEBUG_STATE) - debug_printf("%s\n", __FUNCTION__); - - for (i = 0; i < BRW_MAX_CACHE; i++) - clear_cache(&brw->cache[i]); - - if (brw->curbe.last_buf) { - FREE(brw->curbe.last_buf); - brw->curbe.last_buf = NULL; - } - - brw->state.dirty.brw |= ~0; - brw->state.dirty.cache |= ~0; -} - - - - - -void brw_destroy_caches( struct brw_context *brw ) -{ - unsigned i; - - for (i = 0; i < BRW_MAX_CACHE; i++) - clear_cache(&brw->cache[i]); -} diff --git a/src/gallium/drivers/i965simple/brw_state_pool.c b/src/gallium/drivers/i965simple/brw_state_pool.c deleted file mode 100644 index e91263cb1f..0000000000 --- a/src/gallium/drivers/i965simple/brw_state_pool.c +++ /dev/null @@ -1,138 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -/** @file brw_state_pool.c - * Implements the state pool allocator. - * - * For the 965, we create two state pools for state cache entries. Objects - * will be allocated into the pools depending on which state base address - * their pointer is relative to in other 965 state. - * - * The state pools are relatively simple: As objects are allocated, increment - * the offset to allocate space. When the pool is "full" (rather, close to - * full), we reset the pool and reset the state cache entries that point into - * the pool. - */ - -#include "pipe/internal/p_winsys_screen.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "pipe/p_inlines.h" -#include "brw_context.h" -#include "brw_state.h" - -boolean brw_pool_alloc( struct brw_mem_pool *pool, - unsigned size, - unsigned alignment, - unsigned *offset_return) -{ - unsigned fixup = align(pool->offset, alignment) - pool->offset; - - size = align(size, 4); - - if (pool->offset + fixup + size >= pool->size) { - debug_printf("%s failed\n", __FUNCTION__); - assert(0); - exit(0); - } - - pool->offset += fixup; - *offset_return = pool->offset; - pool->offset += size; - - return TRUE; -} - -static -void brw_invalidate_pool( struct brw_mem_pool *pool ) -{ - if (BRW_DEBUG & DEBUG_STATE) - debug_printf("\n\n\n %s \n\n\n", __FUNCTION__); - - pool->offset = 0; - - brw_clear_all_caches(pool->brw); -} - - -static void brw_init_pool( struct brw_context *brw, - unsigned pool_id, - unsigned size ) -{ - struct brw_mem_pool *pool = &brw->pool[pool_id]; - - pool->size = size; - pool->brw = brw; - - pool->buffer = pipe_buffer_create(brw->pipe.screen, - 4096, - 0 /* DRM_BO_FLAG_MEM_TT */, - size); -} - -static void brw_destroy_pool( struct brw_context *brw, - unsigned pool_id ) -{ - struct brw_mem_pool *pool = &brw->pool[pool_id]; - - pipe_buffer_reference( pool->brw->pipe.screen, - &pool->buffer, - NULL ); -} - - -void brw_pool_check_wrap( struct brw_context *brw, - struct brw_mem_pool *pool ) -{ - if (pool->offset > (pool->size * 3) / 4) { - brw->state.dirty.brw |= BRW_NEW_SCENE; - } - -} - -void brw_init_pools( struct brw_context *brw ) -{ - brw_init_pool(brw, BRW_GS_POOL, 0x80000); - brw_init_pool(brw, BRW_SS_POOL, 0x80000); -} - -void brw_destroy_pools( struct brw_context *brw ) -{ - brw_destroy_pool(brw, BRW_GS_POOL); - brw_destroy_pool(brw, BRW_SS_POOL); -} - - -void brw_invalidate_pools( struct brw_context *brw ) -{ - brw_invalidate_pool(&brw->pool[BRW_GS_POOL]); - brw_invalidate_pool(&brw->pool[BRW_SS_POOL]); -} diff --git a/src/gallium/drivers/i965simple/brw_state_upload.c b/src/gallium/drivers/i965simple/brw_state_upload.c deleted file mode 100644 index bac9161b5f..0000000000 --- a/src/gallium/drivers/i965simple/brw_state_upload.c +++ /dev/null @@ -1,202 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "brw_context.h" -#include "brw_state.h" - -#include "util/u_memory.h" - -/* This is used to initialize brw->state.atoms[]. We could use this - * list directly except for a single atom, brw_constant_buffer, which - * has a .dirty value which changes according to the parameters of the - * current fragment and vertex programs, and so cannot be a static - * value. - */ -const struct brw_tracked_state *atoms[] = -{ - &brw_vs_prog, - &brw_gs_prog, - &brw_clip_prog, - &brw_sf_prog, - &brw_wm_prog, - - /* Once all the programs are done, we know how large urb entry - * sizes need to be and can decide if we need to change the urb - * layout. - */ - &brw_curbe_offsets, - &brw_recalculate_urb_fence, - - - &brw_cc_vp, - &brw_cc_unit, - - &brw_wm_surfaces, /* must do before samplers */ - &brw_wm_samplers, - - &brw_wm_unit, - &brw_sf_vp, - &brw_sf_unit, - &brw_vs_unit, /* always required, enabled or not */ - &brw_clip_unit, - &brw_gs_unit, - - /* Command packets: - */ - &brw_invarient_state, - &brw_state_base_address, - &brw_pipe_control, - - &brw_binding_table_pointers, - &brw_blend_constant_color, - - &brw_drawing_rect, - &brw_depthbuffer, - - &brw_polygon_stipple, - &brw_line_stipple, - - &brw_psp_urb_cbs, - - &brw_constant_buffer -}; - - -void brw_init_state( struct brw_context *brw ) -{ - brw_init_pools(brw); - brw_init_caches(brw); - - brw->state.dirty.brw = ~0; - brw->emit_state_always = 0; -} - - -void brw_destroy_state( struct brw_context *brw ) -{ - brw_destroy_caches(brw); - brw_destroy_batch_cache(brw); - brw_destroy_pools(brw); -} - -/*********************************************************************** - */ - -static boolean check_state( const struct brw_state_flags *a, - const struct brw_state_flags *b ) -{ - return ((a->brw & b->brw) || - (a->cache & b->cache)); -} - -static void accumulate_state( struct brw_state_flags *a, - const struct brw_state_flags *b ) -{ - a->brw |= b->brw; - a->cache |= b->cache; -} - - -static void xor_states( struct brw_state_flags *result, - const struct brw_state_flags *a, - const struct brw_state_flags *b ) -{ - result->brw = a->brw ^ b->brw; - result->cache = a->cache ^ b->cache; -} - - -/*********************************************************************** - * Emit all state: - */ -void brw_validate_state( struct brw_context *brw ) -{ - struct brw_state_flags *state = &brw->state.dirty; - unsigned i; - - if (brw->emit_state_always) - state->brw |= ~0; - - if (state->cache == 0 && - state->brw == 0) - return; - - if (brw->state.dirty.brw & BRW_NEW_SCENE) - brw_clear_batch_cache_flush(brw); - - if (BRW_DEBUG) { - /* Debug version which enforces various sanity checks on the - * state flags which are generated and checked to help ensure - * state atoms are ordered correctly in the list. - */ - struct brw_state_flags examined, prev; - memset(&examined, 0, sizeof(examined)); - prev = *state; - - for (i = 0; i < Elements(atoms); i++) { - const struct brw_tracked_state *atom = atoms[i]; - struct brw_state_flags generated; - - assert(atom->dirty.brw || - atom->dirty.cache); - assert(atom->update); - - if (check_state(state, &atom->dirty)) { - atom->update( brw ); - } - - accumulate_state(&examined, &atom->dirty); - - /* generated = (prev ^ state) - * if (examined & generated) - * fail; - */ - xor_states(&generated, &prev, state); - assert(!check_state(&examined, &generated)); - prev = *state; - } - } - else { - for (i = 0; i < Elements(atoms); i++) { - const struct brw_tracked_state *atom = atoms[i]; - - assert(atom->dirty.brw || - atom->dirty.cache); - assert(atom->update); - - if (check_state(state, &atom->dirty)) - atom->update( brw ); - } - } - - memset(state, 0, sizeof(*state)); -} diff --git a/src/gallium/drivers/i965simple/brw_structs.h b/src/gallium/drivers/i965simple/brw_structs.h deleted file mode 100644 index bbb087e95d..0000000000 --- a/src/gallium/drivers/i965simple/brw_structs.h +++ /dev/null @@ -1,1348 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#ifndef BRW_STRUCTS_H -#define BRW_STRUCTS_H - -#include "pipe/p_compiler.h" - -/* Command packets: - */ -struct header -{ - unsigned length:16; - unsigned opcode:16; -}; - - -union header_union -{ - struct header bits; - unsigned dword; -}; - -struct brw_3d_control -{ - struct - { - unsigned length:8; - unsigned notify_enable:1; - unsigned pad:3; - unsigned wc_flush_enable:1; - unsigned depth_stall_enable:1; - unsigned operation:2; - unsigned opcode:16; - } header; - - struct - { - unsigned pad:2; - unsigned dest_addr_type:1; - unsigned dest_addr:29; - } dest; - - unsigned dword2; - unsigned dword3; -}; - - -struct brw_3d_primitive -{ - struct - { - unsigned length:8; - unsigned pad:2; - unsigned topology:5; - unsigned indexed:1; - unsigned opcode:16; - } header; - - unsigned verts_per_instance; - unsigned start_vert_location; - unsigned instance_count; - unsigned start_instance_location; - unsigned base_vert_location; -}; - -/* These seem to be passed around as function args, so it works out - * better to keep them as #defines: - */ -#define BRW_FLUSH_READ_CACHE 0x1 -#define BRW_FLUSH_STATE_CACHE 0x2 -#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4 -#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8 - -struct brw_mi_flush -{ - unsigned flags:4; - unsigned pad:12; - unsigned opcode:16; -}; - -struct brw_vf_statistics -{ - unsigned statistics_enable:1; - unsigned pad:15; - unsigned opcode:16; -}; - - - -struct brw_binding_table_pointers -{ - struct header header; - unsigned vs; - unsigned gs; - unsigned clp; - unsigned sf; - unsigned wm; -}; - - -struct brw_blend_constant_color -{ - struct header header; - float blend_constant_color[4]; -}; - - -struct brw_depthbuffer -{ - union header_union header; - - union { - struct { - unsigned pitch:18; - unsigned format:3; - unsigned pad:4; - unsigned depth_offset_disable:1; - unsigned tile_walk:1; - unsigned tiled_surface:1; - unsigned pad2:1; - unsigned surface_type:3; - } bits; - unsigned dword; - } dword1; - - unsigned dword2_base_addr; - - union { - struct { - unsigned pad:1; - unsigned mipmap_layout:1; - unsigned lod:4; - unsigned width:13; - unsigned height:13; - } bits; - unsigned dword; - } dword3; - - union { - struct { - unsigned pad:12; - unsigned min_array_element:9; - unsigned depth:11; - } bits; - unsigned dword; - } dword4; -}; - -struct brw_drawrect -{ - struct header header; - unsigned xmin:16; - unsigned ymin:16; - unsigned xmax:16; - unsigned ymax:16; - unsigned xorg:16; - unsigned yorg:16; -}; - - - - -struct brw_global_depth_offset_clamp -{ - struct header header; - float depth_offset_clamp; -}; - -struct brw_indexbuffer -{ - union { - struct - { - unsigned length:8; - unsigned index_format:2; - unsigned cut_index_enable:1; - unsigned pad:5; - unsigned opcode:16; - } bits; - unsigned dword; - - } header; - - unsigned buffer_start; - unsigned buffer_end; -}; - - -struct brw_line_stipple -{ - struct header header; - - struct - { - unsigned pattern:16; - unsigned pad:16; - } bits0; - - struct - { - unsigned repeat_count:9; - unsigned pad:7; - unsigned inverse_repeat_count:16; - } bits1; -}; - - -struct brw_pipelined_state_pointers -{ - struct header header; - - struct { - unsigned pad:5; - unsigned offset:27; - } vs; - - struct - { - unsigned enable:1; - unsigned pad:4; - unsigned offset:27; - } gs; - - struct - { - unsigned enable:1; - unsigned pad:4; - unsigned offset:27; - } clp; - - struct - { - unsigned pad:5; - unsigned offset:27; - } sf; - - struct - { - unsigned pad:5; - unsigned offset:27; - } wm; - - struct - { - unsigned pad:5; - unsigned offset:27; /* KW: check me! */ - } cc; -}; - - -struct brw_polygon_stipple_offset -{ - struct header header; - - struct { - unsigned y_offset:5; - unsigned pad:3; - unsigned x_offset:5; - unsigned pad0:19; - } bits0; -}; - - - -struct brw_polygon_stipple -{ - struct header header; - unsigned stipple[32]; -}; - - - -struct brw_pipeline_select -{ - struct - { - unsigned pipeline_select:1; - unsigned pad:15; - unsigned opcode:16; - } header; -}; - - -struct brw_pipe_control -{ - struct - { - unsigned length:8; - unsigned notify_enable:1; - unsigned pad:2; - unsigned instruction_state_cache_flush_enable:1; - unsigned write_cache_flush_enable:1; - unsigned depth_stall_enable:1; - unsigned post_sync_operation:2; - - unsigned opcode:16; - } header; - - struct - { - unsigned pad:2; - unsigned dest_addr_type:1; - unsigned dest_addr:29; - } bits1; - - unsigned data0; - unsigned data1; -}; - - -struct brw_urb_fence -{ - struct - { - unsigned length:8; - unsigned vs_realloc:1; - unsigned gs_realloc:1; - unsigned clp_realloc:1; - unsigned sf_realloc:1; - unsigned vfe_realloc:1; - unsigned cs_realloc:1; - unsigned pad:2; - unsigned opcode:16; - } header; - - struct - { - unsigned vs_fence:10; - unsigned gs_fence:10; - unsigned clp_fence:10; - unsigned pad:2; - } bits0; - - struct - { - unsigned sf_fence:10; - unsigned vf_fence:10; - unsigned cs_fence:10; - unsigned pad:2; - } bits1; -}; - -struct brw_constant_buffer_state /* previously brw_command_streamer */ -{ - struct header header; - - struct - { - unsigned nr_urb_entries:3; - unsigned pad:1; - unsigned urb_entry_size:5; - unsigned pad0:23; - } bits0; -}; - -struct brw_constant_buffer -{ - struct - { - unsigned length:8; - unsigned valid:1; - unsigned pad:7; - unsigned opcode:16; - } header; - - struct - { - unsigned buffer_length:6; - unsigned buffer_address:26; - } bits0; -}; - -struct brw_state_base_address -{ - struct header header; - - struct - { - unsigned modify_enable:1; - unsigned pad:4; - unsigned general_state_address:27; - } bits0; - - struct - { - unsigned modify_enable:1; - unsigned pad:4; - unsigned surface_state_address:27; - } bits1; - - struct - { - unsigned modify_enable:1; - unsigned pad:4; - unsigned indirect_object_state_address:27; - } bits2; - - struct - { - unsigned modify_enable:1; - unsigned pad:11; - unsigned general_state_upper_bound:20; - } bits3; - - struct - { - unsigned modify_enable:1; - unsigned pad:11; - unsigned indirect_object_state_upper_bound:20; - } bits4; -}; - -struct brw_state_prefetch -{ - struct header header; - - struct - { - unsigned prefetch_count:3; - unsigned pad:3; - unsigned prefetch_pointer:26; - } bits0; -}; - -struct brw_system_instruction_pointer -{ - struct header header; - - struct - { - unsigned pad:4; - unsigned system_instruction_pointer:28; - } bits0; -}; - - - - -/* State structs for the various fixed function units: - */ - - -struct thread0 -{ - unsigned pad0:1; - unsigned grf_reg_count:3; - unsigned pad1:2; - unsigned kernel_start_pointer:26; -}; - -struct thread1 -{ - unsigned ext_halt_exception_enable:1; - unsigned sw_exception_enable:1; - unsigned mask_stack_exception_enable:1; - unsigned timeout_exception_enable:1; - unsigned illegal_op_exception_enable:1; - unsigned pad0:3; - unsigned depth_coef_urb_read_offset:6; /* WM only */ - unsigned pad1:2; - unsigned floating_point_mode:1; - unsigned thread_priority:1; - unsigned binding_table_entry_count:8; - unsigned pad3:5; - unsigned single_program_flow:1; -}; - -struct thread2 -{ - unsigned per_thread_scratch_space:4; - unsigned pad0:6; - unsigned scratch_space_base_pointer:22; -}; - - -struct thread3 -{ - unsigned dispatch_grf_start_reg:4; - unsigned urb_entry_read_offset:6; - unsigned pad0:1; - unsigned urb_entry_read_length:6; - unsigned pad1:1; - unsigned const_urb_entry_read_offset:6; - unsigned pad2:1; - unsigned const_urb_entry_read_length:6; - unsigned pad3:1; -}; - - - -struct brw_clip_unit_state -{ - struct thread0 thread0; - struct - { - unsigned pad0:7; - unsigned sw_exception_enable:1; - unsigned pad1:3; - unsigned mask_stack_exception_enable:1; - unsigned pad2:1; - unsigned illegal_op_exception_enable:1; - unsigned pad3:2; - unsigned floating_point_mode:1; - unsigned thread_priority:1; - unsigned binding_table_entry_count:8; - unsigned pad4:5; - unsigned single_program_flow:1; - } thread1; - - struct thread2 thread2; - struct thread3 thread3; - - struct - { - unsigned pad0:9; - unsigned gs_output_stats:1; /* not always */ - unsigned stats_enable:1; - unsigned nr_urb_entries:7; - unsigned pad1:1; - unsigned urb_entry_allocation_size:5; - unsigned pad2:1; - unsigned max_threads:1; /* may be less */ - unsigned pad3:6; - } thread4; - - struct - { - unsigned pad0:13; - unsigned clip_mode:3; - unsigned userclip_enable_flags:8; - unsigned userclip_must_clip:1; - unsigned pad1:1; - unsigned guard_band_enable:1; - unsigned viewport_z_clip_enable:1; - unsigned viewport_xy_clip_enable:1; - unsigned vertex_position_space:1; - unsigned api_mode:1; - unsigned pad2:1; - } clip5; - - struct - { - unsigned pad0:5; - unsigned clipper_viewport_state_ptr:27; - } clip6; - - - float viewport_xmin; - float viewport_xmax; - float viewport_ymin; - float viewport_ymax; -}; - - - -struct brw_cc_unit_state -{ - struct - { - unsigned pad0:3; - unsigned bf_stencil_pass_depth_pass_op:3; - unsigned bf_stencil_pass_depth_fail_op:3; - unsigned bf_stencil_fail_op:3; - unsigned bf_stencil_func:3; - unsigned bf_stencil_enable:1; - unsigned pad1:2; - unsigned stencil_write_enable:1; - unsigned stencil_pass_depth_pass_op:3; - unsigned stencil_pass_depth_fail_op:3; - unsigned stencil_fail_op:3; - unsigned stencil_func:3; - unsigned stencil_enable:1; - } cc0; - - - struct - { - unsigned bf_stencil_ref:8; - unsigned stencil_write_mask:8; - unsigned stencil_test_mask:8; - unsigned stencil_ref:8; - } cc1; - - - struct - { - unsigned logicop_enable:1; - unsigned pad0:10; - unsigned depth_write_enable:1; - unsigned depth_test_function:3; - unsigned depth_test:1; - unsigned bf_stencil_write_mask:8; - unsigned bf_stencil_test_mask:8; - } cc2; - - - struct - { - unsigned pad0:8; - unsigned alpha_test_func:3; - unsigned alpha_test:1; - unsigned blend_enable:1; - unsigned ia_blend_enable:1; - unsigned pad1:1; - unsigned alpha_test_format:1; - unsigned pad2:16; - } cc3; - - struct - { - unsigned pad0:5; - unsigned cc_viewport_state_offset:27; - } cc4; - - struct - { - unsigned pad0:2; - unsigned ia_dest_blend_factor:5; - unsigned ia_src_blend_factor:5; - unsigned ia_blend_function:3; - unsigned statistics_enable:1; - unsigned logicop_func:4; - unsigned pad1:11; - unsigned dither_enable:1; - } cc5; - - struct - { - unsigned clamp_post_alpha_blend:1; - unsigned clamp_pre_alpha_blend:1; - unsigned clamp_range:2; - unsigned pad0:11; - unsigned y_dither_offset:2; - unsigned x_dither_offset:2; - unsigned dest_blend_factor:5; - unsigned src_blend_factor:5; - unsigned blend_function:3; - } cc6; - - struct { - union { - float f; - ubyte ub[4]; - } alpha_ref; - } cc7; -}; - - - -struct brw_sf_unit_state -{ - struct thread0 thread0; - struct thread1 thread1; - struct thread2 thread2; - struct thread3 thread3; - - struct - { - unsigned pad0:10; - unsigned stats_enable:1; - unsigned nr_urb_entries:7; - unsigned pad1:1; - unsigned urb_entry_allocation_size:5; - unsigned pad2:1; - unsigned max_threads:6; - unsigned pad3:1; - } thread4; - - struct - { - unsigned front_winding:1; - unsigned viewport_transform:1; - unsigned pad0:3; - unsigned sf_viewport_state_offset:27; - } sf5; - - struct - { - unsigned pad0:9; - unsigned dest_org_vbias:4; - unsigned dest_org_hbias:4; - unsigned scissor:1; - unsigned disable_2x2_trifilter:1; - unsigned disable_zero_pix_trifilter:1; - unsigned point_rast_rule:2; - unsigned line_endcap_aa_region_width:2; - unsigned line_width:4; - unsigned fast_scissor_disable:1; - unsigned cull_mode:2; - unsigned aa_enable:1; - } sf6; - - struct - { - unsigned point_size:11; - unsigned use_point_size_state:1; - unsigned subpixel_precision:1; - unsigned sprite_point:1; - unsigned pad0:11; - unsigned trifan_pv:2; - unsigned linestrip_pv:2; - unsigned tristrip_pv:2; - unsigned line_last_pixel_enable:1; - } sf7; - -}; - - -struct brw_gs_unit_state -{ - struct thread0 thread0; - struct thread1 thread1; - struct thread2 thread2; - struct thread3 thread3; - - struct - { - unsigned pad0:10; - unsigned stats_enable:1; - unsigned nr_urb_entries:7; - unsigned pad1:1; - unsigned urb_entry_allocation_size:5; - unsigned pad2:1; - unsigned max_threads:1; - unsigned pad3:6; - } thread4; - - struct - { - unsigned sampler_count:3; - unsigned pad0:2; - unsigned sampler_state_pointer:27; - } gs5; - - - struct - { - unsigned max_vp_index:4; - unsigned pad0:26; - unsigned reorder_enable:1; - unsigned pad1:1; - } gs6; -}; - - -struct brw_vs_unit_state -{ - struct thread0 thread0; - struct thread1 thread1; - struct thread2 thread2; - struct thread3 thread3; - - struct - { - unsigned pad0:10; - unsigned stats_enable:1; - unsigned nr_urb_entries:7; - unsigned pad1:1; - unsigned urb_entry_allocation_size:5; - unsigned pad2:1; - unsigned max_threads:4; - unsigned pad3:3; - } thread4; - - struct - { - unsigned sampler_count:3; - unsigned pad0:2; - unsigned sampler_state_pointer:27; - } vs5; - - struct - { - unsigned vs_enable:1; - unsigned vert_cache_disable:1; - unsigned pad0:30; - } vs6; -}; - - -struct brw_wm_unit_state -{ - struct thread0 thread0; - struct thread1 thread1; - struct thread2 thread2; - struct thread3 thread3; - - struct { - unsigned stats_enable:1; - unsigned pad0:1; - unsigned sampler_count:3; - unsigned sampler_state_pointer:27; - } wm4; - - struct - { - unsigned enable_8_pix:1; - unsigned enable_16_pix:1; - unsigned enable_32_pix:1; - unsigned pad0:7; - unsigned legacy_global_depth_bias:1; - unsigned line_stipple:1; - unsigned depth_offset:1; - unsigned polygon_stipple:1; - unsigned line_aa_region_width:2; - unsigned line_endcap_aa_region_width:2; - unsigned early_depth_test:1; - unsigned thread_dispatch_enable:1; - unsigned program_uses_depth:1; - unsigned program_computes_depth:1; - unsigned program_uses_killpixel:1; - unsigned legacy_line_rast: 1; - unsigned pad1:1; - unsigned max_threads:6; - unsigned pad2:1; - } wm5; - - float global_depth_offset_constant; - float global_depth_offset_scale; -}; - -struct brw_sampler_default_color { - float color[4]; -}; - -struct brw_sampler_state -{ - - struct - { - unsigned shadow_function:3; - unsigned lod_bias:11; - unsigned min_filter:3; - unsigned mag_filter:3; - unsigned mip_filter:2; - unsigned base_level:5; - unsigned pad:1; - unsigned lod_preclamp:1; - unsigned default_color_mode:1; - unsigned pad0:1; - unsigned disable:1; - } ss0; - - struct - { - unsigned r_wrap_mode:3; - unsigned t_wrap_mode:3; - unsigned s_wrap_mode:3; - unsigned pad:3; - unsigned max_lod:10; - unsigned min_lod:10; - } ss1; - - - struct - { - unsigned pad:5; - unsigned default_color_pointer:27; - } ss2; - - struct - { - unsigned pad:19; - unsigned max_aniso:3; - unsigned chroma_key_mode:1; - unsigned chroma_key_index:2; - unsigned chroma_key_enable:1; - unsigned monochrome_filter_width:3; - unsigned monochrome_filter_height:3; - } ss3; -}; - - -struct brw_clipper_viewport -{ - float xmin; - float xmax; - float ymin; - float ymax; -}; - -struct brw_cc_viewport -{ - float min_depth; - float max_depth; -}; - -struct brw_sf_viewport -{ - struct { - float m00; - float m11; - float m22; - float m30; - float m31; - float m32; - } viewport; - - struct { - short xmin; - short ymin; - short xmax; - short ymax; - } scissor; -}; - -/* Documented in the subsystem/shared-functions/sampler chapter... - */ -struct brw_surface_state -{ - struct { - unsigned cube_pos_z:1; - unsigned cube_neg_z:1; - unsigned cube_pos_y:1; - unsigned cube_neg_y:1; - unsigned cube_pos_x:1; - unsigned cube_neg_x:1; - unsigned pad:4; - unsigned mipmap_layout_mode:1; - unsigned vert_line_stride_ofs:1; - unsigned vert_line_stride:1; - unsigned color_blend:1; - unsigned writedisable_blue:1; - unsigned writedisable_green:1; - unsigned writedisable_red:1; - unsigned writedisable_alpha:1; - unsigned surface_format:9; - unsigned data_return_format:1; - unsigned pad0:1; - unsigned surface_type:3; - } ss0; - - struct { - unsigned base_addr; - } ss1; - - struct { - unsigned pad:2; - unsigned mip_count:4; - unsigned width:13; - unsigned height:13; - } ss2; - - struct { - unsigned tile_walk:1; - unsigned tiled_surface:1; - unsigned pad:1; - unsigned pitch:18; - unsigned depth:11; - } ss3; - - struct { - unsigned pad:19; - unsigned min_array_elt:9; - unsigned min_lod:4; - } ss4; -}; - - - -struct brw_vertex_buffer_state -{ - struct { - unsigned pitch:11; - unsigned pad:15; - unsigned access_type:1; - unsigned vb_index:5; - } vb0; - - unsigned start_addr; - unsigned max_index; -#if 1 - unsigned instance_data_step_rate; /* not included for sequential/random vertices? */ -#endif -}; - -#define BRW_VBP_MAX 17 - -struct brw_vb_array_state { - struct header header; - struct brw_vertex_buffer_state vb[BRW_VBP_MAX]; -}; - - -struct brw_vertex_element_state -{ - struct - { - unsigned src_offset:11; - unsigned pad:5; - unsigned src_format:9; - unsigned pad0:1; - unsigned valid:1; - unsigned vertex_buffer_index:5; - } ve0; - - struct - { - unsigned dst_offset:8; - unsigned pad:8; - unsigned vfcomponent3:4; - unsigned vfcomponent2:4; - unsigned vfcomponent1:4; - unsigned vfcomponent0:4; - } ve1; -}; - -#define BRW_VEP_MAX 18 - -struct brw_vertex_element_packet { - struct header header; - struct brw_vertex_element_state ve[BRW_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */ -}; - - -struct brw_urb_immediate { - unsigned opcode:4; - unsigned offset:6; - unsigned swizzle_control:2; - unsigned pad:1; - unsigned allocate:1; - unsigned used:1; - unsigned complete:1; - unsigned response_length:4; - unsigned msg_length:4; - unsigned msg_target:4; - unsigned pad1:3; - unsigned end_of_thread:1; -}; - -/* Instruction format for the execution units: - */ - -struct brw_instruction -{ - struct - { - unsigned opcode:7; - unsigned pad:1; - unsigned access_mode:1; - unsigned mask_control:1; - unsigned dependency_control:2; - unsigned compression_control:2; - unsigned thread_control:2; - unsigned predicate_control:4; - unsigned predicate_inverse:1; - unsigned execution_size:3; - unsigned destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */ - unsigned pad0:2; - unsigned debug_control:1; - unsigned saturate:1; - } header; - - union { - struct - { - unsigned dest_reg_file:2; - unsigned dest_reg_type:3; - unsigned src0_reg_file:2; - unsigned src0_reg_type:3; - unsigned src1_reg_file:2; - unsigned src1_reg_type:3; - unsigned pad:1; - unsigned dest_subreg_nr:5; - unsigned dest_reg_nr:8; - unsigned dest_horiz_stride:2; - unsigned dest_address_mode:1; - } da1; - - struct - { - unsigned dest_reg_file:2; - unsigned dest_reg_type:3; - unsigned src0_reg_file:2; - unsigned src0_reg_type:3; - unsigned pad:6; - int dest_indirect_offset:10; /* offset against the deref'd address reg */ - unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */ - unsigned dest_horiz_stride:2; - unsigned dest_address_mode:1; - } ia1; - - struct - { - unsigned dest_reg_file:2; - unsigned dest_reg_type:3; - unsigned src0_reg_file:2; - unsigned src0_reg_type:3; - unsigned src1_reg_file:2; - unsigned src1_reg_type:3; - unsigned pad0:1; - unsigned dest_writemask:4; - unsigned dest_subreg_nr:1; - unsigned dest_reg_nr:8; - unsigned pad1:2; - unsigned dest_address_mode:1; - } da16; - - struct - { - unsigned dest_reg_file:2; - unsigned dest_reg_type:3; - unsigned src0_reg_file:2; - unsigned src0_reg_type:3; - unsigned pad0:6; - unsigned dest_writemask:4; - int dest_indirect_offset:6; - unsigned dest_subreg_nr:3; - unsigned pad1:2; - unsigned dest_address_mode:1; - } ia16; - } bits1; - - - union { - struct - { - unsigned src0_subreg_nr:5; - unsigned src0_reg_nr:8; - unsigned src0_abs:1; - unsigned src0_negate:1; - unsigned src0_address_mode:1; - unsigned src0_horiz_stride:2; - unsigned src0_width:3; - unsigned src0_vert_stride:4; - unsigned flag_reg_nr:1; - unsigned pad:6; - } da1; - - struct - { - int src0_indirect_offset:10; - unsigned src0_subreg_nr:3; - unsigned src0_abs:1; - unsigned src0_negate:1; - unsigned src0_address_mode:1; - unsigned src0_horiz_stride:2; - unsigned src0_width:3; - unsigned src0_vert_stride:4; - unsigned flag_reg_nr:1; - unsigned pad:6; - } ia1; - - struct - { - unsigned src0_swz_x:2; - unsigned src0_swz_y:2; - unsigned src0_subreg_nr:1; - unsigned src0_reg_nr:8; - unsigned src0_abs:1; - unsigned src0_negate:1; - unsigned src0_address_mode:1; - unsigned src0_swz_z:2; - unsigned src0_swz_w:2; - unsigned pad0:1; - unsigned src0_vert_stride:4; - unsigned flag_reg_nr:1; - unsigned pad1:6; - } da16; - - struct - { - unsigned src0_swz_x:2; - unsigned src0_swz_y:2; - int src0_indirect_offset:6; - unsigned src0_subreg_nr:3; - unsigned src0_abs:1; - unsigned src0_negate:1; - unsigned src0_address_mode:1; - unsigned src0_swz_z:2; - unsigned src0_swz_w:2; - unsigned pad0:1; - unsigned src0_vert_stride:4; - unsigned flag_reg_nr:1; - unsigned pad1:6; - } ia16; - - } bits2; - - union - { - struct - { - unsigned src1_subreg_nr:5; - unsigned src1_reg_nr:8; - unsigned src1_abs:1; - unsigned src1_negate:1; - unsigned pad:1; - unsigned src1_horiz_stride:2; - unsigned src1_width:3; - unsigned src1_vert_stride:4; - unsigned pad0:7; - } da1; - - struct - { - unsigned src1_swz_x:2; - unsigned src1_swz_y:2; - unsigned src1_subreg_nr:1; - unsigned src1_reg_nr:8; - unsigned src1_abs:1; - unsigned src1_negate:1; - unsigned pad0:1; - unsigned src1_swz_z:2; - unsigned src1_swz_w:2; - unsigned pad1:1; - unsigned src1_vert_stride:4; - unsigned pad2:7; - } da16; - - struct - { - int src1_indirect_offset:10; - unsigned src1_subreg_nr:3; - unsigned src1_abs:1; - unsigned src1_negate:1; - unsigned pad0:1; - unsigned src1_horiz_stride:2; - unsigned src1_width:3; - unsigned src1_vert_stride:4; - unsigned flag_reg_nr:1; - unsigned pad1:6; - } ia1; - - struct - { - unsigned src1_swz_x:2; - unsigned src1_swz_y:2; - int src1_indirect_offset:6; - unsigned src1_subreg_nr:3; - unsigned src1_abs:1; - unsigned src1_negate:1; - unsigned pad0:1; - unsigned src1_swz_z:2; - unsigned src1_swz_w:2; - unsigned pad1:1; - unsigned src1_vert_stride:4; - unsigned flag_reg_nr:1; - unsigned pad2:6; - } ia16; - - - struct - { - int jump_count:16; /* note: signed */ - unsigned pop_count:4; - unsigned pad0:12; - } if_else; - - struct { - unsigned function:4; - unsigned int_type:1; - unsigned precision:1; - unsigned saturate:1; - unsigned data_type:1; - unsigned pad0:8; - unsigned response_length:4; - unsigned msg_length:4; - unsigned msg_target:4; - unsigned pad1:3; - unsigned end_of_thread:1; - } math; - - struct { - unsigned binding_table_index:8; - unsigned sampler:4; - unsigned return_format:2; - unsigned msg_type:2; - unsigned response_length:4; - unsigned msg_length:4; - unsigned msg_target:4; - unsigned pad1:3; - unsigned end_of_thread:1; - } sampler; - - struct brw_urb_immediate urb; - - struct { - unsigned binding_table_index:8; - unsigned msg_control:4; - unsigned msg_type:2; - unsigned target_cache:2; - unsigned response_length:4; - unsigned msg_length:4; - unsigned msg_target:4; - unsigned pad1:3; - unsigned end_of_thread:1; - } dp_read; - - struct { - unsigned binding_table_index:8; - unsigned msg_control:3; - unsigned pixel_scoreboard_clear:1; - unsigned msg_type:3; - unsigned send_commit_msg:1; - unsigned response_length:4; - unsigned msg_length:4; - unsigned msg_target:4; - unsigned pad1:3; - unsigned end_of_thread:1; - } dp_write; - - struct { - unsigned pad:16; - unsigned response_length:4; - unsigned msg_length:4; - unsigned msg_target:4; - unsigned pad1:3; - unsigned end_of_thread:1; - } generic; - - int d; - unsigned ud; - } bits3; -}; - - -#endif diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c deleted file mode 100644 index 724a69b2ee..0000000000 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ /dev/null @@ -1,126 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "brw_blit.h" -#include "brw_context.h" -#include "brw_state.h" -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/internal/p_winsys_screen.h" -#include "util/u_tile.h" -#include "util/u_rect.h" - - - -/* Assumes all values are within bounds -- no checking at this level - - * do it higher up if required. - */ -static void -brw_surface_copy(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - struct pipe_surface *src, - unsigned srcx, unsigned srcy, unsigned width, unsigned height) -{ - assert( dst != src ); - assert( dst->block.size == src->block.size ); - assert( dst->block.width == src->block.height ); - assert( dst->block.height == src->block.height ); - - if (0) { - void *dst_map = pipe->screen->surface_map( pipe->screen, - dst, - PIPE_BUFFER_USAGE_CPU_WRITE ); - - const void *src_map = pipe->screen->surface_map( pipe->screen, - src, - PIPE_BUFFER_USAGE_CPU_READ ); - - util_copy_rect(dst_map, - &dst->block, - dst->stride, - dstx, dsty, - width, height, - src_map, - src->stride, - srcx, srcy); - - pipe->screen->surface_unmap(pipe->screen, src); - pipe->screen->surface_unmap(pipe->screen, dst); - } - else { - struct brw_texture *dst_tex = (struct brw_texture *)dst->texture; - struct brw_texture *src_tex = (struct brw_texture *)src->texture; - assert(dst->block.width == 1); - assert(dst->block.height == 1); - brw_copy_blit(brw_context(pipe), - FALSE, - dst->block.size, - (short) src->stride/src->block.size, src_tex->buffer, src->offset, FALSE, - (short) dst->stride/dst->block.size, dst_tex->buffer, dst->offset, FALSE, - (short) srcx, (short) srcy, (short) dstx, (short) dsty, - (short) width, (short) height, PIPE_LOGICOP_COPY); - } -} - - -static void -brw_surface_fill(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height, unsigned value) -{ - if (0) { - void *dst_map = pipe->screen->surface_map( pipe->screen, - dst, - PIPE_BUFFER_USAGE_CPU_WRITE ); - - util_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value); - - pipe->screen->surface_unmap(pipe->screen, dst); - } - else { - struct brw_texture *tex = (struct brw_texture *)dst->texture; - assert(dst->block.width == 1); - assert(dst->block.height == 1); - brw_fill_blit(brw_context(pipe), - dst->block.size, - (short) dst->stride/dst->block.size, - tex->buffer, dst->offset, FALSE, - (short) dstx, (short) dsty, - (short) width, (short) height, - value); - } -} - - -void -brw_init_surface_functions(struct brw_context *brw) -{ - brw->pipe.surface_copy = brw_surface_copy; - brw->pipe.surface_fill = brw_surface_fill; -} diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c deleted file mode 100644 index 998ffaeac4..0000000000 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ /dev/null @@ -1,380 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -/* Code to layout images in a mipmap tree for i965. - */ - -#include "pipe/p_state.h" -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/internal/p_winsys_screen.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "brw_context.h" -#include "brw_tex_layout.h" - - -#define FILE_DEBUG_FLAG DEBUG_TEXTURE - -#if 0 -unsigned intel_compressed_alignment(unsigned internalFormat) -{ - unsigned alignment = 4; - - switch (internalFormat) { - case GL_COMPRESSED_RGB_FXT1_3DFX: - case GL_COMPRESSED_RGBA_FXT1_3DFX: - alignment = 8; - break; - - default: - break; - } - - return alignment; -} -#endif - - -static void intel_miptree_set_image_offset(struct brw_texture *tex, - unsigned level, - unsigned img, - unsigned x, unsigned y) -{ - struct pipe_texture *pt = &tex->base; - if (img == 0 && level == 0) - assert(x == 0 && y == 0); - assert(img < tex->nr_images[level]); - - tex->image_offset[level][img] = y * tex->stride + x * pt->block.size; -} - -static void intel_miptree_set_level_info(struct brw_texture *tex, - unsigned level, - unsigned nr_images, - unsigned x, unsigned y, - unsigned w, unsigned h, unsigned d) -{ - struct pipe_texture *pt = &tex->base; - - assert(level < PIPE_MAX_TEXTURE_LEVELS); - - pt->width[level] = w; - pt->height[level] = h; - pt->depth[level] = d; - - pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w); - pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h); - - tex->level_offset[level] = y * tex->stride + x * tex->base.block.size; - tex->nr_images[level] = nr_images; - - /* - DBG("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, - level, w, h, d, x, y, tex->level_offset[level]); - */ - - /* Not sure when this would happen, but anyway: - */ - if (tex->image_offset[level]) { - FREE(tex->image_offset[level]); - tex->image_offset[level] = NULL; - } - - assert(nr_images); - assert(!tex->image_offset[level]); - - tex->image_offset[level] = (unsigned *) MALLOC(nr_images * sizeof(unsigned)); - tex->image_offset[level][0] = 0; -} - -static void i945_miptree_layout_2d(struct brw_texture *tex) -{ - struct pipe_texture *pt = &tex->base; - const int align_x = 2, align_y = 4; - unsigned level; - unsigned x = 0; - unsigned y = 0; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned nblocksx = pt->nblocksx[0]; - unsigned nblocksy = pt->nblocksy[0]; - - tex->stride = align(pt->nblocksx[0] * pt->block.size, 4); - - /* May need to adjust pitch to accomodate the placement of - * the 2nd mipmap level. This occurs when the alignment - * constraints of mipmap placement push the right edge of the - * 2nd mipmap level out past the width of its parent. - */ - if (pt->last_level > 0) { - unsigned mip1_nblocksx - = align(pf_get_nblocksx(&pt->block, minify(width)), align_x) - + pf_get_nblocksx(&pt->block, minify(minify(width))); - - if (mip1_nblocksx > nblocksx) - tex->stride = mip1_nblocksx * pt->block.size; - } - - /* Pitch must be a whole number of dwords - */ - tex->stride = align(tex->stride, 64); - tex->total_nblocksy = 0; - - for (level = 0; level <= pt->last_level; level++) { - intel_miptree_set_level_info(tex, level, 1, x, y, width, - height, 1); - - nblocksy = align(nblocksy, align_y); - - /* Because the images are packed better, the final offset - * might not be the maximal one: - */ - tex->total_nblocksy = MAX2(tex->total_nblocksy, y + nblocksy); - - /* Layout_below: step right after second mipmap level. - */ - if (level == 1) { - x += align(nblocksx, align_x); - } - else { - y += nblocksy; - } - - width = minify(width); - height = minify(height); - nblocksx = pf_get_nblocksx(&pt->block, width); - nblocksy = pf_get_nblocksy(&pt->block, height); - } -} - -static boolean brw_miptree_layout(struct brw_texture *tex) -{ - struct pipe_texture *pt = &tex->base; - /* XXX: these vary depending on image format: - */ -/* int align_w = 4; */ - - switch (pt->target) { - case PIPE_TEXTURE_CUBE: - case PIPE_TEXTURE_3D: { - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; - unsigned nblocksx = pt->nblocksx[0]; - unsigned nblocksy = pt->nblocksy[0]; - unsigned pack_x_pitch, pack_x_nr; - unsigned pack_y_pitch; - unsigned level; - unsigned align_h = 2; - unsigned align_w = 4; - - tex->total_nblocksy = 0; - - tex->stride = align(pt->nblocksx[0], 4); - pack_y_pitch = align(pt->nblocksy[0], align_h); - - pack_x_pitch = tex->stride / pt->block.size; - pack_x_nr = 1; - - for (level = 0; level <= pt->last_level; level++) { - unsigned nr_images = pt->target == PIPE_TEXTURE_3D ? depth : 6; - int x = 0; - int y = 0; - uint q, j; - - intel_miptree_set_level_info(tex, level, nr_images, - 0, tex->total_nblocksy, - width, height, depth); - - for (q = 0; q < nr_images;) { - for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { - intel_miptree_set_image_offset(tex, level, q, x, y); - x += pack_x_pitch; - } - - x = 0; - y += pack_y_pitch; - } - - - tex->total_nblocksy += y; - width = minify(width); - height = minify(height); - depth = minify(depth); - nblocksx = pf_get_nblocksx(&pt->block, width); - nblocksy = pf_get_nblocksy(&pt->block, height); - - if (pf_is_compressed(pt->format)) { - pack_y_pitch = (height + 3) / 4; - - if (pack_x_pitch > align(width, align_w)) { - pack_x_pitch = align(width, align_w); - pack_x_nr <<= 1; - } - } else { - if (pack_x_pitch > 4) { - pack_x_pitch >>= 1; - pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr * pt->block.size <= tex->stride); - } - - if (pack_y_pitch > 2) { - pack_y_pitch >>= 1; - pack_y_pitch = align(pack_y_pitch, align_h); - } - } - - } - break; - } - - default: - i945_miptree_layout_2d(tex); - break; - } -#if 0 - PRINT("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, - pt->pitch, - pt->total_nblocksy, - pt->block.size, - pt->stride * pt->total_nblocksy ); -#endif - - return TRUE; -} - - -static struct pipe_texture * -brw_texture_create_screen(struct pipe_screen *screen, - const struct pipe_texture *templat) -{ - struct brw_texture *tex = CALLOC_STRUCT(brw_texture); - - if (tex) { - tex->base = *templat; - pipe_reference_init(&tex->base.reference, 1); - - tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]); - tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]); - - if (brw_miptree_layout(tex)) - tex->buffer = screen->buffer_create(screen, 64, - PIPE_BUFFER_USAGE_PIXEL, - tex->stride * - tex->total_nblocksy); - - if (!tex->buffer) { - FREE(tex); - return NULL; - } - } - - return &tex->base; -} - - -static void -brw_texture_destroy_screen(struct pipe_texture *pt) -{ - struct brw_texture *tex = (struct brw_texture *)pt; - uint i; - - /* - DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); - */ - - pipe_buffer_reference(&tex->buffer, NULL); - - for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) - if (tex->image_offset[i]) - free(tex->image_offset[i]); - - free(tex); -} - - -static struct pipe_surface * -brw_get_tex_surface_screen(struct pipe_screen *screen, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - struct brw_texture *tex = (struct brw_texture *)pt; - struct pipe_surface *ps; - unsigned offset; /* in bytes */ - - offset = tex->level_offset[level]; - - if (pt->target == PIPE_TEXTURE_CUBE) { - offset += tex->image_offset[level][face]; - } - else if (pt->target == PIPE_TEXTURE_3D) { - offset += tex->image_offset[level][zslice]; - } - else { - assert(face == 0); - assert(zslice == 0); - } - - ps = CALLOC_STRUCT(pipe_surface); - if (ps) { - pipe_reference_init(&ps->reference, 1); - pipe_texture_reference(&ps->texture, pt); - ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; - ps->block = pt->block; - ps->nblocksx = pt->nblocksx[level]; - ps->nblocksy = pt->nblocksy[level]; - ps->stride = tex->stride; - ps->offset = offset; - } - return ps; -} - - -void -brw_init_texture_functions(struct brw_context *brw) -{ -// brw->pipe.texture_update = brw_texture_update; -} - - -void -brw_init_screen_texture_funcs(struct pipe_screen *screen) -{ - screen->texture_create = brw_texture_create_screen; - screen->texture_destroy = brw_texture_destroy_screen; - screen->get_tex_surface = brw_get_tex_surface_screen; -} - diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.h b/src/gallium/drivers/i965simple/brw_tex_layout.h deleted file mode 100644 index a6b6ba8146..0000000000 --- a/src/gallium/drivers/i965simple/brw_tex_layout.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - - -#ifndef BRW_TEX_LAYOUT_H -#define BRW_TEX_LAYOUT_H - - -struct brw_context; -struct pipe_screen; - - -extern void -brw_init_texture_functions(struct brw_context *brw); - -extern void -brw_init_screen_texture_funcs(struct pipe_screen *screen); - - -#endif diff --git a/src/gallium/drivers/i965simple/brw_urb.c b/src/gallium/drivers/i965simple/brw_urb.c deleted file mode 100644 index 101a4367b9..0000000000 --- a/src/gallium/drivers/i965simple/brw_urb.c +++ /dev/null @@ -1,186 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "brw_context.h" -//#include "brw_state.h" -#include "brw_batch.h" -#include "brw_defines.h" - -#define VS 0 -#define GS 1 -#define CLP 2 -#define SF 3 -#define CS 4 - -/* XXX: Are the min_entry_size numbers useful? - * XXX: Verify min_nr_entries, esp for VS. - * XXX: Verify SF min_entry_size. - */ -static const struct { - unsigned min_nr_entries; - unsigned preferred_nr_entries; - unsigned min_entry_size; - unsigned max_entry_size; -} limits[CS+1] = { - { 8, 32, 1, 5 }, /* vs */ - { 4, 8, 1, 5 }, /* gs */ - { 6, 8, 1, 5 }, /* clp */ - { 1, 8, 1, 12 }, /* sf */ - { 1, 4, 1, 32 } /* cs */ -}; - - -static boolean check_urb_layout( struct brw_context *brw ) -{ - brw->urb.vs_start = 0; - brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize; - brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize; - brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize; - brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize; - - return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= 256; -} - -/* Most minimal update, forces re-emit of URB fence packet after GS - * unit turned on/off. - */ -static void recalculate_urb_fence( struct brw_context *brw ) -{ - unsigned csize = brw->curbe.total_size; - unsigned vsize = brw->vs.prog_data->urb_entry_size; - unsigned sfsize = brw->sf.prog_data->urb_entry_size; - - if (csize < limits[CS].min_entry_size) - csize = limits[CS].min_entry_size; - - if (vsize < limits[VS].min_entry_size) - vsize = limits[VS].min_entry_size; - - if (sfsize < limits[SF].min_entry_size) - sfsize = limits[SF].min_entry_size; - - if (brw->urb.vsize < vsize || - brw->urb.sfsize < sfsize || - brw->urb.csize < csize || - (brw->urb.constrained && (brw->urb.vsize > brw->urb.vsize || - brw->urb.sfsize > brw->urb.sfsize || - brw->urb.csize > brw->urb.csize))) { - - - brw->urb.csize = csize; - brw->urb.sfsize = sfsize; - brw->urb.vsize = vsize; - - brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; - brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries; - brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries; - brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; - brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries; - - if (!check_urb_layout(brw)) { - brw->urb.nr_vs_entries = limits[VS].min_nr_entries; - brw->urb.nr_gs_entries = limits[GS].min_nr_entries; - brw->urb.nr_clip_entries = limits[CLP].min_nr_entries; - brw->urb.nr_sf_entries = limits[SF].min_nr_entries; - brw->urb.nr_cs_entries = limits[CS].min_nr_entries; - - brw->urb.constrained = 1; - - if (!check_urb_layout(brw)) { - /* This is impossible, given the maximal sizes of urb - * entries and the values for minimum nr of entries - * provided above. - */ - debug_printf("couldn't calculate URB layout!\n"); - exit(1); - } - - if (BRW_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) - debug_printf("URB CONSTRAINED\n"); - } - else - brw->urb.constrained = 0; - - if (BRW_DEBUG & DEBUG_URB) - debug_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", - brw->urb.vs_start, - brw->urb.gs_start, - brw->urb.clip_start, - brw->urb.sf_start, - brw->urb.cs_start, - 256); - - brw->state.dirty.brw |= BRW_NEW_URB_FENCE; - } -} - - -const struct brw_tracked_state brw_recalculate_urb_fence = { - .dirty = { - .brw = BRW_NEW_CURBE_OFFSETS, - .cache = (CACHE_NEW_VS_PROG | - CACHE_NEW_SF_PROG) - }, - .update = recalculate_urb_fence -}; - - - - - -void brw_upload_urb_fence(struct brw_context *brw) -{ - struct brw_urb_fence uf; - memset(&uf, 0, sizeof(uf)); - - uf.header.opcode = CMD_URB_FENCE; - uf.header.length = sizeof(uf)/4-2; - uf.header.vs_realloc = 1; - uf.header.gs_realloc = 1; - uf.header.clp_realloc = 1; - uf.header.sf_realloc = 1; - uf.header.vfe_realloc = 1; - uf.header.cs_realloc = 1; - - /* The ordering below is correct, not the layout in the - * instruction. - * - * There are 256 urb reg pairs in total. - */ - uf.bits0.vs_fence = brw->urb.gs_start; - uf.bits0.gs_fence = brw->urb.clip_start; - uf.bits0.clp_fence = brw->urb.sf_start; - uf.bits1.sf_fence = brw->urb.cs_start; - uf.bits1.cs_fence = 256; - - BRW_BATCH_STRUCT(brw, &uf); -} diff --git a/src/gallium/drivers/i965simple/brw_util.c b/src/gallium/drivers/i965simple/brw_util.c deleted file mode 100644 index 42391d7c8c..0000000000 --- a/src/gallium/drivers/i965simple/brw_util.c +++ /dev/null @@ -1,104 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "brw_util.h" -#include "brw_defines.h" - -#include "pipe/p_defines.h" - -unsigned brw_count_bits( unsigned val ) -{ - unsigned i; - for (i = 0; val ; val >>= 1) - if (val & 1) - i++; - return i; -} - - -unsigned brw_translate_blend_equation( int mode ) -{ - switch (mode) { - case PIPE_BLEND_ADD: - return BRW_BLENDFUNCTION_ADD; - case PIPE_BLEND_MIN: - return BRW_BLENDFUNCTION_MIN; - case PIPE_BLEND_MAX: - return BRW_BLENDFUNCTION_MAX; - case PIPE_BLEND_SUBTRACT: - return BRW_BLENDFUNCTION_SUBTRACT; - case PIPE_BLEND_REVERSE_SUBTRACT: - return BRW_BLENDFUNCTION_REVERSE_SUBTRACT; - default: - assert(0); - return BRW_BLENDFUNCTION_ADD; - } -} - -unsigned brw_translate_blend_factor( int factor ) -{ - switch(factor) { - case PIPE_BLENDFACTOR_ZERO: - return BRW_BLENDFACTOR_ZERO; - case PIPE_BLENDFACTOR_SRC_ALPHA: - return BRW_BLENDFACTOR_SRC_ALPHA; - case PIPE_BLENDFACTOR_ONE: - return BRW_BLENDFACTOR_ONE; - case PIPE_BLENDFACTOR_SRC_COLOR: - return BRW_BLENDFACTOR_SRC_COLOR; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - return BRW_BLENDFACTOR_INV_SRC_COLOR; - case PIPE_BLENDFACTOR_DST_COLOR: - return BRW_BLENDFACTOR_DST_COLOR; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - return BRW_BLENDFACTOR_INV_DST_COLOR; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - return BRW_BLENDFACTOR_INV_SRC_ALPHA; - case PIPE_BLENDFACTOR_DST_ALPHA: - return BRW_BLENDFACTOR_DST_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - return BRW_BLENDFACTOR_INV_DST_ALPHA; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE; - case PIPE_BLENDFACTOR_CONST_COLOR: - return BRW_BLENDFACTOR_CONST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - return BRW_BLENDFACTOR_INV_CONST_COLOR; - case PIPE_BLENDFACTOR_CONST_ALPHA: - return BRW_BLENDFACTOR_CONST_ALPHA; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - return BRW_BLENDFACTOR_INV_CONST_ALPHA; - default: - assert(0); - return BRW_BLENDFACTOR_ZERO; - } -} diff --git a/src/gallium/drivers/i965simple/brw_util.h b/src/gallium/drivers/i965simple/brw_util.h deleted file mode 100644 index d60e5934db..0000000000 --- a/src/gallium/drivers/i965simple/brw_util.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#ifndef BRW_UTIL_H -#define BRW_UTIL_H - -#include "pipe/p_state.h" - -extern unsigned brw_count_bits( unsigned val ); -extern unsigned brw_translate_blend_factor( int factor ); -extern unsigned brw_translate_blend_equation( int mode ); - - -#endif diff --git a/src/gallium/drivers/i965simple/brw_vs.c b/src/gallium/drivers/i965simple/brw_vs.c deleted file mode 100644 index 92327e896d..0000000000 --- a/src/gallium/drivers/i965simple/brw_vs.c +++ /dev/null @@ -1,120 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "brw_context.h" -#include "brw_vs.h" -#include "brw_util.h" -#include "brw_state.h" - - -static void do_vs_prog( struct brw_context *brw, - const struct brw_vertex_program *vp, - struct brw_vs_prog_key *key ) -{ - unsigned program_size; - const unsigned *program; - struct brw_vs_compile c; - - memset(&c, 0, sizeof(c)); - memcpy(&c.key, key, sizeof(*key)); - - brw_init_compile(&c.func); - c.vp = vp; - - c.prog_data.outputs_written = vp->info.num_outputs; - c.prog_data.inputs_read = vp->info.num_inputs; - -#if 0 - if (c.key.copy_edgeflag) { - c.prog_data.outputs_written |= 1<vs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_VS_PROG], - &c.key, - sizeof(c.key), - program, - program_size, - &c.prog_data, - &brw->vs.prog_data); -} - - -static void brw_upload_vs_prog( struct brw_context *brw ) -{ - struct brw_vs_prog_key key; - const struct brw_vertex_program *vp = brw->attribs.VertexProgram; - - assert(vp); - - memset(&key, 0, sizeof(key)); - - /* Just upload the program verbatim for now. Always send it all - * the inputs it asks for, whether they are varying or not. - */ - key.program_string_id = vp->id; - key.nr_userclip = brw->attribs.Clip.nr; - key.copy_edgeflag = (brw->attribs.Raster->fill_cw != PIPE_POLYGON_MODE_FILL || - brw->attribs.Raster->fill_ccw != PIPE_POLYGON_MODE_FILL); - - /* Make an early check for the key. - */ - if (brw_search_cache(&brw->cache[BRW_VS_PROG], - &key, sizeof(key), - &brw->vs.prog_data, - &brw->vs.prog_gs_offset)) - return; - - do_vs_prog(brw, vp, &key); -} - - -/* See brw_vs.c: - */ -const struct brw_tracked_state brw_vs_prog = { - .dirty = { - .brw = BRW_NEW_VS, - .cache = 0 - }, - .update = brw_upload_vs_prog -}; diff --git a/src/gallium/drivers/i965simple/brw_vs.h b/src/gallium/drivers/i965simple/brw_vs.h deleted file mode 100644 index 070f9dfcae..0000000000 --- a/src/gallium/drivers/i965simple/brw_vs.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#ifndef BRW_VS_H -#define BRW_VS_H - - -#include "brw_context.h" -#include "brw_eu.h" - - -struct brw_vs_prog_key { - unsigned program_string_id; - unsigned nr_userclip:4; - unsigned copy_edgeflag:1; - unsigned know_w_is_one:1; - unsigned pad:26; -}; - - -struct brw_vs_compile { - struct brw_compile func; - struct brw_vs_prog_key key; - struct brw_vs_prog_data prog_data; - - const struct brw_vertex_program *vp; - - unsigned nr_inputs; - - unsigned first_output; - unsigned nr_outputs; - - unsigned first_tmp; - unsigned last_tmp; - - struct brw_reg r0; - struct brw_reg r1; - struct brw_reg regs[12][128]; - struct brw_reg tmp; - struct brw_reg stack; - - struct { - boolean used_in_src; - struct brw_reg reg; - } output_regs[128]; - - struct brw_reg userplane[6]; - -}; - -void brw_vs_emit( struct brw_vs_compile *c ); - -#endif diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c deleted file mode 100644 index 3ee82d95b3..0000000000 --- a/src/gallium/drivers/i965simple/brw_vs_emit.c +++ /dev/null @@ -1,1330 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "brw_context.h" -#include "brw_vs.h" - -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" - -struct brw_prog_info { - unsigned num_temps; - unsigned num_addrs; - unsigned num_consts; - - unsigned writes_psize; - - unsigned pos_idx; - unsigned result_edge_idx; - unsigned edge_flag_idx; - unsigned psize_idx; -}; - -/* Do things as simply as possible. Allocate and populate all regs - * ahead of time. - */ -static void brw_vs_alloc_regs( struct brw_vs_compile *c, - struct brw_prog_info *info ) -{ - unsigned i, reg = 0, mrf; - unsigned nr_params; - - /* r0 -- reserved as usual - */ - c->r0 = brw_vec8_grf(reg, 0); reg++; - - /* User clip planes from curbe: - */ - if (c->key.nr_userclip) { - for (i = 0; i < c->key.nr_userclip; i++) { - c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1); - } - - /* Deal with curbe alignment: - */ - reg += ((6+c->key.nr_userclip+3)/4)*2; - } - - /* Vertex program parameters from curbe: - */ - nr_params = c->prog_data.max_const; - for (i = 0; i < nr_params; i++) { - c->regs[TGSI_FILE_CONSTANT][i] = stride(brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); - } - reg += (nr_params+1)/2; - c->prog_data.curb_read_length = reg - 1; - - - - /* Allocate input regs: - */ - c->nr_inputs = c->vp->info.num_inputs; - for (i = 0; i < c->nr_inputs; i++) { - c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0); - reg++; - } - - - /* Allocate outputs: TODO: could organize the non-position outputs - * to go straight into message regs. - */ - c->nr_outputs = 0; - c->first_output = reg; - mrf = 4; - for (i = 0; i < c->vp->info.num_outputs; i++) { - c->nr_outputs++; -#if 0 - if (i == VERT_RESULT_HPOS) { - c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); - reg++; - } - else if (i == VERT_RESULT_PSIZ) { - c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); - reg++; - mrf++; /* just a placeholder? XXX fix later stages & remove this */ - } - else { - c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf); - mrf++; - } -#else - /*treat pos differently for now */ - if (i == info->pos_idx) { - c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); - reg++; - } else { - c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf); - mrf++; - } -#endif - } - - /* Allocate program temporaries: - */ - for (i = 0; i < info->num_temps; i++) { - c->regs[TGSI_FILE_TEMPORARY][i] = brw_vec8_grf(reg, 0); - reg++; - } - - /* Address reg(s). Don't try to use the internal address reg until - * deref time. - */ - for (i = 0; i < info->num_addrs; i++) { - c->regs[TGSI_FILE_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE, - reg, - 0, - BRW_REGISTER_TYPE_D, - BRW_VERTICAL_STRIDE_8, - BRW_WIDTH_8, - BRW_HORIZONTAL_STRIDE_1, - BRW_SWIZZLE_XXXX, - TGSI_WRITEMASK_X); - reg++; - } - - for (i = 0; i < 128; i++) { - if (c->output_regs[i].used_in_src) { - c->output_regs[i].reg = brw_vec8_grf(reg, 0); - reg++; - } - } - - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); - reg += 2; - - - /* Some opcodes need an internal temporary: - */ - c->first_tmp = reg; - c->last_tmp = reg; /* for allocation purposes */ - - /* Each input reg holds data from two vertices. The - * urb_read_length is the number of registers read from *each* - * vertex urb, so is half the amount: - */ - c->prog_data.urb_read_length = (c->nr_inputs+1)/2; - - c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4; - c->prog_data.total_grf = reg; -} - - -static struct brw_reg get_tmp( struct brw_vs_compile *c ) -{ - struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); - - if (++c->last_tmp > c->prog_data.total_grf) - c->prog_data.total_grf = c->last_tmp; - - return tmp; -} - -static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) -{ - if (tmp.nr == c->last_tmp-1) - c->last_tmp--; -} - -static void release_tmps( struct brw_vs_compile *c ) -{ - c->last_tmp = c->first_tmp; -} - - -static void unalias1( struct brw_vs_compile *c, - struct brw_reg dst, - struct brw_reg arg0, - void (*func)( struct brw_vs_compile *, - struct brw_reg, - struct brw_reg )) -{ - if (dst.file == arg0.file && dst.nr == arg0.nr) { - struct brw_compile *p = &c->func; - struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); - func(c, tmp, arg0); - brw_MOV(p, dst, tmp); - } - else { - func(c, dst, arg0); - } -} - -static void unalias2( struct brw_vs_compile *c, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1, - void (*func)( struct brw_vs_compile *, - struct brw_reg, - struct brw_reg, - struct brw_reg )) -{ - if ((dst.file == arg0.file && dst.nr == arg0.nr) || - (dst.file == arg1.file && dst.nr == arg1.nr)) { - struct brw_compile *p = &c->func; - struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); - func(c, tmp, arg0, arg1); - brw_MOV(p, dst, tmp); - } - else { - func(c, dst, arg0, arg1); - } -} - -static void emit_sop( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1, - unsigned cond) -{ - brw_push_insn_state(p); - brw_CMP(p, brw_null_reg(), cond, arg0, arg1); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_MOV(p, dst, brw_imm_f(1.0f)); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, brw_imm_f(0.0f)); - brw_pop_insn_state(p); -} - -static void emit_seq( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1 ) -{ - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ); -} - -static void emit_sne( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1 ) -{ - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ); -} -static void emit_slt( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1 ) -{ - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L); -} - -static void emit_sle( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1 ) -{ - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE); -} - -static void emit_sgt( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1 ) -{ - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G); -} - -static void emit_sge( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1 ) -{ - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE); -} - -static void emit_max( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1 ) -{ - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); - brw_SEL(p, dst, arg1, arg0); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); -} - -static void emit_min( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1 ) -{ - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); - brw_SEL(p, dst, arg0, arg1); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); -} - - -static void emit_math1( struct brw_vs_compile *c, - unsigned function, - struct brw_reg dst, - struct brw_reg arg0, - unsigned precision) -{ - /* There are various odd behaviours with SEND on the simulator. In - * addition there are documented issues with the fact that the GEN4 - * processor doesn't do dependency control properly on SEND - * results. So, on balance, this kludge to get around failures - * with writemasked math results looks like it might be necessary - * whether that turns out to be a simulator bug or not: - */ - struct brw_compile *p = &c->func; - struct brw_reg tmp = dst; - boolean need_tmp = (dst.dw1.bits.writemask != 0xf || - dst.file != BRW_GENERAL_REGISTER_FILE); - - if (need_tmp) - tmp = get_tmp(c); - - brw_math(p, - tmp, - function, - BRW_MATH_SATURATE_NONE, - 2, - arg0, - BRW_MATH_DATA_SCALAR, - precision); - - if (need_tmp) { - brw_MOV(p, dst, tmp); - release_tmp(c, tmp); - } -} - -static void emit_math2( struct brw_vs_compile *c, - unsigned function, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1, - unsigned precision) -{ - struct brw_compile *p = &c->func; - struct brw_reg tmp = dst; - boolean need_tmp = (dst.dw1.bits.writemask != 0xf || - dst.file != BRW_GENERAL_REGISTER_FILE); - - if (need_tmp) - tmp = get_tmp(c); - - brw_MOV(p, brw_message_reg(3), arg1); - - brw_math(p, - tmp, - function, - BRW_MATH_SATURATE_NONE, - 2, - arg0, - BRW_MATH_DATA_SCALAR, - precision); - - if (need_tmp) { - brw_MOV(p, dst, tmp); - release_tmp(c, tmp); - } -} - - - -static void emit_exp_noalias( struct brw_vs_compile *c, - struct brw_reg dst, - struct brw_reg arg0 ) -{ - struct brw_compile *p = &c->func; - - - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X) { - struct brw_reg tmp = get_tmp(c); - struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D); - - /* tmp_d = floor(arg0.x) */ - brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0)); - - /* result[0] = 2.0 ^ tmp */ - - /* Adjust exponent for floating point: - * exp += 127 - */ - brw_ADD(p, brw_writemask(tmp_d, TGSI_WRITEMASK_X), tmp_d, brw_imm_d(127)); - - /* Install exponent and sign. - * Excess drops off the edge: - */ - brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), TGSI_WRITEMASK_X), - tmp_d, brw_imm_d(23)); - - release_tmp(c, tmp); - } - - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y) { - /* result[1] = arg0.x - floor(arg0.x) */ - brw_FRC(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0, 0)); - } - - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) { - /* As with the LOG instruction, we might be better off just - * doing a taylor expansion here, seeing as we have to do all - * the prep work. - * - * If mathbox partial precision is too low, consider also: - * result[3] = result[0] * EXP(result[1]) - */ - emit_math1(c, - BRW_MATH_FUNCTION_EXP, - brw_writemask(dst, TGSI_WRITEMASK_Z), - brw_swizzle1(arg0, 0), - BRW_MATH_PRECISION_PARTIAL); - } - - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) { - /* result[3] = 1.0; */ - brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), brw_imm_f(1)); - } -} - - -static void emit_log_noalias( struct brw_vs_compile *c, - struct brw_reg dst, - struct brw_reg arg0 ) -{ - struct brw_compile *p = &c->func; - struct brw_reg tmp = dst; - struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); - struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD); - boolean need_tmp = (dst.dw1.bits.writemask != 0xf || - dst.file != BRW_GENERAL_REGISTER_FILE); - - if (need_tmp) { - tmp = get_tmp(c); - tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); - } - - /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt - * according to spec: - * - * These almost look likey they could be joined up, but not really - * practical: - * - * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127 - * result[1].i = (x.i & ((1<<23)-1) + (127<<23) - */ - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_XZ) { - brw_AND(p, - brw_writemask(tmp_ud, TGSI_WRITEMASK_X), - brw_swizzle1(arg0_ud, 0), - brw_imm_ud((1U<<31)-1)); - - brw_SHR(p, - brw_writemask(tmp_ud, TGSI_WRITEMASK_X), - tmp_ud, - brw_imm_ud(23)); - - brw_ADD(p, - brw_writemask(tmp, TGSI_WRITEMASK_X), - retype(tmp_ud, BRW_REGISTER_TYPE_D), /* does it matter? */ - brw_imm_d(-127)); - } - - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_YZ) { - brw_AND(p, - brw_writemask(tmp_ud, TGSI_WRITEMASK_Y), - brw_swizzle1(arg0_ud, 0), - brw_imm_ud((1<<23)-1)); - - brw_OR(p, - brw_writemask(tmp_ud, TGSI_WRITEMASK_Y), - tmp_ud, - brw_imm_ud(127<<23)); - } - - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) { - /* result[2] = result[0] + LOG2(result[1]); */ - - /* Why bother? The above is just a hint how to do this with a - * taylor series. Maybe we *should* use a taylor series as by - * the time all the above has been done it's almost certainly - * quicker than calling the mathbox, even with low precision. - * - * Options are: - * - result[0] + mathbox.LOG2(result[1]) - * - mathbox.LOG2(arg0.x) - * - result[0] + inline_taylor_approx(result[1]) - */ - emit_math1(c, - BRW_MATH_FUNCTION_LOG, - brw_writemask(tmp, TGSI_WRITEMASK_Z), - brw_swizzle1(tmp, 1), - BRW_MATH_PRECISION_FULL); - - brw_ADD(p, - brw_writemask(tmp, TGSI_WRITEMASK_Z), - brw_swizzle1(tmp, 2), - brw_swizzle1(tmp, 0)); - } - - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) { - /* result[3] = 1.0; */ - brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_W), brw_imm_f(1)); - } - - if (need_tmp) { - brw_MOV(p, dst, tmp); - release_tmp(c, tmp); - } -} - - - - -/* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1 - */ -static void emit_dst_noalias( struct brw_vs_compile *c, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1) -{ - struct brw_compile *p = &c->func; - - /* There must be a better way to do this: - */ - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X) - brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_X), brw_imm_f(1.0)); - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y) - brw_MUL(p, brw_writemask(dst, TGSI_WRITEMASK_Y), arg0, arg1); - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) - brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Z), arg0); - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) - brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), arg1); -} - -static void emit_xpd( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg t, - struct brw_reg u) -{ - brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3), brw_swizzle(u,2,0,1,3)); - brw_MAC(p, dst, negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3)); -} - - - -static void emit_lit_noalias( struct brw_vs_compile *c, - struct brw_reg dst, - struct brw_reg arg0 ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *if_insn; - struct brw_reg tmp = dst; - boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); - - if (need_tmp) - tmp = get_tmp(c); - - brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_YZ), brw_imm_f(0)); - brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_XW), brw_imm_f(1)); - - /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order - * to get all channels active inside the IF. In the clipping code - * we run with NoMask, so it's not an option and we can use - * BRW_EXECUTE_1 for all comparisions. - */ - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0)); - if_insn = brw_IF(p, BRW_EXECUTE_8); - { - brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0,0)); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0)); - brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_Z), brw_swizzle1(arg0,1)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - emit_math2(c, - BRW_MATH_FUNCTION_POW, - brw_writemask(dst, TGSI_WRITEMASK_Z), - brw_swizzle1(tmp, 2), - brw_swizzle1(arg0, 3), - BRW_MATH_PRECISION_PARTIAL); - } - - brw_ENDIF(p, if_insn); -} - - - - - -/* TODO: relative addressing! - */ -static struct brw_reg get_reg( struct brw_vs_compile *c, - unsigned file, - unsigned index ) -{ - switch (file) { - case TGSI_FILE_TEMPORARY: - case TGSI_FILE_INPUT: - case TGSI_FILE_OUTPUT: - assert(c->regs[file][index].nr != 0); - return c->regs[file][index]; - case TGSI_FILE_CONSTANT: - assert(c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm].nr != 0); - return c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm]; - case TGSI_FILE_IMMEDIATE: - assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0); - return c->regs[TGSI_FILE_CONSTANT][index]; - case TGSI_FILE_ADDRESS: - assert(index == 0); - return c->regs[file][index]; - - case TGSI_FILE_NULL: /* undef values */ - return brw_null_reg(); - - default: - assert(0); - return brw_null_reg(); - } -} - - - -static struct brw_reg deref( struct brw_vs_compile *c, - struct brw_reg arg, - int offset) -{ - struct brw_compile *p = &c->func; - struct brw_reg tmp = vec4(get_tmp(c)); - struct brw_reg vp_address = retype(vec1(get_reg(c, TGSI_FILE_ADDRESS, 0)), BRW_REGISTER_TYPE_UW); - unsigned byte_offset = arg.nr * 32 + arg.subnr + offset * 16; - struct brw_reg indirect = brw_vec4_indirect(0,0); - - { - brw_push_insn_state(p); - brw_set_access_mode(p, BRW_ALIGN_1); - - /* This is pretty clunky - load the address register twice and - * fetch each 4-dword value in turn. There must be a way to do - * this in a single pass, but I couldn't get it to work. - */ - brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset)); - brw_MOV(p, tmp, indirect); - - brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset)); - brw_MOV(p, suboffset(tmp, 4), indirect); - - brw_pop_insn_state(p); - } - - return vec8(tmp); -} - - -static void emit_arl( struct brw_vs_compile *c, - struct brw_reg dst, - struct brw_reg arg0 ) -{ - struct brw_compile *p = &c->func; - struct brw_reg tmp = dst; - boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); - - if (need_tmp) - tmp = get_tmp(c); - - brw_RNDD(p, tmp, arg0); - brw_MUL(p, dst, tmp, brw_imm_d(16)); - - if (need_tmp) - release_tmp(c, tmp); -} - - -/* Will return mangled results for SWZ op. The emit_swz() function - * ignores this result and recalculates taking extended swizzles into - * account. - */ -static struct brw_reg get_arg( struct brw_vs_compile *c, - struct tgsi_src_register *src ) -{ - struct brw_reg reg; - - if (src->File == TGSI_FILE_NULL) - return brw_null_reg(); - -#if 0 - if (src->RelAddr) - reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index); - else -#endif - reg = get_reg(c, src->File, src->Index); - - /* Convert 3-bit swizzle to 2-bit. - */ - reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->SwizzleX, - src->SwizzleY, - src->SwizzleZ, - src->SwizzleW); - - /* Note this is ok for non-swizzle instructions: - */ - reg.negate = src->Negate ? 1 : 0; - - return reg; -} - - -static struct brw_reg get_dst( struct brw_vs_compile *c, - const struct tgsi_dst_register *dst ) -{ - struct brw_reg reg = get_reg(c, dst->File, dst->Index); - - reg.dw1.bits.writemask = dst->WriteMask; - - return reg; -} - - - - -static void emit_swz( struct brw_vs_compile *c, - struct brw_reg dst, - struct tgsi_src_register src ) -{ - struct brw_compile *p = &c->func; - unsigned zeros_mask = 0; - unsigned ones_mask = 0; - unsigned src_mask = 0; - ubyte src_swz[4]; - boolean need_tmp = (src.Negate && - dst.file != BRW_GENERAL_REGISTER_FILE); - struct brw_reg tmp = dst; - unsigned i; - - if (need_tmp) - tmp = get_tmp(c); - - for (i = 0; i < 4; i++) { - if (dst.dw1.bits.writemask & (1<regs[PROGRAM_STATE_VAR][0], src.Index); - else -#endif - arg0 = get_reg(c, src.File, src.Index); - - arg0 = brw_swizzle(arg0, - src_swz[0], src_swz[1], - src_swz[2], src_swz[3]); - - brw_MOV(p, brw_writemask(tmp, src_mask), arg0); - } - - if (zeros_mask) - brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0)); - - if (ones_mask) - brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1)); - - if (src.Negate) - brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp)); - - if (need_tmp) { - brw_MOV(p, dst, tmp); - release_tmp(c, tmp); - } -} - - - -/* Post-vertex-program processing. Send the results to the URB. - */ -static void emit_vertex_write( struct brw_vs_compile *c, struct brw_prog_info *info) -{ - struct brw_compile *p = &c->func; - struct brw_reg m0 = brw_message_reg(0); - struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][info->pos_idx]; - struct brw_reg ndc; - - if (c->key.copy_edgeflag) { - brw_MOV(p, - get_reg(c, TGSI_FILE_OUTPUT, info->result_edge_idx), - get_reg(c, TGSI_FILE_INPUT, info->edge_flag_idx)); - } - - - /* Build ndc coords? TODO: Shortcircuit when w is known to be one. - */ - if (!c->key.know_w_is_one) { - ndc = get_tmp(c); - emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); - brw_MUL(p, brw_writemask(ndc, TGSI_WRITEMASK_XYZ), pos, ndc); - } - else { - ndc = pos; - } - - /* This includes the workaround for -ve rhw, so is no longer an - * optional step: - */ - if (info->writes_psize || - c->key.nr_userclip || - !c->key.know_w_is_one) - { - struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); - unsigned i; - - brw_MOV(p, header1, brw_imm_ud(0)); - - brw_set_access_mode(p, BRW_ALIGN_16); - - if (info->writes_psize) { - struct brw_reg psiz = c->regs[TGSI_FILE_OUTPUT][info->psize_idx]; - brw_MUL(p, brw_writemask(header1, TGSI_WRITEMASK_W), - brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); - brw_AND(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, - brw_imm_ud(0x7ff<<8)); - } - - - for (i = 0; i < c->key.nr_userclip; i++) { - brw_set_conditionalmod(p, BRW_CONDITIONAL_L); - brw_DP4(p, brw_null_reg(), pos, c->userplane[i]); - brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<key.know_w_is_one) { - brw_CMP(p, - vec8(brw_null_reg()), - BRW_CONDITIONAL_L, - brw_swizzle1(ndc, 3), - brw_imm_f(0)); - - brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<6)); - brw_MOV(p, ndc, brw_imm_f(0)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } - - brw_set_access_mode(p, BRW_ALIGN_1); /* why? */ - brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1); - brw_set_access_mode(p, BRW_ALIGN_16); - - release_tmp(c, header1); - } - else { - brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); - } - - - /* Emit the (interleaved) headers for the two vertices - an 8-reg - * of zeros followed by two sets of NDC coordinates: - */ - brw_set_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, offset(m0, 2), ndc); - brw_MOV(p, offset(m0, 3), pos); - - - brw_urb_WRITE(p, - brw_null_reg(), /* dest */ - 0, /* starting mrf reg nr */ - c->r0, /* src */ - 0, /* allocate */ - 1, /* used */ - c->nr_outputs + 3, /* msg len */ - 0, /* response len */ - 1, /* eot */ - 1, /* writes complete */ - 0, /* urb destination offset */ - BRW_URB_SWIZZLE_INTERLEAVE); - -} - -static void -post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst ) -{ - struct tgsi_parse_context parse; - const struct tgsi_token *tokens = c->vp->program.tokens; - tgsi_parse_init(&parse, tokens); - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { -#if 0 - struct brw_instruction *brw_inst1, *brw_inst2; - const struct tgsi_full_instruction *inst1, *inst2; - int offset; - inst1 = &parse.FullToken.FullInstruction; - brw_inst1 = inst1->Data; - switch (inst1->Opcode) { - case TGSI_OPCODE_CAL: - case TGSI_OPCODE_BRA: - target_insn = inst1->BranchTarget; - inst2 = &c->vp->program.Base.Instructions[target_insn]; - brw_inst2 = inst2->Data; - offset = brw_inst2 - brw_inst1; - brw_set_src1(brw_inst1, brw_imm_d(offset*16)); - break; - case TGSI_OPCODE_END: - offset = end_inst - brw_inst1; - brw_set_src1(brw_inst1, brw_imm_d(offset*16)); - break; - default: - break; - } -#endif - } - } - tgsi_parse_free(&parse); -} - -static void process_declaration(const struct tgsi_full_declaration *decl, - struct brw_prog_info *info) -{ - int first = decl->DeclarationRange.First; - int last = decl->DeclarationRange.Last; - - switch(decl->Declaration.File) { - case TGSI_FILE_CONSTANT: - info->num_consts += last - first + 1; - break; - case TGSI_FILE_INPUT: { - } - break; - case TGSI_FILE_OUTPUT: { - assert(last == first); /* for now */ - if (decl->Declaration.Semantic) { - switch (decl->Semantic.SemanticName) { - case TGSI_SEMANTIC_POSITION: { - info->pos_idx = first; - } - break; - case TGSI_SEMANTIC_COLOR: - break; - case TGSI_SEMANTIC_BCOLOR: - break; - case TGSI_SEMANTIC_FOG: - break; - case TGSI_SEMANTIC_PSIZE: { - info->writes_psize = TRUE; - info->psize_idx = first; - } - break; - case TGSI_SEMANTIC_GENERIC: - break; - } - } - } - break; - case TGSI_FILE_TEMPORARY: { - info->num_temps += (last - first) + 1; - } - break; - case TGSI_FILE_SAMPLER: { - } - break; - case TGSI_FILE_ADDRESS: { - info->num_addrs += (last - first) + 1; - } - break; - case TGSI_FILE_IMMEDIATE: { - } - break; - case TGSI_FILE_NULL: { - } - break; - } -} - -static void process_instruction(struct brw_vs_compile *c, - struct tgsi_full_instruction *inst, - struct brw_prog_info *info) -{ - struct brw_reg args[3], dst; - struct brw_compile *p = &c->func; - /*struct brw_indirect stack_index = brw_indirect(0, 0);*/ - unsigned i; - unsigned index; - unsigned file; - /*FIXME: might not be the only one*/ - const struct tgsi_dst_register *dst_reg = &inst->FullDstRegisters[0].DstRegister; - /* - struct brw_instruction *if_inst[MAX_IFSN]; - unsigned insn, if_insn = 0; - */ - - for (i = 0; i < 3; i++) { - struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; - index = src->SrcRegister.Index; - file = src->SrcRegister.File; - if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src) - args[i] = c->output_regs[index].reg; - else - args[i] = get_arg(c, &src->SrcRegister); - } - - /* Get dest regs. Note that it is possible for a reg to be both - * dst and arg, given the static allocation of registers. So - * care needs to be taken emitting multi-operation instructions. - */ - index = dst_reg->Index; - file = dst_reg->File; - if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src) - dst = c->output_regs[index].reg; - else - dst = get_dst(c, dst_reg); - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - brw_MOV(p, dst, brw_abs(args[0])); - break; - case TGSI_OPCODE_ADD: - brw_ADD(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_DP3: - brw_DP3(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_DP4: - brw_DP4(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_DPH: - brw_DPH(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_DST: - unalias2(c, dst, args[0], args[1], emit_dst_noalias); - break; - case TGSI_OPCODE_EXP: - unalias1(c, dst, args[0], emit_exp_noalias); - break; - case TGSI_OPCODE_EX2: - emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL); - break; - case TGSI_OPCODE_ARL: - emit_arl(c, dst, args[0]); - break; - case TGSI_OPCODE_FLR: - brw_RNDD(p, dst, args[0]); - break; - case TGSI_OPCODE_FRC: - brw_FRC(p, dst, args[0]); - break; - case TGSI_OPCODE_LOG: - unalias1(c, dst, args[0], emit_log_noalias); - break; - case TGSI_OPCODE_LG2: - emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL); - break; - case TGSI_OPCODE_LIT: - unalias1(c, dst, args[0], emit_lit_noalias); - break; - case TGSI_OPCODE_MAD: - brw_MOV(p, brw_acc_reg(), args[2]); - brw_MAC(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_MAX: - emit_max(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_MIN: - emit_min(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: -#if 0 - /* The args[0] value can't be used here as it won't have - * correctly encoded the full swizzle: - */ - emit_swz(c, dst, inst->SrcReg[0] ); -#endif - brw_MOV(p, dst, args[0]); - break; - case TGSI_OPCODE_MUL: - brw_MUL(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_POW: - emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL); - break; - case TGSI_OPCODE_RCP: - emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL); - break; - case TGSI_OPCODE_RSQ: - emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); - break; - - case TGSI_OPCODE_SEQ: - emit_seq(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_SNE: - emit_sne(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_SGE: - emit_sge(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_SGT: - emit_sgt(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_SLT: - emit_slt(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_SLE: - emit_sle(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_SUB: - brw_ADD(p, dst, args[0], negate(args[1])); - break; - case TGSI_OPCODE_XPD: - emit_xpd(p, dst, args[0], args[1]); - break; -#if 0 - case TGSI_OPCODE_IF: - assert(if_insn < MAX_IFSN); - if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); - break; - case TGSI_OPCODE_ELSE: - if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); - break; - case TGSI_OPCODE_ENDIF: - assert(if_insn > 0); - brw_ENDIF(p, if_inst[--if_insn]); - break; - case TGSI_OPCODE_BRA: - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - brw_set_predicate_control_flag_value(p, 0xff); - break; - case TGSI_OPCODE_CAL: - brw_set_access_mode(p, BRW_ALIGN_1); - brw_ADD(p, deref_1uw(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); - brw_set_access_mode(p, BRW_ALIGN_16); - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(4)); - inst->Data = &p->store[p->nr_insn]; - brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - break; -#endif - case TGSI_OPCODE_RET: -#if 0 - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(-4)); - brw_set_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, brw_ip_reg(), deref_1uw(stack_index, 0)); - brw_set_access_mode(p, BRW_ALIGN_16); -#else - /*brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));*/ -#endif - break; - case TGSI_OPCODE_END: - brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - break; - case TGSI_OPCODE_BGNSUB: - case TGSI_OPCODE_ENDSUB: - break; - default: - debug_printf("Unsupport opcode %d in vertex shader\n", inst->Instruction.Opcode); - break; - } - - if (dst_reg->File == TGSI_FILE_OUTPUT - && dst_reg->Index != info->pos_idx - && c->output_regs[dst_reg->Index].used_in_src) - brw_MOV(p, get_dst(c, dst_reg), dst); - - release_tmps(c); -} - -/* Emit the fragment program instructions here. - */ -void brw_vs_emit(struct brw_vs_compile *c) -{ -#define MAX_IFSN 32 - struct brw_compile *p = &c->func; - struct brw_instruction *end_inst; - struct tgsi_parse_context parse; - struct brw_indirect stack_index = brw_indirect(0, 0); - const struct tgsi_token *tokens = c->vp->program.tokens; - struct brw_prog_info prog_info; - unsigned allocated_registers = 0; - memset(&prog_info, 0, sizeof(struct brw_prog_info)); - - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_set_access_mode(p, BRW_ALIGN_16); - - tgsi_parse_init(&parse, tokens); - /* Message registers can't be read, so copy the output into GRF register - if they are used in source registers */ - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - unsigned i; - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_INSTRUCTION: { - const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; - for (i = 0; i < 3; ++i) { - const struct tgsi_src_register *src = &inst->FullSrcRegisters[i].SrcRegister; - unsigned index = src->Index; - unsigned file = src->File; - if (file == TGSI_FILE_OUTPUT) - c->output_regs[index].used_in_src = TRUE; - } - } - break; - default: - /* nothing */ - break; - } - } - tgsi_parse_free(&parse); - - tgsi_parse_init(&parse, tokens); - - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: { - struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; - process_declaration(decl, &prog_info); - } - break; - case TGSI_TOKEN_TYPE_IMMEDIATE: { - struct tgsi_full_immediate *imm = &parse.FullToken.FullImmediate; - assert(imm->Immediate.NrTokens == 4 + 1); - c->prog_data.imm_buf[c->prog_data.num_imm][0] = imm->u[0].Float; - c->prog_data.imm_buf[c->prog_data.num_imm][1] = imm->u[1].Float; - c->prog_data.imm_buf[c->prog_data.num_imm][2] = imm->u[2].Float; - c->prog_data.imm_buf[c->prog_data.num_imm][3] = imm->u[3].Float; - c->prog_data.num_imm++; - } - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: { - struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; - if (!allocated_registers) { - /* first instruction (declerations finished). - * now that we know what vars are being used allocate - * registers for them.*/ - c->prog_data.num_consts = prog_info.num_consts; - c->prog_data.max_const = prog_info.num_consts + c->prog_data.num_imm; - brw_vs_alloc_regs(c, &prog_info); - - brw_set_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); - brw_set_access_mode(p, BRW_ALIGN_16); - allocated_registers = 1; - } - process_instruction(c, inst, &prog_info); - } - break; - } - } - - end_inst = &p->store[p->nr_insn]; - emit_vertex_write(c, &prog_info); - post_vs_emit(c, end_inst); - tgsi_parse_free(&parse); - -} diff --git a/src/gallium/drivers/i965simple/brw_vs_state.c b/src/gallium/drivers/i965simple/brw_vs_state.c deleted file mode 100644 index 1eaff87892..0000000000 --- a/src/gallium/drivers/i965simple/brw_vs_state.c +++ /dev/null @@ -1,103 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" - -#include "util/u_math.h" -#include "util/u_memory.h" - -static void upload_vs_unit( struct brw_context *brw ) -{ - struct brw_vs_unit_state vs; - - memset(&vs, 0, sizeof(vs)); - - /* CACHE_NEW_VS_PROG */ - vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6; - vs.thread0.grf_reg_count = align(brw->vs.prog_data->total_grf, 16) / 16 - 1; - vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; - vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; - vs.thread3.dispatch_grf_start_reg = 1; - - - /* BRW_NEW_URB_FENCE */ - vs.thread4.nr_urb_entries = brw->urb.nr_vs_entries; - vs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; - vs.thread4.max_threads = MIN2( - MAX2(0, (brw->urb.nr_vs_entries - 6) / 2 - 1), - 15); - - - - if (BRW_DEBUG & DEBUG_SINGLE_THREAD) - vs.thread4.max_threads = 0; - - /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ - if (0 /*brw->attribs.Clip->ClipPlanesEnabled*/) { - /* Note that we read in the userclip planes as well, hence - * clip_start: - */ - vs.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; - } - else { - vs.thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2; - } - - vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - vs.thread3.urb_entry_read_offset = 0; - - /* No samplers for ARB_vp programs: - */ - vs.vs5.sampler_count = 0; - - if (BRW_DEBUG & DEBUG_STATS) - vs.thread4.stats_enable = 1; - - /* Vertex program always enabled: - */ - vs.vs6.vs_enable = 1; - - brw->vs.state_gs_offset = brw_cache_data( &brw->cache[BRW_VS_UNIT], &vs ); -} - - -const struct brw_tracked_state brw_vs_unit = { - .dirty = { - .brw = (BRW_NEW_CLIP | - BRW_NEW_CURBE_OFFSETS | - BRW_NEW_URB_FENCE), - .cache = CACHE_NEW_VS_PROG - }, - .update = upload_vs_unit -}; diff --git a/src/gallium/drivers/i965simple/brw_winsys.h b/src/gallium/drivers/i965simple/brw_winsys.h deleted file mode 100644 index ec1e400418..0000000000 --- a/src/gallium/drivers/i965simple/brw_winsys.h +++ /dev/null @@ -1,209 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \file - * This is the interface that i965simple requires any window system - * hosting it to implement. This is the only include file in i965simple - * which is public. - * - */ - -#ifndef BRW_WINSYS_H -#define BRW_WINSYS_H - - -#include "pipe/p_defines.h" - - -/* Pipe drivers are (meant to be!) independent of both GL and the - * window system. The window system provides a buffer manager and a - * set of additional hooks for things like command buffer submission, - * etc. - * - * There clearly has to be some agreement between the window system - * driver and the hardware driver about the format of command buffers, - * etc. - */ - -struct pipe_buffer; -struct pipe_fence_handle; -struct pipe_winsys; -struct pipe_screen; - - -/* The pipe driver currently understands the following chipsets: - */ -#define PCI_CHIP_I965_G 0x29A2 -#define PCI_CHIP_I965_Q 0x2992 -#define PCI_CHIP_I965_G_1 0x2982 -#define PCI_CHIP_I965_GM 0x2A02 -#define PCI_CHIP_I965_GME 0x2A12 - - -/* These are the names of all the state caches managed by the driver. - * - * When data is uploaded to a buffer with buffer_subdata, we use the - * special version of that function below so that information about - * what type of data this is can be passed to the winsys backend. - * That in turn allows the correct flags to be set in the aub file - * dump to allow human-readable file dumps later on. - */ - -enum brw_cache_id { - BRW_CC_VP, - BRW_CC_UNIT, - BRW_WM_PROG, - BRW_SAMPLER_DEFAULT_COLOR, - BRW_SAMPLER, - BRW_WM_UNIT, - BRW_SF_PROG, - BRW_SF_VP, - BRW_SF_UNIT, - BRW_VS_UNIT, - BRW_VS_PROG, - BRW_GS_UNIT, - BRW_GS_PROG, - BRW_CLIP_VP, - BRW_CLIP_UNIT, - BRW_CLIP_PROG, - BRW_SS_SURFACE, - BRW_SS_SURF_BIND, - - BRW_MAX_CACHE -}; - -#define BRW_CONSTANT_BUFFER BRW_MAX_CACHE - -/** - * Additional winsys interface for i965simple. - * - * It is an over-simple batchbuffer mechanism. Will want to improve the - * performance of this, perhaps based on the cmdstream stuff. It - * would be pretty impossible to implement swz on top of this - * interface. - * - * Will also need additions/changes to implement static/dynamic - * indirect state. - */ -struct brw_winsys { - - void (*destroy)(struct brw_winsys *); - - /** - * Reserve space on batch buffer. - * - * Returns a null pointer if there is insufficient space in the batch buffer - * to hold the requested number of dwords and relocations. - * - * The number of dwords should also include the number of relocations. - */ - unsigned *(*batch_start)(struct brw_winsys *sws, - unsigned dwords, - unsigned relocs); - - void (*batch_dword)(struct brw_winsys *sws, - unsigned dword); - - /** - * Emit a relocation to a buffer. - * - * Used not only when the buffer addresses are not pinned, but also to - * ensure refered buffers will not be destroyed until the current batch - * buffer execution is finished. - * - * The access flags is a combination of I915_BUFFER_ACCESS_WRITE and - * I915_BUFFER_ACCESS_READ macros. - */ - void (*batch_reloc)(struct brw_winsys *sws, - struct pipe_buffer *buf, - unsigned access_flags, - unsigned delta); - - - /* Not used yet, but really want this: - */ - void (*batch_end)( struct brw_winsys *sws ); - - /** - * Flush the batch buffer. - * - * Fence argument must point to NULL or to a previous fence, and the caller - * must call fence_reference when done with the fence. - */ - void (*batch_flush)(struct brw_winsys *sws, - struct pipe_fence_handle **fence); - - - /* A version of buffer_subdata that includes information for the - * simulator: - */ - void (*buffer_subdata_typed)(struct brw_winsys *sws, - struct pipe_buffer *buf, - unsigned long offset, - unsigned long size, - const void *data, - unsigned data_type); - - - /* A cheat so we don't have to think about relocations in a couple - * of places yet: - */ - unsigned (*get_buffer_offset)( struct brw_winsys *sws, - struct pipe_buffer *buf, - unsigned flags ); - -}; - -#define BRW_BUFFER_ACCESS_WRITE 0x1 -#define BRW_BUFFER_ACCESS_READ 0x2 - -#define BRW_BUFFER_USAGE_LIT_VERTEX (PIPE_BUFFER_USAGE_CUSTOM << 0) - - -struct pipe_context *brw_create(struct pipe_screen *, - struct brw_winsys *, - unsigned pci_id); - -static inline boolean brw_batchbuffer_data(struct brw_winsys *winsys, - const void *data, - unsigned bytes) -{ - static const unsigned incr = sizeof(unsigned); - uint i; - const unsigned *udata = (const unsigned*)(data); - unsigned size = bytes/incr; - - winsys->batch_start(winsys, size, 0); - for (i = 0; i < size; ++i) { - winsys->batch_dword(winsys, udata[i]); - } - winsys->batch_end(winsys); - - return (i == size); -} -#endif diff --git a/src/gallium/drivers/i965simple/brw_wm.c b/src/gallium/drivers/i965simple/brw_wm.c deleted file mode 100644 index 10161f2d2f..0000000000 --- a/src/gallium/drivers/i965simple/brw_wm.c +++ /dev/null @@ -1,209 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "brw_context.h" -#include "brw_util.h" -#include "brw_wm.h" -#include "brw_eu.h" -#include "brw_state.h" -#include "util/u_memory.h" - - - -static void do_wm_prog( struct brw_context *brw, - struct brw_fragment_program *fp, - struct brw_wm_prog_key *key) -{ - struct brw_wm_compile *c = CALLOC_STRUCT(brw_wm_compile); - const unsigned *program; - unsigned program_size; - - c->key = *key; - c->fp = fp; - - c->delta_xy[0] = brw_null_reg(); - c->delta_xy[1] = brw_null_reg(); - c->pixel_xy[0] = brw_null_reg(); - c->pixel_xy[1] = brw_null_reg(); - c->pixel_w = brw_null_reg(); - - - debug_printf("XXXXXXXX FP\n"); - - brw_wm_glsl_emit(c); - - /* get the program - */ - program = brw_get_program(&c->func, &program_size); - - /* - */ - brw->wm.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_WM_PROG], - &c->key, - sizeof(c->key), - program, - program_size, - &c->prog_data, - &brw->wm.prog_data ); - - FREE(c); -} - - - -static void brw_wm_populate_key( struct brw_context *brw, - struct brw_wm_prog_key *key ) -{ - /* BRW_NEW_FRAGMENT_PROGRAM */ - struct brw_fragment_program *fp = - (struct brw_fragment_program *)brw->attribs.FragmentProgram; - unsigned lookup = 0; - unsigned line_aa; - - memset(key, 0, sizeof(*key)); - - /* Build the index for table lookup - */ - /* BRW_NEW_DEPTH_STENCIL */ - if (fp->info.uses_kill || - brw->attribs.DepthStencil->alpha.enabled) - lookup |= IZ_PS_KILL_ALPHATEST_BIT; - - if (fp->info.writes_z) - lookup |= IZ_PS_COMPUTES_DEPTH_BIT; - - if (brw->attribs.DepthStencil->depth.enabled) - lookup |= IZ_DEPTH_TEST_ENABLE_BIT; - - if (brw->attribs.DepthStencil->depth.enabled && - brw->attribs.DepthStencil->depth.writemask) /* ?? */ - lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; - - if (brw->attribs.DepthStencil->stencil[0].enabled) { - lookup |= IZ_STENCIL_TEST_ENABLE_BIT; - - if (brw->attribs.DepthStencil->stencil[0].writemask || - brw->attribs.DepthStencil->stencil[1].writemask) - lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; - } - - /* XXX: when should this be disabled? - */ - if (1) - lookup |= IZ_EARLY_DEPTH_TEST_BIT; - - - line_aa = AA_NEVER; - - /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ - if (brw->attribs.Raster->line_smooth) { - if (brw->reduced_primitive == PIPE_PRIM_LINES) { - line_aa = AA_ALWAYS; - } - else if (brw->reduced_primitive == PIPE_PRIM_TRIANGLES) { - if (brw->attribs.Raster->fill_ccw == PIPE_POLYGON_MODE_LINE) { - line_aa = AA_SOMETIMES; - - if (brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_LINE || - (brw->attribs.Raster->cull_mode == PIPE_WINDING_CW)) - line_aa = AA_ALWAYS; - } - else if (brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_LINE) { - line_aa = AA_SOMETIMES; - - if (brw->attribs.Raster->cull_mode == PIPE_WINDING_CCW) - line_aa = AA_ALWAYS; - } - } - } - - brw_wm_lookup_iz(line_aa, - lookup, - key); - - -#if 0 - /* BRW_NEW_SAMPLER - * - * Not doing any of this at the moment: - */ - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - const struct pipe_sampler_state *unit = brw->attribs.Samplers[i]; - - if (unit) { - - if (unit->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - key->shadowtex_mask |= 1<Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA) - key->yuvtex_mask |= 1<program_string_id = fp->id; - -} - - -static void brw_upload_wm_prog( struct brw_context *brw ) -{ - struct brw_wm_prog_key key; - struct brw_fragment_program *fp = (struct brw_fragment_program *) - brw->attribs.FragmentProgram; - - brw_wm_populate_key(brw, &key); - - /* Make an early check for the key. - */ - if (brw_search_cache(&brw->cache[BRW_WM_PROG], - &key, sizeof(key), - &brw->wm.prog_data, - &brw->wm.prog_gs_offset)) - return; - - do_wm_prog(brw, fp, &key); -} - - -const struct brw_tracked_state brw_wm_prog = { - .dirty = { - .brw = (BRW_NEW_FS | - BRW_NEW_REDUCED_PRIMITIVE), - .cache = 0 - }, - .update = brw_upload_wm_prog -}; - diff --git a/src/gallium/drivers/i965simple/brw_wm.h b/src/gallium/drivers/i965simple/brw_wm.h deleted file mode 100644 index b29c4393f0..0000000000 --- a/src/gallium/drivers/i965simple/brw_wm.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#ifndef BRW_WM_H -#define BRW_WM_H - - -#include "brw_context.h" -#include "brw_eu.h" - -/* A big lookup table is used to figure out which and how many - * additional regs will inserted before the main payload in the WM - * program execution. These mainly relate to depth and stencil - * processing and the early-depth-test optimization. - */ -#define IZ_PS_KILL_ALPHATEST_BIT 0x1 -#define IZ_PS_COMPUTES_DEPTH_BIT 0x2 -#define IZ_DEPTH_WRITE_ENABLE_BIT 0x4 -#define IZ_DEPTH_TEST_ENABLE_BIT 0x8 -#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10 -#define IZ_STENCIL_TEST_ENABLE_BIT 0x20 -#define IZ_EARLY_DEPTH_TEST_BIT 0x40 -#define IZ_BIT_MAX 0x80 - -#define AA_NEVER 0 -#define AA_SOMETIMES 1 -#define AA_ALWAYS 2 - -struct brw_wm_prog_key { - unsigned source_depth_reg:3; - unsigned aa_dest_stencil_reg:3; - unsigned dest_depth_reg:3; - unsigned nr_depth_regs:3; - unsigned shadowtex_mask:8; - unsigned computes_depth:1; /* could be derived from program string */ - unsigned source_depth_to_render_target:1; - unsigned runtime_check_aads_emit:1; - - unsigned yuvtex_mask:8; - - unsigned program_string_id; -}; - - - - - -#define PROGRAM_INTERNAL_PARAM -#define MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS 1024 /* 72 for GL_ARB_f_p */ -#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + PIPE_MAX_ATTRIBS + 3) -#define BRW_WM_MAX_GRF 128 /* hardware limit */ -#define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) -#define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12) -#define BRW_WM_MAX_PARAM 256 -#define BRW_WM_MAX_CONST 256 -#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS - -#define PAYLOAD_DEPTH (PIPE_MAX_ATTRIBS) - -#define MAX_IFSN 32 -#define MAX_LOOP_DEPTH 32 - -struct brw_wm_compile { - struct brw_compile func; - struct brw_wm_prog_key key; - struct brw_wm_prog_data prog_data; /* result */ - - struct brw_fragment_program *fp; - - unsigned grf_limit; - unsigned max_wm_grf; - - - struct brw_reg pixel_xy[2]; - struct brw_reg delta_xy[2]; - struct brw_reg pixel_w; - - - struct brw_reg wm_regs[8][32][4]; - - struct brw_reg payload_depth[4]; - struct brw_reg payload_coef[16]; - - struct brw_reg emit_mask_reg; - - struct brw_instruction *if_inst[MAX_IFSN]; - int if_insn; - - struct brw_instruction *loop_inst[MAX_LOOP_DEPTH]; - int loop_insn; - - struct brw_instruction *inst0; - struct brw_instruction *inst1; - - struct brw_reg stack; - struct brw_indirect stack_index; - - unsigned reg_index; - - unsigned tmp_start; - unsigned tmp_index; -}; - - - -void brw_wm_lookup_iz( unsigned line_aa, - unsigned lookup, - struct brw_wm_prog_key *key ); - -void brw_wm_glsl_emit(struct brw_wm_compile *c); -void brw_wm_emit_decls(struct brw_wm_compile *c); - -#endif diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c deleted file mode 100644 index d50e66f613..0000000000 --- a/src/gallium/drivers/i965simple/brw_wm_decl.c +++ /dev/null @@ -1,392 +0,0 @@ - -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_wm.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" - -static struct brw_reg alloc_tmp(struct brw_wm_compile *c) -{ - c->tmp_index++; - c->reg_index = MAX2(c->reg_index, c->tmp_start + c->tmp_index); - return brw_vec8_grf(c->tmp_start + c->tmp_index, 0); -} - -static void release_tmps(struct brw_wm_compile *c) -{ - c->tmp_index = 0; -} - - - -static int is_null( struct brw_reg reg ) -{ - return (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && - reg.nr == BRW_ARF_NULL); -} - -static void emit_pixel_xy( struct brw_wm_compile *c ) -{ - if (is_null(c->pixel_xy[0])) { - - struct brw_compile *p = &c->func; - struct brw_reg r1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); - - c->pixel_xy[0] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW)); - c->pixel_xy[1] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW)); - - /* Calculate pixel centers by adding 1 or 0 to each of the - * micro-tile coordinates passed in r1. - */ - brw_ADD(p, - c->pixel_xy[0], - stride(suboffset(r1_uw, 4), 2, 4, 0), - brw_imm_v(0x10101010)); - - brw_ADD(p, - c->pixel_xy[1], - stride(suboffset(r1_uw, 5), 2, 4, 0), - brw_imm_v(0x11001100)); - } -} - - - - - - -static void emit_delta_xy( struct brw_wm_compile *c ) -{ - if (is_null(c->delta_xy[0])) { - struct brw_compile *p = &c->func; - struct brw_reg r1 = brw_vec1_grf(1, 0); - - emit_pixel_xy(c); - - c->delta_xy[0] = alloc_tmp(c); - c->delta_xy[1] = alloc_tmp(c); - - /* Calc delta X,Y by subtracting origin in r1 from the pixel - * centers. - */ - brw_ADD(p, - c->delta_xy[0], - retype(c->pixel_xy[0], BRW_REGISTER_TYPE_UW), - negate(r1)); - - brw_ADD(p, - c->delta_xy[1], - retype(c->pixel_xy[1], BRW_REGISTER_TYPE_UW), - negate(suboffset(r1,1))); - } -} - - - -#if 0 -static void emit_pixel_w( struct brw_wm_compile *c ) -{ - if (is_null(c->pixel_w)) { - struct brw_compile *p = &c->func; - - struct brw_reg interp_wpos = c->coef_wpos; - - c->pixel_w = alloc_tmp(c); - - emit_delta_xy(c); - - /* Calc 1/w - just linterp wpos[3] optimized by putting the - * result straight into a message reg. - */ - struct brw_reg interp3 = brw_vec1_grf(interp_wpos.nr+1, 4); - brw_LINE(p, brw_null_reg(), interp3, c->delta_xy[0]); - brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), c->delta_xy[1]); - - /* Calc w */ - brw_math_16( p, - c->pixel_w, - BRW_MATH_FUNCTION_INV, - BRW_MATH_SATURATE_NONE, - 2, - brw_null_reg(), - BRW_MATH_PRECISION_FULL); - } -} -#endif - - -static void emit_cinterp(struct brw_wm_compile *c, - int idx, - int mask ) -{ - struct brw_compile *p = &c->func; - struct brw_reg interp[4]; - struct brw_reg coef = c->payload_coef[idx]; - int i; - - interp[0] = brw_vec1_grf(coef.nr, 0); - interp[1] = brw_vec1_grf(coef.nr, 4); - interp[2] = brw_vec1_grf(coef.nr+1, 0); - interp[3] = brw_vec1_grf(coef.nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<wm_regs[TGSI_FILE_INPUT][idx][i]; - brw_MOV(p, dst, suboffset(interp[i],3)); - } - } -} - -static void emit_linterp(struct brw_wm_compile *c, - int idx, - int mask ) -{ - struct brw_compile *p = &c->func; - struct brw_reg interp[4]; - struct brw_reg coef = c->payload_coef[idx]; - int i; - - emit_delta_xy(c); - - interp[0] = brw_vec1_grf(coef.nr, 0); - interp[1] = brw_vec1_grf(coef.nr, 4); - interp[2] = brw_vec1_grf(coef.nr+1, 0); - interp[3] = brw_vec1_grf(coef.nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<wm_regs[TGSI_FILE_INPUT][idx][i]; - brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]); - brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]); - } - } -} - -#if 0 -static void emit_pinterp(struct brw_wm_compile *c, - int idx, - int mask ) -{ - struct brw_compile *p = &c->func; - struct brw_reg interp[4]; - struct brw_reg coef = c->payload_coef[idx]; - int i; - - get_delta_xy(c); - get_pixel_w(c); - - interp[0] = brw_vec1_grf(coef.nr, 0); - interp[1] = brw_vec1_grf(coef.nr, 4); - interp[2] = brw_vec1_grf(coef.nr+1, 0); - interp[3] = brw_vec1_grf(coef.nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<delta_xy[0]); - brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]); - brw_MUL(p, dst, dst, c->pixel_w); - } - } -} -#endif - - - -#if 0 -static void emit_wpos( ) -{ - struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); - struct tgsi_full_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); - struct tgsi_full_src_register deltas = get_delta_xy(c); - struct tgsi_full_src_register arg2; - unsigned opcode; - - opcode = WM_LINTERP; - arg2 = src_undef(); - - /* Have to treat wpos.xy specially: - */ - emit_op(c, - WM_WPOSXY, - dst_mask(dst, WRITEMASK_XY), - 0, 0, 0, - get_pixel_xy(c), - src_undef(), - src_undef()); - - dst = dst_mask(dst, WRITEMASK_ZW); - - /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw - */ - emit_op(c, - WM_LINTERP, - dst, - 0, 0, 0, - interp, - deltas, - arg2); -} -#endif - - - - -/* Perform register allocation: - * - * -- r0??? - * -- passthrough depth regs (and stencil/aa??) - * -- curbe ?? - * -- inputs (coefficients) - * - * Use a totally static register allocation. This will perform poorly - * but is an easy way to get started (again). - */ -static void prealloc_reg(struct brw_wm_compile *c) -{ - int i, j; - int nr_curbe_regs = 0; - - /* R0, then some depth related regs: - */ - for (i = 0; i < c->key.nr_depth_regs; i++) { - c->payload_depth[i] = brw_vec8_grf(i*2, 0); - c->reg_index += 2; - } - - - /* Then a copy of our part of the CURBE entry: - */ - { - int nr_constants = c->fp->info.file_max[TGSI_FILE_CONSTANT] + 1; - int index = 0; - - /* XXX number of constants, or highest numbered constant? */ - assert(nr_constants == c->fp->info.file_count[TGSI_FILE_CONSTANT]); - - c->prog_data.max_const = 4*nr_constants; - for (i = 0; i < nr_constants; i++) { - for (j = 0; j < 4; j++, index++) - c->wm_regs[TGSI_FILE_CONSTANT][i][j] = brw_vec1_grf(c->reg_index + index/8, - index%8); - } - - nr_curbe_regs = 2*((4*nr_constants+15)/16); - c->reg_index += nr_curbe_regs; - } - - /* Adjust for parameter coefficients for position, which are - * currently always provided. - */ -// c->position_coef[i] = brw_vec8_grf(c->reg_index, 0); - c->reg_index += 2; - - /* Next we receive the plane coefficients for parameter - * interpolation: - */ - assert(c->fp->info.file_max[TGSI_FILE_INPUT] == c->fp->info.num_inputs); - for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++) { - c->payload_coef[i] = brw_vec8_grf(c->reg_index, 0); - c->reg_index += 2; - } - - c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; - c->prog_data.urb_read_length = (c->fp->info.num_inputs + 1) * 2; - c->prog_data.curb_read_length = nr_curbe_regs; - - /* That's the end of the payload, now we can start allocating registers. - */ - c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index++; - - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index += 2; - - /* Now allocate room for the interpolated inputs and staging - * registers for the outputs: - */ - /* XXX do we want to loop over the _number_ of inputs/outputs or loop - * to the highest input/output index that's used? - * Probably the same, actually. - */ - assert(c->fp->info.file_max[TGSI_FILE_INPUT] + 1 == c->fp->info.num_inputs); - assert(c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1 == c->fp->info.num_outputs); - for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++) - for (j = 0; j < 4; j++) - c->wm_regs[TGSI_FILE_INPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); - - for (i = 0; i < c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1; i++) - for (j = 0; j < 4; j++) - c->wm_regs[TGSI_FILE_OUTPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); - - /* Beyond this we should only need registers for internal temporaries: - */ - c->tmp_start = c->reg_index; -} - - - - - -/* Need to interpolate fragment program inputs in as a preamble to the - * shader. A more sophisticated compiler would do this on demand, but - * we'll do it up front: - */ -void brw_wm_emit_decls(struct brw_wm_compile *c) -{ - struct tgsi_parse_context parse; - int done = 0; - - prealloc_reg(c); - - tgsi_parse_init( &parse, c->fp->program.tokens ); - - while( !done && - !tgsi_parse_end_of_tokens( &parse ) ) - { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; - unsigned first = decl->DeclarationRange.First; - unsigned last = decl->DeclarationRange.Last; - unsigned mask = decl->Declaration.UsageMask; /* ? */ - unsigned i; - - if (decl->Declaration.File != TGSI_FILE_INPUT) - break; - - for( i = first; i <= last; i++ ) { - switch (decl->Declaration.Interpolate) { - case TGSI_INTERPOLATE_CONSTANT: - emit_cinterp(c, i, mask); - break; - - case TGSI_INTERPOLATE_LINEAR: - emit_linterp(c, i, mask); - break; - - case TGSI_INTERPOLATE_PERSPECTIVE: - //emit_pinterp(c, i, mask); - emit_linterp(c, i, mask); - break; - } - } - break; - } - case TGSI_TOKEN_TYPE_IMMEDIATE: - case TGSI_TOKEN_TYPE_INSTRUCTION: - default: - done = 1; - break; - } - } - - tgsi_parse_free (&parse); - - release_tmps(c); -} diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c deleted file mode 100644 index db75963932..0000000000 --- a/src/gallium/drivers/i965simple/brw_wm_glsl.c +++ /dev/null @@ -1,1076 +0,0 @@ - -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_wm.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" - - - -static int get_scalar_dst_index(struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register dst = inst->FullDstRegisters[0].DstRegister; - int i; - for (i = 0; i < 4; i++) - if (dst.WriteMask & (1<tmp_index++; - c->reg_index = MAX2(c->reg_index, c->tmp_index); - return brw_vec8_grf(c->tmp_start + c->tmp_index, 0); -} - -static void release_tmps(struct brw_wm_compile *c) -{ - c->tmp_index = 0; -} - - -static struct brw_reg -get_reg(struct brw_wm_compile *c, int file, int index, int component ) -{ - switch (file) { - case TGSI_FILE_NULL: - return brw_null_reg(); - - case TGSI_FILE_SAMPLER: - /* Should never get here: - */ - assert (0); - return brw_null_reg(); - - case TGSI_FILE_IMMEDIATE: - /* These need a different path: - */ - assert(0); - return brw_null_reg(); - - - case TGSI_FILE_CONSTANT: - case TGSI_FILE_INPUT: - case TGSI_FILE_OUTPUT: - case TGSI_FILE_TEMPORARY: - case TGSI_FILE_ADDRESS: - return c->wm_regs[file][index][component]; - - default: - assert(0); - return brw_null_reg(); - } -} - - -static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst, - int component) -{ - return get_reg(c, - inst->FullDstRegisters[0].DstRegister.File, - inst->FullDstRegisters[0].DstRegister.Index, - component); -} - -static int get_swz( struct tgsi_src_register src, int index ) -{ - switch (index & 3) { - case 0: return src.SwizzleX; - case 1: return src.SwizzleY; - case 2: return src.SwizzleZ; - case 3: return src.SwizzleW; - default: return 0; - } -} - -static int get_ext_swz( struct tgsi_src_register_ext_swz src, int index ) -{ - switch (index & 3) { - case 0: return src.ExtSwizzleX; - case 1: return src.ExtSwizzleY; - case 2: return src.ExtSwizzleZ; - case 3: return src.ExtSwizzleW; - default: return 0; - } -} - -static struct brw_reg get_src_reg(struct brw_wm_compile *c, - struct tgsi_full_src_register *src, - int index) -{ - struct brw_reg reg; - int component = index; - int neg = 0; - int abs = 0; - - if (src->SrcRegister.Negate) - neg = 1; - - component = get_swz(src->SrcRegister, component); - - /* Yes, there are multiple negates: - */ - switch (component & 3) { - case 0: neg ^= src->SrcRegisterExtSwz.NegateX; break; - case 1: neg ^= src->SrcRegisterExtSwz.NegateY; break; - case 2: neg ^= src->SrcRegisterExtSwz.NegateZ; break; - case 3: neg ^= src->SrcRegisterExtSwz.NegateW; break; - } - - /* And multiple swizzles, fun isn't it: - */ - component = get_ext_swz(src->SrcRegisterExtSwz, component); - - /* Not handling indirect lookups yet: - */ - assert(src->SrcRegister.Indirect == 0); - - /* Don't know what dimension means: - */ - assert(src->SrcRegister.Dimension == 0); - - /* Will never handle any of this stuff: - */ - assert(src->SrcRegisterExtMod.Complement == 0); - assert(src->SrcRegisterExtMod.Bias == 0); - assert(src->SrcRegisterExtMod.Scale2X == 0); - - if (src->SrcRegisterExtMod.Absolute) - abs = 1; - - /* Another negate! This is a post-absolute negate, which we - * can't do. Need to clean the crap out of tgsi somehow. - */ - assert(src->SrcRegisterExtMod.Negate == 0); - - switch( component ) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: - reg = get_reg(c, - src->SrcRegister.File, - src->SrcRegister.Index, - component ); - - if (neg) - reg = negate(reg); - - if (abs) - reg = brw_abs(reg); - - break; - - /* XXX: this won't really work in the general case, but we know - * that the extended swizzle is only allowed in the SWZ - * instruction (right??), in which case using an immediate - * directly will work. - */ - case TGSI_EXTSWIZZLE_ZERO: - reg = brw_imm_f(0); - break; - - case TGSI_EXTSWIZZLE_ONE: - if (neg && !abs) - reg = brw_imm_f(-1.0); - else - reg = brw_imm_f(1.0); - break; - - default: - assert(0); - break; - } - - - return reg; -} - -static void emit_abs( struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - - int i; - struct brw_compile *p = &c->func; - brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); - for (i = 0; i < 4; i++) { - if (mask & (1<FullSrcRegisters[0], i); - brw_MOV(p, dst, brw_abs(src)); /* NOTE */ - } - } - brw_set_saturate(p, 0); -} - - -static void emit_xpd(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - int i; - struct brw_compile *p = &c->func; - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - for (i = 0; i < 4; i++) { - unsigned i2 = (i+2)%3; - unsigned i1 = (i+1)%3; - if (mask & (1<FullSrcRegisters[0], i2)); - src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i1); - brw_MUL(p, brw_null_reg(), src0, src1); - src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i1); - src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i2); - brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); - brw_MAC(p, dst, src0, src1); - brw_set_saturate(p, 0); - } - } - brw_set_saturate(p, 0); -} - -static void emit_dp3(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - struct brw_reg src0[3], src1[3], dst; - int i; - struct brw_compile *p = &c->func; - for (i = 0; i < 3; i++) { - src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); - src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); - } - - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - brw_MAC(p, dst, src0[2], src1[2]); - brw_set_saturate(p, 0); -} - -static void emit_dp4(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - struct brw_reg src0[4], src1[4], dst; - int i; - struct brw_compile *p = &c->func; - for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); - src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); - } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_MAC(p, brw_null_reg(), src0[2], src1[2]); - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - brw_MAC(p, dst, src0[3], src1[3]); - brw_set_saturate(p, 0); -} - -static void emit_dph(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - struct brw_reg src0[4], src1[4], dst; - int i; - struct brw_compile *p = &c->func; - for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); - src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); - } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_MAC(p, dst, src0[2], src1[2]); - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - brw_ADD(p, dst, src0[3], src1[3]); - brw_set_saturate(p, 0); -} - -static void emit_math1(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst, unsigned func) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, dst; - - src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); - brw_MOV(p, brw_message_reg(2), src0); - brw_math(p, - dst, - func, - ((inst->Instruction.Saturate != TGSI_SAT_NONE) - ? BRW_MATH_SATURATE_SATURATE - : BRW_MATH_SATURATE_NONE), - 2, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); -} - - -static void emit_alu2(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst, - unsigned opcode) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, dst; - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - int i; - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<FullSrcRegisters[0], i); - src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); - brw_alu2(p, opcode, dst, src0, src1); - } - } - brw_set_saturate(p, 0); -} - - -static void emit_alu1(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst, - unsigned opcode) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, dst; - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - int i; - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<FullSrcRegisters[0], i); - brw_alu1(p, opcode, dst, src0); - } - } - if (inst->Instruction.Saturate != TGSI_SAT_NONE) - brw_set_saturate(p, 0); -} - - -static void emit_max(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - struct brw_compile *p = &c->func; - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - struct brw_reg src0, src1, dst; - int i; - brw_push_insn_state(p); - for (i = 0; i < 4; i++) { - if (mask & (1<FullSrcRegisters[0], i); - src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - brw_MOV(p, dst, src0); - brw_set_saturate(p, 0); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1); - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, src1); - brw_set_saturate(p, 0); - brw_set_predicate_control_flag_value(p, 0xff); - } - } - brw_pop_insn_state(p); -} - -static void emit_min(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - struct brw_compile *p = &c->func; - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - struct brw_reg src0, src1, dst; - int i; - brw_push_insn_state(p); - for (i = 0; i < 4; i++) { - if (mask & (1<FullSrcRegisters[0], i); - src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - brw_MOV(p, dst, src0); - brw_set_saturate(p, 0); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, src1); - brw_set_saturate(p, 0); - brw_set_predicate_control_flag_value(p, 0xff); - } - } - brw_pop_insn_state(p); -} - -static void emit_pow(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg dst, src0, src1; - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); - src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); - src1 = get_src_reg(c, &inst->FullSrcRegisters[1], 0); - - brw_MOV(p, brw_message_reg(2), src0); - brw_MOV(p, brw_message_reg(3), src1); - - brw_math(p, - dst, - BRW_MATH_FUNCTION_POW, - (inst->Instruction.Saturate != TGSI_SAT_NONE - ? BRW_MATH_SATURATE_SATURATE - : BRW_MATH_SATURATE_NONE), - 2, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); -} - -static void emit_lrp(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - struct brw_compile *p = &c->func; - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - struct brw_reg dst, tmp1, tmp2, src0, src1, src2; - int i; - for (i = 0; i < 4; i++) { - if (mask & (1<FullSrcRegisters[0], i); - - src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); - - if (src1.nr == dst.nr) { - tmp1 = alloc_tmp(c); - brw_MOV(p, tmp1, src1); - } else - tmp1 = src1; - - src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i); - if (src2.nr == dst.nr) { - tmp2 = alloc_tmp(c); - brw_MOV(p, tmp2, src2); - } else - tmp2 = src2; - - brw_ADD(p, dst, negate(src0), brw_imm_f(1.0)); - brw_MUL(p, brw_null_reg(), dst, tmp2); - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - brw_MAC(p, dst, src0, tmp1); - brw_set_saturate(p, 0); - } - release_tmps(c); - } -} - -static void emit_kil(struct brw_wm_compile *c) -{ - struct brw_compile *p = &c->func; - struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK - brw_AND(p, depth, c->emit_mask_reg, depth); - brw_pop_insn_state(p); -} - -static void emit_mad(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - struct brw_compile *p = &c->func; - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - struct brw_reg dst, src0, src1, src2; - int i; - - for (i = 0; i < 4; i++) { - if (mask & (1<FullSrcRegisters[0], i); - src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); - src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i); - brw_MUL(p, dst, src0, src1); - - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - brw_ADD(p, dst, dst, src2); - brw_set_saturate(p, 0); - } - } -} - -static void emit_sop(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst, unsigned cond) -{ - struct brw_compile *p = &c->func; - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - struct brw_reg dst, src0, src1; - int i; - - brw_push_insn_state(p); - for (i = 0; i < 4; i++) { - if (mask & (1<FullSrcRegisters[0], i); - src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); - brw_CMP(p, brw_null_reg(), cond, src0, src1); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_MOV(p, dst, brw_imm_f(0.0)); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, brw_imm_f(1.0)); - } - } - brw_pop_insn_state(p); -} - - -static void emit_ddx(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - struct brw_compile *p = &c->func; - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst; - struct brw_reg src0, w; - unsigned nr, i; - src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); - w = get_src_reg(c, &inst->FullSrcRegisters[1], 3); - nr = src0.nr; - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); - for(i = 0; i < 4; i++ ) { - if (mask & (1<func; - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst; - struct brw_reg src0, w; - unsigned nr, i; - - src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); - nr = src0.nr; - w = get_src_reg(c, &inst->FullSrcRegisters[1], 3); - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); - for(i = 0; i < 4; i++ ) { - if (mask & (1<func; - struct brw_reg payload_reg = c->payload_depth[0]; - struct brw_reg dst[4], src[4]; - unsigned i; - for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i); - for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); - -#if 0 - switch (inst->TexSrcTarget) { - case TEXTURE_1D_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); - break; - case TEXTURE_2D_INDEX: - case TEXTURE_RECT_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), src[1]); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); - break; - default: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), src[1]); - brw_MOV(p, brw_message_reg(4), src[2]); - break; - } -#else - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), src[1]); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); -#endif - - brw_MOV(p, brw_message_reg(5), src[3]); - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); - brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), - 1, - retype(payload_reg, BRW_REGISTER_TYPE_UW), - inst->TexSrcUnit + 1, /* surface */ - inst->TexSrcUnit, /* sampler */ - inst->FullDstRegisters[0].DstRegister.WriteMask, - BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, - 4, - 4, - 0); -#endif -} - -static void emit_tex(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ -#if 0 - struct brw_compile *p = &c->func; - struct brw_reg payload_reg = c->payload_depth[0]; - struct brw_reg dst[4], src[4]; - unsigned msg_len; - unsigned i, nr; - unsigned emit; - boolean shadow = (c->key.shadowtex_mask & (1<TexSrcUnit)) ? 1 : 0; - - for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i); - for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); - -#if 0 - switch (inst->TexSrcTarget) { - case TEXTURE_1D_INDEX: - emit = WRITEMASK_X; - nr = 1; - break; - case TEXTURE_2D_INDEX: - case TEXTURE_RECT_INDEX: - emit = WRITEMASK_XY; - nr = 2; - break; - default: - emit = WRITEMASK_XYZ; - nr = 3; - break; - } -#else - emit = WRITEMASK_XY; - nr = 2; -#endif - - msg_len = 1; - - for (i = 0; i < nr; i++) { - static const unsigned swz[4] = {0,1,2,2}; - if (emit & (1<TexSrcUnit + 1, /* surface */ - inst->TexSrcUnit, /* sampler */ - inst->FullDstRegisters[0].DstRegister.WriteMask, - BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, - 4, - shadow ? 6 : 4, - 0); - - if (shadow) - brw_MOV(p, dst[3], brw_imm_f(1.0)); -#endif -} - - - - - - - - -static void emit_fb_write(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - struct brw_compile *p = &c->func; - int nr = 2; - int channel; - int base_reg = 0; - - // src0 = output color - // src1 = payload_depth[0] - // src2 = output depth - // dst = ??? - - - - /* Reserve a space for AA - may not be needed: - */ - if (c->key.aa_dest_stencil_reg) - nr += 1; - - { - brw_push_insn_state(p); - for (channel = 0; channel < 4; channel++) { - struct brw_reg src0 = c->wm_regs[TGSI_FILE_OUTPUT][0][channel]; - - /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ - /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ - brw_MOV(p, brw_message_reg(nr + channel), src0); - } - /* skip over the regs populated above: */ - nr += 8; - brw_pop_insn_state(p); - } - - - /* Pass through control information: - */ - /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ - { - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ - brw_MOV(p, - brw_message_reg(base_reg + 1), - brw_vec8_grf(1, 0)); - brw_pop_insn_state(p); - } - - /* Send framebuffer write message: */ - brw_fb_WRITE(p, - retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), - base_reg, - retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), - 0, /* render surface always 0 */ - nr, - 0, - 1); - -} - - -static void brw_wm_emit_instruction( struct brw_wm_compile *c, - struct tgsi_full_instruction *inst ) -{ - struct brw_compile *p = &c->func; - -#if 0 - if (inst->CondUpdate) - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - else - brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); -#else - brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); -#endif - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - emit_abs(c, inst); - break; - case TGSI_OPCODE_ADD: - emit_alu2(c, inst, BRW_OPCODE_ADD); - break; - case TGSI_OPCODE_SUB: - assert(0); -// emit_alu2(c, inst, BRW_OPCODE_SUB); - break; - case TGSI_OPCODE_FRC: - emit_alu1(c, inst, BRW_OPCODE_FRC); - break; - case TGSI_OPCODE_FLR: - assert(0); -// emit_alu1(c, inst, BRW_OPCODE_FLR); - break; - case TGSI_OPCODE_LRP: - emit_lrp(c, inst); - break; - case TGSI_OPCODE_INT: - emit_alu1(c, inst, BRW_OPCODE_RNDD); - break; - case TGSI_OPCODE_MOV: - emit_alu1(c, inst, BRW_OPCODE_MOV); - break; - case TGSI_OPCODE_DP3: - emit_dp3(c, inst); - break; - case TGSI_OPCODE_DP4: - emit_dp4(c, inst); - break; - case TGSI_OPCODE_XPD: - emit_xpd(c, inst); - break; - case TGSI_OPCODE_DPH: - emit_dph(c, inst); - break; - case TGSI_OPCODE_RCP: - emit_math1(c, inst, BRW_MATH_FUNCTION_INV); - break; - case TGSI_OPCODE_RSQ: - emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); - break; - case TGSI_OPCODE_SIN: - emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); - break; - case TGSI_OPCODE_COS: - emit_math1(c, inst, BRW_MATH_FUNCTION_COS); - break; - case TGSI_OPCODE_EX2: - emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); - break; - case TGSI_OPCODE_LG2: - emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); - break; - case TGSI_OPCODE_MAX: - emit_max(c, inst); - break; - case TGSI_OPCODE_MIN: - emit_min(c, inst); - break; - case TGSI_OPCODE_DDX: - emit_ddx(c, inst); - break; - case TGSI_OPCODE_DDY: - emit_ddy(c, inst); - break; - case TGSI_OPCODE_SLT: - emit_sop(c, inst, BRW_CONDITIONAL_L); - break; - case TGSI_OPCODE_SLE: - emit_sop(c, inst, BRW_CONDITIONAL_LE); - break; - case TGSI_OPCODE_SGT: - emit_sop(c, inst, BRW_CONDITIONAL_G); - break; - case TGSI_OPCODE_SGE: - emit_sop(c, inst, BRW_CONDITIONAL_GE); - break; - case TGSI_OPCODE_SEQ: - emit_sop(c, inst, BRW_CONDITIONAL_EQ); - break; - case TGSI_OPCODE_SNE: - emit_sop(c, inst, BRW_CONDITIONAL_NEQ); - break; - case TGSI_OPCODE_MUL: - emit_alu2(c, inst, BRW_OPCODE_MUL); - break; - case TGSI_OPCODE_POW: - emit_pow(c, inst); - break; - case TGSI_OPCODE_MAD: - emit_mad(c, inst); - break; - case TGSI_OPCODE_TEX: - emit_tex(c, inst); - break; - case TGSI_OPCODE_TXB: - emit_txb(c, inst); - break; - case TGSI_OPCODE_TEXKILL: - emit_kil(c); - break; - case TGSI_OPCODE_IF: - assert(c->if_insn < MAX_IFSN); - c->if_inst[c->if_insn++] = brw_IF(p, BRW_EXECUTE_8); - break; - case TGSI_OPCODE_ELSE: - c->if_inst[c->if_insn-1] = brw_ELSE(p, c->if_inst[c->if_insn-1]); - break; - case TGSI_OPCODE_ENDIF: - assert(c->if_insn > 0); - brw_ENDIF(p, c->if_inst[--c->if_insn]); - break; - case TGSI_OPCODE_BGNSUB: - case TGSI_OPCODE_ENDSUB: - break; - case TGSI_OPCODE_CAL: - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_set_access_mode(p, BRW_ALIGN_1); - brw_ADD(p, deref_1ud(c->stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); - brw_set_access_mode(p, BRW_ALIGN_16); - brw_ADD(p, - get_addr_reg(c->stack_index), - get_addr_reg(c->stack_index), brw_imm_d(4)); -// orig_inst = inst->Data; -// orig_inst->Data = &p->store[p->nr_insn]; - assert(0); - brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - brw_pop_insn_state(p); - break; - - case TGSI_OPCODE_RET: -#if 0 - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_ADD(p, - get_addr_reg(c->stack_index), - get_addr_reg(c->stack_index), brw_imm_d(-4)); - brw_set_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, brw_ip_reg(), deref_1ud(c->stack_index, 0)); - brw_set_access_mode(p, BRW_ALIGN_16); - brw_pop_insn_state(p); -#else - emit_fb_write(c, inst); -#endif - - break; - case TGSI_OPCODE_BGNFOR: - c->loop_inst[c->loop_insn++] = brw_DO(p, BRW_EXECUTE_8); - break; - case TGSI_OPCODE_BRK: - brw_BREAK(p); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - break; - case TGSI_OPCODE_CONT: - brw_CONT(p); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - break; - case TGSI_OPCODE_ENDFOR: - c->loop_insn--; - c->inst0 = c->inst1 = brw_WHILE(p, c->loop_inst[c->loop_insn]); - /* patch all the BREAK instructions from - last BGNFOR */ - while (c->inst0 > c->loop_inst[c->loop_insn]) { - c->inst0--; - if (c->inst0->header.opcode == BRW_OPCODE_BREAK) { - c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0 + 1; - c->inst0->bits3.if_else.pop_count = 0; - } else if (c->inst0->header.opcode == BRW_OPCODE_CONTINUE) { - c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0; - c->inst0->bits3.if_else.pop_count = 0; - } - } - break; - case TGSI_OPCODE_END: - emit_fb_write(c, inst); - break; - - default: - debug_printf("unsupported IR in fragment shader %d\n", - inst->Instruction.Opcode); - } -#if 0 - if (inst->CondUpdate) - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - else - brw_set_predicate_control(p, BRW_PREDICATE_NONE); -#endif -} - - - - - - -void brw_wm_glsl_emit(struct brw_wm_compile *c) -{ - struct tgsi_parse_context parse; - struct brw_compile *p = &c->func; - - brw_init_compile(&c->func); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - - c->reg_index = 0; - c->if_insn = 0; - c->loop_insn = 0; - c->stack_index = brw_indirect(0,0); - - /* Do static register allocation and parameter interpolation: - */ - brw_wm_emit_decls( c ); - - /* Emit the actual program. All done with very direct translation, - * hopefully we can improve on this shortly... - */ - brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack)); - - tgsi_parse_init( &parse, c->fp->program.tokens ); - - while( !tgsi_parse_end_of_tokens( &parse ) ) - { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - /* already done */ - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - /* not handled yet */ - assert(0); - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - brw_wm_emit_instruction(c, &parse.FullToken.FullInstruction); - break; - - default: - assert( 0 ); - } - } - - tgsi_parse_free (&parse); - - /* Fix up call targets: - */ -#if 0 - { - unsigned nr_insns = c->fp->program.Base.NumInstructions; - unsigned insn, target_insn; - struct tgsi_full_instruction *inst1, *inst2; - struct brw_instruction *brw_inst1, *brw_inst2; - int offset; - for (insn = 0; insn < nr_insns; insn++) { - inst1 = &c->fp->program.Base.Instructions[insn]; - brw_inst1 = inst1->Data; - switch (inst1->Opcode) { - case TGSI_OPCODE_CAL: - target_insn = inst1->BranchTarget; - inst2 = &c->fp->program.Base.Instructions[target_insn]; - brw_inst2 = inst2->Data; - offset = brw_inst2 - brw_inst1; - brw_set_src1(brw_inst1, brw_imm_d(offset*16)); - break; - default: - break; - } - } - } -#endif - - c->prog_data.total_grf = c->reg_index; - c->prog_data.total_scratch = 0; -} diff --git a/src/gallium/drivers/i965simple/brw_wm_iz.c b/src/gallium/drivers/i965simple/brw_wm_iz.c deleted file mode 100644 index 6c5f25bf39..0000000000 --- a/src/gallium/drivers/i965simple/brw_wm_iz.c +++ /dev/null @@ -1,214 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "brw_wm.h" - - -#undef P /* prompted depth */ -#undef C /* computed */ -#undef N /* non-promoted? */ - -#define P 0 -#define C 1 -#define N 2 - -const struct { - unsigned mode:2; - unsigned sd_present:1; - unsigned sd_to_rt:1; - unsigned dd_present:1; - unsigned ds_present:1; -} wm_iz_table[IZ_BIT_MAX] = -{ - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { C, 0, 1, 0, 0 }, - { C, 0, 1, 0, 0 }, - { C, 1, 1, 0, 0 }, - { C, 1, 1, 0, 0 }, - { C, 0, 1, 0, 0 }, - { C, 0, 1, 0, 0 }, - { C, 1, 1, 1, 0 }, - { C, 1, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { C, 1, 1, 1, 0 }, - { C, 1, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { C, 0, 1, 0, 0 }, - { C, 0, 1, 0, 0 }, - { C, 1, 1, 0, 0 }, - { C, 1, 1, 0, 0 }, - { C, 0, 1, 0, 0 }, - { C, 0, 1, 0, 0 }, - { C, 1, 1, 1, 0 }, - { C, 1, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { C, 1, 1, 1, 0 }, - { C, 1, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 0, 0, 1 }, - { C, 0, 0, 0, 1 }, - { C, 0, 1, 0, 1 }, - { C, 0, 1, 0, 1 }, - { C, 1, 1, 0, 1 }, - { C, 1, 1, 0, 1 }, - { C, 0, 1, 0, 1 }, - { C, 0, 1, 0, 1 }, - { C, 1, 1, 1, 1 }, - { C, 1, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { C, 1, 1, 1, 1 }, - { C, 1, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { C, 0, 0, 0, 1 }, - { C, 0, 0, 0, 1 }, - { C, 0, 1, 0, 1 }, - { C, 0, 1, 0, 1 }, - { C, 1, 1, 0, 1 }, - { C, 1, 1, 0, 1 }, - { C, 0, 1, 0, 1 }, - { C, 0, 1, 0, 1 }, - { C, 1, 1, 1, 1 }, - { C, 1, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { C, 1, 1, 1, 1 }, - { C, 1, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { N, 1, 1, 0, 0 }, - { N, 0, 1, 0, 0 }, - { N, 0, 1, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { P, 0, 0, 0, 0 }, - { N, 1, 1, 0, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { N, 1, 1, 0, 0 }, - { N, 0, 1, 0, 0 }, - { N, 0, 1, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { P, 0, 0, 0, 0 }, - { N, 1, 1, 0, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { N, 1, 1, 0, 1 }, - { N, 0, 1, 0, 1 }, - { N, 0, 1, 0, 1 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { C, 0, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { P, 0, 0, 0, 0 }, - { N, 1, 1, 0, 1 }, - { C, 0, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { P, 0, 0, 0, 0 }, - { C, 0, 0, 0, 1 }, - { P, 0, 0, 0, 0 }, - { C, 0, 1, 0, 1 }, - { P, 0, 0, 0, 0 }, - { C, 1, 1, 0, 1 }, - { C, 0, 1, 0, 1 }, - { C, 0, 1, 0, 1 }, - { P, 0, 0, 0, 0 }, - { C, 1, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { P, 0, 0, 0, 0 }, - { C, 1, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { C, 0, 1, 1, 1 } -}; - -void brw_wm_lookup_iz( unsigned line_aa, - unsigned lookup, - struct brw_wm_prog_key *key ) -{ - unsigned reg = 2; - - assert (lookup < IZ_BIT_MAX); - - if (lookup & IZ_PS_COMPUTES_DEPTH_BIT) - key->computes_depth = 1; - - if (wm_iz_table[lookup].sd_present) { - key->source_depth_reg = reg; - reg += 2; - } - - if (wm_iz_table[lookup].sd_to_rt) - key->source_depth_to_render_target = 1; - - if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) { - key->aa_dest_stencil_reg = reg; - key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present && - line_aa == AA_SOMETIMES); - reg++; - } - - if (wm_iz_table[lookup].dd_present) { - key->dest_depth_reg = reg; - reg+=2; - } - - key->nr_depth_regs = (reg+1)/2; -} - diff --git a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c deleted file mode 100644 index 52b2909a65..0000000000 --- a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c +++ /dev/null @@ -1,275 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" - -#include "util/u_math.h" -#include "util/u_memory.h" - - -#define COMPAREFUNC_ALWAYS 0 -#define COMPAREFUNC_NEVER 0x1 -#define COMPAREFUNC_LESS 0x2 -#define COMPAREFUNC_EQUAL 0x3 -#define COMPAREFUNC_LEQUAL 0x4 -#define COMPAREFUNC_GREATER 0x5 -#define COMPAREFUNC_NOTEQUAL 0x6 -#define COMPAREFUNC_GEQUAL 0x7 - -/* Samplers aren't strictly wm state from the hardware's perspective, - * but that is the only situation in which we use them in this driver. - */ - -static int intel_translate_shadow_compare_func(unsigned func) -{ - switch(func) { - case PIPE_FUNC_NEVER: - return COMPAREFUNC_ALWAYS; - case PIPE_FUNC_LESS: - return COMPAREFUNC_LEQUAL; - case PIPE_FUNC_LEQUAL: - return COMPAREFUNC_LESS; - case PIPE_FUNC_GREATER: - return COMPAREFUNC_GEQUAL; - case PIPE_FUNC_GEQUAL: - return COMPAREFUNC_GREATER; - case PIPE_FUNC_NOTEQUAL: - return COMPAREFUNC_EQUAL; - case PIPE_FUNC_EQUAL: - return COMPAREFUNC_NOTEQUAL; - case PIPE_FUNC_ALWAYS: - return COMPAREFUNC_NEVER; - } - - debug_printf("Unknown value in %s: %x\n", __FUNCTION__, func); - return COMPAREFUNC_NEVER; -} - -/* The brw (and related graphics cores) do not support GL_CLAMP. The - * Intel drivers for "other operating systems" implement GL_CLAMP as - * GL_CLAMP_TO_EDGE, so the same is done here. - */ -static unsigned translate_wrap_mode( int wrap ) -{ - switch( wrap ) { - case PIPE_TEX_WRAP_REPEAT: - return BRW_TEXCOORDMODE_WRAP; - case PIPE_TEX_WRAP_CLAMP: - return BRW_TEXCOORDMODE_CLAMP; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */ - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - return BRW_TEXCOORDMODE_CLAMP_BORDER; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - return BRW_TEXCOORDMODE_MIRROR; - default: - return BRW_TEXCOORDMODE_WRAP; - } -} - - -static unsigned U_FIXED(float value, unsigned frac_bits) -{ - value *= (1<cache[BRW_SAMPLER_DEFAULT_COLOR], &sdc ); -} - - -/* - */ -static void brw_update_sampler_state( const struct pipe_sampler_state *pipe_sampler, - unsigned sdc_gs_offset, - struct brw_sampler_state *sampler) -{ - memset(sampler, 0, sizeof(*sampler)); - - switch (pipe_sampler->min_mip_filter) { - case PIPE_TEX_FILTER_NEAREST: - sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; - break; - case PIPE_TEX_FILTER_LINEAR: - sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; - break; - case PIPE_TEX_FILTER_ANISO: - sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; - break; - default: - break; - } - - switch (pipe_sampler->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; - break; - default: - break; - } - /* Set Anisotropy: - */ - switch (pipe_sampler->mag_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; - break; - case PIPE_TEX_FILTER_LINEAR: - sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; - break; - case PIPE_TEX_FILTER_ANISO: - sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; - break; - default: - break; - } - - if (pipe_sampler->max_anisotropy > 2.0) { - sampler->ss3.max_aniso = MAX2((pipe_sampler->max_anisotropy - 2) / 2, - BRW_ANISORATIO_16); - } - - sampler->ss1.s_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_s); - sampler->ss1.r_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_r); - sampler->ss1.t_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_t); - - /* Fulsim complains if I don't do this. Hardware doesn't mind: - */ -#if 0 - if (texObj->Target == GL_TEXTURE_CUBE_MAP_ARB) { - sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; - sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; - sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; - } -#endif - - /* Set shadow function: - */ - if (pipe_sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - /* Shadowing is "enabled" by emitting a particular sampler - * message (sample_c). So need to recompile WM program when - * shadow comparison is enabled on each/any texture unit. - */ - sampler->ss0.shadow_function = intel_translate_shadow_compare_func(pipe_sampler->compare_func); - } - - /* Set LOD bias: - */ - sampler->ss0.lod_bias = S_FIXED(CLAMP(pipe_sampler->lod_bias, -16, 15), 6); - - sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ - sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ - - /* Set BaseMipLevel, MaxLOD, MinLOD: - * - * XXX: I don't think that using firstLevel, lastLevel works, - * because we always setup the surface state as if firstLevel == - * level zero. Probably have to subtract firstLevel from each of - * these: - */ - sampler->ss0.base_level = U_FIXED(0, 1); - - sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(pipe_sampler->max_lod, 0), 13), 6); - sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(pipe_sampler->min_lod, 0), 13), 6); - - sampler->ss2.default_color_pointer = sdc_gs_offset >> 5; -} - - - -/* All samplers must be uploaded in a single contiguous array, which - * complicates various things. However, this is still too confusing - - * FIXME: simplify all the different new texture state flags. - */ -static void upload_wm_samplers(struct brw_context *brw) -{ - unsigned unit; - unsigned sampler_count = 0; - - /* BRW_NEW_SAMPLER */ - for (unit = 0; unit < brw->num_textures && unit < brw->num_samplers; - unit++) { - /* determine unit enable/disable by looking for a bound texture */ - if (brw->attribs.Texture[unit]) { - const struct pipe_sampler_state *sampler = brw->attribs.Samplers[unit]; - unsigned sdc_gs_offset = upload_default_color(brw, sampler->border_color); - - brw_update_sampler_state(sampler, - sdc_gs_offset, - &brw->wm.sampler[unit]); - - sampler_count = unit + 1; - } - } - - if (brw->wm.sampler_count != sampler_count) { - brw->wm.sampler_count = sampler_count; - brw->state.dirty.cache |= CACHE_NEW_SAMPLER; - } - - brw->wm.sampler_gs_offset = 0; - - if (brw->wm.sampler_count) - brw->wm.sampler_gs_offset = - brw_cache_data_sz(&brw->cache[BRW_SAMPLER], - brw->wm.sampler, - sizeof(struct brw_sampler_state) * brw->wm.sampler_count); -} - -const struct brw_tracked_state brw_wm_samplers = { - .dirty = { - .brw = BRW_NEW_SAMPLER, - .cache = 0 - }, - .update = upload_wm_samplers -}; - diff --git a/src/gallium/drivers/i965simple/brw_wm_state.c b/src/gallium/drivers/i965simple/brw_wm_state.c deleted file mode 100644 index 37a9bf919c..0000000000 --- a/src/gallium/drivers/i965simple/brw_wm_state.c +++ /dev/null @@ -1,195 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "brw_wm.h" -#include "util/u_math.h" -#include "util/u_memory.h" - -/*********************************************************************** - * WM unit - fragment programs and rasterization - */ -static void upload_wm_unit(struct brw_context *brw ) -{ - struct brw_wm_unit_state wm; - unsigned max_threads; - unsigned per_thread; - - if (BRW_DEBUG & DEBUG_SINGLE_THREAD) - max_threads = 0; - else - max_threads = 31; - - - memset(&wm, 0, sizeof(wm)); - - /* CACHE_NEW_WM_PROG */ - wm.thread0.grf_reg_count = align(brw->wm.prog_data->total_grf, 16) / 16 - 1; - wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6; - wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; - wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length; - wm.thread3.const_urb_entry_read_length = brw->wm.prog_data->curb_read_length; - - wm.wm5.max_threads = max_threads; - - per_thread = align(brw->wm.prog_data->total_scratch, 1024); - assert(per_thread <= 12 * 1024); - -#if 0 - if (brw->wm.prog_data->total_scratch) { - unsigned total = per_thread * (max_threads + 1); - - /* Scratch space -- just have to make sure there is sufficient - * allocated for the active program and current number of threads. - */ - brw->wm.scratch_buffer_size = total; - if (brw->wm.scratch_buffer && - brw->wm.scratch_buffer_size > brw->wm.scratch_buffer->size) { - dri_bo_unreference(brw->wm.scratch_buffer); - brw->wm.scratch_buffer = NULL; - } - if (!brw->wm.scratch_buffer) { - brw->wm.scratch_buffer = dri_bo_alloc(intel->intelScreen->bufmgr, - "wm scratch", - brw->wm.scratch_buffer_size, - 4096, DRM_BO_FLAG_MEM_TT); - } - } - /* XXX: Scratch buffers are not implemented correectly. - * - * The scratch offset to be programmed into wm is relative to the general - * state base address. However, using dri_bo_alloc/dri_bo_emit_reloc (or - * the previous bmGenBuffers scheme), we get an offset relative to the - * start of framebuffer. Even before then, it was broken in other ways, - * so just fail for now if we hit that path. - */ - assert(brw->wm.prog_data->total_scratch == 0); -#endif - - /* CACHE_NEW_SURFACE */ - wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces; - - /* BRW_NEW_CURBE_OFFSETS */ - wm.thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2; - - wm.thread3.urb_entry_read_offset = 0; - wm.thread1.depth_coef_urb_read_offset = 1; - wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - - /* CACHE_NEW_SAMPLER */ - wm.wm4.sampler_count = (brw->wm.sampler_count + 1) / 4; - wm.wm4.sampler_state_pointer = brw->wm.sampler_gs_offset >> 5; - - /* BRW_NEW_FRAGMENT_PROGRAM */ - { - const struct brw_fragment_program *fp = brw->attribs.FragmentProgram; - - if (fp->UsesDepth) - wm.wm5.program_uses_depth = 1; /* as far as we can tell */ - - if (fp->info.writes_z) - wm.wm5.program_computes_depth = 1; - - /* BRW_NEW_ALPHA_TEST */ - if (fp->info.uses_kill || - brw->attribs.DepthStencil->alpha.enabled) - wm.wm5.program_uses_killpixel = 1; - - wm.wm5.enable_8_pix = 1; - } - - wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */ - wm.wm5.legacy_line_rast = 0; - wm.wm5.legacy_global_depth_bias = 0; - wm.wm5.early_depth_test = 1; /* never need to disable */ - wm.wm5.line_aa_region_width = 0; - wm.wm5.line_endcap_aa_region_width = 1; - - /* BRW_NEW_RASTERIZER */ - if (brw->attribs.Raster->poly_stipple_enable) - wm.wm5.polygon_stipple = 1; - -#if 0 - if (brw->attribs.Polygon->OffsetFill) { - wm.wm5.depth_offset = 1; - /* Something wierd going on with legacy_global_depth_bias, - * offset_constant, scaling and MRD. This value passes glean - * but gives some odd results elsewere (eg. the - * quad-offset-units test). - */ - wm.global_depth_offset_constant = brw->attribs.Polygon->OffsetUnits * 2; - - /* This is the only value that passes glean: - */ - wm.global_depth_offset_scale = brw->attribs.Polygon->OffsetFactor; - } -#endif - - if (brw->attribs.Raster->line_stipple_enable) { - wm.wm5.line_stipple = 1; - } - - if (BRW_DEBUG & DEBUG_STATS) - wm.wm4.stats_enable = 1; - - brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm ); - - if (brw->wm.prog_data->total_scratch) { - /* - dri_emit_reloc(brw->cache[BRW_WM_UNIT].pool->buffer, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, - (per_thread / 1024) - 1, - brw->wm.state_gs_offset + - ((char *)&wm.thread2 - (char *)&wm), - brw->wm.scratch_buffer); - */ - } else { - wm.thread2.scratch_space_base_pointer = 0; - } -} - -const struct brw_tracked_state brw_wm_unit = { - .dirty = { - .brw = (BRW_NEW_RASTERIZER | - BRW_NEW_ALPHA_TEST | - BRW_NEW_FS | - BRW_NEW_CURBE_OFFSETS), - - .cache = (CACHE_NEW_SURFACE | - CACHE_NEW_WM_PROG | - CACHE_NEW_SAMPLER) - }, - .update = upload_wm_unit -}; - diff --git a/src/gallium/drivers/i965simple/brw_wm_surface_state.c b/src/gallium/drivers/i965simple/brw_wm_surface_state.c deleted file mode 100644 index b5b9e0e702..0000000000 --- a/src/gallium/drivers/i965simple/brw_wm_surface_state.c +++ /dev/null @@ -1,305 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" - -static unsigned translate_tex_target( enum pipe_texture_target target ) -{ - switch (target) { - case PIPE_TEXTURE_1D: - return BRW_SURFACE_1D; - - case PIPE_TEXTURE_2D: - return BRW_SURFACE_2D; - - case PIPE_TEXTURE_3D: - return BRW_SURFACE_3D; - - case PIPE_TEXTURE_CUBE: - return BRW_SURFACE_CUBE; - - default: - assert(0); - return 0; - } -} - -static unsigned translate_tex_format( enum pipe_format pipe_format ) -{ - switch( pipe_format ) { - case PIPE_FORMAT_L8_UNORM: - return BRW_SURFACEFORMAT_L8_UNORM; - - case PIPE_FORMAT_I8_UNORM: - return BRW_SURFACEFORMAT_I8_UNORM; - - case PIPE_FORMAT_A8_UNORM: - return BRW_SURFACEFORMAT_A8_UNORM; - - case PIPE_FORMAT_A8L8_UNORM: - return BRW_SURFACEFORMAT_L8A8_UNORM; - - case PIPE_FORMAT_R8G8B8_UNORM: - assert(0); /* not supported for sampling */ - return BRW_SURFACEFORMAT_R8G8B8_UNORM; - - case PIPE_FORMAT_B8G8R8A8_UNORM: - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - - case PIPE_FORMAT_R8G8B8A8_UNORM: - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; - - case PIPE_FORMAT_R5G6B5_UNORM: - return BRW_SURFACEFORMAT_B5G6R5_UNORM; - - case PIPE_FORMAT_A1R5G5B5_UNORM: - return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; - - case PIPE_FORMAT_A4R4G4B4_UNORM: - return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; - - case PIPE_FORMAT_YCBCR_REV: - return BRW_SURFACEFORMAT_YCRCB_NORMAL; - - case PIPE_FORMAT_YCBCR: - return BRW_SURFACEFORMAT_YCRCB_SWAPUVY; -#if 0 - case PIPE_FORMAT_RGB_FXT1: - case PIPE_FORMAT_RGBA_FXT1: - return BRW_SURFACEFORMAT_FXT1; -#endif - - case PIPE_FORMAT_Z16_UNORM: - return BRW_SURFACEFORMAT_I16_UNORM; -#if 0 - case PIPE_FORMAT_RGB_DXT1: - return BRW_SURFACEFORMAT_DXT1_RGB; - - case PIPE_FORMAT_RGBA_DXT1: - return BRW_SURFACEFORMAT_BC1_UNORM; - - case PIPE_FORMAT_RGBA_DXT3: - return BRW_SURFACEFORMAT_BC2_UNORM; - - case PIPE_FORMAT_RGBA_DXT5: - return BRW_SURFACEFORMAT_BC3_UNORM; - - case PIPE_FORMAT_SRGBA8: - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB; - case PIPE_FORMAT_SRGB_DXT1: - return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; -#endif - - default: - assert(0); - return 0; - } -} - -static unsigned brw_buffer_offset(struct brw_context *brw, - struct pipe_buffer *buffer) -{ - return brw->winsys->get_buffer_offset(brw->winsys, - buffer, - 0); -} - -static -void brw_update_texture_surface( struct brw_context *brw, - unsigned unit ) -{ - const struct brw_texture *tObj = brw->attribs.Texture[unit]; - struct brw_surface_state surf; - - memset(&surf, 0, sizeof(surf)); - - surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; - surf.ss0.surface_type = translate_tex_target(tObj->base.target); - surf.ss0.surface_format = translate_tex_format(tObj->base.format); - - /* This is ok for all textures with channel width 8bit or less: - */ -/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ - - /* Updated in emit_reloc */ - surf.ss1.base_addr = brw_buffer_offset( brw, tObj->buffer ); - - surf.ss2.mip_count = tObj->base.last_level; - surf.ss2.width = tObj->base.width[0] - 1; - surf.ss2.height = tObj->base.height[0] - 1; - - surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; - surf.ss3.tiled_surface = 0; /* always zero */ - surf.ss3.pitch = tObj->stride - 1; - surf.ss3.depth = tObj->base.depth[0] - 1; - - surf.ss4.min_lod = 0; - - if (tObj->base.target == PIPE_TEXTURE_CUBE) { - surf.ss0.cube_pos_x = 1; - surf.ss0.cube_pos_y = 1; - surf.ss0.cube_pos_z = 1; - surf.ss0.cube_neg_x = 1; - surf.ss0.cube_neg_y = 1; - surf.ss0.cube_neg_z = 1; - } - - brw->wm.bind.surf_ss_offset[unit + 1] = - brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); -} - - - -#define OFFSET(TYPE, FIELD) ( (unsigned)&(((TYPE *)0)->FIELD) ) - - -static void upload_wm_surfaces(struct brw_context *brw ) -{ - unsigned i; - - { - struct brw_surface_state surf; - - /* BRW_NEW_FRAMEBUFFER - */ - struct pipe_surface *pipe_surface = brw->attribs.FrameBuffer.cbufs[0];/*fixme*/ - struct brw_texture *tex = (struct brw_texture *)pipe_surface->texture; - - memset(&surf, 0, sizeof(surf)); - - if (pipe_surface != NULL) { - if (pipe_surface->block.size == 4) - surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - else - surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; - - surf.ss0.surface_type = BRW_SURFACE_2D; - - surf.ss1.base_addr = brw_buffer_offset( brw, tex->buffer ); - - surf.ss2.width = pipe_surface->width - 1; - surf.ss2.height = pipe_surface->height - 1; - surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; - surf.ss3.tiled_surface = 0; - surf.ss3.pitch = pipe_surface->stride - 1; - } else { - surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - surf.ss0.surface_type = BRW_SURFACE_NULL; - } - - /* BRW_NEW_BLEND */ - surf.ss0.color_blend = (!brw->attribs.Blend->logicop_enable && - brw->attribs.Blend->blend_enable); - - - surf.ss0.writedisable_red = !(brw->attribs.Blend->colormask & PIPE_MASK_R); - surf.ss0.writedisable_green = !(brw->attribs.Blend->colormask & PIPE_MASK_G); - surf.ss0.writedisable_blue = !(brw->attribs.Blend->colormask & PIPE_MASK_B); - surf.ss0.writedisable_alpha = !(brw->attribs.Blend->colormask & PIPE_MASK_A); - - - - - brw->wm.bind.surf_ss_offset[0] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); - - brw->wm.nr_surfaces = 1; - } - - - /* BRW_NEW_TEXTURE - */ - for (i = 0; i < brw->num_textures && i < brw->num_samplers; i++) { - const struct brw_texture *texUnit = brw->attribs.Texture[i]; - - if (texUnit && - texUnit->base.reference.count/*(texUnit->reference.count > 0) == really used */) { - - brw_update_texture_surface(brw, i); - - brw->wm.nr_surfaces = i+2; - } - else { - brw->wm.bind.surf_ss_offset[i+1] = 0; - } - } - - brw->wm.bind_ss_offset = brw_cache_data( &brw->cache[BRW_SS_SURF_BIND], - &brw->wm.bind ); -} - - -/* KW: Will find a different way to acheive this, see for example the - * state caches with relocs in the i915 swz driver. - */ -#if 0 -static void emit_reloc_wm_surfaces(struct brw_context *brw) -{ - int unit; - - if (brw->state.draw_region != NULL) { - /* Emit framebuffer relocation */ - dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE), - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, - 0, - brw->wm.bind.surf_ss_offset[0] + - offsetof(struct brw_surface_state, ss1), - brw->state.draw_region->buffer); - } - - /* Emit relocations for texture buffers */ - for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { - struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - struct intel_texture_object *intelObj = intel_texture_object(tObj); - - if (texUnit->_ReallyEnabled && intelObj->mt != NULL) { - dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE), - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, - 0, - brw->wm.bind.surf_ss_offset[unit + 1] + - offsetof(struct brw_surface_state, ss1), - intelObj->mt->region->buffer); - } - } -} -#endif - -const struct brw_tracked_state brw_wm_surfaces = { - .dirty = { - .brw = (BRW_NEW_FRAMEBUFFER | - BRW_NEW_BLEND | - BRW_NEW_TEXTURE), - .cache = 0 - }, - .update = upload_wm_surfaces, -}; diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript index 467d595d33..14d4ca7c33 100644 --- a/src/gallium/winsys/xlib/SConscript +++ b/src/gallium/winsys/xlib/SConscript @@ -36,15 +36,6 @@ if env['platform'] == 'linux' \ env.Tool('udis86') sources += ['xlib_llvmpipe.c'] drivers += [llvmpipe] - - if 'i965simple' in env['drivers']: - env.Append(CPPDEFINES = 'GALLIUM_I965SIMPLE') - sources += [ - 'xlib_brw_aub.c', - 'xlib_brw_context.c', - 'xlib_brw_screen.c', - ] - drivers += [i965simple] if 'cell' in env['drivers']: env.Append(CPPDEFINES = 'GALLIUM_CELL') diff --git a/src/gallium/winsys/xlib/xlib.c b/src/gallium/winsys/xlib/xlib.c index 4b71cf7ec3..163cc8863c 100644 --- a/src/gallium/winsys/xlib/xlib.c +++ b/src/gallium/winsys/xlib/xlib.c @@ -43,7 +43,6 @@ enum mode { MODE_TRACE, - MODE_BRW, MODE_CELL, MODE_LLVMPIPE, MODE_SOFTPIPE @@ -55,9 +54,6 @@ static enum mode get_mode() if (getenv("XMESA_TRACE")) return MODE_TRACE; - if (getenv("XMESA_BRW")) - return MODE_BRW; - #ifdef GALLIUM_CELL if (!getenv("GALLIUM_NOCELL")) return MODE_CELL; @@ -80,11 +76,6 @@ static void _init( void ) case MODE_TRACE: #if defined(GALLIUM_TRACE) && defined(GALLIUM_SOFTPIPE) xmesa_set_driver( &xlib_trace_driver ); -#endif - break; - case MODE_BRW: -#if defined(GALLIUM_BRW) - xmesa_set_driver( &xlib_brw_driver ); #endif break; case MODE_CELL: diff --git a/src/gallium/winsys/xlib/xlib.h b/src/gallium/winsys/xlib/xlib.h index 347d45f4d6..f0855035f7 100644 --- a/src/gallium/winsys/xlib/xlib.h +++ b/src/gallium/winsys/xlib/xlib.h @@ -9,7 +9,6 @@ extern struct xm_driver xlib_trace_driver; extern struct xm_driver xlib_softpipe_driver; extern struct xm_driver xlib_llvmpipe_driver; extern struct xm_driver xlib_cell_driver; -extern struct xm_driver xlib_brw_driver; #endif diff --git a/src/gallium/winsys/xlib/xlib_brw.h b/src/gallium/winsys/xlib/xlib_brw.h deleted file mode 100644 index be2dd147db..0000000000 --- a/src/gallium/winsys/xlib/xlib_brw.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef XLIB_BRW_H -#define XLIB_BRW_H - -struct pipe_winsys; -struct pipe_buffer; -struct pipe_surface; -struct xmesa_buffer; - -unsigned xlib_brw_get_buffer_offset( struct pipe_winsys *pws, - struct pipe_buffer *buf, - unsigned access_flags ); - -void xlib_brw_buffer_subdata_typed( struct pipe_winsys *pws, - struct pipe_buffer *buf, - unsigned long offset, - unsigned long size, - const void *data, - unsigned data_type ); - - - -void xlib_brw_commands_aub(struct pipe_winsys *winsys, - unsigned *cmds, - unsigned nr_dwords); - -struct pipe_context * -xlib_create_brw_context( struct pipe_screen *screen, - void *unused ); - -#endif diff --git a/src/gallium/winsys/xlib/xlib_brw_aub.c b/src/gallium/winsys/xlib/xlib_brw_aub.c deleted file mode 100644 index b6bd849ef2..0000000000 --- a/src/gallium/winsys/xlib/xlib_brw_aub.c +++ /dev/null @@ -1,399 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include -#include -#include "xlib_brw_aub.h" -#include "pipe/p_context.h" -#include "pipe/p_state.h" -#include "util/u_debug.h" -#include "util/u_memory.h" -#include "softpipe/sp_texture.h" - - -struct brw_aubfile { - FILE *file; - unsigned next_free_page; -}; - - -extern char *__progname; - - -struct aub_file_header { - unsigned int instruction_type; - unsigned int pad0:16; - unsigned int minor:8; - unsigned int major:8; - unsigned char application[8*4]; - unsigned int day:8; - unsigned int month:8; - unsigned int year:16; - unsigned int timezone:8; - unsigned int second:8; - unsigned int minute:8; - unsigned int hour:8; - unsigned int comment_length:16; - unsigned int pad1:16; -}; - -struct aub_block_header { - unsigned int instruction_type; - unsigned int operation:8; - unsigned int type:8; - unsigned int address_space:8; - unsigned int pad0:8; - unsigned int general_state_type:8; - unsigned int surface_state_type:8; - unsigned int pad1:16; - unsigned int address; - unsigned int length; -}; - -struct aub_dump_bmp { - unsigned int instruction_type; - unsigned int xmin:16; - unsigned int ymin:16; - unsigned int pitch:16; - unsigned int bpp:8; - unsigned int format:8; - unsigned int xsize:16; - unsigned int ysize:16; - unsigned int addr; - unsigned int unknown; -}; - -enum bh_operation { - BH_COMMENT, - BH_DATA_WRITE, - BH_COMMAND_WRITE, - BH_MMI0_WRITE32, - BH_END_SCENE, - BH_CONFIG_MEMORY_MAP, - BH_MAX_OPERATION -}; - -enum command_write_type { - CW_HWB_RING = 1, - CW_PRIMARY_RING_A, - CW_PRIMARY_RING_B, /* XXX - disagreement with listaub! */ - CW_PRIMARY_RING_C, - CW_MAX_TYPE -}; - -enum memory_map_type { - MM_DEFAULT, - MM_DYNAMIC, - MM_MAX_TYPE -}; - -enum address_space { - ADDR_GTT, - ADDR_LOCAL, - ADDR_MAIN, - ADDR_MAX -}; - - -#define AUB_FILE_HEADER 0xe085000b -#define AUB_BLOCK_HEADER 0xe0c10003 -#define AUB_DUMP_BMP 0xe09e0004 - -/* Registers to control page table - */ -#define PGETBL_CTL 0x2020 -#define PGETBL_ENABLED 0x1 - -#define NR_GTT_ENTRIES 65536 /* 256 mb */ - -#define FAIL \ -do { \ - fprintf(stderr, "failed to write aub data at %s/%d\n", __FUNCTION__, __LINE__); \ - exit(1); \ -} while (0) - - -/* Emit the headers at the top of each aubfile. Initialize the GTT. - */ -static void init_aubfile( FILE *aub_file ) -{ - struct aub_file_header fh; - struct aub_block_header bh; - unsigned int data; - - static int nr; - - nr++; - - /* Emit the aub header: - */ - memset(&fh, 0, sizeof(fh)); - - fh.instruction_type = AUB_FILE_HEADER; - fh.minor = 0x0; - fh.major = 0x7; - memcpy(fh.application, __progname, sizeof(fh.application)); - fh.day = (nr>>24) & 0xff; - fh.month = 0x0; - fh.year = 0x0; - fh.timezone = 0x0; - fh.second = nr & 0xff; - fh.minute = (nr>>8) & 0xff; - fh.hour = (nr>>16) & 0xff; - fh.comment_length = 0x0; - - if (fwrite(&fh, sizeof(fh), 1, aub_file) < 0) - FAIL; - - /* Setup the GTT starting at main memory address zero (!): - */ - memset(&bh, 0, sizeof(bh)); - - bh.instruction_type = AUB_BLOCK_HEADER; - bh.operation = BH_MMI0_WRITE32; - bh.type = 0x0; - bh.address_space = ADDR_GTT; /* ??? */ - bh.general_state_type = 0x0; - bh.surface_state_type = 0x0; - bh.address = PGETBL_CTL; - bh.length = 0x4; - - if (fwrite(&bh, sizeof(bh), 1, aub_file) < 0) - FAIL; - - data = 0x0 | PGETBL_ENABLED; - - if (fwrite(&data, sizeof(data), 1, aub_file) < 0) - FAIL; -} - - -static void init_aub_gtt( struct brw_aubfile *aubfile, - unsigned start_offset, - unsigned size ) -{ - FILE *aub_file = aubfile->file; - struct aub_block_header bh; - unsigned int i; - - assert(start_offset + size < NR_GTT_ENTRIES * 4096); - - - memset(&bh, 0, sizeof(bh)); - - bh.instruction_type = AUB_BLOCK_HEADER; - bh.operation = BH_DATA_WRITE; - bh.type = 0x0; - bh.address_space = ADDR_MAIN; - bh.general_state_type = 0x0; - bh.surface_state_type = 0x0; - bh.address = start_offset / 4096 * 4; - bh.length = size / 4096 * 4; - - if (fwrite(&bh, sizeof(bh), 1, aub_file) < 0) - FAIL; - - for (i = 0; i < size / 4096; i++) { - unsigned data = aubfile->next_free_page | 1; - - aubfile->next_free_page += 4096; - - if (fwrite(&data, sizeof(data), 1, aub_file) < 0) - FAIL; - } - -} - -static void write_block_header( FILE *aub_file, - struct aub_block_header *bh, - const unsigned *data, - unsigned sz ) -{ - sz = (sz + 3) & ~3; - - if (fwrite(bh, sizeof(*bh), 1, aub_file) < 0) - FAIL; - - if (fwrite(data, sz, 1, aub_file) < 0) - FAIL; - - fflush(aub_file); -} - - -static void write_dump_bmp( FILE *aub_file, - struct aub_dump_bmp *db ) -{ - if (fwrite(db, sizeof(*db), 1, aub_file) < 0) - FAIL; - - fflush(aub_file); -} - - - -void brw_aub_gtt_data( struct brw_aubfile *aubfile, - unsigned offset, - const void *data, - unsigned sz, - unsigned type, - unsigned state_type ) -{ - struct aub_block_header bh; - - bh.instruction_type = AUB_BLOCK_HEADER; - bh.operation = BH_DATA_WRITE; - bh.type = type; - bh.address_space = ADDR_GTT; - bh.pad0 = 0; - - if (type == DW_GENERAL_STATE) { - bh.general_state_type = state_type; - bh.surface_state_type = 0; - } - else { - bh.general_state_type = 0; - bh.surface_state_type = state_type; - } - - bh.pad1 = 0; - bh.address = offset; - bh.length = sz; - - write_block_header(aubfile->file, &bh, data, sz); -} - - - -void brw_aub_gtt_cmds( struct brw_aubfile *aubfile, - unsigned offset, - const void *data, - unsigned sz ) -{ - struct aub_block_header bh; - unsigned type = CW_PRIMARY_RING_A; - - - bh.instruction_type = AUB_BLOCK_HEADER; - bh.operation = BH_COMMAND_WRITE; - bh.type = type; - bh.address_space = ADDR_GTT; - bh.pad0 = 0; - bh.general_state_type = 0; - bh.surface_state_type = 0; - bh.pad1 = 0; - bh.address = offset; - bh.length = sz; - - write_block_header(aubfile->file, &bh, data, sz); -} - -void brw_aub_dump_bmp( struct brw_aubfile *aubfile, - struct pipe_surface *surface, - unsigned gtt_offset ) -{ - struct aub_dump_bmp db; - unsigned format; - - assert(surface->texture->block.width == 1); - assert(surface->texture->block.height == 1); - - if (surface->texture->block.size == 4) - format = 0x7; - else - format = 0x3; - - db.instruction_type = AUB_DUMP_BMP; - db.xmin = 0; - db.ymin = 0; - db.format = format; - db.bpp = surface->texture->block.size * 8; - db.pitch = softpipe_texture(surface->texture)->stride[surface->level] / - surface->texture->block.size; - db.xsize = surface->width; - db.ysize = surface->height; - db.addr = gtt_offset; - db.unknown = /* surface->tiled ? 0x4 : */ 0x0; - - write_dump_bmp(aubfile->file, &db); -} - - - -struct brw_aubfile *brw_aubfile_create( void ) -{ - struct brw_aubfile *aubfile = CALLOC_STRUCT(brw_aubfile); - char filename[80]; - int val; - static int i = 0; - - i++; - - if (getenv("INTEL_AUBFILE")) { - val = snprintf(filename, sizeof(filename), "%s%d.aub", getenv("INTEL_AUBFILE"), i%4); - debug_printf("--> Aub file: %s\n", filename); - aubfile->file = fopen(filename, "w"); - } - else { - val = snprintf(filename, sizeof(filename), "%s.aub", __progname); - if (val < 0 || val > sizeof(filename)) - strcpy(filename, "default.aub"); - - debug_printf("--> Aub file: %s\n", filename); - aubfile->file = fopen(filename, "w"); - } - - if (!aubfile->file) { - debug_printf("couldn't open aubfile\n"); - exit(1); - } - - init_aubfile(aubfile->file); - - /* The GTT is located starting address zero in main memory. Pages - * to populate the gtt start after this point. - */ - aubfile->next_free_page = (NR_GTT_ENTRIES * 4 + 4095) & ~4095; - - /* More or less correspond with all the agp regions mapped by the - * driver: - */ - init_aub_gtt(aubfile, 0, 4096*4); - init_aub_gtt(aubfile, AUB_BUF_START, AUB_BUF_SIZE); - - return aubfile; -} - -void brw_aub_destroy( struct brw_aubfile *aubfile ) -{ - fclose(aubfile->file); - FREE(aubfile); -} diff --git a/src/gallium/winsys/xlib/xlib_brw_aub.h b/src/gallium/winsys/xlib/xlib_brw_aub.h deleted file mode 100644 index f5c60c7be2..0000000000 --- a/src/gallium/winsys/xlib/xlib_brw_aub.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#ifndef BRW_AUB_H -#define BRW_AUB_H - -/* We set up this region, buffers may be allocated here: - */ -#define AUB_BUF_START (4096*4) -#define AUB_BUF_SIZE (8*1024*1024) - -struct intel_context; -struct pipe_surface; - -struct brw_aubfile *brw_aubfile_create( void ); - -void brw_aub_destroy( struct brw_aubfile *aubfile ); - -void brw_aub_gtt_data( struct brw_aubfile *aubfile, - unsigned offset, - const void *data, - unsigned sz, - unsigned type, - unsigned state_type ); - -void brw_aub_gtt_cmds( struct brw_aubfile *aubfile, - unsigned offset, - const void *data, - unsigned sz ); - -void brw_aub_dump_bmp( struct brw_aubfile *aubfile, - struct pipe_surface *surface, - unsigned gtt_offset ); - - -enum data_write_type { - DW_NOTYPE, - DW_BATCH_BUFFER, - DW_BIN_BUFFER, - DW_BIN_POINTER_LIST, - DW_SLOW_STATE_BUFFER, - DW_VERTEX_BUFFER, - DW_2D_MAP, - DW_CUBE_MAP, - DW_INDIRECT_STATE_BUFFER, - DW_VOLUME_MAP, - DW_1D_MAP, - DW_CONSTANT_BUFFER, - DW_CONSTANT_URB_ENTRY, - DW_INDEX_BUFFER, - DW_GENERAL_STATE, - DW_SURFACE_STATE, - DW_MEDIA_OBJECT_INDIRECT_DATA, - DW_MAX_TYPE -}; - -enum data_write_general_state_type { - DWGS_NOTYPE, - DWGS_VERTEX_SHADER_STATE, - DWGS_GEOMETRY_SHADER_STATE , - DWGS_CLIPPER_STATE, - DWGS_STRIPS_FANS_STATE, - DWGS_WINDOWER_IZ_STATE, - DWGS_COLOR_CALC_STATE, - DWGS_CLIPPER_VIEWPORT_STATE, /* was 0x7 */ - DWGS_STRIPS_FANS_VIEWPORT_STATE, - DWGS_COLOR_CALC_VIEWPORT_STATE, /* was 0x9 */ - DWGS_SAMPLER_STATE, - DWGS_KERNEL_INSTRUCTIONS, - DWGS_SCRATCH_SPACE, - DWGS_SAMPLER_DEFAULT_COLOR, - DWGS_INTERFACE_DESCRIPTOR, - DWGS_VLD_STATE, - DWGS_VFE_STATE, - DWGS_MAX_TYPE -}; - -enum data_write_surface_state_type { - DWSS_NOTYPE, - DWSS_BINDING_TABLE_STATE, - DWSS_SURFACE_STATE, - DWSS_MAX_TYPE -}; - - -#endif diff --git a/src/gallium/winsys/xlib/xlib_brw_context.c b/src/gallium/winsys/xlib/xlib_brw_context.c deleted file mode 100644 index 09599507f4..0000000000 --- a/src/gallium/winsys/xlib/xlib_brw_context.c +++ /dev/null @@ -1,209 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ - -/* - * Authors: - * Keith Whitwell - * Brian Paul - */ - - -//#include "glxheader.h" -//#include "xmesaP.h" - -#include "pipe/internal/p_winsys_screen.h" -#include "pipe/p_inlines.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "i965simple/brw_winsys.h" -#include "xlib_brw_aub.h" -#include "xlib_brw.h" - - - - -#define XBCWS_BATCHBUFFER_SIZE 1024 - - -/* The backend to the brw driver (ie struct brw_winsys) is actually a - * per-context entity. - */ -struct xlib_brw_context_winsys { - struct brw_winsys brw_context_winsys; /**< batch buffer funcs */ - struct aub_context *aub; - - struct pipe_winsys *pipe_winsys; - - unsigned batch_data[XBCWS_BATCHBUFFER_SIZE]; - unsigned batch_nr; - unsigned batch_size; - unsigned batch_alloc; -}; - - -/* Turn a brw_winsys into an xlib_brw_context_winsys: - */ -static inline struct xlib_brw_context_winsys * -xlib_brw_context_winsys( struct brw_winsys *sws ) -{ - return (struct xlib_brw_context_winsys *)sws; -} - - -/* Simple batchbuffer interface: - */ - -static unsigned *xbcws_batch_start( struct brw_winsys *sws, - unsigned dwords, - unsigned relocs ) -{ - struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); - - if (xbcws->batch_size < xbcws->batch_nr + dwords) - return NULL; - - xbcws->batch_alloc = xbcws->batch_nr + dwords; - return (void *)1; /* not a valid pointer! */ -} - -static void xbcws_batch_dword( struct brw_winsys *sws, - unsigned dword ) -{ - struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); - - assert(xbcws->batch_nr < xbcws->batch_alloc); - xbcws->batch_data[xbcws->batch_nr++] = dword; -} - -static void xbcws_batch_reloc( struct brw_winsys *sws, - struct pipe_buffer *buf, - unsigned access_flags, - unsigned delta ) -{ - struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); - - assert(xbcws->batch_nr < xbcws->batch_alloc); - xbcws->batch_data[xbcws->batch_nr++] = - ( xlib_brw_get_buffer_offset( NULL, buf, access_flags ) + - delta ); -} - -static void xbcws_batch_end( struct brw_winsys *sws ) -{ - struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); - - assert(xbcws->batch_nr <= xbcws->batch_alloc); - xbcws->batch_alloc = 0; -} - -static void xbcws_batch_flush( struct brw_winsys *sws, - struct pipe_fence_handle **fence ) -{ - struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); - assert(xbcws->batch_nr <= xbcws->batch_size); - - if (xbcws->batch_nr) { - xlib_brw_commands_aub( xbcws->pipe_winsys, - xbcws->batch_data, - xbcws->batch_nr ); - } - - xbcws->batch_nr = 0; -} - - - -/* Really a per-device function, just pass through: - */ -static unsigned xbcws_get_buffer_offset( struct brw_winsys *sws, - struct pipe_buffer *buf, - unsigned access_flags ) -{ - struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); - - return xlib_brw_get_buffer_offset( xbcws->pipe_winsys, - buf, - access_flags ); -} - - -/* Really a per-device function, just pass through: - */ -static void xbcws_buffer_subdata_typed( struct brw_winsys *sws, - struct pipe_buffer *buf, - unsigned long offset, - unsigned long size, - const void *data, - unsigned data_type ) -{ - struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); - - xlib_brw_buffer_subdata_typed( xbcws->pipe_winsys, - buf, - offset, - size, - data, - data_type ); -} - - -/** - * Create i965 hardware rendering context, but plugged into a - * dump-to-aubfile backend. - */ -struct pipe_context * -xlib_create_brw_context( struct pipe_screen *screen, - void *unused ) -{ - struct xlib_brw_context_winsys *xbcws = CALLOC_STRUCT( xlib_brw_context_winsys ); - - /* Fill in this struct with callbacks that i965simple will need to - * communicate with the window system, buffer manager, etc. - */ - xbcws->brw_context_winsys.batch_start = xbcws_batch_start; - xbcws->brw_context_winsys.batch_dword = xbcws_batch_dword; - xbcws->brw_context_winsys.batch_reloc = xbcws_batch_reloc; - xbcws->brw_context_winsys.batch_end = xbcws_batch_end; - xbcws->brw_context_winsys.batch_flush = xbcws_batch_flush; - xbcws->brw_context_winsys.buffer_subdata_typed = xbcws_buffer_subdata_typed; - xbcws->brw_context_winsys.get_buffer_offset = xbcws_get_buffer_offset; - - xbcws->pipe_winsys = screen->winsys; /* redundant */ - - xbcws->batch_size = XBCWS_BATCHBUFFER_SIZE; - - /* Create the i965simple context: - */ -#ifdef GALLIUM_CELL - return NULL; -#else - return brw_create( screen, - &xbcws->brw_context_winsys, - 0 ); -#endif -} diff --git a/src/gallium/winsys/xlib/xlib_brw_screen.c b/src/gallium/winsys/xlib/xlib_brw_screen.c deleted file mode 100644 index ef545796f3..0000000000 --- a/src/gallium/winsys/xlib/xlib_brw_screen.c +++ /dev/null @@ -1,469 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ - -/* - * Authors: - * Keith Whitwell - * Brian Paul - */ - - -//#include "state_trackers/xlib/glxheader.h" -//#include "state_trackers/xlib/xmesaP.h" - -#include "pipe/internal/p_winsys_screen.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "i965simple/brw_winsys.h" -#include "i965simple/brw_screen.h" -#include "i965simple/brw_context.h" - - -#include "xlib_brw_aub.h" -#include "xlib_brw.h" -#include "xlib.h" - -static struct pipe_buffer * -buffer_from_surface(struct pipe_surface *surface) -{ - struct brw_texture *texture = (struct brw_texture *)surface; - return texture->buffer; -} - -struct aub_buffer { - struct pipe_reference reference; - char *data; - unsigned offset; - unsigned size; - unsigned map_count; - boolean dump_on_unmap; -}; - - - -struct aub_pipe_winsys { - struct pipe_winsys winsys; - - struct brw_aubfile *aubfile; - - /* This is simple, isn't it: - */ - char *pool; - unsigned size; - unsigned used; -}; - - -/* Turn a pipe winsys into an aub/pipe winsys: - */ -static inline struct aub_pipe_winsys * -aub_pipe_winsys( struct pipe_winsys *winsys ) -{ - return (struct aub_pipe_winsys *)winsys; -} - - - -static INLINE struct aub_buffer * -aub_bo( struct pipe_buffer *bo ) -{ - return (struct aub_buffer *)bo; -} - -static INLINE struct pipe_buffer * -pipe_bo( struct aub_buffer *bo ) -{ - return (struct pipe_buffer *)bo; -} - - - - -static void *aub_buffer_map(struct pipe_winsys *winsys, - struct pipe_buffer *buf, - unsigned flags ) -{ - struct aub_buffer *sbo = aub_bo(buf); - - assert(sbo->data); - - if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) - sbo->dump_on_unmap = 1; - - sbo->map_count++; - return sbo->data; -} - -static void aub_buffer_unmap(struct pipe_winsys *winsys, - struct pipe_buffer *buf) -{ - struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys); - struct aub_buffer *sbo = aub_bo(buf); - - sbo->map_count--; - - if (sbo->map_count == 0 && - sbo->dump_on_unmap) { - - sbo->dump_on_unmap = 0; - - brw_aub_gtt_data( iws->aubfile, - sbo->offset, - sbo->data, - sbo->size, - 0, - 0); - } -} - - -static void -aub_buffer_destroy(struct pipe_buffer *buf) -{ - free(buf); -} - - - -void xlib_brw_commands_aub(struct pipe_winsys *winsys, - unsigned *cmds, - unsigned nr_dwords) -{ - struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys); - unsigned size = nr_dwords * 4; - - assert(iws->used + size < iws->size); - - brw_aub_gtt_cmds( iws->aubfile, - AUB_BUF_START + iws->used, - cmds, - nr_dwords * sizeof(int) ); - - iws->used += align(size, 4096); -} - - -/* XXX: fix me: - */ -static struct aub_pipe_winsys *global_winsys = NULL; - - - - -/* Pipe has no concept of pools. We choose the tex/region pool - * for all buffers. - */ -static struct pipe_buffer * -aub_buffer_create(struct pipe_winsys *winsys, - unsigned alignment, - unsigned usage, - unsigned size) -{ - struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys); - struct aub_buffer *sbo = CALLOC_STRUCT(aub_buffer); - - pipe_reference_init(&sbo->reference, 1); - - /* Could reuse buffers that are not referenced in current - * batchbuffer. Can't do that atm, so always reallocate: - */ - assert(iws->used + size < iws->size); - sbo->data = iws->pool + iws->used; - sbo->offset = AUB_BUF_START + iws->used; - iws->used += align(size, 4096); - - sbo->size = size; - - return pipe_bo(sbo); -} - - -static struct pipe_buffer * -aub_user_buffer_create(struct pipe_winsys *winsys, void *ptr, unsigned bytes) -{ - struct aub_buffer *sbo; - - /* Lets hope this is meant for upload, not as a result! - */ - sbo = aub_bo(aub_buffer_create( winsys, 0, 0, 0 )); - - sbo->data = ptr; - sbo->size = bytes; - - return pipe_bo(sbo); -} - - -/* The state tracker (should!) keep track of whether the fake - * frontbuffer has been touched by any rendering since the last time - * we copied its contents to the real frontbuffer. Our task is easy: - */ -static void -aub_flush_frontbuffer( struct pipe_winsys *winsys, - struct pipe_surface *surface, - void *context_private) -{ -// struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys); - brw_aub_dump_bmp( global_winsys->aubfile, - surface, - aub_bo(buffer_from_surface(surface))->offset ); -} - - -/** - * Round n up to next multiple. - */ -static INLINE unsigned -round_up(unsigned n, unsigned multiple) -{ - return (n + multiple - 1) & ~(multiple - 1); -} - -static struct pipe_buffer * -aub_i915_surface_buffer_create(struct pipe_winsys *winsys, - unsigned width, unsigned height, - enum pipe_format format, - unsigned usage, - unsigned tex_usage, - unsigned *stride) -{ - const unsigned alignment = 64; - struct pipe_format_block block; - unsigned nblocksx, nblocksy; - - pf_get_block(format, &block); - nblocksx = pf_get_nblocksx(&block, width); - nblocksy = pf_get_nblocksy(&block, height); - *stride = round_up(nblocksx * block.size, alignment); - - return winsys->buffer_create(winsys, alignment, - usage, - *stride * nblocksy); -} - - -static const char * -aub_get_name( struct pipe_winsys *winsys ) -{ - return "Aub/xlib"; -} - -static void -xlib_brw_destroy_pipe_winsys_aub( struct pipe_winsys *winsys ) - -{ - struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys); - brw_aub_destroy(iws->aubfile); - free(iws->pool); - free(iws); -} - - - -static struct pipe_winsys * -xlib_create_brw_winsys( void ) -{ - struct aub_pipe_winsys *iws = CALLOC_STRUCT( aub_pipe_winsys ); - - /* Fill in this struct with callbacks that pipe will need to - * communicate with the window system, buffer manager, etc. - * - * Pipe would be happy with a malloc based memory manager, but - * the SwapBuffers implementation in this winsys driver requires - * that rendering be done to an appropriate _DriBufferObject. - */ - iws->winsys.buffer_create = aub_buffer_create; - iws->winsys.user_buffer_create = aub_user_buffer_create; - iws->winsys.buffer_map = aub_buffer_map; - iws->winsys.buffer_unmap = aub_buffer_unmap; - iws->winsys.buffer_destroy = aub_buffer_destroy; - iws->winsys.flush_frontbuffer = aub_flush_frontbuffer; - iws->winsys.get_name = aub_get_name; - iws->winsys.destroy = xlib_brw_destroy_pipe_winsys_aub; - - iws->winsys.surface_buffer_create = aub_i915_surface_buffer_create; - - iws->aubfile = brw_aubfile_create(); - iws->size = AUB_BUF_SIZE; - iws->pool = malloc(AUB_BUF_SIZE); - - /* HACK: static copy of this pointer: - */ - assert(global_winsys == NULL); - global_winsys = iws; - - return &iws->winsys; -} - - -static struct pipe_screen * -xlib_create_brw_screen( void ) -{ -#ifndef GALLIUM_CELL - struct pipe_winsys *winsys; - struct pipe_screen *screen; - - winsys = xlib_create_brw_winsys(); - if (winsys == NULL) - return NULL; - - screen = brw_create_screen(winsys, 0/* XXX pci_id */); - if (screen == NULL) - goto fail; - - return screen; - -fail: - if (winsys) - winsys->destroy( winsys ); - -#endif - return NULL; -} - - -/* These per-screen functions are acually made available to the driver - * through the brw_winsys (per-context) entity. - */ -unsigned xlib_brw_get_buffer_offset( struct pipe_winsys *pws, - struct pipe_buffer *buf, - unsigned access_flags ) -{ - return aub_bo(buf)->offset; -} - -void xlib_brw_buffer_subdata_typed( struct pipe_winsys *pws, - struct pipe_buffer *buf, - unsigned long offset, - unsigned long size, - const void *data, - unsigned data_type ) -{ - unsigned aub_type = DW_GENERAL_STATE; - unsigned aub_sub_type = 0; - - switch (data_type) { - case BRW_CC_VP: - aub_sub_type = DWGS_COLOR_CALC_VIEWPORT_STATE; - break; - case BRW_CC_UNIT: - aub_sub_type = DWGS_COLOR_CALC_STATE; - break; - case BRW_WM_PROG: - aub_sub_type = DWGS_KERNEL_INSTRUCTIONS; - break; - case BRW_SAMPLER_DEFAULT_COLOR: - aub_sub_type = DWGS_SAMPLER_DEFAULT_COLOR; - break; - case BRW_SAMPLER: - aub_sub_type = DWGS_SAMPLER_STATE; - break; - case BRW_WM_UNIT: - aub_sub_type = DWGS_WINDOWER_IZ_STATE; - break; - case BRW_SF_PROG: - aub_sub_type = DWGS_KERNEL_INSTRUCTIONS; - break; - case BRW_SF_VP: - aub_sub_type = DWGS_STRIPS_FANS_VIEWPORT_STATE; - break; - case BRW_SF_UNIT: - aub_sub_type = DWGS_STRIPS_FANS_STATE; - break; - case BRW_VS_UNIT: - aub_sub_type = DWGS_VERTEX_SHADER_STATE; - break; - case BRW_VS_PROG: - aub_sub_type = DWGS_KERNEL_INSTRUCTIONS; - break; - case BRW_GS_UNIT: - aub_sub_type = DWGS_GEOMETRY_SHADER_STATE; - break; - case BRW_GS_PROG: - aub_sub_type = DWGS_KERNEL_INSTRUCTIONS; - break; - case BRW_CLIP_VP: - aub_sub_type = DWGS_CLIPPER_VIEWPORT_STATE; - break; - case BRW_CLIP_UNIT: - aub_sub_type = DWGS_CLIPPER_STATE; - break; - case BRW_CLIP_PROG: - aub_sub_type = DWGS_KERNEL_INSTRUCTIONS; - break; - case BRW_SS_SURFACE: - aub_type = DW_SURFACE_STATE; - aub_sub_type = DWSS_SURFACE_STATE; - break; - case BRW_SS_SURF_BIND: - aub_type = DW_SURFACE_STATE; - aub_sub_type = DWSS_BINDING_TABLE_STATE; - break; - case BRW_CONSTANT_BUFFER: - aub_type = DW_CONSTANT_URB_ENTRY; - aub_sub_type = 0; - break; - - default: - assert(0); - break; - } - - { - struct aub_pipe_winsys *iws = aub_pipe_winsys(pws); - struct aub_buffer *sbo = aub_bo(buf); - - assert(sbo->size > offset + size); - memcpy(sbo->data + offset, data, size); - - brw_aub_gtt_data( iws->aubfile, - sbo->offset + offset, - sbo->data + offset, - size, - aub_type, - aub_sub_type ); - } -} - - -static void -xlib_brw_display_surface(struct xmesa_buffer *b, - struct pipe_surface *surf) -{ - brw_aub_dump_bmp( global_winsys->aubfile, - surf, - aub_bo(buffer_from_surface(surf))->offset ); -} - - -struct xm_driver xlib_brw_driver = -{ - .create_pipe_screen = xlib_create_brw_screen, - .create_pipe_context = xlib_create_brw_context, - .display_surface = xlib_brw_display_surface, -}; -- cgit v1.2.3 From f00da2a3ff59c1a7104ac25a1c6eba5a6050ad68 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Tue, 29 Sep 2009 16:07:11 -0700 Subject: i915g: Drop the simple sufix None of the other driver have a silly sufix, so just drop it. Nothing new added in this commit or any other commit but this is better marketing. --- SConstruct | 6 +- configs/default | 2 +- configure.ac | 2 +- src/gallium/drivers/i915/Makefile | 28 + src/gallium/drivers/i915/SConscript | 30 + src/gallium/drivers/i915/i915_batch.h | 47 + src/gallium/drivers/i915/i915_blit.c | 151 +++ src/gallium/drivers/i915/i915_blit.h | 55 + src/gallium/drivers/i915/i915_buffer.c | 136 +++ src/gallium/drivers/i915/i915_buffer.h | 31 + src/gallium/drivers/i915/i915_clear.c | 48 + src/gallium/drivers/i915/i915_context.c | 244 ++++ src/gallium/drivers/i915/i915_context.h | 350 ++++++ src/gallium/drivers/i915/i915_debug.c | 898 +++++++++++++++ src/gallium/drivers/i915/i915_debug.h | 114 ++ src/gallium/drivers/i915/i915_debug_fp.c | 363 ++++++ src/gallium/drivers/i915/i915_flush.c | 86 ++ src/gallium/drivers/i915/i915_fpc.h | 207 ++++ src/gallium/drivers/i915/i915_fpc_emit.c | 375 ++++++ src/gallium/drivers/i915/i915_fpc_translate.c | 1202 ++++++++++++++++++++ src/gallium/drivers/i915/i915_prim_emit.c | 219 ++++ src/gallium/drivers/i915/i915_prim_vbuf.c | 645 +++++++++++ src/gallium/drivers/i915/i915_reg.h | 978 ++++++++++++++++ src/gallium/drivers/i915/i915_screen.c | 298 +++++ src/gallium/drivers/i915/i915_screen.h | 80 ++ src/gallium/drivers/i915/i915_state.c | 796 +++++++++++++ src/gallium/drivers/i915/i915_state.h | 50 + src/gallium/drivers/i915/i915_state_derived.c | 183 +++ src/gallium/drivers/i915/i915_state_dynamic.c | 310 +++++ src/gallium/drivers/i915/i915_state_emit.c | 402 +++++++ src/gallium/drivers/i915/i915_state_immediate.c | 225 ++++ src/gallium/drivers/i915/i915_state_inlines.h | 230 ++++ src/gallium/drivers/i915/i915_state_sampler.c | 299 +++++ src/gallium/drivers/i915/i915_surface.c | 94 ++ src/gallium/drivers/i915/i915_texture.c | 958 ++++++++++++++++ src/gallium/drivers/i915/i915_texture.h | 36 + src/gallium/drivers/i915/intel_batchbuffer.h | 87 ++ src/gallium/drivers/i915/intel_winsys.h | 230 ++++ src/gallium/drivers/i915simple/Makefile | 28 - src/gallium/drivers/i915simple/SConscript | 30 - src/gallium/drivers/i915simple/i915_batch.h | 47 - src/gallium/drivers/i915simple/i915_blit.c | 151 --- src/gallium/drivers/i915simple/i915_blit.h | 55 - src/gallium/drivers/i915simple/i915_buffer.c | 136 --- src/gallium/drivers/i915simple/i915_buffer.h | 31 - src/gallium/drivers/i915simple/i915_clear.c | 48 - src/gallium/drivers/i915simple/i915_context.c | 244 ---- src/gallium/drivers/i915simple/i915_context.h | 350 ------ src/gallium/drivers/i915simple/i915_debug.c | 898 --------------- src/gallium/drivers/i915simple/i915_debug.h | 114 -- src/gallium/drivers/i915simple/i915_debug_fp.c | 363 ------ src/gallium/drivers/i915simple/i915_flush.c | 86 -- src/gallium/drivers/i915simple/i915_fpc.h | 207 ---- src/gallium/drivers/i915simple/i915_fpc_emit.c | 375 ------ .../drivers/i915simple/i915_fpc_translate.c | 1202 -------------------- src/gallium/drivers/i915simple/i915_prim_emit.c | 219 ---- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 645 ----------- src/gallium/drivers/i915simple/i915_reg.h | 978 ---------------- src/gallium/drivers/i915simple/i915_screen.c | 298 ----- src/gallium/drivers/i915simple/i915_screen.h | 80 -- src/gallium/drivers/i915simple/i915_state.c | 796 ------------- src/gallium/drivers/i915simple/i915_state.h | 50 - .../drivers/i915simple/i915_state_derived.c | 183 --- .../drivers/i915simple/i915_state_dynamic.c | 310 ----- src/gallium/drivers/i915simple/i915_state_emit.c | 402 ------- .../drivers/i915simple/i915_state_immediate.c | 225 ---- .../drivers/i915simple/i915_state_inlines.h | 230 ---- .../drivers/i915simple/i915_state_sampler.c | 299 ----- src/gallium/drivers/i915simple/i915_surface.c | 94 -- src/gallium/drivers/i915simple/i915_texture.c | 958 ---------------- src/gallium/drivers/i915simple/i915_texture.h | 36 - src/gallium/drivers/i915simple/intel_batchbuffer.h | 87 -- src/gallium/drivers/i915simple/intel_winsys.h | 230 ---- src/gallium/winsys/drm/intel/dri/Makefile | 2 +- src/gallium/winsys/drm/intel/dri/SConscript | 2 +- src/gallium/winsys/drm/intel/egl/Makefile | 2 +- src/gallium/winsys/drm/intel/gem/intel_drm_api.c | 4 +- .../winsys/drm/intel/gem/intel_drm_winsys.h | 2 +- src/gallium/winsys/drm/intel/xorg/Makefile | 2 +- 79 files changed, 10497 insertions(+), 10497 deletions(-) create mode 100644 src/gallium/drivers/i915/Makefile create mode 100644 src/gallium/drivers/i915/SConscript create mode 100644 src/gallium/drivers/i915/i915_batch.h create mode 100644 src/gallium/drivers/i915/i915_blit.c create mode 100644 src/gallium/drivers/i915/i915_blit.h create mode 100644 src/gallium/drivers/i915/i915_buffer.c create mode 100644 src/gallium/drivers/i915/i915_buffer.h create mode 100644 src/gallium/drivers/i915/i915_clear.c create mode 100644 src/gallium/drivers/i915/i915_context.c create mode 100644 src/gallium/drivers/i915/i915_context.h create mode 100644 src/gallium/drivers/i915/i915_debug.c create mode 100644 src/gallium/drivers/i915/i915_debug.h create mode 100644 src/gallium/drivers/i915/i915_debug_fp.c create mode 100644 src/gallium/drivers/i915/i915_flush.c create mode 100644 src/gallium/drivers/i915/i915_fpc.h create mode 100644 src/gallium/drivers/i915/i915_fpc_emit.c create mode 100644 src/gallium/drivers/i915/i915_fpc_translate.c create mode 100644 src/gallium/drivers/i915/i915_prim_emit.c create mode 100644 src/gallium/drivers/i915/i915_prim_vbuf.c create mode 100644 src/gallium/drivers/i915/i915_reg.h create mode 100644 src/gallium/drivers/i915/i915_screen.c create mode 100644 src/gallium/drivers/i915/i915_screen.h create mode 100644 src/gallium/drivers/i915/i915_state.c create mode 100644 src/gallium/drivers/i915/i915_state.h create mode 100644 src/gallium/drivers/i915/i915_state_derived.c create mode 100644 src/gallium/drivers/i915/i915_state_dynamic.c create mode 100644 src/gallium/drivers/i915/i915_state_emit.c create mode 100644 src/gallium/drivers/i915/i915_state_immediate.c create mode 100644 src/gallium/drivers/i915/i915_state_inlines.h create mode 100644 src/gallium/drivers/i915/i915_state_sampler.c create mode 100644 src/gallium/drivers/i915/i915_surface.c create mode 100644 src/gallium/drivers/i915/i915_texture.c create mode 100644 src/gallium/drivers/i915/i915_texture.h create mode 100644 src/gallium/drivers/i915/intel_batchbuffer.h create mode 100644 src/gallium/drivers/i915/intel_winsys.h delete mode 100644 src/gallium/drivers/i915simple/Makefile delete mode 100644 src/gallium/drivers/i915simple/SConscript delete mode 100644 src/gallium/drivers/i915simple/i915_batch.h delete mode 100644 src/gallium/drivers/i915simple/i915_blit.c delete mode 100644 src/gallium/drivers/i915simple/i915_blit.h delete mode 100644 src/gallium/drivers/i915simple/i915_buffer.c delete mode 100644 src/gallium/drivers/i915simple/i915_buffer.h delete mode 100644 src/gallium/drivers/i915simple/i915_clear.c delete mode 100644 src/gallium/drivers/i915simple/i915_context.c delete mode 100644 src/gallium/drivers/i915simple/i915_context.h delete mode 100644 src/gallium/drivers/i915simple/i915_debug.c delete mode 100644 src/gallium/drivers/i915simple/i915_debug.h delete mode 100644 src/gallium/drivers/i915simple/i915_debug_fp.c delete mode 100644 src/gallium/drivers/i915simple/i915_flush.c delete mode 100644 src/gallium/drivers/i915simple/i915_fpc.h delete mode 100644 src/gallium/drivers/i915simple/i915_fpc_emit.c delete mode 100644 src/gallium/drivers/i915simple/i915_fpc_translate.c delete mode 100644 src/gallium/drivers/i915simple/i915_prim_emit.c delete mode 100644 src/gallium/drivers/i915simple/i915_prim_vbuf.c delete mode 100644 src/gallium/drivers/i915simple/i915_reg.h delete mode 100644 src/gallium/drivers/i915simple/i915_screen.c delete mode 100644 src/gallium/drivers/i915simple/i915_screen.h delete mode 100644 src/gallium/drivers/i915simple/i915_state.c delete mode 100644 src/gallium/drivers/i915simple/i915_state.h delete mode 100644 src/gallium/drivers/i915simple/i915_state_derived.c delete mode 100644 src/gallium/drivers/i915simple/i915_state_dynamic.c delete mode 100644 src/gallium/drivers/i915simple/i915_state_emit.c delete mode 100644 src/gallium/drivers/i915simple/i915_state_immediate.c delete mode 100644 src/gallium/drivers/i915simple/i915_state_inlines.h delete mode 100644 src/gallium/drivers/i915simple/i915_state_sampler.c delete mode 100644 src/gallium/drivers/i915simple/i915_surface.c delete mode 100644 src/gallium/drivers/i915simple/i915_texture.c delete mode 100644 src/gallium/drivers/i915simple/i915_texture.h delete mode 100644 src/gallium/drivers/i915simple/intel_batchbuffer.h delete mode 100644 src/gallium/drivers/i915simple/intel_winsys.h (limited to 'src/gallium/drivers') diff --git a/SConstruct b/SConstruct index 9ef7c01f0b..d53f4401e5 100644 --- a/SConstruct +++ b/SConstruct @@ -32,10 +32,10 @@ import common default_statetrackers = 'mesa' if common.default_platform in ('linux', 'freebsd', 'darwin'): - default_drivers = 'softpipe,failover,i915simple,trace,identity,llvmpipe' + default_drivers = 'softpipe,failover,i915,trace,identity,llvmpipe' default_winsys = 'xlib' elif common.default_platform in ('winddk',): - default_drivers = 'softpipe,i915simple,trace,identity' + default_drivers = 'softpipe,i915,trace,identity' default_winsys = 'all' else: default_drivers = 'all' @@ -46,7 +46,7 @@ common.AddOptions(opts) opts.Add(ListVariable('statetrackers', 'state trackers to build', default_statetrackers, ['mesa', 'python', 'xorg'])) opts.Add(ListVariable('drivers', 'pipe drivers to build', default_drivers, - ['softpipe', 'failover', 'i915simple', 'cell', 'trace', 'r300', 'identity', 'llvmpipe'])) + ['softpipe', 'failover', 'i915', 'cell', 'trace', 'r300', 'identity', 'llvmpipe'])) opts.Add(ListVariable('winsys', 'winsys drivers to build', default_winsys, ['xlib', 'intel', 'gdi', 'radeon'])) diff --git a/configs/default b/configs/default index 2ce725050d..c3bb47e70d 100644 --- a/configs/default +++ b/configs/default @@ -96,7 +96,7 @@ EGL_DRIVERS_DIRS = demo GALLIUM_DIRS = auxiliary drivers state_trackers GALLIUM_AUXILIARY_DIRS = rbug draw translate cso_cache pipebuffer tgsi sct rtasm util indices vl GALLIUM_AUXILIARIES = $(foreach DIR,$(GALLIUM_AUXILIARY_DIRS),$(TOP)/src/gallium/auxiliary/$(DIR)/lib$(DIR).a) -GALLIUM_DRIVERS_DIRS = softpipe i915simple failover trace identity +GALLIUM_DRIVERS_DIRS = softpipe i915 failover trace identity GALLIUM_DRIVERS = $(foreach DIR,$(GALLIUM_DRIVERS_DIRS),$(TOP)/src/gallium/drivers/$(DIR)/lib$(DIR).a) GALLIUM_WINSYS_DIRS = xlib egl_xlib GALLIUM_WINSYS_DRM_DIRS = diff --git a/configure.ac b/configure.ac index 143fd31a02..95497af15f 100644 --- a/configure.ac +++ b/configure.ac @@ -1190,7 +1190,7 @@ AC_ARG_ENABLE([gallium-intel], [enable_gallium_intel=yes]) if test "x$enable_gallium_intel" = xyes; then GALLIUM_WINSYS_DRM_DIRS="$GALLIUM_WINSYS_DRM_DIRS intel" - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS i915simple" + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS i915" fi dnl diff --git a/src/gallium/drivers/i915/Makefile b/src/gallium/drivers/i915/Makefile new file mode 100644 index 0000000000..e33c74d02f --- /dev/null +++ b/src/gallium/drivers/i915/Makefile @@ -0,0 +1,28 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = i915 + +C_SOURCES = \ + i915_blit.c \ + i915_buffer.c \ + i915_clear.c \ + i915_flush.c \ + i915_context.c \ + i915_debug.c \ + i915_debug_fp.c \ + i915_state.c \ + i915_state_immediate.c \ + i915_state_dynamic.c \ + i915_state_derived.c \ + i915_state_emit.c \ + i915_state_sampler.c \ + i915_screen.c \ + i915_prim_emit.c \ + i915_prim_vbuf.c \ + i915_texture.c \ + i915_fpc_emit.c \ + i915_fpc_translate.c \ + i915_surface.c + +include ../../Makefile.template diff --git a/src/gallium/drivers/i915/SConscript b/src/gallium/drivers/i915/SConscript new file mode 100644 index 0000000000..5a1c47c88d --- /dev/null +++ b/src/gallium/drivers/i915/SConscript @@ -0,0 +1,30 @@ +Import('*') + +env = env.Clone() + +i915 = env.ConvenienceLibrary( + target = 'i915', + source = [ + 'i915_blit.c', + 'i915_buffer.c', + 'i915_clear.c', + 'i915_context.c', + 'i915_debug.c', + 'i915_debug_fp.c', + 'i915_flush.c', + 'i915_fpc_emit.c', + 'i915_fpc_translate.c', + 'i915_prim_emit.c', + 'i915_prim_vbuf.c', + 'i915_screen.c', + 'i915_state.c', + 'i915_state_derived.c', + 'i915_state_dynamic.c', + 'i915_state_emit.c', + 'i915_state_immediate.c', + 'i915_state_sampler.c', + 'i915_surface.c', + 'i915_texture.c', + ]) + +Export('i915') diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h new file mode 100644 index 0000000000..b813784723 --- /dev/null +++ b/src/gallium/drivers/i915/i915_batch.h @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_BATCH_H +#define I915_BATCH_H + +#include "intel_batchbuffer.h" + +#define BEGIN_BATCH(dwords, relocs) \ + (intel_batchbuffer_check(i915->batch, dwords, relocs)) + +#define OUT_BATCH(dword) \ + intel_batchbuffer_dword(i915->batch, dword) + +#define OUT_RELOC(buf, usage, offset) \ + intel_batchbuffer_reloc(i915->batch, buf, usage, offset) + +#define FLUSH_BATCH(fence) do { \ + intel_batchbuffer_flush(i915->batch, fence); \ + i915->hardware_dirty = ~0; \ +} while (0) + +#endif diff --git a/src/gallium/drivers/i915/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c new file mode 100644 index 0000000000..83dfc33528 --- /dev/null +++ b/src/gallium/drivers/i915/i915_blit.c @@ -0,0 +1,151 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "i915_blit.h" +#include "i915_reg.h" +#include "i915_batch.h" +#include "i915_debug.h" + +#define FILE_DEBUG_FLAG DEBUG_BLIT + +void +i915_fill_blit(struct i915_context *i915, + unsigned cpp, + unsigned short dst_pitch, + struct intel_buffer *dst_buffer, + unsigned dst_offset, + short x, short y, + short w, short h, + unsigned color) +{ + unsigned BR13, CMD; + + + I915_DBG(i915, + "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + __FUNCTION__, + dst_buffer, dst_pitch, dst_offset, x, y, w, h); + + switch (cpp) { + case 1: + case 2: + case 3: + BR13 = (((int) dst_pitch) & 0xffff) | + (0xF0 << 16) | (1 << 24); + CMD = XY_COLOR_BLT_CMD; + break; + case 4: + BR13 = (((int) dst_pitch) & 0xffff) | + (0xF0 << 16) | (1 << 24) | (1 << 25); + CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA | + XY_COLOR_BLT_WRITE_RGB); + break; + default: + return; + } + + if (!BEGIN_BATCH(6, 1)) { + FLUSH_BATCH(NULL); + assert(BEGIN_BATCH(6, 1)); + } + OUT_BATCH(CMD); + OUT_BATCH(BR13); + OUT_BATCH((y << 16) | x); + OUT_BATCH(((y + h) << 16) | (x + w)); + OUT_RELOC(dst_buffer, INTEL_USAGE_2D_TARGET, dst_offset); + OUT_BATCH(color); + FLUSH_BATCH(NULL); +} + +void +i915_copy_blit(struct i915_context *i915, + unsigned do_flip, + unsigned cpp, + unsigned short src_pitch, + struct intel_buffer *src_buffer, + unsigned src_offset, + unsigned short dst_pitch, + struct intel_buffer *dst_buffer, + unsigned dst_offset, + short src_x, short src_y, + short dst_x, short dst_y, + short w, short h) +{ + unsigned CMD, BR13; + int dst_y2 = dst_y + h; + int dst_x2 = dst_x + w; + + + I915_DBG(i915, + "%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + __FUNCTION__, + src_buffer, src_pitch, src_offset, src_x, src_y, + dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); + + switch (cpp) { + case 1: + case 2: + case 3: + BR13 = (((int) dst_pitch) & 0xffff) | + (0xCC << 16) | (1 << 24); + CMD = XY_SRC_COPY_BLT_CMD; + break; + case 4: + BR13 = (((int) dst_pitch) & 0xffff) | + (0xCC << 16) | (1 << 24) | (1 << 25); + CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | + XY_SRC_COPY_BLT_WRITE_RGB); + break; + default: + return; + } + + if (dst_y2 < dst_y || dst_x2 < dst_x) { + return; + } + + /* Hardware can handle negative pitches but loses the ability to do + * proper overlapping blits in that case. We don't really have a + * need for either at this stage. + */ + assert (dst_pitch > 0 && src_pitch > 0); + + if (!BEGIN_BATCH(8, 2)) { + FLUSH_BATCH(NULL); + assert(BEGIN_BATCH(8, 2)); + } + OUT_BATCH(CMD); + OUT_BATCH(BR13); + OUT_BATCH((dst_y << 16) | dst_x); + OUT_BATCH((dst_y2 << 16) | dst_x2); + OUT_RELOC(dst_buffer, INTEL_USAGE_2D_TARGET, dst_offset); + OUT_BATCH((src_y << 16) | src_x); + OUT_BATCH(((int) src_pitch & 0xffff)); + OUT_RELOC(src_buffer, INTEL_USAGE_2D_SOURCE, src_offset); + FLUSH_BATCH(NULL); +} diff --git a/src/gallium/drivers/i915/i915_blit.h b/src/gallium/drivers/i915/i915_blit.h new file mode 100644 index 0000000000..8ce3220cfd --- /dev/null +++ b/src/gallium/drivers/i915/i915_blit.h @@ -0,0 +1,55 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_BLIT_H +#define I915_BLIT_H + +#include "i915_context.h" + +extern void i915_copy_blit(struct i915_context *i915, + unsigned do_flip, + unsigned cpp, + unsigned short src_pitch, + struct intel_buffer *src_buffer, + unsigned src_offset, + unsigned short dst_pitch, + struct intel_buffer *dst_buffer, + unsigned dst_offset, + short srcx, short srcy, + short dstx, short dsty, + short w, short h); + +extern void i915_fill_blit(struct i915_context *i915, + unsigned cpp, + unsigned short dst_pitch, + struct intel_buffer *dst_buffer, + unsigned dst_offset, + short x, short y, + short w, short h, unsigned color); + + +#endif diff --git a/src/gallium/drivers/i915/i915_buffer.c b/src/gallium/drivers/i915/i915_buffer.c new file mode 100644 index 0000000000..effeba1297 --- /dev/null +++ b/src/gallium/drivers/i915/i915_buffer.c @@ -0,0 +1,136 @@ +/************************************************************************** + * + * Copyright © 2009 Jakob Bornecrantz + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" +#include "i915_screen.h" +#include "i915_buffer.h" + +struct intel_buffer; + +struct i915_buffer +{ + struct pipe_buffer base; + + struct intel_buffer *ibuf; /** hw buffer */ + + void *data; /**< user and malloc data */ + boolean own; /**< we own the data incase of malloc */ +}; + +static INLINE struct i915_buffer * +i915_buffer(struct pipe_buffer *buffer) +{ + return (struct i915_buffer *)buffer; +} + +static struct pipe_buffer * +i915_buffer_create(struct pipe_screen *screen, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct i915_buffer *buf = CALLOC_STRUCT(i915_buffer); + + if (!buf) + return NULL; + + pipe_reference_init(&buf->base.reference, 1); + buf->base.alignment = alignment; + buf->base.screen = screen; + buf->base.usage = usage; + buf->base.size = size; + buf->data = MALLOC(size); + buf->own = TRUE; + + if (!buf->data) + goto err; + + return &buf->base; + +err: + FREE(buf); + return NULL; +} + +static struct pipe_buffer * +i915_user_buffer_create(struct pipe_screen *screen, + void *ptr, + unsigned bytes) +{ + struct i915_buffer *buf = CALLOC_STRUCT(i915_buffer); + + if (!buf) + return NULL; + + pipe_reference_init(&buf->base.reference, 1); + buf->base.alignment = 0; + buf->base.screen = screen; + buf->base.usage = 0; + buf->base.size = bytes; + buf->data = ptr; + buf->own = FALSE; + + return &buf->base; +} + +static void * +i915_buffer_map(struct pipe_screen *screen, + struct pipe_buffer *buffer, + unsigned usage) +{ + struct i915_buffer *buf = i915_buffer(buffer); + assert(!buf->ibuf); + return buf->data; +} + +static void +i915_buffer_unmap(struct pipe_screen *screen, + struct pipe_buffer *buffer) +{ + struct i915_buffer *buf = i915_buffer(buffer); + assert(!buf->ibuf); +} + +static void +i915_buffer_destroy(struct pipe_buffer *buffer) +{ + struct i915_buffer *buf = i915_buffer(buffer); + assert(!buf->ibuf); + + if (buf->own) + FREE(buf->data); + FREE(buf); +} + +void i915_init_screen_buffer_functions(struct i915_screen *screen) +{ + screen->base.buffer_create = i915_buffer_create; + screen->base.user_buffer_create = i915_user_buffer_create; + screen->base.buffer_map = i915_buffer_map; + screen->base.buffer_map_range = NULL; + screen->base.buffer_flush_mapped_range = NULL; + screen->base.buffer_unmap = i915_buffer_unmap; + screen->base.buffer_destroy = i915_buffer_destroy; +} diff --git a/src/gallium/drivers/i915/i915_buffer.h b/src/gallium/drivers/i915/i915_buffer.h new file mode 100644 index 0000000000..80fda7c62f --- /dev/null +++ b/src/gallium/drivers/i915/i915_buffer.h @@ -0,0 +1,31 @@ +/************************************************************************** + * + * Copyright © 2009 Jakob Bornecrantz + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_BUFFER_H +#define I915_BUFFER_H + +void i915_init_screen_buffer_functions(struct i915_screen *screen); + +#endif diff --git a/src/gallium/drivers/i915/i915_clear.c b/src/gallium/drivers/i915/i915_clear.c new file mode 100644 index 0000000000..90530f2826 --- /dev/null +++ b/src/gallium/drivers/i915/i915_clear.c @@ -0,0 +1,48 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: + * Brian Paul + */ + + +#include "util/u_clear.h" +#include "i915_context.h" +#include "i915_state.h" + + +/** + * Clear the given buffers to the specified values. + * No masking, no scissor (clear entire buffer). + */ +void +i915_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, + double depth, unsigned stencil) +{ + util_clear(pipe, &i915_context(pipe)->framebuffer, buffers, rgba, depth, + stencil); +} diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c new file mode 100644 index 0000000000..e745f3342d --- /dev/null +++ b/src/gallium/drivers/i915/i915_context.c @@ -0,0 +1,244 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_context.h" +#include "i915_state.h" +#include "i915_screen.h" +#include "i915_batch.h" +#include "i915_texture.h" +#include "i915_reg.h" + +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "pipe/p_screen.h" + + +/* + * Draw functions + */ + + +static boolean +i915_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned prim, unsigned start, unsigned count) +{ + struct i915_context *i915 = i915_context(pipe); + struct draw_context *draw = i915->draw; + unsigned i; + + if (i915->dirty) + i915_update_derived(i915); + + /* + * Map vertex buffers + */ + for (i = 0; i < i915->num_vertex_buffers; i++) { + void *buf = pipe_buffer_map(pipe->screen, i915->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + + /* + * Map index buffer, if present + */ + if (indexBuffer) { + void *mapped_indexes = pipe_buffer_map(pipe->screen, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer_range(draw, indexSize, + min_index, + max_index, + mapped_indexes); + } else { + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + + draw_set_mapped_constant_buffer(draw, + i915->current.constants[PIPE_SHADER_VERTEX], + (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * + 4 * sizeof(float))); + + /* + * Do the drawing + */ + draw_arrays(i915->draw, prim, start, count); + + /* + * unmap vertex/index buffers + */ + for (i = 0; i < i915->num_vertex_buffers; i++) { + pipe_buffer_unmap(pipe->screen, i915->vertex_buffer[i].buffer); + draw_set_mapped_vertex_buffer(draw, i, NULL); + } + + if (indexBuffer) { + pipe_buffer_unmap(pipe->screen, indexBuffer); + draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL); + } + + return TRUE; +} + +static boolean +i915_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count) +{ + return i915_draw_range_elements(pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + prim, start, count); +} + +static boolean +i915_draw_arrays(struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) +{ + return i915_draw_elements(pipe, NULL, 0, prim, start, count); +} + + +/* + * Is referenced functions + */ + + +static unsigned int +i915_is_texture_referenced(struct pipe_context *pipe, + struct pipe_texture *texture, + unsigned face, unsigned level) +{ + /** + * FIXME: Return the corrent result. We can't alays return referenced + * since it causes a double flush within the vbo module. + */ +#if 0 + return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; +#else + return 0; +#endif +} + +static unsigned int +i915_is_buffer_referenced(struct pipe_context *pipe, + struct pipe_buffer *buf) +{ + /** + * FIXME: Return the corrent result. We can't alays return referenced + * since it causes a double flush within the vbo module. + */ +#if 0 + return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; +#else + return 0; +#endif +} + + +/* + * Generic context functions + */ + + +static void i915_destroy(struct pipe_context *pipe) +{ + struct i915_context *i915 = i915_context(pipe); + int i; + + draw_destroy(i915->draw); + + if(i915->batch) + i915->iws->batchbuffer_destroy(i915->batch); + + /* unbind framebuffer */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + pipe_surface_reference(&i915->framebuffer.cbufs[i], NULL); + } + pipe_surface_reference(&i915->framebuffer.zsbuf, NULL); + + FREE(i915); +} + +struct pipe_context * +i915_create_context(struct pipe_screen *screen) +{ + struct i915_context *i915; + + i915 = CALLOC_STRUCT(i915_context); + if (i915 == NULL) + return NULL; + + i915->iws = i915_screen(screen)->iws; + i915->base.winsys = NULL; + i915->base.screen = screen; + + i915->base.destroy = i915_destroy; + + i915->base.clear = i915_clear; + + i915->base.draw_arrays = i915_draw_arrays; + i915->base.draw_elements = i915_draw_elements; + i915->base.draw_range_elements = i915_draw_range_elements; + + i915->base.is_texture_referenced = i915_is_texture_referenced; + i915->base.is_buffer_referenced = i915_is_buffer_referenced; + + /* + * Create drawing context and plug our rendering stage into it. + */ + i915->draw = draw_create(); + assert(i915->draw); + if (!debug_get_bool_option("I915_NO_VBUF", FALSE)) { + draw_set_rasterize_stage(i915->draw, i915_draw_vbuf_stage(i915)); + } else { + draw_set_rasterize_stage(i915->draw, i915_draw_render_stage(i915)); + } + + i915_init_surface_functions(i915); + i915_init_state_functions(i915); + i915_init_flush_functions(i915); + + draw_install_aaline_stage(i915->draw, &i915->base); + draw_install_aapoint_stage(i915->draw, &i915->base); + + i915->dirty = ~0; + i915->hardware_dirty = ~0; + + /* Batch stream debugging is a bit hacked up at the moment: + */ + i915->batch = i915->iws->batchbuffer_create(i915->iws); + + return &i915->base; +} diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h new file mode 100644 index 0000000000..234b441ce6 --- /dev/null +++ b/src/gallium/drivers/i915/i915_context.h @@ -0,0 +1,350 @@ + /************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_CONTEXT_H +#define I915_CONTEXT_H + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "draw/draw_vertex.h" + +#include "tgsi/tgsi_scan.h" + + +struct intel_winsys; +struct intel_buffer; +struct intel_batchbuffer; + + +#define I915_TEX_UNITS 8 + +#define I915_DYNAMIC_MODES4 0 +#define I915_DYNAMIC_DEPTHSCALE_0 1 /* just the header */ +#define I915_DYNAMIC_DEPTHSCALE_1 2 +#define I915_DYNAMIC_IAB 3 +#define I915_DYNAMIC_BC_0 4 /* just the header */ +#define I915_DYNAMIC_BC_1 5 +#define I915_DYNAMIC_BFO_0 6 +#define I915_DYNAMIC_BFO_1 7 +#define I915_DYNAMIC_STP_0 8 +#define I915_DYNAMIC_STP_1 9 +#define I915_DYNAMIC_SC_ENA_0 10 +#define I915_DYNAMIC_SC_RECT_0 11 +#define I915_DYNAMIC_SC_RECT_1 12 +#define I915_DYNAMIC_SC_RECT_2 13 +#define I915_MAX_DYNAMIC 14 + + +#define I915_IMMEDIATE_S0 0 +#define I915_IMMEDIATE_S1 1 +#define I915_IMMEDIATE_S2 2 +#define I915_IMMEDIATE_S3 3 +#define I915_IMMEDIATE_S4 4 +#define I915_IMMEDIATE_S5 5 +#define I915_IMMEDIATE_S6 6 +#define I915_IMMEDIATE_S7 7 +#define I915_MAX_IMMEDIATE 8 + +/* These must mach the order of LI0_STATE_* bits, as they will be used + * to generate hardware packets: + */ +#define I915_CACHE_STATIC 0 +#define I915_CACHE_DYNAMIC 1 /* handled specially */ +#define I915_CACHE_SAMPLER 2 +#define I915_CACHE_MAP 3 +#define I915_CACHE_PROGRAM 4 +#define I915_CACHE_CONSTANTS 5 +#define I915_MAX_CACHE 6 + +#define I915_MAX_CONSTANT 32 + + +/** See constant_flags[] below */ +#define I915_CONSTFLAG_USER 0x1f + + +/** + * Subclass of pipe_shader_state + */ +struct i915_fragment_shader +{ + struct pipe_shader_state state; + + struct tgsi_shader_info info; + + uint *program; + uint program_len; + + /** + * constants introduced during translation. + * These are placed at the end of the constant buffer and grow toward + * the beginning (eg: slot 31, 30 29, ...) + * User-provided constants start at 0. + * This allows both types of constants to co-exist (until there's too many) + * and doesn't require regenerating/changing the fragment program to + * shuffle constants around. + */ + uint num_constants; + float constants[I915_MAX_CONSTANT][4]; + + /** + * Status of each constant + * if I915_CONSTFLAG_PARAM, the value must be taken from the corresponding + * slot of the user's constant buffer. (set by pipe->set_constant_buffer()) + * Else, the bitmask indicates which components are occupied by immediates. + */ + ubyte constant_flags[I915_MAX_CONSTANT]; +}; + + +struct i915_cache_context; + +/* Use to calculate differences between state emitted to hardware and + * current driver-calculated state. + */ +struct i915_state +{ + unsigned immediate[I915_MAX_IMMEDIATE]; + unsigned dynamic[I915_MAX_DYNAMIC]; + + float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4]; + /** number of constants passed in through a constant buffer */ + uint num_user_constants[PIPE_SHADER_TYPES]; + + /* texture sampler state */ + unsigned sampler[I915_TEX_UNITS][3]; + unsigned sampler_enable_flags; + unsigned sampler_enable_nr; + + /* texture image buffers */ + unsigned texbuffer[I915_TEX_UNITS][2]; + + /** Describes the current hardware vertex layout */ + struct vertex_info vertex_info; + + unsigned id; /* track lost context events */ +}; + +struct i915_blend_state { + unsigned iab; + unsigned modes4; + unsigned LIS5; + unsigned LIS6; +}; + +struct i915_depth_stencil_state { + unsigned stencil_modes4; + unsigned bfo[2]; + unsigned stencil_LIS5; + unsigned depth_LIS6; +}; + +struct i915_rasterizer_state { + int light_twoside : 1; + unsigned st; + enum interp_mode color_interp; + + unsigned LIS4; + unsigned LIS7; + unsigned sc[1]; + + const struct pipe_rasterizer_state *templ; + + union { float f; unsigned u; } ds[2]; +}; + +struct i915_sampler_state { + unsigned state[3]; + const struct pipe_sampler_state *templ; + unsigned minlod; + unsigned maxlod; +}; + +struct i915_texture { + struct pipe_texture base; + + /* Derived from the above: + */ + unsigned stride; + unsigned depth_stride; /* per-image on i945? */ + unsigned total_nblocksy; + + unsigned sw_tiled; /**< tiled with software flags */ + unsigned hw_tiled; /**< tiled with hardware fences */ + + unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + + /* Explicitly store the offset of each image for each cube face or + * depth value. Pretty much have to accept that hardware formats + * are going to be so diverse that there is no unified way to + * compute the offsets of depth/cube images within a mipmap level, + * so have to store them as a lookup table: + */ + unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ + + /* The data is held here: + */ + struct intel_buffer *buffer; +}; + +struct i915_context +{ + struct pipe_context base; + + struct intel_winsys *iws; + + struct draw_context *draw; + + /* The most recent drawing state as set by the driver: + */ + const struct i915_blend_state *blend; + const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + const struct i915_depth_stencil_state *depth_stencil; + const struct i915_rasterizer_state *rasterizer; + + struct i915_fragment_shader *fs; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct i915_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + + unsigned dirty; + + unsigned num_samplers; + unsigned num_textures; + unsigned num_vertex_elements; + unsigned num_vertex_buffers; + + struct intel_batchbuffer *batch; + + /** Vertex buffer */ + struct intel_buffer *vbo; + size_t vbo_offset; + unsigned vbo_flushed; + + struct i915_state current; + unsigned hardware_dirty; + + unsigned debug; +}; + +/* A flag for each state_tracker state object: + */ +#define I915_NEW_VIEWPORT 0x1 +#define I915_NEW_RASTERIZER 0x2 +#define I915_NEW_FS 0x4 +#define I915_NEW_BLEND 0x8 +#define I915_NEW_CLIP 0x10 +#define I915_NEW_SCISSOR 0x20 +#define I915_NEW_STIPPLE 0x40 +#define I915_NEW_FRAMEBUFFER 0x80 +#define I915_NEW_ALPHA_TEST 0x100 +#define I915_NEW_DEPTH_STENCIL 0x200 +#define I915_NEW_SAMPLER 0x400 +#define I915_NEW_TEXTURE 0x800 +#define I915_NEW_CONSTANTS 0x1000 +#define I915_NEW_VBO 0x2000 +#define I915_NEW_VS 0x4000 + + +/* Driver's internally generated state flags: + */ +#define I915_NEW_VERTEX_FORMAT 0x10000 + + +/* Dirty flags for hardware emit + */ +#define I915_HW_STATIC (1<ptr + stream->offset); + + if (len == 0) { + PRINTF(stream, "Error - zero length packet (0x%08x)\n", stream->ptr[0]); + assert(0); + return FALSE; + } + + if (stream->print_addresses) + PRINTF(stream, "%08x: ", stream->offset); + + + PRINTF(stream, "%s (%d dwords):\n", name, len); + for (i = 0; i < len; i++) + PRINTF(stream, "\t0x%08x\n", ptr[i]); + PRINTF(stream, "\n"); + + stream->offset += len * sizeof(unsigned); + + return TRUE; +} + + +static const char *get_prim_name( unsigned val ) +{ + switch (val & PRIM3D_MASK) { + case PRIM3D_TRILIST: return "TRILIST"; break; + case PRIM3D_TRISTRIP: return "TRISTRIP"; break; + case PRIM3D_TRISTRIP_RVRSE: return "TRISTRIP_RVRSE"; break; + case PRIM3D_TRIFAN: return "TRIFAN"; break; + case PRIM3D_POLY: return "POLY"; break; + case PRIM3D_LINELIST: return "LINELIST"; break; + case PRIM3D_LINESTRIP: return "LINESTRIP"; break; + case PRIM3D_RECTLIST: return "RECTLIST"; break; + case PRIM3D_POINTLIST: return "POINTLIST"; break; + case PRIM3D_DIB: return "DIB"; break; + case PRIM3D_CLEAR_RECT: return "CLEAR_RECT"; break; + case PRIM3D_ZONE_INIT: return "ZONE_INIT"; break; + default: return "????"; break; + } +} + +static boolean debug_prim( struct debug_stream *stream, const char *name, + boolean dump_floats, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + const char *prim = get_prim_name( ptr[0] ); + unsigned i; + + + + PRINTF(stream, "%s %s (%d dwords):\n", name, prim, len); + PRINTF(stream, "\t0x%08x\n", ptr[0]); + for (i = 1; i < len; i++) { + if (dump_floats) + PRINTF(stream, "\t0x%08x // %f\n", ptr[i], *(float *)&ptr[i]); + else + PRINTF(stream, "\t0x%08x\n", ptr[i]); + } + + + PRINTF(stream, "\n"); + + stream->offset += len * sizeof(unsigned); + + return TRUE; +} + + + + +static boolean debug_program( struct debug_stream *stream, const char *name, unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + + if (len == 0) { + PRINTF(stream, "Error - zero length packet (0x%08x)\n", stream->ptr[0]); + assert(0); + return FALSE; + } + + if (stream->print_addresses) + PRINTF(stream, "%08x: ", stream->offset); + + PRINTF(stream, "%s (%d dwords):\n", name, len); + i915_disassemble_program( stream, ptr, len ); + + stream->offset += len * sizeof(unsigned); + return TRUE; +} + + +static boolean debug_chain( struct debug_stream *stream, const char *name, unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned old_offset = stream->offset + len * sizeof(unsigned); + unsigned i; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + for (i = 0; i < len; i++) + PRINTF(stream, "\t0x%08x\n", ptr[i]); + + stream->offset = ptr[1] & ~0x3; + + if (stream->offset < old_offset) + PRINTF(stream, "\n... skipping backwards from 0x%x --> 0x%x ...\n\n", + old_offset, stream->offset ); + else + PRINTF(stream, "\n... skipping from 0x%x --> 0x%x ...\n\n", + old_offset, stream->offset ); + + + return TRUE; +} + + +static boolean debug_variable_length_prim( struct debug_stream *stream ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + const char *prim = get_prim_name( ptr[0] ); + unsigned i, len; + + ushort *idx = (ushort *)(ptr+1); + for (i = 0; idx[i] != 0xffff; i++) + ; + + len = 1+(i+2)/2; + + PRINTF(stream, "3DPRIM, %s variable length %d indicies (%d dwords):\n", prim, i, len); + for (i = 0; i < len; i++) + PRINTF(stream, "\t0x%08x\n", ptr[i]); + PRINTF(stream, "\n"); + + stream->offset += len * sizeof(unsigned); + return TRUE; +} + + +static void +BITS( + struct debug_stream *stream, + unsigned dw, + unsigned hi, + unsigned lo, + const char *fmt, + ... ) +{ + va_list args; + unsigned himask = ~0UL >> (31 - (hi)); + + PRINTF(stream, "\t\t "); + + va_start( args, fmt ); + debug_vprintf( fmt, args ); + va_end( args ); + + PRINTF(stream, ": 0x%x\n", ((dw) & himask) >> (lo)); +} + +#ifdef DEBUG +#define MBZ( dw, hi, lo) do { \ + unsigned x = (dw) >> (lo); \ + unsigned lomask = (1 << (lo)) - 1; \ + unsigned himask; \ + himask = (1UL << (hi)) - 1; \ + assert ((x & himask & ~lomask) == 0); \ +} while (0) +#else +#define MBZ( dw, hi, lo) do { \ +} while (0) +#endif + +static void +FLAG( + struct debug_stream *stream, + unsigned dw, + unsigned bit, + const char *fmt, + ... ) +{ + if (((dw) >> (bit)) & 1) { + va_list args; + + PRINTF(stream, "\t\t "); + + va_start( args, fmt ); + debug_vprintf( fmt, args ); + va_end( args ); + + PRINTF(stream, "\n"); + } +} + +static boolean debug_load_immediate( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned bits = (ptr[0] >> 4) & 0xff; + unsigned j = 0; + + PRINTF(stream, "%s (%d dwords, flags: %x):\n", name, len, bits); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + if (bits & (1<<0)) { + PRINTF(stream, "\t LIS0: 0x%08x\n", ptr[j]); + PRINTF(stream, "\t vb address: 0x%08x\n", (ptr[j] & ~0x3)); + BITS(stream, ptr[j], 0, 0, "vb invalidate disable"); + j++; + } + if (bits & (1<<1)) { + PRINTF(stream, "\t LIS1: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 29, 24, "vb dword width"); + BITS(stream, ptr[j], 21, 16, "vb dword pitch"); + BITS(stream, ptr[j], 15, 0, "vb max index"); + j++; + } + if (bits & (1<<2)) { + int i; + PRINTF(stream, "\t LIS2: 0x%08x\n", ptr[j]); + for (i = 0; i < 8; i++) { + unsigned tc = (ptr[j] >> (i * 4)) & 0xf; + if (tc != 0xf) + BITS(stream, tc, 3, 0, "tex coord %d", i); + } + j++; + } + if (bits & (1<<3)) { + PRINTF(stream, "\t LIS3: 0x%08x\n", ptr[j]); + j++; + } + if (bits & (1<<4)) { + PRINTF(stream, "\t LIS4: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 23, "point width"); + BITS(stream, ptr[j], 22, 19, "line width"); + FLAG(stream, ptr[j], 18, "alpha flatshade"); + FLAG(stream, ptr[j], 17, "fog flatshade"); + FLAG(stream, ptr[j], 16, "spec flatshade"); + FLAG(stream, ptr[j], 15, "rgb flatshade"); + BITS(stream, ptr[j], 14, 13, "cull mode"); + FLAG(stream, ptr[j], 12, "vfmt: point width"); + FLAG(stream, ptr[j], 11, "vfmt: specular/fog"); + FLAG(stream, ptr[j], 10, "vfmt: rgba"); + FLAG(stream, ptr[j], 9, "vfmt: depth offset"); + BITS(stream, ptr[j], 8, 6, "vfmt: position (2==xyzw)"); + FLAG(stream, ptr[j], 5, "force dflt diffuse"); + FLAG(stream, ptr[j], 4, "force dflt specular"); + FLAG(stream, ptr[j], 3, "local depth offset enable"); + FLAG(stream, ptr[j], 2, "vfmt: fp32 fog coord"); + FLAG(stream, ptr[j], 1, "sprite point"); + FLAG(stream, ptr[j], 0, "antialiasing"); + j++; + } + if (bits & (1<<5)) { + PRINTF(stream, "\t LIS5: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 28, "rgba write disables"); + FLAG(stream, ptr[j], 27, "force dflt point width"); + FLAG(stream, ptr[j], 26, "last pixel enable"); + FLAG(stream, ptr[j], 25, "global z offset enable"); + FLAG(stream, ptr[j], 24, "fog enable"); + BITS(stream, ptr[j], 23, 16, "stencil ref"); + BITS(stream, ptr[j], 15, 13, "stencil test"); + BITS(stream, ptr[j], 12, 10, "stencil fail op"); + BITS(stream, ptr[j], 9, 7, "stencil pass z fail op"); + BITS(stream, ptr[j], 6, 4, "stencil pass z pass op"); + FLAG(stream, ptr[j], 3, "stencil write enable"); + FLAG(stream, ptr[j], 2, "stencil test enable"); + FLAG(stream, ptr[j], 1, "color dither enable"); + FLAG(stream, ptr[j], 0, "logiop enable"); + j++; + } + if (bits & (1<<6)) { + PRINTF(stream, "\t LIS6: 0x%08x\n", ptr[j]); + FLAG(stream, ptr[j], 31, "alpha test enable"); + BITS(stream, ptr[j], 30, 28, "alpha func"); + BITS(stream, ptr[j], 27, 20, "alpha ref"); + FLAG(stream, ptr[j], 19, "depth test enable"); + BITS(stream, ptr[j], 18, 16, "depth func"); + FLAG(stream, ptr[j], 15, "blend enable"); + BITS(stream, ptr[j], 14, 12, "blend func"); + BITS(stream, ptr[j], 11, 8, "blend src factor"); + BITS(stream, ptr[j], 7, 4, "blend dst factor"); + FLAG(stream, ptr[j], 3, "depth write enable"); + FLAG(stream, ptr[j], 2, "color write enable"); + BITS(stream, ptr[j], 1, 0, "provoking vertex"); + j++; + } + + + PRINTF(stream, "\n"); + + assert(j == len); + + stream->offset += len * sizeof(unsigned); + + return TRUE; +} + + + +static boolean debug_load_indirect( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned bits = (ptr[0] >> 8) & 0x3f; + unsigned i, j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + for (i = 0; i < 6; i++) { + if (bits & (1<offset += len * sizeof(unsigned); + + return TRUE; +} + +static void BR13( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + FLAG(stream, val, 30, "clipping enable"); + BITS(stream, val, 25, 24, "color depth (3==32bpp)"); + BITS(stream, val, 23, 16, "raster op"); + BITS(stream, val, 15, 0, "dest pitch"); +} + + +static void BR22( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + BITS(stream, val, 31, 16, "dest y1"); + BITS(stream, val, 15, 0, "dest x1"); +} + +static void BR23( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + BITS(stream, val, 31, 16, "dest y2"); + BITS(stream, val, 15, 0, "dest x2"); +} + +static void BR09( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x -- dest address\n", val); +} + +static void BR26( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + BITS(stream, val, 31, 16, "src y1"); + BITS(stream, val, 15, 0, "src x1"); +} + +static void BR11( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + BITS(stream, val, 15, 0, "src pitch"); +} + +static void BR12( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x -- src address\n", val); +} + +static void BR16( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x -- color\n", val); +} + +static boolean debug_copy_blit( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + BR13(stream, ptr[j++]); + BR22(stream, ptr[j++]); + BR23(stream, ptr[j++]); + BR09(stream, ptr[j++]); + BR26(stream, ptr[j++]); + BR11(stream, ptr[j++]); + BR12(stream, ptr[j++]); + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_color_blit( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + BR13(stream, ptr[j++]); + BR22(stream, ptr[j++]); + BR23(stream, ptr[j++]); + BR09(stream, ptr[j++]); + BR16(stream, ptr[j++]); + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_modes4( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 21, 18, "logicop func"); + FLAG(stream, ptr[j], 17, "stencil test mask modify-enable"); + FLAG(stream, ptr[j], 16, "stencil write mask modify-enable"); + BITS(stream, ptr[j], 15, 8, "stencil test mask"); + BITS(stream, ptr[j], 7, 0, "stencil write mask"); + j++; + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_map_state( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + { + PRINTF(stream, "\t0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 15, 0, "map mask"); + j++; + } + + while (j < len) { + { + PRINTF(stream, "\t TMn.0: 0x%08x\n", ptr[j]); + PRINTF(stream, "\t map address: 0x%08x\n", (ptr[j] & ~0x3)); + FLAG(stream, ptr[j], 1, "vertical line stride"); + FLAG(stream, ptr[j], 0, "vertical line stride offset"); + j++; + } + + { + PRINTF(stream, "\t TMn.1: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 21, "height"); + BITS(stream, ptr[j], 20, 10, "width"); + BITS(stream, ptr[j], 9, 7, "surface format"); + BITS(stream, ptr[j], 6, 3, "texel format"); + FLAG(stream, ptr[j], 2, "use fence regs"); + FLAG(stream, ptr[j], 1, "tiled surface"); + FLAG(stream, ptr[j], 0, "tile walk ymajor"); + j++; + } + { + PRINTF(stream, "\t TMn.2: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 21, "dword pitch"); + BITS(stream, ptr[j], 20, 15, "cube face enables"); + BITS(stream, ptr[j], 14, 9, "max lod"); + FLAG(stream, ptr[j], 8, "mip layout right"); + BITS(stream, ptr[j], 7, 0, "depth"); + j++; + } + } + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_sampler_state( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + { + PRINTF(stream, "\t0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 15, 0, "sampler mask"); + j++; + } + + while (j < len) { + { + PRINTF(stream, "\t TSn.0: 0x%08x\n", ptr[j]); + FLAG(stream, ptr[j], 31, "reverse gamma"); + FLAG(stream, ptr[j], 30, "planar to packed"); + FLAG(stream, ptr[j], 29, "yuv->rgb"); + BITS(stream, ptr[j], 28, 27, "chromakey index"); + BITS(stream, ptr[j], 26, 22, "base mip level"); + BITS(stream, ptr[j], 21, 20, "mip mode filter"); + BITS(stream, ptr[j], 19, 17, "mag mode filter"); + BITS(stream, ptr[j], 16, 14, "min mode filter"); + BITS(stream, ptr[j], 13, 5, "lod bias (s4.4)"); + FLAG(stream, ptr[j], 4, "shadow enable"); + FLAG(stream, ptr[j], 3, "max-aniso-4"); + BITS(stream, ptr[j], 2, 0, "shadow func"); + j++; + } + + { + PRINTF(stream, "\t TSn.1: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 24, "min lod"); + MBZ( ptr[j], 23, 18 ); + FLAG(stream, ptr[j], 17, "kill pixel enable"); + FLAG(stream, ptr[j], 16, "keyed tex filter mode"); + FLAG(stream, ptr[j], 15, "chromakey enable"); + BITS(stream, ptr[j], 14, 12, "tcx wrap mode"); + BITS(stream, ptr[j], 11, 9, "tcy wrap mode"); + BITS(stream, ptr[j], 8, 6, "tcz wrap mode"); + FLAG(stream, ptr[j], 5, "normalized coords"); + BITS(stream, ptr[j], 4, 1, "map (surface) index"); + FLAG(stream, ptr[j], 0, "EAST deinterlacer enable"); + j++; + } + { + PRINTF(stream, "\t TSn.2: 0x%08x (default color)\n", ptr[j]); + j++; + } + } + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_dest_vars( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + { + PRINTF(stream, "\t0x%08x\n", ptr[j]); + FLAG(stream, ptr[j], 31, "early classic ztest"); + FLAG(stream, ptr[j], 30, "opengl tex default color"); + FLAG(stream, ptr[j], 29, "bypass iz"); + FLAG(stream, ptr[j], 28, "lod preclamp"); + BITS(stream, ptr[j], 27, 26, "dither pattern"); + FLAG(stream, ptr[j], 25, "linear gamma blend"); + FLAG(stream, ptr[j], 24, "debug dither"); + BITS(stream, ptr[j], 23, 20, "dstorg x"); + BITS(stream, ptr[j], 19, 16, "dstorg y"); + MBZ (ptr[j], 15, 15 ); + BITS(stream, ptr[j], 14, 12, "422 write select"); + BITS(stream, ptr[j], 11, 8, "cbuf format"); + BITS(stream, ptr[j], 3, 2, "zbuf format"); + FLAG(stream, ptr[j], 1, "vert line stride"); + FLAG(stream, ptr[j], 1, "vert line stride offset"); + j++; + } + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_buf_info( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + { + PRINTF(stream, "\t0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 28, 28, "aux buffer id"); + BITS(stream, ptr[j], 27, 24, "buffer id (7=depth, 3=back)"); + FLAG(stream, ptr[j], 23, "use fence regs"); + FLAG(stream, ptr[j], 22, "tiled surface"); + FLAG(stream, ptr[j], 21, "tile walk ymajor"); + MBZ (ptr[j], 20, 14); + BITS(stream, ptr[j], 13, 2, "dword pitch"); + MBZ (ptr[j], 2, 0); + j++; + } + + PRINTF(stream, "\t0x%08x -- buffer base address\n", ptr[j++]); + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean i915_debug_packet( struct debug_stream *stream ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned cmd = *ptr; + + switch (((cmd >> 29) & 0x7)) { + case 0x0: + switch ((cmd >> 23) & 0x3f) { + case 0x0: + return debug(stream, "MI_NOOP", 1); + case 0x3: + return debug(stream, "MI_WAIT_FOR_EVENT", 1); + case 0x4: + return debug(stream, "MI_FLUSH", 1); + case 0xA: + debug(stream, "MI_BATCH_BUFFER_END", 1); + return FALSE; + case 0x22: + return debug(stream, "MI_LOAD_REGISTER_IMM", 3); + case 0x31: + return debug_chain(stream, "MI_BATCH_BUFFER_START", 2); + default: + (void)debug(stream, "UNKNOWN 0x0 case!", 1); + assert(0); + break; + } + break; + case 0x1: + (void) debug(stream, "UNKNOWN 0x1 case!", 1); + assert(0); + break; + case 0x2: + switch ((cmd >> 22) & 0xff) { + case 0x50: + return debug_color_blit(stream, "XY_COLOR_BLT", (cmd & 0xff) + 2); + case 0x53: + return debug_copy_blit(stream, "XY_SRC_COPY_BLT", (cmd & 0xff) + 2); + default: + return debug(stream, "blit command", (cmd & 0xff) + 2); + } + break; + case 0x3: + switch ((cmd >> 24) & 0x1f) { + case 0x6: + return debug(stream, "3DSTATE_ANTI_ALIASING", 1); + case 0x7: + return debug(stream, "3DSTATE_RASTERIZATION_RULES", 1); + case 0x8: + return debug(stream, "3DSTATE_BACKFACE_STENCIL_OPS", 2); + case 0x9: + return debug(stream, "3DSTATE_BACKFACE_STENCIL_MASKS", 1); + case 0xb: + return debug(stream, "3DSTATE_INDEPENDENT_ALPHA_BLEND", 1); + case 0xc: + return debug(stream, "3DSTATE_MODES5", 1); + case 0xd: + return debug_modes4(stream, "3DSTATE_MODES4", 1); + case 0x15: + return debug(stream, "3DSTATE_FOG_COLOR", 1); + case 0x16: + return debug(stream, "3DSTATE_COORD_SET_BINDINGS", 1); + case 0x1c: + /* 3DState16NP */ + switch((cmd >> 19) & 0x1f) { + case 0x10: + return debug(stream, "3DSTATE_SCISSOR_ENABLE", 1); + case 0x11: + return debug(stream, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE", 1); + default: + (void) debug(stream, "UNKNOWN 0x1c case!", 1); + assert(0); + break; + } + break; + case 0x1d: + /* 3DStateMW */ + switch ((cmd >> 16) & 0xff) { + case 0x0: + return debug_map_state(stream, "3DSTATE_MAP_STATE", (cmd & 0x1f) + 2); + case 0x1: + return debug_sampler_state(stream, "3DSTATE_SAMPLER_STATE", (cmd & 0x1f) + 2); + case 0x4: + return debug_load_immediate(stream, "3DSTATE_LOAD_STATE_IMMEDIATE", (cmd & 0xf) + 2); + case 0x5: + return debug_program(stream, "3DSTATE_PIXEL_SHADER_PROGRAM", (cmd & 0x1ff) + 2); + case 0x6: + return debug(stream, "3DSTATE_PIXEL_SHADER_CONSTANTS", (cmd & 0xff) + 2); + case 0x7: + return debug_load_indirect(stream, "3DSTATE_LOAD_INDIRECT", (cmd & 0xff) + 2); + case 0x80: + return debug(stream, "3DSTATE_DRAWING_RECTANGLE", (cmd & 0xffff) + 2); + case 0x81: + return debug(stream, "3DSTATE_SCISSOR_RECTANGLE", (cmd & 0xffff) + 2); + case 0x83: + return debug(stream, "3DSTATE_SPAN_STIPPLE", (cmd & 0xffff) + 2); + case 0x85: + return debug_dest_vars(stream, "3DSTATE_DEST_BUFFER_VARS", (cmd & 0xffff) + 2); + case 0x88: + return debug(stream, "3DSTATE_CONSTANT_BLEND_COLOR", (cmd & 0xffff) + 2); + case 0x89: + return debug(stream, "3DSTATE_FOG_MODE", (cmd & 0xffff) + 2); + case 0x8e: + return debug_buf_info(stream, "3DSTATE_BUFFER_INFO", (cmd & 0xffff) + 2); + case 0x97: + return debug(stream, "3DSTATE_DEPTH_OFFSET_SCALE", (cmd & 0xffff) + 2); + case 0x98: + return debug(stream, "3DSTATE_DEFAULT_Z", (cmd & 0xffff) + 2); + case 0x99: + return debug(stream, "3DSTATE_DEFAULT_DIFFUSE", (cmd & 0xffff) + 2); + case 0x9a: + return debug(stream, "3DSTATE_DEFAULT_SPECULAR", (cmd & 0xffff) + 2); + case 0x9c: + return debug(stream, "3DSTATE_CLEAR_PARAMETERS", (cmd & 0xffff) + 2); + default: + assert(0); + return 0; + } + break; + case 0x1e: + if (cmd & (1 << 23)) + return debug(stream, "???", (cmd & 0xffff) + 1); + else + return debug(stream, "", 1); + break; + case 0x1f: + if ((cmd & (1 << 23)) == 0) + return debug_prim(stream, "3DPRIM (inline)", 1, (cmd & 0x1ffff) + 2); + else if (cmd & (1 << 17)) + { + if ((cmd & 0xffff) == 0) + return debug_variable_length_prim(stream); + else + return debug_prim(stream, "3DPRIM (indexed)", 0, (((cmd & 0xffff) + 1) / 2) + 1); + } + else + return debug_prim(stream, "3DPRIM (indirect sequential)", 0, 2); + break; + default: + return debug(stream, "", 0); + } + default: + assert(0); + return 0; + } + + assert(0); + return 0; +} + + + +void +i915_dump_batchbuffer( struct intel_batchbuffer *batch ) +{ + struct debug_stream stream; + unsigned *start = (unsigned*)batch->map; + unsigned *end = (unsigned*)batch->ptr; + unsigned long bytes = (unsigned long) (end - start) * 4; + boolean done = FALSE; + + stream.offset = 0; + stream.ptr = (char *)start; + stream.print_addresses = 0; + + if (!start || !end) { + debug_printf( "\n\nBATCH: ???\n"); + return; + } + + debug_printf( "\n\nBATCH: (%d)\n", bytes / 4); + + while (!done && + stream.offset < bytes) + { + if (!i915_debug_packet( &stream )) + break; + + assert(stream.offset <= bytes && + stream.offset >= 0); + } + + debug_printf( "END-BATCH\n\n\n"); +} + + diff --git a/src/gallium/drivers/i915/i915_debug.h b/src/gallium/drivers/i915/i915_debug.h new file mode 100644 index 0000000000..dd9b86e17b --- /dev/null +++ b/src/gallium/drivers/i915/i915_debug.h @@ -0,0 +1,114 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#ifndef I915_DEBUG_H +#define I915_DEBUG_H + +#include + +struct i915_context; + +struct debug_stream +{ + unsigned offset; /* current gtt offset */ + char *ptr; /* pointer to gtt offset zero */ + char *end; /* pointer to gtt offset zero */ + unsigned print_addresses; +}; + + +/* Internal functions + */ +void i915_disassemble_program(struct debug_stream *stream, + const unsigned *program, unsigned sz); + +void i915_print_ureg(const char *msg, unsigned ureg); + + +#define DEBUG_BATCH 0x1 +#define DEBUG_BLIT 0x2 +#define DEBUG_BUFFER 0x4 +#define DEBUG_CONSTANTS 0x8 +#define DEBUG_CONTEXT 0x10 +#define DEBUG_DRAW 0x20 +#define DEBUG_DYNAMIC 0x40 +#define DEBUG_FLUSH 0x80 +#define DEBUG_MAP 0x100 +#define DEBUG_PROGRAM 0x200 +#define DEBUG_REGIONS 0x400 +#define DEBUG_SAMPLER 0x800 +#define DEBUG_STATIC 0x1000 +#define DEBUG_SURFACE 0x2000 +#define DEBUG_WINSYS 0x4000 + +#include "pipe/p_compiler.h" + +#if defined(DEBUG) && defined(FILE_DEBUG_FLAG) + +#include "pipe/internal/p_winsys_screen.h" + +static INLINE void +I915_DBG( + struct i915_context *i915, + const char *fmt, + ... ) +{ + if ((i915)->debug & FILE_DEBUG_FLAG) { + va_list args; + + va_start( args, fmt ); + debug_vprintf( fmt, args ); + va_end( args ); + } +} + +#else + +static INLINE void +I915_DBG( + struct i915_context *i915, + const char *fmt, + ... ) +{ + (void) i915; + (void) fmt; +} + +#endif + + +struct intel_batchbuffer; + +void i915_dump_batchbuffer( struct intel_batchbuffer *i915 ); + +void i915_debug_init( struct i915_context *i915 ); + + +#endif diff --git a/src/gallium/drivers/i915/i915_debug_fp.c b/src/gallium/drivers/i915/i915_debug_fp.c new file mode 100644 index 0000000000..9c5b117b6d --- /dev/null +++ b/src/gallium/drivers/i915/i915_debug_fp.c @@ -0,0 +1,363 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "i915_reg.h" +#include "i915_debug.h" +#include "pipe/internal/p_winsys_screen.h" +#include "util/u_memory.h" + + +static void +PRINTF( + struct debug_stream *stream, + const char *fmt, + ... ) +{ + va_list args; + + va_start( args, fmt ); + debug_vprintf( fmt, args ); + va_end( args ); +} + + +static const char *opcodes[0x20] = { + "NOP", + "ADD", + "MOV", + "MUL", + "MAD", + "DP2ADD", + "DP3", + "DP4", + "FRC", + "RCP", + "RSQ", + "EXP", + "LOG", + "CMP", + "MIN", + "MAX", + "FLR", + "MOD", + "TRC", + "SGE", + "SLT", + "TEXLD", + "TEXLDP", + "TEXLDB", + "TEXKILL", + "DCL", + "0x1a", + "0x1b", + "0x1c", + "0x1d", + "0x1e", + "0x1f", +}; + + +static const int args[0x20] = { + 0, /* 0 nop */ + 2, /* 1 add */ + 1, /* 2 mov */ + 2, /* 3 m ul */ + 3, /* 4 mad */ + 3, /* 5 dp2add */ + 2, /* 6 dp3 */ + 2, /* 7 dp4 */ + 1, /* 8 frc */ + 1, /* 9 rcp */ + 1, /* a rsq */ + 1, /* b exp */ + 1, /* c log */ + 3, /* d cmp */ + 2, /* e min */ + 2, /* f max */ + 1, /* 10 flr */ + 1, /* 11 mod */ + 1, /* 12 trc */ + 2, /* 13 sge */ + 2, /* 14 slt */ + 1, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, +}; + + +static const char *regname[0x8] = { + "R", + "T", + "CONST", + "S", + "OC", + "OD", + "U", + "UNKNOWN", +}; + +static void +print_reg_type_nr(struct debug_stream *stream, unsigned type, unsigned nr) +{ + switch (type) { + case REG_TYPE_T: + switch (nr) { + case T_DIFFUSE: + PRINTF(stream, "T_DIFFUSE"); + return; + case T_SPECULAR: + PRINTF(stream, "T_SPECULAR"); + return; + case T_FOG_W: + PRINTF(stream, "T_FOG_W"); + return; + default: + PRINTF(stream, "T_TEX%d", nr); + return; + } + case REG_TYPE_OC: + if (nr == 0) { + PRINTF(stream, "oC"); + return; + } + break; + case REG_TYPE_OD: + if (nr == 0) { + PRINTF(stream, "oD"); + return; + } + break; + default: + break; + } + + PRINTF(stream, "%s[%d]", regname[type], nr); +} + +#define REG_SWIZZLE_MASK 0x7777 +#define REG_NEGATE_MASK 0x8888 + +#define REG_SWIZZLE_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) | \ + (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) | \ + (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) | \ + (SRC_W << A2_SRC2_CHANNEL_W_SHIFT)) + + +static void +print_reg_neg_swizzle(struct debug_stream *stream, unsigned reg) +{ + int i; + + if ((reg & REG_SWIZZLE_MASK) == REG_SWIZZLE_XYZW && + (reg & REG_NEGATE_MASK) == 0) + return; + + PRINTF(stream, "."); + + for (i = 3; i >= 0; i--) { + if (reg & (1 << ((i * 4) + 3))) + PRINTF(stream, "-"); + + switch ((reg >> (i * 4)) & 0x7) { + case 0: + PRINTF(stream, "x"); + break; + case 1: + PRINTF(stream, "y"); + break; + case 2: + PRINTF(stream, "z"); + break; + case 3: + PRINTF(stream, "w"); + break; + case 4: + PRINTF(stream, "0"); + break; + case 5: + PRINTF(stream, "1"); + break; + default: + PRINTF(stream, "?"); + break; + } + } +} + + +static void +print_src_reg(struct debug_stream *stream, unsigned dword) +{ + unsigned nr = (dword >> A2_SRC2_NR_SHIFT) & REG_NR_MASK; + unsigned type = (dword >> A2_SRC2_TYPE_SHIFT) & REG_TYPE_MASK; + print_reg_type_nr(stream, type, nr); + print_reg_neg_swizzle(stream, dword); +} + + +static void +print_dest_reg(struct debug_stream *stream, unsigned dword) +{ + unsigned nr = (dword >> A0_DEST_NR_SHIFT) & REG_NR_MASK; + unsigned type = (dword >> A0_DEST_TYPE_SHIFT) & REG_TYPE_MASK; + print_reg_type_nr(stream, type, nr); + if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL) + return; + PRINTF(stream, "."); + if (dword & A0_DEST_CHANNEL_X) + PRINTF(stream, "x"); + if (dword & A0_DEST_CHANNEL_Y) + PRINTF(stream, "y"); + if (dword & A0_DEST_CHANNEL_Z) + PRINTF(stream, "z"); + if (dword & A0_DEST_CHANNEL_W) + PRINTF(stream, "w"); +} + + +#define GET_SRC0_REG(r0, r1) ((r0<<14)|(r1>>A1_SRC0_CHANNEL_W_SHIFT)) +#define GET_SRC1_REG(r0, r1) ((r0<<8)|(r1>>A2_SRC1_CHANNEL_W_SHIFT)) +#define GET_SRC2_REG(r) (r) + + +static void +print_arith_op(struct debug_stream *stream, + unsigned opcode, const unsigned * program) +{ + if (opcode != A0_NOP) { + print_dest_reg(stream, program[0]); + if (program[0] & A0_DEST_SATURATE) + PRINTF(stream, " = SATURATE "); + else + PRINTF(stream, " = "); + } + + PRINTF(stream, "%s ", opcodes[opcode]); + + print_src_reg(stream, GET_SRC0_REG(program[0], program[1])); + if (args[opcode] == 1) { + PRINTF(stream, "\n"); + return; + } + + PRINTF(stream, ", "); + print_src_reg(stream, GET_SRC1_REG(program[1], program[2])); + if (args[opcode] == 2) { + PRINTF(stream, "\n"); + return; + } + + PRINTF(stream, ", "); + print_src_reg(stream, GET_SRC2_REG(program[2])); + PRINTF(stream, "\n"); + return; +} + + +static void +print_tex_op(struct debug_stream *stream, + unsigned opcode, const unsigned * program) +{ + print_dest_reg(stream, program[0] | A0_DEST_CHANNEL_ALL); + PRINTF(stream, " = "); + + PRINTF(stream, "%s ", opcodes[opcode]); + + PRINTF(stream, "S[%d],", program[0] & T0_SAMPLER_NR_MASK); + + print_reg_type_nr(stream, + (program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) & + REG_TYPE_MASK, + (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK); + PRINTF(stream, "\n"); +} + +static void +print_texkil_op(struct debug_stream *stream, + unsigned opcode, const unsigned * program) +{ + PRINTF(stream, "TEXKIL "); + + print_reg_type_nr(stream, + (program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) & + REG_TYPE_MASK, + (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK); + PRINTF(stream, "\n"); +} + +static void +print_dcl_op(struct debug_stream *stream, + unsigned opcode, const unsigned * program) +{ + PRINTF(stream, "%s ", opcodes[opcode]); + print_dest_reg(stream, + program[0] | A0_DEST_CHANNEL_ALL); + PRINTF(stream, "\n"); +} + + +void +i915_disassemble_program(struct debug_stream *stream, + const unsigned * program, unsigned sz) +{ + unsigned i; + + PRINTF(stream, "\t\tBEGIN\n"); + + assert((program[0] & 0x1ff) + 2 == sz); + + program++; + for (i = 1; i < sz; i += 3, program += 3) { + unsigned opcode = program[0] & (0x1f << 24); + + PRINTF(stream, "\t\t"); + + if ((int) opcode >= A0_NOP && opcode <= A0_SLT) + print_arith_op(stream, opcode >> 24, program); + else if (opcode >= T0_TEXLD && opcode < T0_TEXKILL) + print_tex_op(stream, opcode >> 24, program); + else if (opcode == T0_TEXKILL) + print_texkil_op(stream, opcode >> 24, program); + else if (opcode == D0_DCL) + print_dcl_op(stream, opcode >> 24, program); + else + PRINTF(stream, "Unknown opcode 0x%x\n", opcode); + } + + PRINTF(stream, "\t\tEND\n\n"); +} + + diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c new file mode 100644 index 0000000000..1582168eba --- /dev/null +++ b/src/gallium/drivers/i915/i915_flush.c @@ -0,0 +1,86 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell + */ + + +#include "pipe/p_defines.h" +#include "draw/draw_context.h" +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_batch.h" + + +static void i915_flush( struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence ) +{ + struct i915_context *i915 = i915_context(pipe); + + draw_flush(i915->draw); + +#if 0 + /* Do we need to emit an MI_FLUSH command to flush the hardware + * caches? + */ + if (flags & (PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE)) { + unsigned flush = MI_FLUSH; + + if (!(flags & PIPE_FLUSH_RENDER_CACHE)) + flush |= INHIBIT_FLUSH_RENDER_CACHE; + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) + flush |= FLUSH_MAP_CACHE; + + if (!BEGIN_BATCH(1, 0)) { + FLUSH_BATCH(NULL); + assert(BEGIN_BATCH(1, 0)); + } + OUT_BATCH( flush ); + } +#endif + +#if 0 + if (i915->batch->map == i915->batch->ptr) { + return; + } +#endif + + /* If there are no flags, just flush pending commands to hardware: + */ + FLUSH_BATCH(fence); + i915->vbo_flushed = 1; +} + + + +void i915_init_flush_functions( struct i915_context *i915 ) +{ + i915->base.flush = i915_flush; +} diff --git a/src/gallium/drivers/i915/i915_fpc.h b/src/gallium/drivers/i915/i915_fpc.h new file mode 100644 index 0000000000..2f0f99d046 --- /dev/null +++ b/src/gallium/drivers/i915/i915_fpc.h @@ -0,0 +1,207 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef I915_FPC_H +#define I915_FPC_H + + +#include "i915_context.h" +#include "i915_reg.h" + + + +#define I915_PROGRAM_SIZE 192 + + + +/** + * Program translation state + */ +struct i915_fp_compile { + struct i915_fragment_shader *shader; /* the shader we're compiling */ + + boolean used_constants[I915_MAX_CONSTANT]; + + /** maps TGSI immediate index to constant slot */ + uint num_immediates; + uint immediates_map[I915_MAX_CONSTANT]; + float immediates[I915_MAX_CONSTANT][4]; + + boolean first_instruction; + + uint declarations[I915_PROGRAM_SIZE]; + uint program[I915_PROGRAM_SIZE]; + + uint *csr; /**< Cursor, points into program. */ + + uint *decl; /**< Cursor, points into declarations. */ + + uint decl_s; /**< flags for which s regs need to be decl'd */ + uint decl_t; /**< flags for which t regs need to be decl'd */ + + uint temp_flag; /**< Tracks temporary regs which are in use */ + uint utemp_flag; /**< Tracks TYPE_U temporary regs which are in use */ + + uint nr_tex_indirect; + uint nr_tex_insn; + uint nr_alu_insn; + uint nr_decl_insn; + + boolean error; /**< Set if i915_program_error() is called */ + uint wpos_tex; + uint NumNativeInstructions; + uint NumNativeAluInstructions; + uint NumNativeTexInstructions; + uint NumNativeTexIndirections; +}; + + +/* Having zero and one in here makes the definition of swizzle a lot + * easier. + */ +#define UREG_TYPE_SHIFT 29 +#define UREG_NR_SHIFT 24 +#define UREG_CHANNEL_X_NEGATE_SHIFT 23 +#define UREG_CHANNEL_X_SHIFT 20 +#define UREG_CHANNEL_Y_NEGATE_SHIFT 19 +#define UREG_CHANNEL_Y_SHIFT 16 +#define UREG_CHANNEL_Z_NEGATE_SHIFT 15 +#define UREG_CHANNEL_Z_SHIFT 12 +#define UREG_CHANNEL_W_NEGATE_SHIFT 11 +#define UREG_CHANNEL_W_SHIFT 8 +#define UREG_CHANNEL_ZERO_NEGATE_MBZ 5 +#define UREG_CHANNEL_ZERO_SHIFT 4 +#define UREG_CHANNEL_ONE_NEGATE_MBZ 1 +#define UREG_CHANNEL_ONE_SHIFT 0 + +#define UREG_BAD 0xffffffff /* not a valid ureg */ + +#define X SRC_X +#define Y SRC_Y +#define Z SRC_Z +#define W SRC_W +#define ZERO SRC_ZERO +#define ONE SRC_ONE + +/* Construct a ureg: + */ +#define UREG( type, nr ) (((type)<< UREG_TYPE_SHIFT) | \ + ((nr) << UREG_NR_SHIFT) | \ + (X << UREG_CHANNEL_X_SHIFT) | \ + (Y << UREG_CHANNEL_Y_SHIFT) | \ + (Z << UREG_CHANNEL_Z_SHIFT) | \ + (W << UREG_CHANNEL_W_SHIFT) | \ + (ZERO << UREG_CHANNEL_ZERO_SHIFT) | \ + (ONE << UREG_CHANNEL_ONE_SHIFT)) + +#define GET_CHANNEL_SRC( reg, channel ) ((reg<<(channel*4)) & (0xf<<20)) +#define CHANNEL_SRC( src, channel ) (src>>(channel*4)) + +#define GET_UREG_TYPE(reg) (((reg)>>UREG_TYPE_SHIFT)®_TYPE_MASK) +#define GET_UREG_NR(reg) (((reg)>>UREG_NR_SHIFT)®_NR_MASK) + + + +#define UREG_XYZW_CHANNEL_MASK 0x00ffff00 + +/* One neat thing about the UREG representation: + */ +static INLINE int +swizzle(int reg, uint x, uint y, uint z, uint w) +{ + assert(x <= SRC_ONE); + assert(y <= SRC_ONE); + assert(z <= SRC_ONE); + assert(w <= SRC_ONE); + return ((reg & ~UREG_XYZW_CHANNEL_MASK) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, x), 0) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, y), 1) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, z), 2) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, w), 3)); +} + + + +/*********************************************************************** + * Public interface for the compiler + */ +extern void +i915_translate_fragment_program( struct i915_context *i915, + struct i915_fragment_shader *fs); + + + +extern uint i915_get_temp(struct i915_fp_compile *p); +extern uint i915_get_utemp(struct i915_fp_compile *p); +extern void i915_release_utemps(struct i915_fp_compile *p); + + +extern uint i915_emit_texld(struct i915_fp_compile *p, + uint dest, + uint destmask, + uint sampler, uint coord, uint op); + +extern uint i915_emit_arith(struct i915_fp_compile *p, + uint op, + uint dest, + uint mask, + uint saturate, + uint src0, uint src1, uint src2); + +extern uint i915_emit_decl(struct i915_fp_compile *p, + uint type, uint nr, uint d0_flags); + + +extern uint i915_emit_const1f(struct i915_fp_compile *p, float c0); + +extern uint i915_emit_const2f(struct i915_fp_compile *p, + float c0, float c1); + +extern uint i915_emit_const4fv(struct i915_fp_compile *p, + const float * c); + +extern uint i915_emit_const4f(struct i915_fp_compile *p, + float c0, float c1, + float c2, float c3); + + +/*====================================================================== + * i915_fpc_debug.c + */ +extern void i915_disassemble_program(const uint * program, uint sz); + + +/*====================================================================== + * i915_fpc_translate.c + */ + +extern void +i915_program_error(struct i915_fp_compile *p, const char *msg, ...); + + +#endif diff --git a/src/gallium/drivers/i915/i915_fpc_emit.c b/src/gallium/drivers/i915/i915_fpc_emit.c new file mode 100644 index 0000000000..b054ce41d3 --- /dev/null +++ b/src/gallium/drivers/i915/i915_fpc_emit.c @@ -0,0 +1,375 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_fpc.h" +#include "util/u_math.h" + + +#define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define D0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define T0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define A0_SRC0( reg ) (((reg)&UREG_MASK)>>UREG_A0_SRC0_SHIFT_LEFT) +#define A1_SRC0( reg ) (((reg)&UREG_MASK)<>UREG_A1_SRC1_SHIFT_LEFT) +#define A2_SRC1( reg ) (((reg)&UREG_MASK)<>UREG_A2_SRC2_SHIFT_LEFT) + +/* These are special, and don't have swizzle/negate bits. + */ +#define T0_SAMPLER( reg ) (GET_UREG_NR(reg)<temp_flag); + if (!bit) { + i915_program_error(p, "i915_get_temp: out of temporaries\n"); + return 0; + } + + p->temp_flag |= 1 << (bit - 1); + return bit - 1; +} + + +static void +i915_release_temp(struct i915_fp_compile *p, int reg) +{ + p->temp_flag &= ~(1 << reg); +} + + +/** + * Get unpreserved temporary, a temp whose value is not preserved between + * PS program phases. + */ +uint +i915_get_utemp(struct i915_fp_compile * p) +{ + int bit = ffs(~p->utemp_flag); + if (!bit) { + i915_program_error(p, "i915_get_utemp: out of temporaries\n"); + return 0; + } + + p->utemp_flag |= 1 << (bit - 1); + return UREG(REG_TYPE_U, (bit - 1)); +} + +void +i915_release_utemps(struct i915_fp_compile *p) +{ + p->utemp_flag = ~0x7; +} + + +uint +i915_emit_decl(struct i915_fp_compile *p, + uint type, uint nr, uint d0_flags) +{ + uint reg = UREG(type, nr); + + if (type == REG_TYPE_T) { + if (p->decl_t & (1 << nr)) + return reg; + + p->decl_t |= (1 << nr); + } + else if (type == REG_TYPE_S) { + if (p->decl_s & (1 << nr)) + return reg; + + p->decl_s |= (1 << nr); + } + else + return reg; + + *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags); + *(p->decl++) = D1_MBZ; + *(p->decl++) = D2_MBZ; + + p->nr_decl_insn++; + return reg; +} + +uint +i915_emit_arith(struct i915_fp_compile * p, + uint op, + uint dest, + uint mask, + uint saturate, uint src0, uint src1, uint src2) +{ + uint c[3]; + uint nr_const = 0; + + assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); + dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)); + assert(dest); + + if (GET_UREG_TYPE(src0) == REG_TYPE_CONST) + c[nr_const++] = 0; + if (GET_UREG_TYPE(src1) == REG_TYPE_CONST) + c[nr_const++] = 1; + if (GET_UREG_TYPE(src2) == REG_TYPE_CONST) + c[nr_const++] = 2; + + /* Recursively call this function to MOV additional const values + * into temporary registers. Use utemp registers for this - + * currently shouldn't be possible to run out, but keep an eye on + * this. + */ + if (nr_const > 1) { + uint s[3], first, i, old_utemp_flag; + + s[0] = src0; + s[1] = src1; + s[2] = src2; + old_utemp_flag = p->utemp_flag; + + first = GET_UREG_NR(s[c[0]]); + for (i = 1; i < nr_const; i++) { + if (GET_UREG_NR(s[c[i]]) != first) { + uint tmp = i915_get_utemp(p); + + i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, + s[c[i]], 0, 0); + s[c[i]] = tmp; + } + } + + src0 = s[0]; + src1 = s[1]; + src2 = s[2]; + p->utemp_flag = old_utemp_flag; /* restore */ + } + + *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0)); + *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1)); + *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2)); + + p->nr_alu_insn++; + return dest; +} + + +/** + * Emit a texture load or texkill instruction. + * \param dest the dest i915 register + * \param destmask the dest register writemask + * \param sampler the i915 sampler register + * \param coord the i915 source texcoord operand + * \param opcode the instruction opcode + */ +uint i915_emit_texld( struct i915_fp_compile *p, + uint dest, + uint destmask, + uint sampler, + uint coord, + uint opcode ) +{ + const uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord)); + int temp = -1; + + if (coord != k) { + /* texcoord is swizzled or negated. Need to allocate a new temporary + * register (a utemp / unpreserved temp) won't do. + */ + uint tempReg; + + temp = i915_get_temp(p); /* get temp reg index */ + tempReg = UREG(REG_TYPE_R, temp); /* make i915 register */ + + i915_emit_arith( p, A0_MOV, + tempReg, A0_DEST_CHANNEL_ALL, /* dest reg, writemask */ + 0, /* saturate */ + coord, 0, 0 ); /* src0, src1, src2 */ + + /* new src texcoord is tempReg */ + coord = tempReg; + } + + /* Don't worry about saturate as we only support + */ + if (destmask != A0_DEST_CHANNEL_ALL) { + /* if not writing to XYZW... */ + uint tmp = i915_get_utemp(p); + i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode ); + i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 ); + /* XXX release utemp here? */ + } + else { + assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); + assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); + + /* is the sampler coord a texcoord input reg? */ + if (GET_UREG_TYPE(coord) != REG_TYPE_T) { + p->nr_tex_indirect++; + } + + *(p->csr++) = (opcode | + T0_DEST( dest ) | + T0_SAMPLER( sampler )); + + *(p->csr++) = T1_ADDRESS_REG( coord ); + *(p->csr++) = T2_MBZ; + + p->nr_tex_insn++; + } + + if (temp >= 0) + i915_release_temp(p, temp); + + return dest; +} + + +uint +i915_emit_const1f(struct i915_fp_compile * p, float c0) +{ + struct i915_fragment_shader *ifs = p->shader; + unsigned reg, idx; + + if (c0 == 0.0) + return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO); + if (c0 == 1.0) + return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE); + + for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { + if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER) + continue; + for (idx = 0; idx < 4; idx++) { + if (!(ifs->constant_flags[reg] & (1 << idx)) || + ifs->constants[reg][idx] == c0) { + ifs->constants[reg][idx] = c0; + ifs->constant_flags[reg] |= 1 << idx; + if (reg + 1 > ifs->num_constants) + ifs->num_constants = reg + 1; + return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); + } + } + } + + i915_program_error(p, "i915_emit_const1f: out of constants\n"); + return 0; +} + +uint +i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) +{ + struct i915_fragment_shader *ifs = p->shader; + unsigned reg, idx; + + if (c0 == 0.0) + return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W); + if (c0 == 1.0) + return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W); + + if (c1 == 0.0) + return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W); + if (c1 == 1.0) + return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); + + for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { + if (ifs->constant_flags[reg] == 0xf || + ifs->constant_flags[reg] == I915_CONSTFLAG_USER) + continue; + for (idx = 0; idx < 3; idx++) { + if (!(ifs->constant_flags[reg] & (3 << idx))) { + ifs->constants[reg][idx + 0] = c0; + ifs->constants[reg][idx + 1] = c1; + ifs->constant_flags[reg] |= 3 << idx; + if (reg + 1 > ifs->num_constants) + ifs->num_constants = reg + 1; + return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE); + } + } + } + + i915_program_error(p, "i915_emit_const2f: out of constants\n"); + return 0; +} + + + +uint +i915_emit_const4f(struct i915_fp_compile * p, + float c0, float c1, float c2, float c3) +{ + struct i915_fragment_shader *ifs = p->shader; + unsigned reg; + + for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { + if (ifs->constant_flags[reg] == 0xf && + ifs->constants[reg][0] == c0 && + ifs->constants[reg][1] == c1 && + ifs->constants[reg][2] == c2 && + ifs->constants[reg][3] == c3) { + return UREG(REG_TYPE_CONST, reg); + } + else if (ifs->constant_flags[reg] == 0) { + + ifs->constants[reg][0] = c0; + ifs->constants[reg][1] = c1; + ifs->constants[reg][2] = c2; + ifs->constants[reg][3] = c3; + ifs->constant_flags[reg] = 0xf; + if (reg + 1 > ifs->num_constants) + ifs->num_constants = reg + 1; + return UREG(REG_TYPE_CONST, reg); + } + } + + i915_program_error(p, "i915_emit_const4f: out of constants\n"); + return 0; +} + + +uint +i915_emit_const4fv(struct i915_fp_compile * p, const float * c) +{ + return i915_emit_const4f(p, c[0], c[1], c[2], c[3]); +} diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c new file mode 100644 index 0000000000..89504ced27 --- /dev/null +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -0,0 +1,1202 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_fpc.h" + +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_string.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" + +#include "draw/draw_vertex.h" + + +/** + * Simple pass-through fragment shader to use when we don't have + * a real shader (or it fails to compile for some reason). + */ +static unsigned passthrough[] = +{ + _3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1), + + /* declare input color: + */ + (D0_DCL | + (REG_TYPE_T << D0_TYPE_SHIFT) | + (T_DIFFUSE << D0_NR_SHIFT) | + D0_CHANNEL_ALL), + 0, + 0, + + /* move to output color: + */ + (A0_MOV | + (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | + A0_DEST_CHANNEL_ALL | + (REG_TYPE_T << A0_SRC0_TYPE_SHIFT) | + (T_DIFFUSE << A0_SRC0_NR_SHIFT)), + 0x01230000, /* .xyzw */ + 0 +}; + + +/* 1, -1/3!, 1/5!, -1/7! */ +static const float sin_constants[4] = { 1.0, + -1.0f / (3 * 2 * 1), + 1.0f / (5 * 4 * 3 * 2 * 1), + -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1) +}; + +/* 1, -1/2!, 1/4!, -1/6! */ +static const float cos_constants[4] = { 1.0, + -1.0f / (2 * 1), + 1.0f / (4 * 3 * 2 * 1), + -1.0f / (6 * 5 * 4 * 3 * 2 * 1) +}; + + + +/** + * component-wise negation of ureg + */ +static INLINE int +negate(int reg, int x, int y, int z, int w) +{ + /* Another neat thing about the UREG representation */ + return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) | + ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) | + ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) | + ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT)); +} + + +/** + * In the event of a translation failure, we'll generate a simple color + * pass-through program. + */ +static void +i915_use_passthrough_shader(struct i915_fragment_shader *fs) +{ + fs->program = (uint *) MALLOC(sizeof(passthrough)); + if (fs->program) { + memcpy(fs->program, passthrough, sizeof(passthrough)); + fs->program_len = Elements(passthrough); + } + fs->num_constants = 0; +} + + +void +i915_program_error(struct i915_fp_compile *p, const char *msg, ...) +{ + va_list args; + char buffer[1024]; + + debug_printf("i915_program_error: "); + va_start( args, msg ); + util_vsnprintf( buffer, sizeof(buffer), msg, args ); + va_end( args ); + debug_printf(buffer); + debug_printf("\n"); + + p->error = 1; +} + + + +/** + * Construct a ureg for the given source register. Will emit + * constants, apply swizzling and negation as needed. + */ +static uint +src_vector(struct i915_fp_compile *p, + const struct tgsi_full_src_register *source) +{ + uint index = source->SrcRegister.Index; + uint src = 0, sem_name, sem_ind; + + switch (source->SrcRegister.File) { + case TGSI_FILE_TEMPORARY: + if (source->SrcRegister.Index >= I915_MAX_TEMPORARY) { + i915_program_error(p, "Exceeded max temporary reg"); + return 0; + } + src = UREG(REG_TYPE_R, index); + break; + case TGSI_FILE_INPUT: + /* XXX: Packing COL1, FOGC into a single attribute works for + * texenv programs, but will fail for real fragment programs + * that use these attributes and expect them to be a full 4 + * components wide. Could use a texcoord to pass these + * attributes if necessary, but that won't work in the general + * case. + * + * We also use a texture coordinate to pass wpos when possible. + */ + + sem_name = p->shader->info.input_semantic_name[index]; + sem_ind = p->shader->info.input_semantic_index[index]; + + switch (sem_name) { + case TGSI_SEMANTIC_POSITION: + debug_printf("SKIP SEM POS\n"); + /* + assert(p->wpos_tex != -1); + src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL); + */ + break; + case TGSI_SEMANTIC_COLOR: + if (sem_ind == 0) { + src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); + } + else { + /* secondary color */ + assert(sem_ind == 1); + src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); + src = swizzle(src, X, Y, Z, ONE); + } + break; + case TGSI_SEMANTIC_FOG: + src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); + src = swizzle(src, W, W, W, W); + break; + case TGSI_SEMANTIC_GENERIC: + /* usually a texcoord */ + src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + sem_ind, D0_CHANNEL_ALL); + break; + default: + i915_program_error(p, "Bad source->Index"); + return 0; + } + break; + + case TGSI_FILE_IMMEDIATE: + assert(index < p->num_immediates); + index = p->immediates_map[index]; + /* fall-through */ + case TGSI_FILE_CONSTANT: + src = UREG(REG_TYPE_CONST, index); + break; + + default: + i915_program_error(p, "Bad source->File"); + return 0; + } + + if (source->SrcRegister.Extended) { + src = swizzle(src, + source->SrcRegisterExtSwz.ExtSwizzleX, + source->SrcRegisterExtSwz.ExtSwizzleY, + source->SrcRegisterExtSwz.ExtSwizzleZ, + source->SrcRegisterExtSwz.ExtSwizzleW); + } + else { + src = swizzle(src, + source->SrcRegister.SwizzleX, + source->SrcRegister.SwizzleY, + source->SrcRegister.SwizzleZ, + source->SrcRegister.SwizzleW); + } + + + /* There's both negate-all-components and per-component negation. + * Try to handle both here. + */ + { + int nx = source->SrcRegisterExtSwz.NegateX; + int ny = source->SrcRegisterExtSwz.NegateY; + int nz = source->SrcRegisterExtSwz.NegateZ; + int nw = source->SrcRegisterExtSwz.NegateW; + if (source->SrcRegister.Negate) { + nx = !nx; + ny = !ny; + nz = !nz; + nw = !nw; + } + src = negate(src, nx, ny, nz, nw); + } + + /* no abs() or post-abs negation */ +#if 0 + /* XXX assertions disabled to allow arbfplight.c to run */ + /* XXX enable these assertions, or fix things */ + assert(!source->SrcRegisterExtMod.Absolute); + assert(!source->SrcRegisterExtMod.Negate); +#endif + return src; +} + + +/** + * Construct a ureg for a destination register. + */ +static uint +get_result_vector(struct i915_fp_compile *p, + const struct tgsi_full_dst_register *dest) +{ + switch (dest->DstRegister.File) { + case TGSI_FILE_OUTPUT: + { + uint sem_name = p->shader->info.output_semantic_name[dest->DstRegister.Index]; + switch (sem_name) { + case TGSI_SEMANTIC_POSITION: + return UREG(REG_TYPE_OD, 0); + case TGSI_SEMANTIC_COLOR: + return UREG(REG_TYPE_OC, 0); + default: + i915_program_error(p, "Bad inst->DstReg.Index/semantics"); + return 0; + } + } + case TGSI_FILE_TEMPORARY: + return UREG(REG_TYPE_R, dest->DstRegister.Index); + default: + i915_program_error(p, "Bad inst->DstReg.File"); + return 0; + } +} + + +/** + * Compute flags for saturation and writemask. + */ +static uint +get_result_flags(const struct tgsi_full_instruction *inst) +{ + const uint writeMask + = inst->FullDstRegisters[0].DstRegister.WriteMask; + uint flags = 0x0; + + if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) + flags |= A0_DEST_SATURATE; + + if (writeMask & TGSI_WRITEMASK_X) + flags |= A0_DEST_CHANNEL_X; + if (writeMask & TGSI_WRITEMASK_Y) + flags |= A0_DEST_CHANNEL_Y; + if (writeMask & TGSI_WRITEMASK_Z) + flags |= A0_DEST_CHANNEL_Z; + if (writeMask & TGSI_WRITEMASK_W) + flags |= A0_DEST_CHANNEL_W; + + return flags; +} + + +/** + * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token + */ +static uint +translate_tex_src_target(struct i915_fp_compile *p, uint tex) +{ + switch (tex) { + case TGSI_TEXTURE_SHADOW1D: + /* fall-through */ + case TGSI_TEXTURE_1D: + return D0_SAMPLE_TYPE_2D; + + case TGSI_TEXTURE_SHADOW2D: + /* fall-through */ + case TGSI_TEXTURE_2D: + return D0_SAMPLE_TYPE_2D; + + case TGSI_TEXTURE_SHADOWRECT: + /* fall-through */ + case TGSI_TEXTURE_RECT: + return D0_SAMPLE_TYPE_2D; + + case TGSI_TEXTURE_3D: + return D0_SAMPLE_TYPE_VOLUME; + + case TGSI_TEXTURE_CUBE: + return D0_SAMPLE_TYPE_CUBE; + + default: + i915_program_error(p, "TexSrc type"); + return 0; + } +} + + +/** + * Generate texel lookup instruction. + */ +static void +emit_tex(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst, + uint opcode) +{ + uint texture = inst->InstructionExtTexture.Texture; + uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + uint tex = translate_tex_src_target( p, texture ); + uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); + uint coord = src_vector( p, &inst->FullSrcRegisters[0]); + + i915_emit_texld( p, + get_result_vector( p, &inst->FullDstRegisters[0] ), + get_result_flags( inst ), + sampler, + coord, + opcode); +} + + +/** + * Generate a simple arithmetic instruction + * \param opcode the i915 opcode + * \param numArgs the number of input/src arguments + */ +static void +emit_simple_arith(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst, + uint opcode, uint numArgs) +{ + uint arg1, arg2, arg3; + + assert(numArgs <= 3); + + arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->FullSrcRegisters[0] ); + arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->FullSrcRegisters[1] ); + arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->FullSrcRegisters[2] ); + + i915_emit_arith( p, + opcode, + get_result_vector( p, &inst->FullDstRegisters[0]), + get_result_flags( inst ), 0, + arg1, + arg2, + arg3 ); +} + + +/** As above, but swap the first two src regs */ +static void +emit_simple_arith_swap2(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst, + uint opcode, uint numArgs) +{ + struct tgsi_full_instruction inst2; + + assert(numArgs == 2); + + /* transpose first two registers */ + inst2 = *inst; + inst2.FullSrcRegisters[0] = inst->FullSrcRegisters[1]; + inst2.FullSrcRegisters[1] = inst->FullSrcRegisters[0]; + + emit_simple_arith(p, &inst2, opcode, numArgs); +} + + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +/* + * Translate TGSI instruction to i915 instruction. + * + * Possible concerns: + * + * SIN, COS -- could use another taylor step? + * LIT -- results seem a little different to sw mesa + * LOG -- different to mesa on negative numbers, but this is conformant. + */ +static void +i915_translate_instruction(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst) +{ + uint writemask; + uint src0, src1, src2, flags; + uint tmp = 0; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + i915_emit_arith(p, + A0_MAX, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + src0, negate(src0, 1, 1, 1, 1), 0); + break; + + case TGSI_OPCODE_ADD: + emit_simple_arith(p, inst, A0_ADD, 2); + break; + + case TGSI_OPCODE_CMP: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src2 = src_vector(p, &inst->FullSrcRegisters[2]); + i915_emit_arith(p, A0_CMP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), + 0, src0, src2, src1); /* NOTE: order of src2, src1 */ + break; + + case TGSI_OPCODE_COS: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); + + i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); + + /* By choosing different taylor constants, could get rid of this mul: + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0); + + /* + * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 + * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 + * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 + * result = DP4 t0, cos_constants + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XY, 0, + swizzle(tmp, X, X, ONE, ONE), + swizzle(tmp, X, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XYZ, 0, + swizzle(tmp, X, Y, X, ONE), + swizzle(tmp, X, X, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XYZ, 0, + swizzle(tmp, X, X, Z, ONE), + swizzle(tmp, Z, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(tmp, ONE, Z, Y, X), + i915_emit_const4fv(p, cos_constants), 0); + break; + + case TGSI_OPCODE_DP3: + emit_simple_arith(p, inst, A0_DP3, 2); + break; + + case TGSI_OPCODE_DP4: + emit_simple_arith(p, inst, A0_DP4, 2); + break; + + case TGSI_OPCODE_DPH: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, Y, Z, ONE), src1, 0); + break; + + case TGSI_OPCODE_DST: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + + /* result[0] = 1 * 1; + * result[1] = a[1] * b[1]; + * result[2] = a[2] * 1; + * result[3] = 1 * b[3]; + */ + i915_emit_arith(p, + A0_MUL, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, ONE, Y, Z, ONE), + swizzle(src1, ONE, Y, ONE, W), 0); + break; + + case TGSI_OPCODE_END: + /* no-op */ + break; + + case TGSI_OPCODE_EX2: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_EXP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_FLR: + emit_simple_arith(p, inst, A0_FLR, 1); + break; + + case TGSI_OPCODE_FRC: + emit_simple_arith(p, inst, A0_FRC, 1); + break; + + case TGSI_OPCODE_KIL: + /* kill if src[0].x < 0 || src[0].y < 0 ... */ + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + i915_emit_texld(p, + tmp, /* dest reg: a dummy reg */ + A0_DEST_CHANNEL_ALL, /* dest writemask */ + 0, /* sampler */ + src0, /* coord*/ + T0_TEXKILL); /* opcode */ + break; + + case TGSI_OPCODE_KILP: + assert(0); /* not tested yet */ + break; + + case TGSI_OPCODE_LG2: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_LOG, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_LIT: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + /* tmp = max( a.xyzw, a.00zw ) + * XXX: Clamp tmp.w to -128..128 + * tmp.y = log(tmp.y) + * tmp.y = tmp.w * tmp.y + * tmp.y = exp(tmp.y) + * result = cmp (a.11-x1, a.1x01, a.1xy1 ) + */ + i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, + src0, swizzle(src0, ZERO, ZERO, Z, W), 0); + + i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, Y, Y, Y, Y), 0, 0); + + i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, Y, ZERO, ZERO), + swizzle(tmp, ZERO, W, ZERO, ZERO), 0); + + i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, Y, Y, Y, Y), 0, 0); + + i915_emit_arith(p, A0_CMP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), + swizzle(tmp, ONE, X, ZERO, ONE), + swizzle(tmp, ONE, X, Y, ONE)); + + break; + + case TGSI_OPCODE_LRP: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src2 = src_vector(p, &inst->FullSrcRegisters[2]); + flags = get_result_flags(inst); + tmp = i915_get_utemp(p); + + /* b*a + c*(1-a) + * + * b*a + c - ca + * + * tmp = b*a + c, + * result = (-c)*a + tmp + */ + i915_emit_arith(p, A0_MAD, tmp, + flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); + + i915_emit_arith(p, A0_MAD, + get_result_vector(p, &inst->FullDstRegisters[0]), + flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); + break; + + case TGSI_OPCODE_MAD: + emit_simple_arith(p, inst, A0_MAD, 3); + break; + + case TGSI_OPCODE_MAX: + emit_simple_arith(p, inst, A0_MAX, 2); + break; + + case TGSI_OPCODE_MIN: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + + i915_emit_arith(p, + A0_MAX, + tmp, flags & A0_DEST_CHANNEL_ALL, 0, + negate(src0, 1, 1, 1, 1), + negate(src1, 1, 1, 1, 1), 0); + + i915_emit_arith(p, + A0_MOV, + get_result_vector(p, &inst->FullDstRegisters[0]), + flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0); + break; + + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + emit_simple_arith(p, inst, A0_MOV, 1); + break; + + case TGSI_OPCODE_MUL: + emit_simple_arith(p, inst, A0_MUL, 2); + break; + + case TGSI_OPCODE_POW: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + + /* XXX: masking on intermediate values, here and elsewhere. + */ + i915_emit_arith(p, + A0_LOG, + tmp, A0_DEST_CHANNEL_X, 0, + swizzle(src0, X, X, X, X), 0, 0); + + i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); + + i915_emit_arith(p, + A0_EXP, + get_result_vector(p, &inst->FullDstRegisters[0]), + flags, 0, swizzle(tmp, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_RET: + /* XXX: no-op? */ + break; + + case TGSI_OPCODE_RCP: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_RCP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_RSQ: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_RSQ, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_SCS: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + /* + * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 + * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x + * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x + * scs.x = DP4 t1, sin_constants + * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 + * scs.y = DP4 t1, cos_constants + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XY, 0, + swizzle(src0, X, X, ONE, ONE), + swizzle(src0, X, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, X, Y), + swizzle(tmp, X, X, ONE, ONE), 0); + + writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; + + if (writemask & TGSI_WRITEMASK_Y) { + uint tmp1; + + if (writemask & TGSI_WRITEMASK_X) + tmp1 = i915_get_utemp(p); + else + tmp1 = tmp; + + i915_emit_arith(p, + A0_MUL, + tmp1, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, Y, W), + swizzle(tmp, X, Z, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + A0_DEST_CHANNEL_Y, 0, + swizzle(tmp1, W, Z, Y, X), + i915_emit_const4fv(p, sin_constants), 0); + } + + if (writemask & TGSI_WRITEMASK_X) { + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XYZ, 0, + swizzle(tmp, X, X, Z, ONE), + swizzle(tmp, Z, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + A0_DEST_CHANNEL_X, 0, + swizzle(tmp, ONE, Z, Y, X), + i915_emit_const4fv(p, cos_constants), 0); + } + break; + + case TGSI_OPCODE_SGE: + emit_simple_arith(p, inst, A0_SGE, 2); + break; + + case TGSI_OPCODE_SLE: + /* like SGE, but swap reg0, reg1 */ + emit_simple_arith_swap2(p, inst, A0_SGE, 2); + break; + + case TGSI_OPCODE_SIN: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); + + i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); + + /* By choosing different taylor constants, could get rid of this mul: + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0); + + /* + * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 + * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x + * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x + * result = DP4 t1.wzyx, sin_constants + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XY, 0, + swizzle(tmp, X, X, ONE, ONE), + swizzle(tmp, X, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, X, Y), + swizzle(tmp, X, X, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, Y, W), + swizzle(tmp, X, Z, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(tmp, W, Z, Y, X), + i915_emit_const4fv(p, sin_constants), 0); + break; + + case TGSI_OPCODE_SLT: + emit_simple_arith(p, inst, A0_SLT, 2); + break; + + case TGSI_OPCODE_SGT: + /* like SLT, but swap reg0, reg1 */ + emit_simple_arith_swap2(p, inst, A0_SLT, 2); + break; + + case TGSI_OPCODE_SUB: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + + i915_emit_arith(p, + A0_ADD, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + src0, negate(src1, 1, 1, 1, 1), 0); + break; + + case TGSI_OPCODE_TEX: + emit_tex(p, inst, T0_TEXLD); + break; + + case TGSI_OPCODE_TXB: + emit_tex(p, inst, T0_TEXLDB); + break; + + case TGSI_OPCODE_TXP: + emit_tex(p, inst, T0_TEXLDP); + break; + + case TGSI_OPCODE_XPD: + /* Cross product: + * result.x = src0.y * src1.z - src0.z * src1.y; + * result.y = src0.z * src1.x - src0.x * src1.z; + * result.z = src0.x * src1.y - src0.y * src1.x; + * result.w = undef; + */ + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(src0, Z, X, Y, ONE), + swizzle(src1, Y, Z, X, ONE), 0); + + i915_emit_arith(p, + A0_MAD, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, Y, Z, X, ONE), + swizzle(src1, Z, X, Y, ONE), + negate(tmp, 1, 1, 1, 0)); + break; + + default: + i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode); + p->error = 1; + return; + } + + i915_release_utemps(p); +} + + +/** + * Translate TGSI fragment shader into i915 hardware instructions. + * \param p the translation state + * \param tokens the TGSI token array + */ +static void +i915_translate_instructions(struct i915_fp_compile *p, + const struct tgsi_token *tokens) +{ + struct i915_fragment_shader *ifs = p->shader; + struct tgsi_parse_context parse; + + tgsi_parse_init( &parse, tokens ); + + while( !tgsi_parse_end_of_tokens( &parse ) ) { + + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullToken.FullDeclaration.Declaration.File + == TGSI_FILE_CONSTANT) { + uint i; + for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; + i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; + i++) { + assert(ifs->constant_flags[i] == 0x0); + ifs->constant_flags[i] = I915_CONSTFLAG_USER; + ifs->num_constants = MAX2(ifs->num_constants, i + 1); + } + } + else if (parse.FullToken.FullDeclaration.Declaration.File + == TGSI_FILE_TEMPORARY) { + uint i; + for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; + i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; + i++) { + assert(i < I915_MAX_TEMPORARY); + /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ + p->temp_flag |= (1 << i); /* mark temp as used */ + } + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm + = &parse.FullToken.FullImmediate; + const uint pos = p->num_immediates++; + uint j; + assert( imm->Immediate.NrTokens <= 4 + 1 ); + for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { + p->immediates[pos][j] = imm->u[j].Float; + } + } + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (p->first_instruction) { + /* resolve location of immediates */ + uint i, j; + for (i = 0; i < p->num_immediates; i++) { + /* find constant slot for this immediate */ + for (j = 0; j < I915_MAX_CONSTANT; j++) { + if (ifs->constant_flags[j] == 0x0) { + memcpy(ifs->constants[j], + p->immediates[i], + 4 * sizeof(float)); + /*printf("immediate %d maps to const %d\n", i, j);*/ + ifs->constant_flags[j] = 0xf; /* all four comps used */ + p->immediates_map[i] = j; + ifs->num_constants = MAX2(ifs->num_constants, j + 1); + break; + } + } + } + + p->first_instruction = FALSE; + } + + i915_translate_instruction(p, &parse.FullToken.FullInstruction); + break; + + default: + assert( 0 ); + } + + } /* while */ + + tgsi_parse_free (&parse); +} + + +static struct i915_fp_compile * +i915_init_compile(struct i915_context *i915, + struct i915_fragment_shader *ifs) +{ + struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); + + p->shader = ifs; + + /* Put new constants at end of const buffer, growing downward. + * The problem is we don't know how many user-defined constants might + * be specified with pipe->set_constant_buffer(). + * Should pre-scan the user's program to determine the highest-numbered + * constant referenced. + */ + ifs->num_constants = 0; + memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); + + p->first_instruction = TRUE; + + p->nr_tex_indirect = 1; /* correct? */ + p->nr_tex_insn = 0; + p->nr_alu_insn = 0; + p->nr_decl_insn = 0; + + p->csr = p->program; + p->decl = p->declarations; + p->decl_s = 0; + p->decl_t = 0; + p->temp_flag = ~0x0 << I915_MAX_TEMPORARY; + p->utemp_flag = ~0x7; + + p->wpos_tex = -1; + + /* initialize the first program word */ + *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; + + return p; +} + + +/* Copy compile results to the fragment program struct and destroy the + * compilation context. + */ +static void +i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) +{ + struct i915_fragment_shader *ifs = p->shader; + unsigned long program_size = (unsigned long) (p->csr - p->program); + unsigned long decl_size = (unsigned long) (p->decl - p->declarations); + + if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) + i915_program_error(p, "Exceeded max nr indirect texture lookups"); + + if (p->nr_tex_insn > I915_MAX_TEX_INSN) + i915_program_error(p, "Exceeded max TEX instructions"); + + if (p->nr_alu_insn > I915_MAX_ALU_INSN) + i915_program_error(p, "Exceeded max ALU instructions"); + + if (p->nr_decl_insn > I915_MAX_DECL_INSN) + i915_program_error(p, "Exceeded max DECL instructions"); + + if (p->error) { + p->NumNativeInstructions = 0; + p->NumNativeAluInstructions = 0; + p->NumNativeTexInstructions = 0; + p->NumNativeTexIndirections = 0; + + i915_use_passthrough_shader(ifs); + } + else { + p->NumNativeInstructions + = p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn; + p->NumNativeAluInstructions = p->nr_alu_insn; + p->NumNativeTexInstructions = p->nr_tex_insn; + p->NumNativeTexIndirections = p->nr_tex_indirect; + + /* patch in the program length */ + p->declarations[0] |= program_size + decl_size - 2; + + /* Copy compilation results to fragment program struct: + */ + assert(!ifs->program); + ifs->program + = (uint *) MALLOC((program_size + decl_size) * sizeof(uint)); + if (ifs->program) { + ifs->program_len = program_size + decl_size; + + memcpy(ifs->program, + p->declarations, + decl_size * sizeof(uint)); + + memcpy(ifs->program + decl_size, + p->program, + program_size * sizeof(uint)); + } + } + + /* Release the compilation struct: + */ + FREE(p); +} + + +/** + * Find an unused texture coordinate slot to use for fragment WPOS. + * Update p->fp->wpos_tex with the result (-1 if no used texcoord slot is found). + */ +static void +i915_find_wpos_space(struct i915_fp_compile *p) +{ +#if 0 + const uint inputs + = p->shader->inputs_read | (1 << TGSI_ATTRIB_POS); /*XXX hack*/ + uint i; + + p->wpos_tex = -1; + + if (inputs & (1 << TGSI_ATTRIB_POS)) { + for (i = 0; i < I915_TEX_UNITS; i++) { + if ((inputs & (1 << (TGSI_ATTRIB_TEX0 + i))) == 0) { + p->wpos_tex = i; + return; + } + } + + i915_program_error(p, "No free texcoord for wpos value"); + } +#else + if (p->shader->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) { + /* frag shader using the fragment position input */ +#if 0 + assert(0); +#endif + } +#endif +} + + + + +/** + * Rather than trying to intercept and jiggle depth writes during + * emit, just move the value into its correct position at the end of + * the program: + */ +static void +i915_fixup_depth_write(struct i915_fp_compile *p) +{ + /* XXX assuming pos/depth is always in output[0] */ + if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { + const uint depth = UREG(REG_TYPE_OD, 0); + + i915_emit_arith(p, + A0_MOV, /* opcode */ + depth, /* dest reg */ + A0_DEST_CHANNEL_W, /* write mask */ + 0, /* saturate? */ + swizzle(depth, X, Y, Z, Z), /* src0 */ + 0, 0 /* src1, src2 */); + } +} + + +void +i915_translate_fragment_program( struct i915_context *i915, + struct i915_fragment_shader *fs) +{ + struct i915_fp_compile *p = i915_init_compile(i915, fs); + const struct tgsi_token *tokens = fs->state.tokens; + + i915_find_wpos_space(p); + +#if 0 + tgsi_dump(tokens, 0); +#endif + + i915_translate_instructions(p, tokens); + i915_fixup_depth_write(p); + + i915_fini_compile(i915, p); +} diff --git a/src/gallium/drivers/i915/i915_prim_emit.c b/src/gallium/drivers/i915/i915_prim_emit.c new file mode 100644 index 0000000000..d9a5c40ab9 --- /dev/null +++ b/src/gallium/drivers/i915/i915_prim_emit.c @@ -0,0 +1,219 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "draw/draw_pipe.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" + +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_state.h" +#include "i915_batch.h" + + + +/** + * Primitive emit to hardware. No support for vertex buffers or any + * nice fast paths. + */ +struct setup_stage { + struct draw_stage stage; /**< This must be first (base class) */ + + struct i915_context *i915; +}; + + + +/** + * Basically a cast wrapper. + */ +static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) +{ + return (struct setup_stage *)stage; +} + + +/** + * Extract the needed fields from vertex_header and emit i915 dwords. + * Recall that the vertices are constructed by the 'draw' module and + * have a couple of slots at the beginning (1-dword header, 4-dword + * clip pos) that we ignore here. + */ +static INLINE void +emit_hw_vertex( struct i915_context *i915, + const struct vertex_header *vertex) +{ + const struct vertex_info *vinfo = &i915->current.vertex_info; + uint i; + uint count = 0; /* for debug/sanity */ + + assert(!i915->dirty); + + for (i = 0; i < vinfo->num_attribs; i++) { + const uint j = vinfo->attrib[i].src_index; + const float *attrib = vertex->data[j]; + switch (vinfo->attrib[i].emit) { + case EMIT_1F: + OUT_BATCH( fui(attrib[0]) ); + count++; + break; + case EMIT_2F: + OUT_BATCH( fui(attrib[0]) ); + OUT_BATCH( fui(attrib[1]) ); + count += 2; + break; + case EMIT_3F: + OUT_BATCH( fui(attrib[0]) ); + OUT_BATCH( fui(attrib[1]) ); + OUT_BATCH( fui(attrib[2]) ); + count += 3; + break; + case EMIT_4F: + OUT_BATCH( fui(attrib[0]) ); + OUT_BATCH( fui(attrib[1]) ); + OUT_BATCH( fui(attrib[2]) ); + OUT_BATCH( fui(attrib[3]) ); + count += 4; + break; + case EMIT_4UB: + OUT_BATCH( pack_ub4(float_to_ubyte( attrib[2] ), + float_to_ubyte( attrib[1] ), + float_to_ubyte( attrib[0] ), + float_to_ubyte( attrib[3] )) ); + count += 1; + break; + default: + assert(0); + } + } + assert(count == vinfo->size); +} + + + +static INLINE void +emit_prim( struct draw_stage *stage, + struct prim_header *prim, + unsigned hwprim, + unsigned nr ) +{ + struct i915_context *i915 = setup_stage(stage)->i915; + unsigned vertex_size; + unsigned i; + + if (i915->dirty) + i915_update_derived( i915 ); + + if (i915->hardware_dirty) + i915_emit_hardware_state( i915 ); + + /* need to do this after validation! */ + vertex_size = i915->current.vertex_info.size * 4; /* in bytes */ + assert(vertex_size >= 12); /* never smaller than 12 bytes */ + + if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { + FLUSH_BATCH(NULL); + + /* Make sure state is re-emitted after a flush: + */ + i915_update_derived( i915 ); + i915_emit_hardware_state( i915 ); + + if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { + assert(0); + return; + } + } + + /* Emit each triangle as a single primitive. I told you this was + * simple. + */ + OUT_BATCH(_3DPRIMITIVE | + hwprim | + ((4 + vertex_size * nr)/4 - 2)); + + for (i = 0; i < nr; i++) + emit_hw_vertex(i915, prim->v[i]); +} + + +static void +setup_tri( struct draw_stage *stage, struct prim_header *prim ) +{ + emit_prim( stage, prim, PRIM3D_TRILIST, 3 ); +} + + +static void +setup_line(struct draw_stage *stage, struct prim_header *prim) +{ + emit_prim( stage, prim, PRIM3D_LINELIST, 2 ); +} + + +static void +setup_point(struct draw_stage *stage, struct prim_header *prim) +{ + emit_prim( stage, prim, PRIM3D_POINTLIST, 1 ); +} + + +static void setup_flush( struct draw_stage *stage, unsigned flags ) +{ +} + +static void reset_stipple_counter( struct draw_stage *stage ) +{ +} + +static void render_destroy( struct draw_stage *stage ) +{ + FREE( stage ); +} + + +/** + * Create a new primitive setup/render stage. This gets plugged into + * the 'draw' module's pipeline. + */ +struct draw_stage *i915_draw_render_stage( struct i915_context *i915 ) +{ + struct setup_stage *setup = CALLOC_STRUCT(setup_stage); + + setup->i915 = i915; + setup->stage.draw = i915->draw; + setup->stage.point = setup_point; + setup->stage.line = setup_line; + setup->stage.tri = setup_tri; + setup->stage.flush = setup_flush; + setup->stage.reset_stipple_counter = reset_stipple_counter; + setup->stage.destroy = render_destroy; + + return &setup->stage; +} diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c new file mode 100644 index 0000000000..8a3e466c84 --- /dev/null +++ b/src/gallium/drivers/i915/i915_prim_vbuf.c @@ -0,0 +1,645 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Build post-transformation, post-clipping vertex buffers and element + * lists by hooking into the end of the primitive pipeline and + * manipulating the vertex_id field in the vertex headers. + * + * XXX: work in progress + * + * \author José Fonseca + * \author Keith Whitwell + */ + + +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" +#include "util/u_debug.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_fifo.h" + +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_batch.h" +#include "i915_state.h" + + +/** + * Primitive renderer for i915. + */ +struct i915_vbuf_render { + struct vbuf_render base; + + struct i915_context *i915; + + /** Vertex size in bytes */ + size_t vertex_size; + + /** Software primitive */ + unsigned prim; + + /** Hardware primitive */ + unsigned hwprim; + + /** Genereate a vertex list */ + unsigned fallback; + + /* Stuff for the vbo */ + struct intel_buffer *vbo; + size_t vbo_size; + size_t vbo_offset; + void *vbo_ptr; + size_t vbo_max_used; + + /* stuff for the pool */ + struct util_fifo *pool_fifo; + unsigned pool_used; + unsigned pool_buffer_size; + boolean pool_not_used; +}; + + +/** + * Basically a cast wrapper. + */ +static INLINE struct i915_vbuf_render * +i915_vbuf_render(struct vbuf_render *render) +{ + assert(render); + return (struct i915_vbuf_render *)render; +} + +static const struct vertex_info * +i915_vbuf_render_get_vertex_info(struct vbuf_render *render) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + + if (i915->dirty) { + /* make sure we have up to date vertex layout */ + i915_update_derived(i915); + } + + return &i915->current.vertex_info; +} + +static boolean +i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size) +{ + struct i915_context *i915 = i915_render->i915; + + if (i915_render->vbo_size < size + i915_render->vbo_offset) + return FALSE; + + if (i915->vbo_flushed) + return FALSE; + + return TRUE; +} + +static void +i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) +{ + struct i915_context *i915 = i915_render->i915; + struct intel_winsys *iws = i915->iws; + + if (i915_render->vbo) { + if (i915_render->pool_not_used) + iws->buffer_destroy(iws, i915_render->vbo); + else + u_fifo_add(i915_render->pool_fifo, i915_render->vbo); + i915_render->vbo = NULL; + } + + i915->vbo_flushed = 0; + + i915_render->vbo_size = MAX2(size, i915_render->pool_buffer_size); + i915_render->vbo_offset = 0; + + if (i915_render->vbo_size != i915_render->pool_buffer_size) { + i915_render->pool_not_used = TRUE; + i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, + INTEL_NEW_VERTEX); + } else { + i915_render->pool_not_used = FALSE; + + if (i915_render->pool_used >= 2) { + FLUSH_BATCH(NULL); + i915->vbo_flushed = 0; + i915_render->pool_used = 0; + } + u_fifo_pop(i915_render->pool_fifo, (void**)&i915_render->vbo); + } +} + +static boolean +i915_vbuf_render_allocate_vertices(struct vbuf_render *render, + ushort vertex_size, + ushort nr_vertices) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + size_t size = (size_t)vertex_size * (size_t)nr_vertices; + + /* FIXME: handle failure */ + assert(!i915->vbo); + + if (!i915_vbuf_render_reserve(i915_render, size)) { + + if (i915->vbo_flushed) + i915_render->pool_used = 0; + + i915_vbuf_render_new_buf(i915_render, size); + } + + i915_render->vertex_size = vertex_size; + i915->vbo = i915_render->vbo; + i915->vbo_offset = i915_render->vbo_offset; + i915->dirty |= I915_NEW_VBO; + + if (!i915_render->vbo) + return FALSE; + return TRUE; +} + +static void * +i915_vbuf_render_map_vertices(struct vbuf_render *render) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + struct intel_winsys *iws = i915->iws; + + if (i915->vbo_flushed) + debug_printf("%s bad vbo flush occured stalling on hw\n", __FUNCTION__); + + i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); + + return (unsigned char *)i915_render->vbo_ptr + i915->vbo_offset; +} + +static void +i915_vbuf_render_unmap_vertices(struct vbuf_render *render, + ushort min_index, + ushort max_index) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + struct intel_winsys *iws = i915->iws; + + i915_render->vbo_max_used = MAX2(i915_render->vbo_max_used, i915_render->vertex_size * (max_index + 1)); + iws->buffer_unmap(iws, i915_render->vbo); +} + +static boolean +i915_vbuf_render_set_primitive(struct vbuf_render *render, + unsigned prim) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + i915_render->prim = prim; + + switch(prim) { + case PIPE_PRIM_POINTS: + i915_render->hwprim = PRIM3D_POINTLIST; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_LINES: + i915_render->hwprim = PRIM3D_LINELIST; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_LINE_LOOP: + i915_render->hwprim = PRIM3D_LINELIST; + i915_render->fallback = PIPE_PRIM_LINE_LOOP; + return TRUE; + case PIPE_PRIM_LINE_STRIP: + i915_render->hwprim = PRIM3D_LINESTRIP; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_TRIANGLES: + i915_render->hwprim = PRIM3D_TRILIST; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_TRIANGLE_STRIP: + i915_render->hwprim = PRIM3D_TRISTRIP; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_TRIANGLE_FAN: + i915_render->hwprim = PRIM3D_TRIFAN; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_QUADS: + i915_render->hwprim = PRIM3D_TRILIST; + i915_render->fallback = PIPE_PRIM_QUADS; + return TRUE; + case PIPE_PRIM_QUAD_STRIP: + i915_render->hwprim = PRIM3D_TRILIST; + i915_render->fallback = PIPE_PRIM_QUAD_STRIP; + return TRUE; + case PIPE_PRIM_POLYGON: + i915_render->hwprim = PRIM3D_POLY; + i915_render->fallback = 0; + return TRUE; + default: + /* FIXME: Actually, can handle a lot more just fine... */ + return FALSE; + } +} + +/** + * Used for fallbacks in draw_arrays + */ +static void +draw_arrays_generate_indices(struct vbuf_render *render, + unsigned start, uint nr, + unsigned type) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + unsigned i; + unsigned end = start + nr; + switch(type) { + case 0: + for (i = start; i+1 < end; i += 2) + OUT_BATCH((i+0) | (i+1) << 16); + if (i < end) + OUT_BATCH(i); + break; + case PIPE_PRIM_LINE_LOOP: + if (nr >= 2) { + for (i = start + 1; i < end; i++) + OUT_BATCH((i-0) | (i+0) << 16); + OUT_BATCH((i-0) | ( start) << 16); + } + break; + case PIPE_PRIM_QUADS: + for (i = start; i + 3 < end; i += 4) { + OUT_BATCH((i+0) | (i+1) << 16); + OUT_BATCH((i+3) | (i+1) << 16); + OUT_BATCH((i+2) | (i+3) << 16); + } + break; + case PIPE_PRIM_QUAD_STRIP: + for (i = start; i + 3 < end; i += 2) { + OUT_BATCH((i+0) | (i+1) << 16); + OUT_BATCH((i+3) | (i+2) << 16); + OUT_BATCH((i+0) | (i+3) << 16); + } + break; + default: + assert(0); + } +} + +static unsigned +draw_arrays_calc_nr_indices(uint nr, unsigned type) +{ + switch (type) { + case 0: + return nr; + case PIPE_PRIM_LINE_LOOP: + if (nr >= 2) + return nr * 2; + else + return 0; + case PIPE_PRIM_QUADS: + return (nr / 4) * 6; + case PIPE_PRIM_QUAD_STRIP: + return ((nr - 2) / 2) * 6; + default: + assert(0); + return 0; + } +} + +static void +draw_arrays_fallback(struct vbuf_render *render, + unsigned start, + uint nr) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + unsigned nr_indices; + + if (i915->dirty) + i915_update_derived(i915); + + if (i915->hardware_dirty) + i915_emit_hardware_state(i915); + + nr_indices = draw_arrays_calc_nr_indices(nr, i915_render->fallback); + if (!nr_indices) + return; + + if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { + FLUSH_BATCH(NULL); + + /* Make sure state is re-emitted after a flush: + */ + i915_update_derived(i915); + i915_emit_hardware_state(i915); + i915->vbo_flushed = 1; + + if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { + assert(0); + goto out; + } + } + OUT_BATCH(_3DPRIMITIVE | + PRIM_INDIRECT | + i915_render->hwprim | + PRIM_INDIRECT_ELTS | + nr_indices); + + draw_arrays_generate_indices(render, start, nr, i915_render->fallback); + +out: + return; +} + +static void +i915_vbuf_render_draw_arrays(struct vbuf_render *render, + unsigned start, + uint nr) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + + if (i915_render->fallback) { + draw_arrays_fallback(render, start, nr); + return; + } + + if (i915->dirty) + i915_update_derived(i915); + + if (i915->hardware_dirty) + i915_emit_hardware_state(i915); + + if (!BEGIN_BATCH(2, 0)) { + FLUSH_BATCH(NULL); + + /* Make sure state is re-emitted after a flush: + */ + i915_update_derived(i915); + i915_emit_hardware_state(i915); + i915->vbo_flushed = 1; + + if (!BEGIN_BATCH(2, 0)) { + assert(0); + goto out; + } + } + + OUT_BATCH(_3DPRIMITIVE | + PRIM_INDIRECT | + PRIM_INDIRECT_SEQUENTIAL | + i915_render->hwprim | + nr); + OUT_BATCH(start); /* Beginning vertex index */ + +out: + return; +} + +/** + * Used for normal and fallback emitting of indices + * If type is zero normal operation assumed. + */ +static void +draw_generate_indices(struct vbuf_render *render, + const ushort *indices, + uint nr_indices, + unsigned type) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + unsigned i; + + switch(type) { + case 0: + for (i = 0; i + 1 < nr_indices; i += 2) { + OUT_BATCH(indices[i] | indices[i+1] << 16); + } + if (i < nr_indices) { + OUT_BATCH(indices[i]); + } + break; + case PIPE_PRIM_LINE_LOOP: + if (nr_indices >= 2) { + for (i = 1; i < nr_indices; i++) + OUT_BATCH(indices[i-1] | indices[i] << 16); + OUT_BATCH(indices[i-1] | indices[0] << 16); + } + break; + case PIPE_PRIM_QUADS: + for (i = 0; i + 3 < nr_indices; i += 4) { + OUT_BATCH(indices[i+0] | indices[i+1] << 16); + OUT_BATCH(indices[i+3] | indices[i+1] << 16); + OUT_BATCH(indices[i+2] | indices[i+3] << 16); + } + break; + case PIPE_PRIM_QUAD_STRIP: + for (i = 0; i + 3 < nr_indices; i += 2) { + OUT_BATCH(indices[i+0] | indices[i+1] << 16); + OUT_BATCH(indices[i+3] | indices[i+2] << 16); + OUT_BATCH(indices[i+0] | indices[i+3] << 16); + } + break; + default: + assert(0); + break; + } +} + +static unsigned +draw_calc_nr_indices(uint nr_indices, unsigned type) +{ + switch (type) { + case 0: + return nr_indices; + case PIPE_PRIM_LINE_LOOP: + if (nr_indices >= 2) + return nr_indices * 2; + else + return 0; + case PIPE_PRIM_QUADS: + return (nr_indices / 4) * 6; + case PIPE_PRIM_QUAD_STRIP: + return ((nr_indices - 2) / 2) * 6; + default: + assert(0); + return 0; + } +} + +static void +i915_vbuf_render_draw(struct vbuf_render *render, + const ushort *indices, + uint nr_indices) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + unsigned save_nr_indices; + + save_nr_indices = nr_indices; + + nr_indices = draw_calc_nr_indices(nr_indices, i915_render->fallback); + if (!nr_indices) + return; + + if (i915->dirty) + i915_update_derived(i915); + + if (i915->hardware_dirty) + i915_emit_hardware_state(i915); + + if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { + FLUSH_BATCH(NULL); + + /* Make sure state is re-emitted after a flush: + */ + i915_update_derived(i915); + i915_emit_hardware_state(i915); + i915->vbo_flushed = 1; + + if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { + assert(0); + goto out; + } + } + + OUT_BATCH(_3DPRIMITIVE | + PRIM_INDIRECT | + i915_render->hwprim | + PRIM_INDIRECT_ELTS | + nr_indices); + draw_generate_indices(render, + indices, + save_nr_indices, + i915_render->fallback); + +out: + return; +} + +static void +i915_vbuf_render_release_vertices(struct vbuf_render *render) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + + assert(i915->vbo); + + i915_render->vbo_offset += i915_render->vbo_max_used; + i915_render->vbo_max_used = 0; + i915->vbo = NULL; + i915->dirty |= I915_NEW_VBO; +} + +static void +i915_vbuf_render_destroy(struct vbuf_render *render) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + FREE(i915_render); +} + +/** + * Create a new primitive render. + */ +static struct vbuf_render * +i915_vbuf_render_create(struct i915_context *i915) +{ + struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render); + struct intel_winsys *iws = i915->iws; + int i; + + i915_render->i915 = i915; + + i915_render->base.max_vertex_buffer_bytes = 128*1024; + + /* NOTE: it must be such that state and vertices indices fit in a single + * batch buffer. + */ + i915_render->base.max_indices = 16*1024; + + i915_render->base.get_vertex_info = i915_vbuf_render_get_vertex_info; + i915_render->base.allocate_vertices = i915_vbuf_render_allocate_vertices; + i915_render->base.map_vertices = i915_vbuf_render_map_vertices; + i915_render->base.unmap_vertices = i915_vbuf_render_unmap_vertices; + i915_render->base.set_primitive = i915_vbuf_render_set_primitive; + i915_render->base.draw = i915_vbuf_render_draw; + i915_render->base.draw_arrays = i915_vbuf_render_draw_arrays; + i915_render->base.release_vertices = i915_vbuf_render_release_vertices; + i915_render->base.destroy = i915_vbuf_render_destroy; + + + i915_render->vbo = NULL; + i915_render->vbo_size = 0; + i915_render->vbo_offset = 0; + + i915_render->pool_used = FALSE; + i915_render->pool_buffer_size = 128 * 4096; + i915_render->pool_fifo = u_fifo_create(6); + for (i = 0; i < 6; i++) + u_fifo_add(i915_render->pool_fifo, + iws->buffer_create(iws, i915_render->pool_buffer_size, 64, + INTEL_NEW_VERTEX)); + +#if 0 + /* TODO JB: is this realy needed? */ + i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); + iws->buffer_unmap(iws, i915_render->vbo); +#endif + + return &i915_render->base; +} + +/** + * Create a new primitive vbuf/render stage. + */ +struct draw_stage *i915_draw_vbuf_stage(struct i915_context *i915) +{ + struct vbuf_render *render; + struct draw_stage *stage; + + render = i915_vbuf_render_create(i915); + if(!render) + return NULL; + + stage = draw_vbuf_stage(i915->draw, render); + if(!stage) { + render->destroy(render); + return NULL; + } + /** TODO JB: this shouldn't be here */ + draw_set_render(i915->draw, render); + + return stage; +} diff --git a/src/gallium/drivers/i915/i915_reg.h b/src/gallium/drivers/i915/i915_reg.h new file mode 100644 index 0000000000..04620fec68 --- /dev/null +++ b/src/gallium/drivers/i915/i915_reg.h @@ -0,0 +1,978 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef I915_REG_H +#define I915_REG_H + + +#define I915_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value) + +#define CMD_3D (0x3<<29) + +#define PRIM3D_INLINE (CMD_3D | (0x1f<<24)) +#define PRIM3D_TRILIST (0x0<<18) +#define PRIM3D_TRISTRIP (0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) +#define PRIM3D_TRIFAN (0x3<<18) +#define PRIM3D_POLY (0x4<<18) +#define PRIM3D_LINELIST (0x5<<18) +#define PRIM3D_LINESTRIP (0x6<<18) +#define PRIM3D_RECTLIST (0x7<<18) +#define PRIM3D_POINTLIST (0x8<<18) +#define PRIM3D_DIB (0x9<<18) +#define PRIM3D_CLEAR_RECT (0xa<<18) +#define PRIM3D_ZONE_INIT (0xd<<18) +#define PRIM3D_MASK (0x1f<<18) + +/* p137 */ +#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24)) +#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16) +#define AA_LINE_ECAAR_WIDTH_0_5 0 +#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14) +#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14) +#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14) +#define AA_LINE_REGION_WIDTH_ENABLE (1<<8) +#define AA_LINE_REGION_WIDTH_0_5 0 +#define AA_LINE_REGION_WIDTH_1_0 (1<<6) +#define AA_LINE_REGION_WIDTH_2_0 (2<<6) +#define AA_LINE_REGION_WIDTH_4_0 (3<<6) + +/* 3DSTATE_BACKFACE_STENCIL_OPS, p138*/ +#define _3DSTATE_BACKFACE_STENCIL_OPS (CMD_3D | (0x8<<24)) +#define BFO_ENABLE_STENCIL_REF (1<<23) +#define BFO_STENCIL_REF_SHIFT 15 +#define BFO_STENCIL_REF_MASK (0xff<<15) +#define BFO_ENABLE_STENCIL_FUNCS (1<<14) +#define BFO_STENCIL_TEST_SHIFT 11 +#define BFO_STENCIL_TEST_MASK (0x7<<11) +#define BFO_STENCIL_FAIL_SHIFT 8 +#define BFO_STENCIL_FAIL_MASK (0x7<<8) +#define BFO_STENCIL_PASS_Z_FAIL_SHIFT 5 +#define BFO_STENCIL_PASS_Z_FAIL_MASK (0x7<<5) +#define BFO_STENCIL_PASS_Z_PASS_SHIFT 2 +#define BFO_STENCIL_PASS_Z_PASS_MASK (0x7<<2) +#define BFO_ENABLE_STENCIL_TWO_SIDE (1<<1) +#define BFO_STENCIL_TWO_SIDE (1<<0) + + +/* 3DSTATE_BACKFACE_STENCIL_MASKS, p140 */ +#define _3DSTATE_BACKFACE_STENCIL_MASKS (CMD_3D | (0x9<<24)) +#define BFM_ENABLE_STENCIL_TEST_MASK (1<<17) +#define BFM_ENABLE_STENCIL_WRITE_MASK (1<<16) +#define BFM_STENCIL_TEST_MASK_SHIFT 8 +#define BFM_STENCIL_TEST_MASK_MASK (0xff<<8) +#define BFM_STENCIL_WRITE_MASK_SHIFT 0 +#define BFM_STENCIL_WRITE_MASK_MASK (0xff<<0) + + + +/* 3DSTATE_BIN_CONTROL p141 */ + +/* p143 */ +#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) +/* Dword 1 */ +#define BUF_3D_ID_COLOR_BACK (0x3<<24) +#define BUF_3D_ID_DEPTH (0x7<<24) +#define BUF_3D_USE_FENCE (1<<23) +#define BUF_3D_TILED_SURFACE (1<<22) +#define BUF_3D_TILE_WALK_X 0 +#define BUF_3D_TILE_WALK_Y (1<<21) +#define BUF_3D_PITCH(x) (((x)/4)<<2) +/* Dword 2 */ +#define BUF_3D_ADDR(x) ((x) & ~0x3) + + +/* 3DSTATE_CHROMA_KEY */ + +/* 3DSTATE_CLEAR_PARAMETERS, p150 */ +#define _3DSTATE_CLEAR_PARAMETERS (CMD_3D | (0x1d<<24) | (0x9c<<16) | 5) +/* Dword 1 */ +#define CLEARPARAM_CLEAR_RECT (1 << 16) +#define CLEARPARAM_ZONE_INIT (0 << 16) +#define CLEARPARAM_WRITE_COLOR (1 << 2) +#define CLEARPARAM_WRITE_DEPTH (1 << 1) +#define CLEARPARAM_WRITE_STENCIL (1 << 0) + +/* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */ +#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16)) + + + +/* 3DSTATE_COORD_SET_BINDINGS, p154 */ +#define _3DSTATE_COORD_SET_BINDINGS (CMD_3D | (0x16<<24)) +#define CSB_TCB(iunit, eunit) ((eunit)<<(iunit*3)) + +/* p156 */ +#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16)) + +/* p157 */ +#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16)) + +/* p158 */ +#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16)) + + +/* 3DSTATE_DEPTH_OFFSET_SCALE, p159 */ +#define _3DSTATE_DEPTH_OFFSET_SCALE (CMD_3D | (0x1d<<24) | (0x97<<16)) +/* scale in dword 1 */ + + +/* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */ +#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<<19) | 0x2) + +/* p161 */ +#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) +/* Dword 1 */ +#define TEX_DEFAULT_COLOR_OGL (0<<30) +#define TEX_DEFAULT_COLOR_D3D (1<<30) +#define ZR_EARLY_DEPTH (1<<29) +#define LOD_PRECLAMP_OGL (1<<28) +#define LOD_PRECLAMP_D3D (0<<28) +#define DITHER_FULL_ALWAYS (0<<26) +#define DITHER_FULL_ON_FB_BLEND (1<<26) +#define DITHER_CLAMPED_ALWAYS (2<<26) +#define LINEAR_GAMMA_BLEND_32BPP (1<<25) +#define DEBUG_DISABLE_ENH_DITHER (1<<24) +#define DSTORG_HORT_BIAS(x) ((x)<<20) +#define DSTORG_VERT_BIAS(x) ((x)<<16) +#define COLOR_4_2_2_CHNL_WRT_ALL 0 +#define COLOR_4_2_2_CHNL_WRT_Y (1<<12) +#define COLOR_4_2_2_CHNL_WRT_CR (2<<12) +#define COLOR_4_2_2_CHNL_WRT_CB (3<<12) +#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12) +#define COLOR_BUF_8BIT 0 +#define COLOR_BUF_RGB555 (1<<8) +#define COLOR_BUF_RGB565 (2<<8) +#define COLOR_BUF_ARGB8888 (3<<8) +#define DEPTH_FRMT_16_FIXED 0 +#define DEPTH_FRMT_16_FLOAT (1<<2) +#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2) +#define VERT_LINE_STRIDE_1 (1<<1) +#define VERT_LINE_STRIDE_0 (0<<1) +#define VERT_LINE_STRIDE_OFS_1 1 +#define VERT_LINE_STRIDE_OFS_0 0 + +/* p166 */ +#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3) +/* Dword 1 */ +#define DRAW_RECT_DIS_DEPTH_OFS (1<<30) +#define DRAW_DITHER_OFS_X(x) ((x)<<26) +#define DRAW_DITHER_OFS_Y(x) ((x)<<24) +/* Dword 2 */ +#define DRAW_YMIN(x) ((x)<<16) +#define DRAW_XMIN(x) (x) +/* Dword 3 */ +#define DRAW_YMAX(x) ((x)<<16) +#define DRAW_XMAX(x) (x) +/* Dword 4 */ +#define DRAW_YORG(x) ((x)<<16) +#define DRAW_XORG(x) (x) + + +/* 3DSTATE_FILTER_COEFFICIENTS_4X4, p170 */ + +/* 3DSTATE_FILTER_COEFFICIENTS_6X5, p172 */ + + +/* _3DSTATE_FOG_COLOR, p173 */ +#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24)) +#define FOG_COLOR_RED(x) ((x)<<16) +#define FOG_COLOR_GREEN(x) ((x)<<8) +#define FOG_COLOR_BLUE(x) (x) + +/* _3DSTATE_FOG_MODE, p174 */ +#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2) +/* Dword 1 */ +#define FMC1_FOGFUNC_MODIFY_ENABLE (1<<31) +#define FMC1_FOGFUNC_VERTEX (0<<28) +#define FMC1_FOGFUNC_PIXEL_EXP (1<<28) +#define FMC1_FOGFUNC_PIXEL_EXP2 (2<<28) +#define FMC1_FOGFUNC_PIXEL_LINEAR (3<<28) +#define FMC1_FOGFUNC_MASK (3<<28) +#define FMC1_FOGINDEX_MODIFY_ENABLE (1<<27) +#define FMC1_FOGINDEX_Z (0<<25) +#define FMC1_FOGINDEX_W (1<<25) +#define FMC1_C1_C2_MODIFY_ENABLE (1<<24) +#define FMC1_DENSITY_MODIFY_ENABLE (1<<23) +#define FMC1_C1_ONE (1<<13) +#define FMC1_C1_MASK (0xffff<<4) +/* Dword 2 */ +#define FMC2_C2_ONE (1<<16) +/* Dword 3 */ +#define FMC3_D_ONE (1<<16) + + + +/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p177 */ +#define _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24)) +#define IAB_MODIFY_ENABLE (1<<23) +#define IAB_ENABLE (1<<22) +#define IAB_MODIFY_FUNC (1<<21) +#define IAB_FUNC_SHIFT 16 +#define IAB_MODIFY_SRC_FACTOR (1<<11) +#define IAB_SRC_FACTOR_SHIFT 6 +#define IAB_SRC_FACTOR_MASK (BLENDFACT_MASK<<6) +#define IAB_MODIFY_DST_FACTOR (1<<5) +#define IAB_DST_FACTOR_SHIFT 0 +#define IAB_DST_FACTOR_MASK (BLENDFACT_MASK<<0) + + +#define BLENDFUNC_ADD 0x0 +#define BLENDFUNC_SUBTRACT 0x1 +#define BLENDFUNC_REVERSE_SUBTRACT 0x2 +#define BLENDFUNC_MIN 0x3 +#define BLENDFUNC_MAX 0x4 +#define BLENDFUNC_MASK 0x7 + +/* 3DSTATE_LOAD_INDIRECT, p180 */ + +#define _3DSTATE_LOAD_INDIRECT (CMD_3D|(0x1d<<24)|(0x7<<16)) +#define LI0_STATE_STATIC_INDIRECT (0x01<<8) +#define LI0_STATE_DYNAMIC_INDIRECT (0x02<<8) +#define LI0_STATE_SAMPLER (0x04<<8) +#define LI0_STATE_MAP (0x08<<8) +#define LI0_STATE_PROGRAM (0x10<<8) +#define LI0_STATE_CONSTANTS (0x20<<8) + +#define SIS0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define SIS0_FORCE_LOAD (1<<1) +#define SIS0_BUFFER_VALID (1<<0) +#define SIS1_BUFFER_LENGTH(x) ((x)&0xff) + +#define DIS0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define DIS0_BUFFER_RESET (1<<1) +#define DIS0_BUFFER_VALID (1<<0) + +#define SSB0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define SSB0_FORCE_LOAD (1<<1) +#define SSB0_BUFFER_VALID (1<<0) +#define SSB1_BUFFER_LENGTH(x) ((x)&0xff) + +#define MSB0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define MSB0_FORCE_LOAD (1<<1) +#define MSB0_BUFFER_VALID (1<<0) +#define MSB1_BUFFER_LENGTH(x) ((x)&0xff) + +#define PSP0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define PSP0_FORCE_LOAD (1<<1) +#define PSP0_BUFFER_VALID (1<<0) +#define PSP1_BUFFER_LENGTH(x) ((x)&0xff) + +#define PSC0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define PSC0_FORCE_LOAD (1<<1) +#define PSC0_BUFFER_VALID (1<<0) +#define PSC1_BUFFER_LENGTH(x) ((x)&0xff) + + + + + +/* _3DSTATE_RASTERIZATION_RULES */ +#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24)) +#define ENABLE_POINT_RASTER_RULE (1<<15) +#define OGL_POINT_RASTER_RULE (1<<13) +#define ENABLE_TEXKILL_3D_4D (1<<10) +#define TEXKILL_3D (0<<9) +#define TEXKILL_4D (1<<9) +#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8) +#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5) +#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6) +#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3) + +/* _3DSTATE_SCISSOR_ENABLE, p256 */ +#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19)) +#define ENABLE_SCISSOR_RECT ((1<<1) | 1) +#define DISABLE_SCISSOR_RECT (1<<1) + +/* _3DSTATE_SCISSOR_RECTANGLE_0, p257 */ +#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1) +/* Dword 1 */ +#define SCISSOR_RECT_0_YMIN(x) ((x)<<16) +#define SCISSOR_RECT_0_XMIN(x) (x) +/* Dword 2 */ +#define SCISSOR_RECT_0_YMAX(x) ((x)<<16) +#define SCISSOR_RECT_0_XMAX(x) (x) + +/* p189 */ +#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 ((0x3<<29)|(0x1d<<24)|(0x04<<16)) +#define I1_LOAD_S(n) (1<<(4+n)) + +#define S0_VB_OFFSET_MASK 0xffffffc +#define S0_AUTO_CACHE_INV_DISABLE (1<<0) + +#define S1_VERTEX_WIDTH_SHIFT 24 +#define S1_VERTEX_WIDTH_MASK (0x3f<<24) +#define S1_VERTEX_PITCH_SHIFT 16 +#define S1_VERTEX_PITCH_MASK (0x3f<<16) + +#define TEXCOORDFMT_2D 0x0 +#define TEXCOORDFMT_3D 0x1 +#define TEXCOORDFMT_4D 0x2 +#define TEXCOORDFMT_1D 0x3 +#define TEXCOORDFMT_2D_16 0x4 +#define TEXCOORDFMT_4D_16 0x5 +#define TEXCOORDFMT_NOT_PRESENT 0xf +#define S2_TEXCOORD_FMT0_MASK 0xf +#define S2_TEXCOORD_FMT1_SHIFT 4 +#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4)) +#define S2_TEXCOORD_NONE (~0) + +/* S3 not interesting */ + +#define S4_POINT_WIDTH_SHIFT 23 +#define S4_POINT_WIDTH_MASK (0x1ff<<23) +#define S4_LINE_WIDTH_SHIFT 19 +#define S4_LINE_WIDTH_ONE (0x2<<19) +#define S4_LINE_WIDTH_MASK (0xf<<19) +#define S4_FLATSHADE_ALPHA (1<<18) +#define S4_FLATSHADE_FOG (1<<17) +#define S4_FLATSHADE_SPECULAR (1<<16) +#define S4_FLATSHADE_COLOR (1<<15) +#define S4_CULLMODE_BOTH (0<<13) +#define S4_CULLMODE_NONE (1<<13) +#define S4_CULLMODE_CW (2<<13) +#define S4_CULLMODE_CCW (3<<13) +#define S4_CULLMODE_MASK (3<<13) +#define S4_VFMT_POINT_WIDTH (1<<12) +#define S4_VFMT_SPEC_FOG (1<<11) +#define S4_VFMT_COLOR (1<<10) +#define S4_VFMT_DEPTH_OFFSET (1<<9) +#define S4_VFMT_XYZ (1<<6) +#define S4_VFMT_XYZW (2<<6) +#define S4_VFMT_XY (3<<6) +#define S4_VFMT_XYW (4<<6) +#define S4_VFMT_XYZW_MASK (7<<6) +#define S4_FORCE_DEFAULT_DIFFUSE (1<<5) +#define S4_FORCE_DEFAULT_SPECULAR (1<<4) +#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3) +#define S4_VFMT_FOG_PARAM (1<<2) +#define S4_SPRITE_POINT_ENABLE (1<<1) +#define S4_LINE_ANTIALIAS_ENABLE (1<<0) + +#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \ + S4_VFMT_SPEC_FOG | \ + S4_VFMT_COLOR | \ + S4_VFMT_DEPTH_OFFSET | \ + S4_VFMT_XYZW_MASK | \ + S4_VFMT_FOG_PARAM) + + +#define S5_WRITEDISABLE_ALPHA (1<<31) +#define S5_WRITEDISABLE_RED (1<<30) +#define S5_WRITEDISABLE_GREEN (1<<29) +#define S5_WRITEDISABLE_BLUE (1<<28) +#define S5_WRITEDISABLE_MASK (0xf<<28) +#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27) +#define S5_LAST_PIXEL_ENABLE (1<<26) +#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25) +#define S5_FOG_ENABLE (1<<24) +#define S5_STENCIL_REF_SHIFT 16 +#define S5_STENCIL_REF_MASK (0xff<<16) +#define S5_STENCIL_TEST_FUNC_SHIFT 13 +#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13) +#define S5_STENCIL_FAIL_SHIFT 10 +#define S5_STENCIL_FAIL_MASK (0x7<<10) +#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7 +#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7) +#define S5_STENCIL_PASS_Z_PASS_SHIFT 4 +#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4) +#define S5_STENCIL_WRITE_ENABLE (1<<3) +#define S5_STENCIL_TEST_ENABLE (1<<2) +#define S5_COLOR_DITHER_ENABLE (1<<1) +#define S5_LOGICOP_ENABLE (1<<0) + + +#define S6_ALPHA_TEST_ENABLE (1<<31) +#define S6_ALPHA_TEST_FUNC_SHIFT 28 +#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28) +#define S6_ALPHA_REF_SHIFT 20 +#define S6_ALPHA_REF_MASK (0xff<<20) +#define S6_DEPTH_TEST_ENABLE (1<<19) +#define S6_DEPTH_TEST_FUNC_SHIFT 16 +#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16) +#define S6_CBUF_BLEND_ENABLE (1<<15) +#define S6_CBUF_BLEND_FUNC_SHIFT 12 +#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12) +#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8 +#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8) +#define S6_CBUF_DST_BLEND_FACT_SHIFT 4 +#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4) +#define S6_DEPTH_WRITE_ENABLE (1<<3) +#define S6_COLOR_WRITE_ENABLE (1<<2) +#define S6_TRISTRIP_PV_SHIFT 0 +#define S6_TRISTRIP_PV_MASK (0x3<<0) + +#define S7_DEPTH_OFFSET_CONST_MASK ~0 + + + +#define DST_BLND_FACT(f) ((f)<= 0.0) ? src1 : src2 */ +#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */ +#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */ +#define A0_FLR (0x10<<24) /* dst = floor(src0) */ +#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */ +#define A0_TRC (0x12<<24) /* dst = int(src0) */ +#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */ +#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */ +#define A0_DEST_SATURATE (1<<22) +#define A0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +#define A0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define A0_DEST_CHANNEL_X (1<<10) +#define A0_DEST_CHANNEL_Y (2<<10) +#define A0_DEST_CHANNEL_Z (4<<10) +#define A0_DEST_CHANNEL_W (8<<10) +#define A0_DEST_CHANNEL_ALL (0xf<<10) +#define A0_DEST_CHANNEL_SHIFT 10 +#define A0_SRC0_TYPE_SHIFT 7 +#define A0_SRC0_NR_SHIFT 2 + +#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y) +#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z) + + +#define SRC_X 0 +#define SRC_Y 1 +#define SRC_Z 2 +#define SRC_W 3 +#define SRC_ZERO 4 +#define SRC_ONE 5 + +#define A1_SRC0_CHANNEL_X_NEGATE (1<<31) +#define A1_SRC0_CHANNEL_X_SHIFT 28 +#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27) +#define A1_SRC0_CHANNEL_Y_SHIFT 24 +#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23) +#define A1_SRC0_CHANNEL_Z_SHIFT 20 +#define A1_SRC0_CHANNEL_W_NEGATE (1<<19) +#define A1_SRC0_CHANNEL_W_SHIFT 16 +#define A1_SRC1_TYPE_SHIFT 13 +#define A1_SRC1_NR_SHIFT 8 +#define A1_SRC1_CHANNEL_X_NEGATE (1<<7) +#define A1_SRC1_CHANNEL_X_SHIFT 4 +#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3) +#define A1_SRC1_CHANNEL_Y_SHIFT 0 + +#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31) +#define A2_SRC1_CHANNEL_Z_SHIFT 28 +#define A2_SRC1_CHANNEL_W_NEGATE (1<<27) +#define A2_SRC1_CHANNEL_W_SHIFT 24 +#define A2_SRC2_TYPE_SHIFT 21 +#define A2_SRC2_NR_SHIFT 16 +#define A2_SRC2_CHANNEL_X_NEGATE (1<<15) +#define A2_SRC2_CHANNEL_X_SHIFT 12 +#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11) +#define A2_SRC2_CHANNEL_Y_SHIFT 8 +#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7) +#define A2_SRC2_CHANNEL_Z_SHIFT 4 +#define A2_SRC2_CHANNEL_W_NEGATE (1<<3) +#define A2_SRC2_CHANNEL_W_SHIFT 0 + + + +/* Texture instructions */ +#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared + * sampler and address, and output + * filtered texel data to destination + * register */ +#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a + * perspective divide of the texture + * coordinate .xyz values by .w before + * sampling. */ +#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the + * computed LOD by w. Only S4.6 two's + * comp is used. This implies that a + * float to fixed conversion is + * done. */ +#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling + * operation. Simply kills the pixel + * if any channel of the address + * register is < 0.0. */ +#define T0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +/* Note: U (unpreserved) regs do not retain their values between + * phases (cannot be used for feedback) + * + * Note: oC and OD registers can only be used as the destination of a + * texture instruction once per phase (this is an implementation + * restriction). + */ +#define T0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */ +#define T0_SAMPLER_NR_MASK (0xf<<0) + +#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */ +/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */ +#define T1_ADDRESS_REG_NR_SHIFT 17 +#define T2_MBZ 0 + +/* Declaration instructions */ +#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib) + * register or an s (sampler) + * register. */ +#define D0_SAMPLE_TYPE_SHIFT 22 +#define D0_SAMPLE_TYPE_2D (0x0<<22) +#define D0_SAMPLE_TYPE_CUBE (0x1<<22) +#define D0_SAMPLE_TYPE_VOLUME (0x2<<22) +#define D0_SAMPLE_TYPE_MASK (0x3<<22) + +#define D0_TYPE_SHIFT 19 +/* Allow: T, S */ +#define D0_NR_SHIFT 14 +/* Allow T: 0..10, S: 0..15 */ +#define D0_CHANNEL_X (1<<10) +#define D0_CHANNEL_Y (2<<10) +#define D0_CHANNEL_Z (4<<10) +#define D0_CHANNEL_W (8<<10) +#define D0_CHANNEL_ALL (0xf<<10) +#define D0_CHANNEL_NONE (0<<10) + +#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y) +#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z) + +/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse + * or specular declarations. + * + * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw) + * + * Must be zero for S (sampler) dcls + */ +#define D1_MBZ 0 +#define D2_MBZ 0 + + + +/* p207 */ +#define _3DSTATE_MAP_STATE (CMD_3D|(0x1d<<24)|(0x0<<16)) + +#define MS1_MAPMASK_SHIFT 0 +#define MS1_MAPMASK_MASK (0x8fff<<0) + +#define MS2_UNTRUSTED_SURFACE (1<<31) +#define MS2_ADDRESS_MASK 0xfffffffc +#define MS2_VERTICAL_LINE_STRIDE (1<<1) +#define MS2_VERTICAL_OFFSET (1<<1) + +#define MS3_HEIGHT_SHIFT 21 +#define MS3_WIDTH_SHIFT 10 +#define MS3_PALETTE_SELECT (1<<9) +#define MS3_MAPSURF_FORMAT_SHIFT 7 +#define MS3_MAPSURF_FORMAT_MASK (0x7<<7) +#define MAPSURF_8BIT (1<<7) +#define MAPSURF_16BIT (2<<7) +#define MAPSURF_32BIT (3<<7) +#define MAPSURF_422 (5<<7) +#define MAPSURF_COMPRESSED (6<<7) +#define MAPSURF_4BIT_INDEXED (7<<7) +#define MS3_MT_FORMAT_MASK (0x7 << 3) +#define MS3_MT_FORMAT_SHIFT 3 +#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */ +#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */ +#define MT_8BIT_L8 (1<<3) +#define MT_8BIT_A8 (4<<3) +#define MT_8BIT_MONO8 (5<<3) +#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */ +#define MT_16BIT_ARGB1555 (1<<3) +#define MT_16BIT_ARGB4444 (2<<3) +#define MT_16BIT_AY88 (3<<3) +#define MT_16BIT_88DVDU (5<<3) +#define MT_16BIT_BUMP_655LDVDU (6<<3) +#define MT_16BIT_I16 (7<<3) +#define MT_16BIT_L16 (8<<3) +#define MT_16BIT_A16 (9<<3) +#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */ +#define MT_32BIT_ABGR8888 (1<<3) +#define MT_32BIT_XRGB8888 (2<<3) +#define MT_32BIT_XBGR8888 (3<<3) +#define MT_32BIT_QWVU8888 (4<<3) +#define MT_32BIT_AXVU8888 (5<<3) +#define MT_32BIT_LXVU8888 (6<<3) +#define MT_32BIT_XLVU8888 (7<<3) +#define MT_32BIT_ARGB2101010 (8<<3) +#define MT_32BIT_ABGR2101010 (9<<3) +#define MT_32BIT_AWVU2101010 (0xA<<3) +#define MT_32BIT_GR1616 (0xB<<3) +#define MT_32BIT_VU1616 (0xC<<3) +#define MT_32BIT_xI824 (0xD<<3) +#define MT_32BIT_xA824 (0xE<<3) +#define MT_32BIT_xL824 (0xF<<3) +#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */ +#define MT_422_YCRCB_NORMAL (1<<3) +#define MT_422_YCRCB_SWAPUV (2<<3) +#define MT_422_YCRCB_SWAPUVY (3<<3) +#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */ +#define MT_COMPRESS_DXT2_3 (1<<3) +#define MT_COMPRESS_DXT4_5 (2<<3) +#define MT_COMPRESS_FXT1 (3<<3) +#define MT_COMPRESS_DXT1_RGB (4<<3) +#define MS3_USE_FENCE_REGS (1<<2) +#define MS3_TILED_SURFACE (1<<1) +#define MS3_TILE_WALK (1<<0) + +#define MS4_PITCH_SHIFT 21 +#define MS4_CUBE_FACE_ENA_NEGX (1<<20) +#define MS4_CUBE_FACE_ENA_POSX (1<<19) +#define MS4_CUBE_FACE_ENA_NEGY (1<<18) +#define MS4_CUBE_FACE_ENA_POSY (1<<17) +#define MS4_CUBE_FACE_ENA_NEGZ (1<<16) +#define MS4_CUBE_FACE_ENA_POSZ (1<<15) +#define MS4_CUBE_FACE_ENA_MASK (0x3f<<15) +#define MS4_MAX_LOD_SHIFT 9 +#define MS4_MAX_LOD_MASK (0x3f<<9) +#define MS4_MIP_LAYOUT_LEGACY (0<<8) +#define MS4_MIP_LAYOUT_BELOW_LPT (0<<8) +#define MS4_MIP_LAYOUT_RIGHT_LPT (1<<8) +#define MS4_VOLUME_DEPTH_SHIFT 0 +#define MS4_VOLUME_DEPTH_MASK (0xff<<0) + +/* p244 */ +#define _3DSTATE_SAMPLER_STATE (CMD_3D|(0x1d<<24)|(0x1<<16)) + +#define SS1_MAPMASK_SHIFT 0 +#define SS1_MAPMASK_MASK (0x8fff<<0) + +#define SS2_REVERSE_GAMMA_ENABLE (1<<31) +#define SS2_PACKED_TO_PLANAR_ENABLE (1<<30) +#define SS2_COLORSPACE_CONVERSION (1<<29) +#define SS2_CHROMAKEY_SHIFT 27 +#define SS2_BASE_MIP_LEVEL_SHIFT 22 +#define SS2_BASE_MIP_LEVEL_MASK (0x1f<<22) +#define SS2_MIP_FILTER_SHIFT 20 +#define SS2_MIP_FILTER_MASK (0x3<<20) +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 +#define SS2_MAG_FILTER_SHIFT 17 +#define SS2_MAG_FILTER_MASK (0x7<<17) +#define FILTER_NEAREST 0 +#define FILTER_LINEAR 1 +#define FILTER_ANISOTROPIC 2 +#define FILTER_4X4_1 3 +#define FILTER_4X4_2 4 +#define FILTER_4X4_FLAT 5 +#define FILTER_6X5_MONO 6 /* XXX - check */ +#define SS2_MIN_FILTER_SHIFT 14 +#define SS2_MIN_FILTER_MASK (0x7<<14) +#define SS2_LOD_BIAS_SHIFT 5 +#define SS2_LOD_BIAS_ONE (0x10<<5) +#define SS2_LOD_BIAS_MASK (0x1ff<<5) +/* Shadow requires: + * MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format + * FILTER_4X4_x MIN and MAG filters + */ +#define SS2_SHADOW_ENABLE (1<<4) +#define SS2_MAX_ANISO_MASK (1<<3) +#define SS2_MAX_ANISO_2 (0<<3) +#define SS2_MAX_ANISO_4 (1<<3) +#define SS2_SHADOW_FUNC_SHIFT 0 +#define SS2_SHADOW_FUNC_MASK (0x7<<0) +/* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */ + +#define SS3_MIN_LOD_SHIFT 24 +#define SS3_MIN_LOD_ONE (0x10<<24) +#define SS3_MIN_LOD_MASK (0xff<<24) +#define SS3_KILL_PIXEL_ENABLE (1<<17) +#define SS3_TCX_ADDR_MODE_SHIFT 12 +#define SS3_TCX_ADDR_MODE_MASK (0x7<<12) +#define TEXCOORDMODE_WRAP 0 +#define TEXCOORDMODE_MIRROR 1 +#define TEXCOORDMODE_CLAMP_EDGE 2 +#define TEXCOORDMODE_CUBE 3 +#define TEXCOORDMODE_CLAMP_BORDER 4 +#define TEXCOORDMODE_MIRROR_ONCE 5 +#define SS3_TCY_ADDR_MODE_SHIFT 9 +#define SS3_TCY_ADDR_MODE_MASK (0x7<<9) +#define SS3_TCZ_ADDR_MODE_SHIFT 6 +#define SS3_TCZ_ADDR_MODE_MASK (0x7<<6) +#define SS3_NORMALIZED_COORDS (1<<5) +#define SS3_TEXTUREMAP_INDEX_SHIFT 1 +#define SS3_TEXTUREMAP_INDEX_MASK (0xf<<1) +#define SS3_DEINTERLACER_ENABLE (1<<0) + +#define SS4_BORDER_COLOR_MASK (~0) + +/* 3DSTATE_SPAN_STIPPLE, p258 + */ +#define _3DSTATE_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16)) +#define ST1_ENABLE (1<<16) +#define ST1_MASK (0xffff) + +#define _3DSTATE_DEFAULT_Z ((0x3<<29)|(0x1d<<24)|(0x98<<16)) +#define _3DSTATE_DEFAULT_DIFFUSE ((0x3<<29)|(0x1d<<24)|(0x99<<16)) +#define _3DSTATE_DEFAULT_SPECULAR ((0x3<<29)|(0x1d<<24)|(0x9a<<16)) + + +#define MI_FLUSH ((0<<29)|(4<<23)) +#define FLUSH_MAP_CACHE (1<<0) +#define INHIBIT_FLUSH_RENDER_CACHE (1<<2) + + +#define CMD_3D (0x3<<29) + + +#define _3DPRIMITIVE ((0x3<<29)|(0x1f<<24)) +#define PRIM_INDIRECT (1<<23) +#define PRIM_INLINE (0<<23) +#define PRIM_INDIRECT_SEQUENTIAL (0<<17) +#define PRIM_INDIRECT_ELTS (1<<17) + +#define PRIM3D_TRILIST (0x0<<18) +#define PRIM3D_TRISTRIP (0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) +#define PRIM3D_TRIFAN (0x3<<18) +#define PRIM3D_POLY (0x4<<18) +#define PRIM3D_LINELIST (0x5<<18) +#define PRIM3D_LINESTRIP (0x6<<18) +#define PRIM3D_RECTLIST (0x7<<18) +#define PRIM3D_POINTLIST (0x8<<18) +#define PRIM3D_DIB (0x9<<18) +#define PRIM3D_MASK (0x1f<<18) + +#define I915PACKCOLOR4444(r,g,b,a) \ + ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4)) + +#define I915PACKCOLOR1555(r,g,b,a) \ + ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \ + ((a) ? 0x8000 : 0)) + +#define I915PACKCOLOR565(r,g,b) \ + ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3)) + +#define I915PACKCOLOR8888(r,g,b,a) \ + ((a<<24) | (r<<16) | (g<<8) | b) + + + + +#define BR00_BITBLT_CLIENT 0x40000000 +#define BR00_OP_COLOR_BLT 0x10000000 +#define BR00_OP_SRC_COPY_BLT 0x10C00000 +#define BR13_SOLID_PATTERN 0x80000000 + +#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|0x4) +#define XY_COLOR_BLT_WRITE_ALPHA (1<<21) +#define XY_COLOR_BLT_WRITE_RGB (1<<20) + +#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) +#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21) +#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20) + +#define MI_WAIT_FOR_EVENT ((0x3<<23)) +#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) +#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) + +#define MI_BATCH_BUFFER (0x30<<23) +#define MI_BATCH_BUFFER_START (0x31<<23) +#define MI_BATCH_BUFFER_END (0xa<<23) + + + +#define COMPAREFUNC_ALWAYS 0 +#define COMPAREFUNC_NEVER 0x1 +#define COMPAREFUNC_LESS 0x2 +#define COMPAREFUNC_EQUAL 0x3 +#define COMPAREFUNC_LEQUAL 0x4 +#define COMPAREFUNC_GREATER 0x5 +#define COMPAREFUNC_NOTEQUAL 0x6 +#define COMPAREFUNC_GEQUAL 0x7 + +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 0x1 +#define STENCILOP_REPLACE 0x2 +#define STENCILOP_INCRSAT 0x3 +#define STENCILOP_DECRSAT 0x4 +#define STENCILOP_INCR 0x5 +#define STENCILOP_DECR 0x6 +#define STENCILOP_INVERT 0x7 + +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 0x1 +#define LOGICOP_AND_INV 0x2 +#define LOGICOP_COPY_INV 0x3 +#define LOGICOP_AND_RVRSE 0x4 +#define LOGICOP_INV 0x5 +#define LOGICOP_XOR 0x6 +#define LOGICOP_NAND 0x7 +#define LOGICOP_AND 0x8 +#define LOGICOP_EQUIV 0x9 +#define LOGICOP_NOOP 0xa +#define LOGICOP_OR_INV 0xb +#define LOGICOP_COPY 0xc +#define LOGICOP_OR_RVRSE 0xd +#define LOGICOP_OR 0xe +#define LOGICOP_SET 0xf + +#define BLENDFACT_ZERO 0x01 +#define BLENDFACT_ONE 0x02 +#define BLENDFACT_SRC_COLR 0x03 +#define BLENDFACT_INV_SRC_COLR 0x04 +#define BLENDFACT_SRC_ALPHA 0x05 +#define BLENDFACT_INV_SRC_ALPHA 0x06 +#define BLENDFACT_DST_ALPHA 0x07 +#define BLENDFACT_INV_DST_ALPHA 0x08 +#define BLENDFACT_DST_COLR 0x09 +#define BLENDFACT_INV_DST_COLR 0x0a +#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b +#define BLENDFACT_CONST_COLOR 0x0c +#define BLENDFACT_INV_CONST_COLOR 0x0d +#define BLENDFACT_CONST_ALPHA 0x0e +#define BLENDFACT_INV_CONST_ALPHA 0x0f +#define BLENDFACT_MASK 0x0f + +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I945_GM 0x27A2 +#define PCI_CHIP_I945_GME 0x27AE +#define PCI_CHIP_G33_G 0x29C2 +#define PCI_CHIP_Q35_G 0x29B2 +#define PCI_CHIP_Q33_G 0x29D2 + + +#endif diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c new file mode 100644 index 0000000000..c66558c320 --- /dev/null +++ b/src/gallium/drivers/i915/i915_screen.c @@ -0,0 +1,298 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "util/u_string.h" + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_screen.h" +#include "i915_buffer.h" +#include "i915_texture.h" +#include "intel_winsys.h" + + +/* + * Probe functions + */ + + +static const char * +i915_get_vendor(struct pipe_screen *screen) +{ + return "VMware, Inc."; +} + +static const char * +i915_get_name(struct pipe_screen *screen) +{ + static char buffer[128]; + const char *chipset; + + switch (i915_screen(screen)->pci_id) { + case PCI_CHIP_I915_G: + chipset = "915G"; + break; + case PCI_CHIP_I915_GM: + chipset = "915GM"; + break; + case PCI_CHIP_I945_G: + chipset = "945G"; + break; + case PCI_CHIP_I945_GM: + chipset = "945GM"; + break; + case PCI_CHIP_I945_GME: + chipset = "945GME"; + break; + case PCI_CHIP_G33_G: + chipset = "G33"; + break; + case PCI_CHIP_Q35_G: + chipset = "Q35"; + break; + case PCI_CHIP_Q33_G: + chipset = "Q33"; + break; + default: + chipset = "unknown"; + break; + } + + util_snprintf(buffer, sizeof(buffer), "i915 (chipset: %s)", chipset); + return buffer; +} + +static int +i915_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 11; /* max 1024x1024 */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 11; /* max 1024x1024 */ + default: + return 0; + } +} + +static float +i915_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 7.5; + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 4.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + + default: + return 0; + } +} + +static boolean +i915_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags) +{ + static const enum pipe_format tex_supported[] = { + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_L8_UNORM, + PIPE_FORMAT_A8_UNORM, + PIPE_FORMAT_I8_UNORM, + PIPE_FORMAT_A8L8_UNORM, + PIPE_FORMAT_YCBCR, + PIPE_FORMAT_YCBCR_REV, + PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_NONE /* list terminator */ + }; + static const enum pipe_format surface_supported[] = { + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_NONE /* list terminator */ + }; + const enum pipe_format *list; + uint i; + + if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) + list = surface_supported; + else + list = tex_supported; + + for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) { + if (list[i] == format) + return TRUE; + } + + return FALSE; +} + + +/* + * Fence functions + */ + + +static void +i915_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct i915_screen *is = i915_screen(screen); + + is->iws->fence_reference(is->iws, ptr, fence); +} + +static int +i915_fence_signalled(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flags) +{ + struct i915_screen *is = i915_screen(screen); + + return is->iws->fence_signalled(is->iws, fence); +} + +static int +i915_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flags) +{ + struct i915_screen *is = i915_screen(screen); + + return is->iws->fence_finish(is->iws, fence); +} + + +/* + * Generic functions + */ + + +static void +i915_destroy_screen(struct pipe_screen *screen) +{ + struct i915_screen *is = i915_screen(screen); + + if (is->iws) + is->iws->destroy(is->iws); + + FREE(is); +} + +/** + * Create a new i915_screen object + */ +struct pipe_screen * +i915_create_screen(struct intel_winsys *iws, uint pci_id) +{ + struct i915_screen *is = CALLOC_STRUCT(i915_screen); + + if (!is) + return NULL; + + switch (pci_id) { + case PCI_CHIP_I915_G: + case PCI_CHIP_I915_GM: + is->is_i945 = FALSE; + break; + + case PCI_CHIP_I945_G: + case PCI_CHIP_I945_GM: + case PCI_CHIP_I945_GME: + case PCI_CHIP_G33_G: + case PCI_CHIP_Q33_G: + case PCI_CHIP_Q35_G: + is->is_i945 = TRUE; + break; + + default: + debug_printf("%s: unknown pci id 0x%x, cannot create screen\n", + __FUNCTION__, pci_id); + return NULL; + } + + is->pci_id = pci_id; + is->iws = iws; + + is->base.winsys = NULL; + + is->base.destroy = i915_destroy_screen; + + is->base.get_name = i915_get_name; + is->base.get_vendor = i915_get_vendor; + is->base.get_param = i915_get_param; + is->base.get_paramf = i915_get_paramf; + is->base.is_format_supported = i915_is_format_supported; + + is->base.fence_reference = i915_fence_reference; + is->base.fence_signalled = i915_fence_signalled; + is->base.fence_finish = i915_fence_finish; + + i915_init_screen_texture_functions(is); + i915_init_screen_buffer_functions(is); + + return &is->base; +} diff --git a/src/gallium/drivers/i915/i915_screen.h b/src/gallium/drivers/i915/i915_screen.h new file mode 100644 index 0000000000..5126485caa --- /dev/null +++ b/src/gallium/drivers/i915/i915_screen.h @@ -0,0 +1,80 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_SCREEN_H +#define I915_SCREEN_H + +#include "pipe/p_state.h" +#include "pipe/p_screen.h" + + +struct intel_winsys; + + +/** + * Subclass of pipe_screen + */ +struct i915_screen +{ + struct pipe_screen base; + + struct intel_winsys *iws; + + boolean is_i945; + uint pci_id; +}; + +/** + * Subclass of pipe_transfer + */ +struct i915_transfer +{ + struct pipe_transfer base; + + unsigned offset; +}; + + +/* + * Cast wrappers + */ + + +static INLINE struct i915_screen * +i915_screen(struct pipe_screen *pscreen) +{ + return (struct i915_screen *) pscreen; +} + +static INLINE struct i915_transfer * +i915_transfer(struct pipe_transfer *transfer) +{ + return (struct i915_transfer *)transfer; +} + + +#endif /* I915_SCREEN_H */ diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c new file mode 100644 index 0000000000..7d48e6e84d --- /dev/null +++ b/src/gallium/drivers/i915/i915_state.c @@ -0,0 +1,796 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + + +#include "draw/draw_context.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_parse.h" + +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_state.h" +#include "i915_state_inlines.h" +#include "i915_fpc.h" + +/* The i915 (and related graphics cores) do not support GL_CLAMP. The + * Intel drivers for "other operating systems" implement GL_CLAMP as + * GL_CLAMP_TO_EDGE, so the same is done here. + */ +static unsigned +translate_wrap_mode(unsigned wrap) +{ + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + return TEXCOORDMODE_WRAP; + case PIPE_TEX_WRAP_CLAMP: + return TEXCOORDMODE_CLAMP_EDGE; /* not quite correct */ + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return TEXCOORDMODE_CLAMP_EDGE; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return TEXCOORDMODE_CLAMP_BORDER; +// case PIPE_TEX_WRAP_MIRRORED_REPEAT: +// return TEXCOORDMODE_MIRROR; + default: + return TEXCOORDMODE_WRAP; + } +} + +static unsigned translate_img_filter( unsigned filter ) +{ + switch (filter) { + case PIPE_TEX_FILTER_NEAREST: + return FILTER_NEAREST; + case PIPE_TEX_FILTER_LINEAR: + return FILTER_LINEAR; + case PIPE_TEX_FILTER_ANISO: + return FILTER_ANISOTROPIC; + default: + assert(0); + return FILTER_NEAREST; + } +} + +static unsigned translate_mip_filter( unsigned filter ) +{ + switch (filter) { + case PIPE_TEX_MIPFILTER_NONE: + return MIPFILTER_NONE; + case PIPE_TEX_MIPFILTER_NEAREST: + return MIPFILTER_NEAREST; + case PIPE_TEX_MIPFILTER_LINEAR: + return MIPFILTER_LINEAR; + default: + assert(0); + return MIPFILTER_NONE; + } +} + + +/* None of this state is actually used for anything yet. + */ +static void * +i915_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + struct i915_blend_state *cso_data = CALLOC_STRUCT( i915_blend_state ); + + { + unsigned eqRGB = blend->rgb_func; + unsigned srcRGB = blend->rgb_src_factor; + unsigned dstRGB = blend->rgb_dst_factor; + + unsigned eqA = blend->alpha_func; + unsigned srcA = blend->alpha_src_factor; + unsigned dstA = blend->alpha_dst_factor; + + /* Special handling for MIN/MAX filter modes handled at + * state_tracker level. + */ + + if (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB) { + + cso_data->iab = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | + IAB_MODIFY_ENABLE | + IAB_ENABLE | + IAB_MODIFY_FUNC | + IAB_MODIFY_SRC_FACTOR | + IAB_MODIFY_DST_FACTOR | + SRC_ABLND_FACT(i915_translate_blend_factor(srcA)) | + DST_ABLND_FACT(i915_translate_blend_factor(dstA)) | + (i915_translate_blend_func(eqA) << IAB_FUNC_SHIFT)); + } + else { + cso_data->iab = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | + IAB_MODIFY_ENABLE | + 0); + } + } + + cso_data->modes4 |= (_3DSTATE_MODES_4_CMD | + ENABLE_LOGIC_OP_FUNC | + LOGIC_OP_FUNC(i915_translate_logic_op(blend->logicop_func))); + + if (blend->logicop_enable) + cso_data->LIS5 |= S5_LOGICOP_ENABLE; + + if (blend->dither) + cso_data->LIS5 |= S5_COLOR_DITHER_ENABLE; + + if ((blend->colormask & PIPE_MASK_R) == 0) + cso_data->LIS5 |= S5_WRITEDISABLE_RED; + + if ((blend->colormask & PIPE_MASK_G) == 0) + cso_data->LIS5 |= S5_WRITEDISABLE_GREEN; + + if ((blend->colormask & PIPE_MASK_B) == 0) + cso_data->LIS5 |= S5_WRITEDISABLE_BLUE; + + if ((blend->colormask & PIPE_MASK_A) == 0) + cso_data->LIS5 |= S5_WRITEDISABLE_ALPHA; + + if (blend->blend_enable) { + unsigned funcRGB = blend->rgb_func; + unsigned srcRGB = blend->rgb_src_factor; + unsigned dstRGB = blend->rgb_dst_factor; + + cso_data->LIS6 |= (S6_CBUF_BLEND_ENABLE | + SRC_BLND_FACT(i915_translate_blend_factor(srcRGB)) | + DST_BLND_FACT(i915_translate_blend_factor(dstRGB)) | + (i915_translate_blend_func(funcRGB) << S6_CBUF_BLEND_FUNC_SHIFT)); + } + + return cso_data; +} + +static void i915_bind_blend_state(struct pipe_context *pipe, + void *blend) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + i915->blend = (struct i915_blend_state*)blend; + + i915->dirty |= I915_NEW_BLEND; +} + + +static void i915_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + FREE(blend); +} + +static void i915_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + i915->blend_color = *blend_color; + + i915->dirty |= I915_NEW_BLEND; +} + +static void * +i915_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + struct i915_sampler_state *cso = CALLOC_STRUCT( i915_sampler_state ); + const unsigned ws = sampler->wrap_s; + const unsigned wt = sampler->wrap_t; + const unsigned wr = sampler->wrap_r; + unsigned minFilt, magFilt; + unsigned mipFilt; + + cso->templ = sampler; + + mipFilt = translate_mip_filter(sampler->min_mip_filter); + minFilt = translate_img_filter( sampler->min_img_filter ); + magFilt = translate_img_filter( sampler->mag_img_filter ); + + if (sampler->max_anisotropy > 2.0) { + cso->state[0] |= SS2_MAX_ANISO_4; + } + + { + int b = (int) (sampler->lod_bias * 16.0); + b = CLAMP(b, -256, 255); + cso->state[0] |= ((b << SS2_LOD_BIAS_SHIFT) & SS2_LOD_BIAS_MASK); + } + + /* Shadow: + */ + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) + { + cso->state[0] |= (SS2_SHADOW_ENABLE | + i915_translate_compare_func(sampler->compare_func)); + + minFilt = FILTER_4X4_FLAT; + magFilt = FILTER_4X4_FLAT; + } + + cso->state[0] |= ((minFilt << SS2_MIN_FILTER_SHIFT) | + (mipFilt << SS2_MIP_FILTER_SHIFT) | + (magFilt << SS2_MAG_FILTER_SHIFT)); + + cso->state[1] |= + ((translate_wrap_mode(ws) << SS3_TCX_ADDR_MODE_SHIFT) | + (translate_wrap_mode(wt) << SS3_TCY_ADDR_MODE_SHIFT) | + (translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT)); + + if (sampler->normalized_coords) + cso->state[1] |= SS3_NORMALIZED_COORDS; + + { + int minlod = (int) (16.0 * sampler->min_lod); + int maxlod = (int) (16.0 * sampler->max_lod); + minlod = CLAMP(minlod, 0, 16 * 11); + maxlod = CLAMP(maxlod, 0, 16 * 11); + + if (minlod > maxlod) + maxlod = minlod; + + cso->minlod = minlod; + cso->maxlod = maxlod; + } + + { + ubyte r = float_to_ubyte(sampler->border_color[0]); + ubyte g = float_to_ubyte(sampler->border_color[1]); + ubyte b = float_to_ubyte(sampler->border_color[2]); + ubyte a = float_to_ubyte(sampler->border_color[3]); + cso->state[2] = I915PACKCOLOR8888(r, g, b, a); + } + return cso; +} + +static void i915_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct i915_context *i915 = i915_context(pipe); + unsigned i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == i915->num_samplers && + !memcmp(i915->sampler, sampler, num * sizeof(void *))) + return; + + draw_flush(i915->draw); + + for (i = 0; i < num; ++i) + i915->sampler[i] = sampler[i]; + for (i = num; i < PIPE_MAX_SAMPLERS; ++i) + i915->sampler[i] = NULL; + + i915->num_samplers = num; + + i915->dirty |= I915_NEW_SAMPLER; +} + +static void i915_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE(sampler); +} + + +/** XXX move someday? Or consolidate all these simple state setters + * into one file. + */ + +static void * +i915_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + struct i915_depth_stencil_state *cso = CALLOC_STRUCT( i915_depth_stencil_state ); + + { + int testmask = depth_stencil->stencil[0].valuemask & 0xff; + int writemask = depth_stencil->stencil[0].writemask & 0xff; + + cso->stencil_modes4 |= (_3DSTATE_MODES_4_CMD | + ENABLE_STENCIL_TEST_MASK | + STENCIL_TEST_MASK(testmask) | + ENABLE_STENCIL_WRITE_MASK | + STENCIL_WRITE_MASK(writemask)); + } + + if (depth_stencil->stencil[0].enabled) { + int test = i915_translate_compare_func(depth_stencil->stencil[0].func); + int fop = i915_translate_stencil_op(depth_stencil->stencil[0].fail_op); + int dfop = i915_translate_stencil_op(depth_stencil->stencil[0].zfail_op); + int dpop = i915_translate_stencil_op(depth_stencil->stencil[0].zpass_op); + int ref = depth_stencil->stencil[0].ref_value & 0xff; + + cso->stencil_LIS5 |= (S5_STENCIL_TEST_ENABLE | + S5_STENCIL_WRITE_ENABLE | + (ref << S5_STENCIL_REF_SHIFT) | + (test << S5_STENCIL_TEST_FUNC_SHIFT) | + (fop << S5_STENCIL_FAIL_SHIFT) | + (dfop << S5_STENCIL_PASS_Z_FAIL_SHIFT) | + (dpop << S5_STENCIL_PASS_Z_PASS_SHIFT)); + } + + if (depth_stencil->stencil[1].enabled) { + int test = i915_translate_compare_func(depth_stencil->stencil[1].func); + int fop = i915_translate_stencil_op(depth_stencil->stencil[1].fail_op); + int dfop = i915_translate_stencil_op(depth_stencil->stencil[1].zfail_op); + int dpop = i915_translate_stencil_op(depth_stencil->stencil[1].zpass_op); + int ref = depth_stencil->stencil[1].ref_value & 0xff; + int tmask = depth_stencil->stencil[1].valuemask & 0xff; + int wmask = depth_stencil->stencil[1].writemask & 0xff; + + cso->bfo[0] = (_3DSTATE_BACKFACE_STENCIL_OPS | + BFO_ENABLE_STENCIL_FUNCS | + BFO_ENABLE_STENCIL_TWO_SIDE | + BFO_ENABLE_STENCIL_REF | + BFO_STENCIL_TWO_SIDE | + (ref << BFO_STENCIL_REF_SHIFT) | + (test << BFO_STENCIL_TEST_SHIFT) | + (fop << BFO_STENCIL_FAIL_SHIFT) | + (dfop << BFO_STENCIL_PASS_Z_FAIL_SHIFT) | + (dpop << BFO_STENCIL_PASS_Z_PASS_SHIFT)); + + cso->bfo[1] = (_3DSTATE_BACKFACE_STENCIL_MASKS | + BFM_ENABLE_STENCIL_TEST_MASK | + BFM_ENABLE_STENCIL_WRITE_MASK | + (tmask << BFM_STENCIL_TEST_MASK_SHIFT) | + (wmask << BFM_STENCIL_WRITE_MASK_SHIFT)); + } + else { + /* This actually disables two-side stencil: The bit set is a + * modify-enable bit to indicate we are changing the two-side + * setting. Then there is a symbolic zero to show that we are + * setting the flag to zero/off. + */ + cso->bfo[0] = (_3DSTATE_BACKFACE_STENCIL_OPS | + BFO_ENABLE_STENCIL_TWO_SIDE | + 0); + cso->bfo[1] = 0; + } + + if (depth_stencil->depth.enabled) { + int func = i915_translate_compare_func(depth_stencil->depth.func); + + cso->depth_LIS6 |= (S6_DEPTH_TEST_ENABLE | + (func << S6_DEPTH_TEST_FUNC_SHIFT)); + + if (depth_stencil->depth.writemask) + cso->depth_LIS6 |= S6_DEPTH_WRITE_ENABLE; + } + + if (depth_stencil->alpha.enabled) { + int test = i915_translate_compare_func(depth_stencil->alpha.func); + ubyte refByte = float_to_ubyte(depth_stencil->alpha.ref_value); + + cso->depth_LIS6 |= (S6_ALPHA_TEST_ENABLE | + (test << S6_ALPHA_TEST_FUNC_SHIFT) | + (((unsigned) refByte) << S6_ALPHA_REF_SHIFT)); + } + + return cso; +} + +static void i915_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + i915->depth_stencil = (const struct i915_depth_stencil_state *)depth_stencil; + + i915->dirty |= I915_NEW_DEPTH_STENCIL; +} + +static void i915_delete_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + FREE(depth_stencil); +} + + +static void i915_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + memcpy( &i915->scissor, scissor, sizeof(*scissor) ); + i915->dirty |= I915_NEW_SCISSOR; +} + + +static void i915_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ +} + + + +static void * +i915_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct i915_context *i915 = i915_context(pipe); + struct i915_fragment_shader *ifs = CALLOC_STRUCT(i915_fragment_shader); + if (!ifs) + return NULL; + + ifs->state.tokens = tgsi_dup_tokens(templ->tokens); + + tgsi_scan_shader(templ->tokens, &ifs->info); + + /* The shader's compiled to i915 instructions here */ + i915_translate_fragment_program(i915, ifs); + + return ifs; +} + +static void +i915_bind_fs_state(struct pipe_context *pipe, void *shader) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + i915->fs = (struct i915_fragment_shader*) shader; + + i915->dirty |= I915_NEW_FS; +} + +static +void i915_delete_fs_state(struct pipe_context *pipe, void *shader) +{ + struct i915_fragment_shader *ifs = (struct i915_fragment_shader *) shader; + + if (ifs->program) + FREE(ifs->program); + ifs->program_len = 0; + + FREE((struct tgsi_token *)ifs->state.tokens); + + FREE(ifs); +} + + +static void * +i915_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct i915_context *i915 = i915_context(pipe); + + /* just pass-through to draw module */ + return draw_create_vertex_shader(i915->draw, templ); +} + +static void i915_bind_vs_state(struct pipe_context *pipe, void *shader) +{ + struct i915_context *i915 = i915_context(pipe); + + /* just pass-through to draw module */ + draw_bind_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader); + + i915->dirty |= I915_NEW_VS; +} + +static void i915_delete_vs_state(struct pipe_context *pipe, void *shader) +{ + struct i915_context *i915 = i915_context(pipe); + + /* just pass-through to draw module */ + draw_delete_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader); +} + +static void i915_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct i915_context *i915 = i915_context(pipe); + struct pipe_screen *screen = pipe->screen; + draw_flush(i915->draw); + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + /* Make a copy of shader constants. + * During fragment program translation we may add additional + * constants to the array. + * + * We want to consider the situation where some user constants + * (ex: a material color) may change frequently but the shader program + * stays the same. In that case we should only be updating the first + * N constants, leaving any extras from shader translation alone. + */ + if (buf) { + void *mapped; + if (buf->buffer && buf->buffer->size && + (mapped = pipe_buffer_map(screen, buf->buffer, + PIPE_BUFFER_USAGE_CPU_READ))) { + memcpy(i915->current.constants[shader], mapped, buf->buffer->size); + pipe_buffer_unmap(screen, buf->buffer); + i915->current.num_user_constants[shader] + = buf->buffer->size / (4 * sizeof(float)); + } + else { + i915->current.num_user_constants[shader] = 0; + } + } + + i915->dirty |= I915_NEW_CONSTANTS; +} + + +static void i915_set_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) +{ + struct i915_context *i915 = i915_context(pipe); + uint i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == i915->num_textures && + !memcmp(i915->texture, texture, num * sizeof(struct pipe_texture *))) + return; + + /* Fixes wrong texture in texobj with VBUF */ + draw_flush(i915->draw); + + for (i = 0; i < num; i++) + pipe_texture_reference((struct pipe_texture **) &i915->texture[i], + texture[i]); + + for (i = num; i < i915->num_textures; i++) + pipe_texture_reference((struct pipe_texture **) &i915->texture[i], + NULL); + + i915->num_textures = num; + + i915->dirty |= I915_NEW_TEXTURE; +} + + + +static void i915_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct i915_context *i915 = i915_context(pipe); + int i; + + draw_flush(i915->draw); + + i915->framebuffer.width = fb->width; + i915->framebuffer.height = fb->height; + i915->framebuffer.nr_cbufs = fb->nr_cbufs; + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + pipe_surface_reference(&i915->framebuffer.cbufs[i], fb->cbufs[i]); + } + pipe_surface_reference(&i915->framebuffer.zsbuf, fb->zsbuf); + + i915->dirty |= I915_NEW_FRAMEBUFFER; +} + + + +static void i915_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + draw_set_clip_state(i915->draw, clip); + + i915->dirty |= I915_NEW_CLIP; +} + + + +/* Called when driver state tracker notices changes to the viewport + * matrix: + */ +static void i915_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->viewport = *viewport; /* struct copy */ + + /* pass the viewport info to the draw module */ + draw_set_viewport_state(i915->draw, &i915->viewport); + + i915->dirty |= I915_NEW_VIEWPORT; +} + + +static void * +i915_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rasterizer) +{ + struct i915_rasterizer_state *cso = CALLOC_STRUCT( i915_rasterizer_state ); + + cso->templ = rasterizer; + cso->color_interp = rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + cso->light_twoside = rasterizer->light_twoside; + cso->ds[0].u = _3DSTATE_DEPTH_OFFSET_SCALE; + cso->ds[1].f = rasterizer->offset_scale; + if (rasterizer->poly_stipple_enable) { + cso->st |= ST1_ENABLE; + } + + if (rasterizer->scissor) + cso->sc[0] = _3DSTATE_SCISSOR_ENABLE_CMD | ENABLE_SCISSOR_RECT; + else + cso->sc[0] = _3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT; + + switch (rasterizer->cull_mode) { + case PIPE_WINDING_NONE: + cso->LIS4 |= S4_CULLMODE_NONE; + break; + case PIPE_WINDING_CW: + cso->LIS4 |= S4_CULLMODE_CW; + break; + case PIPE_WINDING_CCW: + cso->LIS4 |= S4_CULLMODE_CCW; + break; + case PIPE_WINDING_BOTH: + cso->LIS4 |= S4_CULLMODE_BOTH; + break; + } + + { + int line_width = CLAMP((int)(rasterizer->line_width * 2), 1, 0xf); + + cso->LIS4 |= line_width << S4_LINE_WIDTH_SHIFT; + + if (rasterizer->line_smooth) + cso->LIS4 |= S4_LINE_ANTIALIAS_ENABLE; + } + + { + int point_size = CLAMP((int) rasterizer->point_size, 1, 0xff); + + cso->LIS4 |= point_size << S4_POINT_WIDTH_SHIFT; + } + + if (rasterizer->flatshade) { + cso->LIS4 |= (S4_FLATSHADE_ALPHA | + S4_FLATSHADE_COLOR | + S4_FLATSHADE_SPECULAR); + } + + cso->LIS7 = fui( rasterizer->offset_units ); + + + return cso; +} + +static void i915_bind_rasterizer_state( struct pipe_context *pipe, + void *raster ) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->rasterizer = (struct i915_rasterizer_state *)raster; + + /* pass-through to draw module */ + draw_set_rasterizer_state(i915->draw, + (i915->rasterizer ? i915->rasterizer->templ : NULL)); + + i915->dirty |= I915_NEW_RASTERIZER; +} + +static void i915_delete_rasterizer_state(struct pipe_context *pipe, + void *raster) +{ + FREE(raster); +} + +static void i915_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct i915_context *i915 = i915_context(pipe); + /* Because we change state before the draw_set_vertex_buffers call + * we need a flush here, just to be sure. + */ + draw_flush(i915->draw); + + memcpy(i915->vertex_buffer, buffers, count * sizeof(buffers[0])); + i915->num_vertex_buffers = count; + + /* pass-through to draw module */ + draw_set_vertex_buffers(i915->draw, count, buffers); +} + +static void i915_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *elements) +{ + struct i915_context *i915 = i915_context(pipe); + /* Because we change state before the draw_set_vertex_buffers call + * we need a flush here, just to be sure. + */ + draw_flush(i915->draw); + + i915->num_vertex_elements = count; + /* pass-through to draw module */ + draw_set_vertex_elements(i915->draw, count, elements); +} + + +static void i915_set_edgeflags(struct pipe_context *pipe, + const unsigned *bitfield) +{ + /* TODO do something here */ +} + +void +i915_init_state_functions( struct i915_context *i915 ) +{ + i915->base.set_edgeflags = i915_set_edgeflags; + i915->base.create_blend_state = i915_create_blend_state; + i915->base.bind_blend_state = i915_bind_blend_state; + i915->base.delete_blend_state = i915_delete_blend_state; + + i915->base.create_sampler_state = i915_create_sampler_state; + i915->base.bind_sampler_states = i915_bind_sampler_states; + i915->base.delete_sampler_state = i915_delete_sampler_state; + + i915->base.create_depth_stencil_alpha_state = i915_create_depth_stencil_state; + i915->base.bind_depth_stencil_alpha_state = i915_bind_depth_stencil_state; + i915->base.delete_depth_stencil_alpha_state = i915_delete_depth_stencil_state; + + i915->base.create_rasterizer_state = i915_create_rasterizer_state; + i915->base.bind_rasterizer_state = i915_bind_rasterizer_state; + i915->base.delete_rasterizer_state = i915_delete_rasterizer_state; + i915->base.create_fs_state = i915_create_fs_state; + i915->base.bind_fs_state = i915_bind_fs_state; + i915->base.delete_fs_state = i915_delete_fs_state; + i915->base.create_vs_state = i915_create_vs_state; + i915->base.bind_vs_state = i915_bind_vs_state; + i915->base.delete_vs_state = i915_delete_vs_state; + + i915->base.set_blend_color = i915_set_blend_color; + i915->base.set_clip_state = i915_set_clip_state; + i915->base.set_constant_buffer = i915_set_constant_buffer; + i915->base.set_framebuffer_state = i915_set_framebuffer_state; + + i915->base.set_polygon_stipple = i915_set_polygon_stipple; + i915->base.set_scissor_state = i915_set_scissor_state; + i915->base.set_sampler_textures = i915_set_sampler_textures; + i915->base.set_viewport_state = i915_set_viewport_state; + i915->base.set_vertex_buffers = i915_set_vertex_buffers; + i915->base.set_vertex_elements = i915_set_vertex_elements; +} diff --git a/src/gallium/drivers/i915/i915_state.h b/src/gallium/drivers/i915/i915_state.h new file mode 100644 index 0000000000..86c6b0027d --- /dev/null +++ b/src/gallium/drivers/i915/i915_state.h @@ -0,0 +1,50 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#ifndef I915_STATE_H +#define I915_STATE_H + +struct i915_context; + + +struct i915_tracked_state { + unsigned dirty; + void (*update)( struct i915_context * ); +}; + +void i915_update_immediate( struct i915_context *i915 ); +void i915_update_dynamic( struct i915_context *i915 ); +void i915_update_derived( struct i915_context *i915 ); +void i915_update_samplers( struct i915_context *i915 ); +void i915_update_textures(struct i915_context *i915); + +void i915_emit_hardware_state( struct i915_context *i915 ); + +#endif diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c new file mode 100644 index 0000000000..178d4e8781 --- /dev/null +++ b/src/gallium/drivers/i915/i915_state_derived.c @@ -0,0 +1,183 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "i915_context.h" +#include "i915_state.h" +#include "i915_reg.h" +#include "i915_fpc.h" + + + +/** + * Determine the hardware vertex layout. + * Depends on vertex/fragment shader state. + */ +static void calculate_vertex_layout( struct i915_context *i915 ) +{ + const struct i915_fragment_shader *fs = i915->fs; + const enum interp_mode colorInterp = i915->rasterizer->color_interp; + struct vertex_info vinfo; + boolean texCoords[8], colors[2], fog, needW; + uint i; + int src; + + memset(texCoords, 0, sizeof(texCoords)); + colors[0] = colors[1] = fog = needW = FALSE; + memset(&vinfo, 0, sizeof(vinfo)); + + /* Determine which fragment program inputs are needed. Setup HW vertex + * layout below, in the HW-specific attribute order. + */ + for (i = 0; i < fs->info.num_inputs; i++) { + switch (fs->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + break; + case TGSI_SEMANTIC_COLOR: + assert(fs->info.input_semantic_index[i] < 2); + colors[fs->info.input_semantic_index[i]] = TRUE; + break; + case TGSI_SEMANTIC_GENERIC: + /* usually a texcoord */ + { + const uint unit = fs->info.input_semantic_index[i]; + assert(unit < 8); + texCoords[unit] = TRUE; + needW = TRUE; + } + break; + case TGSI_SEMANTIC_FOG: + fog = TRUE; + break; + default: + assert(0); + } + } + + + /* pos */ + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_POSITION, 0); + if (needW) { + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src); + vinfo.hwfmt[0] |= S4_VFMT_XYZW; + vinfo.attrib[0].emit = EMIT_4F; + } + else { + draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src); + vinfo.hwfmt[0] |= S4_VFMT_XYZ; + vinfo.attrib[0].emit = EMIT_3F; + } + + /* hardware point size */ + /* XXX todo */ + + /* primary color */ + if (colors[0]) { + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 0); + draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); + vinfo.hwfmt[0] |= S4_VFMT_COLOR; + } + + /* secondary color */ + if (colors[1]) { + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 1); + draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); + vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG; + } + + /* fog coord, not fog blend factor */ + if (fog) { + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_FOG, 0); + draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); + vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM; + } + + /* texcoords */ + for (i = 0; i < 8; i++) { + uint hwtc; + if (texCoords[i]) { + hwtc = TEXCOORDFMT_4D; + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_GENERIC, i); + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + } + else { + hwtc = TEXCOORDFMT_NOT_PRESENT; + } + vinfo.hwfmt[1] |= hwtc << (i * 4); + } + + draw_compute_vertex_size(&vinfo); + + if (memcmp(&i915->current.vertex_info, &vinfo, sizeof(vinfo))) { + /* Need to set this flag so that the LIS2/4 registers get set. + * It also means the i915_update_immediate() function must be called + * after this one, in i915_update_derived(). + */ + i915->dirty |= I915_NEW_VERTEX_FORMAT; + + memcpy(&i915->current.vertex_info, &vinfo, sizeof(vinfo)); + } +} + + + + +/* Hopefully this will remain quite simple, otherwise need to pull in + * something like the state tracker mechanism. + */ +void i915_update_derived( struct i915_context *i915 ) +{ + if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS)) + calculate_vertex_layout( i915 ); + + if (i915->dirty & (I915_NEW_SAMPLER | I915_NEW_TEXTURE)) + i915_update_samplers(i915); + + if (i915->dirty & I915_NEW_TEXTURE) + i915_update_textures(i915); + + if (i915->dirty) + i915_update_immediate( i915 ); + + if (i915->dirty) + i915_update_dynamic( i915 ); + + if (i915->dirty & I915_NEW_FS) { + i915->hardware_dirty |= I915_HW_PROGRAM; /* XXX right? */ + } + + /* HW emit currently references framebuffer state directly: + */ + if (i915->dirty & I915_NEW_FRAMEBUFFER) + i915->hardware_dirty |= I915_HW_STATIC; + + i915->dirty = 0; +} diff --git a/src/gallium/drivers/i915/i915_state_dynamic.c b/src/gallium/drivers/i915/i915_state_dynamic.c new file mode 100644 index 0000000000..86126a5a15 --- /dev/null +++ b/src/gallium/drivers/i915/i915_state_dynamic.c @@ -0,0 +1,310 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_batch.h" +#include "i915_state_inlines.h" +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_state.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" + +#define FILE_DEBUG_FLAG DEBUG_STATE + +/* State that we have chosen to store in the DYNAMIC segment of the + * i915 indirect state mechanism. + * + * Can't cache these in the way we do the static state, as there is no + * start/size in the command packet, instead an 'end' value that gets + * incremented. + * + * Additionally, there seems to be a requirement to re-issue the full + * (active) state every time a 4kb boundary is crossed. + */ + +static INLINE void set_dynamic_indirect( struct i915_context *i915, + unsigned offset, + const unsigned *src, + unsigned dwords ) +{ + unsigned i; + + for (i = 0; i < dwords; i++) + i915->current.dynamic[offset + i] = src[i]; + + i915->hardware_dirty |= I915_HW_DYNAMIC; +} + + +/*********************************************************************** + * Modes4: stencil masks and logicop + */ +static void upload_MODES4( struct i915_context *i915 ) +{ + unsigned modes4 = 0; + + /* I915_NEW_STENCIL */ + modes4 |= i915->depth_stencil->stencil_modes4; + /* I915_NEW_BLEND */ + modes4 |= i915->blend->modes4; + + /* Always, so that we know when state is in-active: + */ + set_dynamic_indirect( i915, + I915_DYNAMIC_MODES4, + &modes4, + 1 ); +} + +const struct i915_tracked_state i915_upload_MODES4 = { + I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL, + upload_MODES4 +}; + + + + +/*********************************************************************** + */ + +static void upload_BFO( struct i915_context *i915 ) +{ + set_dynamic_indirect( i915, + I915_DYNAMIC_BFO_0, + &(i915->depth_stencil->bfo[0]), + 2 ); +} + +const struct i915_tracked_state i915_upload_BFO = { + I915_NEW_DEPTH_STENCIL, + upload_BFO +}; + + +/*********************************************************************** + */ + + +static void upload_BLENDCOLOR( struct i915_context *i915 ) +{ + unsigned bc[2]; + + memset( bc, 0, sizeof(bc) ); + + /* I915_NEW_BLEND {_COLOR} + */ + { + const float *color = i915->blend_color.color; + + bc[0] = _3DSTATE_CONST_BLEND_COLOR_CMD; + bc[1] = pack_ui32_float4( color[0], + color[1], + color[2], + color[3] ); + } + + set_dynamic_indirect( i915, + I915_DYNAMIC_BC_0, + bc, + 2 ); +} + +const struct i915_tracked_state i915_upload_BLENDCOLOR = { + I915_NEW_BLEND, + upload_BLENDCOLOR +}; + +/*********************************************************************** + */ + + +static void upload_IAB( struct i915_context *i915 ) +{ + unsigned iab = i915->blend->iab; + + + set_dynamic_indirect( i915, + I915_DYNAMIC_IAB, + &iab, + 1 ); +} + +const struct i915_tracked_state i915_upload_IAB = { + I915_NEW_BLEND, + upload_IAB +}; + + +/*********************************************************************** + */ + + + +static void upload_DEPTHSCALE( struct i915_context *i915 ) +{ + set_dynamic_indirect( i915, + I915_DYNAMIC_DEPTHSCALE_0, + &(i915->rasterizer->ds[0].u), + 2 ); +} + +const struct i915_tracked_state i915_upload_DEPTHSCALE = { + I915_NEW_RASTERIZER, + upload_DEPTHSCALE +}; + + + +/*********************************************************************** + * Polygon stipple + * + * The i915 supports a 4x4 stipple natively, GL wants 32x32. + * Fortunately stipple is usually a repeating pattern. + * + * XXX: does stipple pattern need to be adjusted according to + * the window position? + * + * XXX: possibly need workaround for conform paths test. + */ + +static void upload_STIPPLE( struct i915_context *i915 ) +{ + unsigned st[2]; + + st[0] = _3DSTATE_STIPPLE; + st[1] = 0; + + /* I915_NEW_RASTERIZER + */ + st[1] |= i915->rasterizer->st; + + + /* I915_NEW_STIPPLE + */ + { + const ubyte *mask = (const ubyte *)i915->poly_stipple.stipple; + ubyte p[4]; + + p[0] = mask[12] & 0xf; + p[1] = mask[8] & 0xf; + p[2] = mask[4] & 0xf; + p[3] = mask[0] & 0xf; + + /* Not sure what to do about fallbacks, so for now just dont: + */ + st[1] |= ((p[0] << 0) | + (p[1] << 4) | + (p[2] << 8) | + (p[3] << 12)); + } + + + set_dynamic_indirect( i915, + I915_DYNAMIC_STP_0, + &st[0], + 2 ); +} + + +const struct i915_tracked_state i915_upload_STIPPLE = { + I915_NEW_RASTERIZER | I915_NEW_STIPPLE, + upload_STIPPLE +}; + + + +/*********************************************************************** + * Scissor. + */ +static void upload_SCISSOR_ENABLE( struct i915_context *i915 ) +{ + set_dynamic_indirect( i915, + I915_DYNAMIC_SC_ENA_0, + &(i915->rasterizer->sc[0]), + 1 ); +} + +const struct i915_tracked_state i915_upload_SCISSOR_ENABLE = { + I915_NEW_RASTERIZER, + upload_SCISSOR_ENABLE +}; + + + +static void upload_SCISSOR_RECT( struct i915_context *i915 ) +{ + unsigned x1 = i915->scissor.minx; + unsigned y1 = i915->scissor.miny; + unsigned x2 = i915->scissor.maxx; + unsigned y2 = i915->scissor.maxy; + unsigned sc[3]; + + sc[0] = _3DSTATE_SCISSOR_RECT_0_CMD; + sc[1] = (y1 << 16) | (x1 & 0xffff); + sc[2] = (y2 << 16) | (x2 & 0xffff); + + set_dynamic_indirect( i915, + I915_DYNAMIC_SC_RECT_0, + &sc[0], + 3 ); +} + + +const struct i915_tracked_state i915_upload_SCISSOR_RECT = { + I915_NEW_SCISSOR, + upload_SCISSOR_RECT +}; + + + + + + +static const struct i915_tracked_state *atoms[] = { + &i915_upload_MODES4, + &i915_upload_BFO, + &i915_upload_BLENDCOLOR, + &i915_upload_IAB, + &i915_upload_DEPTHSCALE, + &i915_upload_STIPPLE, + &i915_upload_SCISSOR_ENABLE, + &i915_upload_SCISSOR_RECT +}; + +/* These will be dynamic indirect state commands, but for now just end + * up on the batch buffer with everything else. + */ +void i915_update_dynamic( struct i915_context *i915 ) +{ + int i; + + for (i = 0; i < Elements(atoms); i++) + if (i915->dirty & atoms[i]->dirty) + atoms[i]->update( i915 ); +} + diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c new file mode 100644 index 0000000000..a3d4e3b04e --- /dev/null +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -0,0 +1,402 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_batch.h" +#include "i915_reg.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" + +static unsigned translate_format( enum pipe_format format ) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + return COLOR_BUF_ARGB8888; + case PIPE_FORMAT_R5G6B5_UNORM: + return COLOR_BUF_RGB565; + default: + assert(0); + return 0; + } +} + +static unsigned translate_depth_format( enum pipe_format zformat ) +{ + switch (zformat) { + case PIPE_FORMAT_S8Z24_UNORM: + return DEPTH_FRMT_24_FIXED_8_OTHER; + case PIPE_FORMAT_Z16_UNORM: + return DEPTH_FRMT_16_FIXED; + default: + assert(0); + return 0; + } +} + + +/** + * Examine framebuffer state to determine width, height. + */ +static boolean +framebuffer_size(const struct pipe_framebuffer_state *fb, + uint *width, uint *height) +{ + if (fb->cbufs[0]) { + *width = fb->cbufs[0]->width; + *height = fb->cbufs[0]->height; + return TRUE; + } + else if (fb->zsbuf) { + *width = fb->zsbuf->width; + *height = fb->zsbuf->height; + return TRUE; + } + else { + *width = *height = 0; + return FALSE; + } +} + + +/* Push the state into the sarea and/or texture memory. + */ +void +i915_emit_hardware_state(struct i915_context *i915 ) +{ + /* XXX: there must be an easier way */ + const unsigned dwords = ( 14 + + 7 + + I915_MAX_DYNAMIC + + 8 + + 2 + I915_TEX_UNITS*3 + + 2 + I915_TEX_UNITS*3 + + 2 + I915_MAX_CONSTANT*4 + +#if 0 + i915->current.program_len + +#else + i915->fs->program_len + +#endif + 6 + ) * 3/2; /* plus 50% margin */ + const unsigned relocs = ( I915_TEX_UNITS + + 3 + ) * 3/2; /* plus 50% margin */ + +#if 0 + debug_printf("i915_emit_hardware_state: %d dwords, %d relocs\n", dwords, relocs); +#endif + + if(!BEGIN_BATCH(dwords, relocs)) { + FLUSH_BATCH(NULL); + assert(BEGIN_BATCH(dwords, relocs)); + } + + /* 14 dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_INVARIENT) + { + OUT_BATCH(_3DSTATE_AA_CMD | + AA_LINE_ECAAR_WIDTH_ENABLE | + AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0); + + OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DFLT_Z_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | + CSB_TCB(0, 0) | + CSB_TCB(1, 1) | + CSB_TCB(2, 2) | + CSB_TCB(3, 3) | + CSB_TCB(4, 4) | + CSB_TCB(5, 5) | + CSB_TCB(6, 6) | + CSB_TCB(7, 7)); + + OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | + ENABLE_TEXKILL_3D_4D | + TEXKILL_4D); + + /* Need to initialize this to zero. + */ + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0)); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); + + /* disable indirect state for now + */ + OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); + OUT_BATCH(0); + } + + /* 7 dwords, 1 relocs */ + if (i915->hardware_dirty & I915_HW_IMMEDIATE) + { + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(0) | + I1_LOAD_S(1) | + I1_LOAD_S(2) | + I1_LOAD_S(4) | + I1_LOAD_S(5) | + I1_LOAD_S(6) | + (5)); + + if(i915->vbo) + OUT_RELOC(i915->vbo, + INTEL_USAGE_VERTEX, + i915->current.immediate[I915_IMMEDIATE_S0]); + else + /* FIXME: we should not do this */ + OUT_BATCH(0); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S1]); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S2]); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S4]); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S5]); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S6]); + } + + /* I915_MAX_DYNAMIC dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_DYNAMIC) + { + int i; + for (i = 0; i < I915_MAX_DYNAMIC; i++) { + OUT_BATCH(i915->current.dynamic[i]); + } + } + + /* 8 dwords, 2 relocs */ + if (i915->hardware_dirty & I915_HW_STATIC) + { + struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; + struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; + + if (cbuf_surface) { + unsigned ctile = BUF_3D_USE_FENCE; + struct i915_texture *tex = (struct i915_texture *) + cbuf_surface->texture; + assert(tex); + + if (tex && tex->sw_tiled) { + ctile = BUF_3D_TILED_SURFACE; + } + + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + + OUT_BATCH(BUF_3D_ID_COLOR_BACK | + BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ + ctile); + + OUT_RELOC(tex->buffer, + INTEL_USAGE_RENDER, + cbuf_surface->offset); + } + + /* What happens if no zbuf?? + */ + if (depth_surface) { + unsigned ztile = BUF_3D_USE_FENCE; + struct i915_texture *tex = (struct i915_texture *) + depth_surface->texture; + assert(tex); + + if (tex && tex->sw_tiled) { + ztile = BUF_3D_TILED_SURFACE; + } + + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + + OUT_BATCH(BUF_3D_ID_DEPTH | + BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ + ztile); + + OUT_RELOC(tex->buffer, + INTEL_USAGE_RENDER, + depth_surface->offset); + } + + { + unsigned cformat, zformat = 0; + + if (cbuf_surface) + cformat = cbuf_surface->format; + else + cformat = PIPE_FORMAT_A8R8G8B8_UNORM; /* arbitrary */ + cformat = translate_format(cformat); + + if (depth_surface) + zformat = translate_depth_format( i915->framebuffer.zsbuf->format ); + + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(DSTORG_HORT_BIAS(0x8) | /* .5 */ + DSTORG_VERT_BIAS(0x8) | /* .5 */ + LOD_PRECLAMP_OGL | + TEX_DEFAULT_COLOR_OGL | + cformat | + zformat ); + } + } + +#if 01 + /* texture images */ + /* 2 + I915_TEX_UNITS*3 dwords, I915_TEX_UNITS relocs */ + if (i915->hardware_dirty & (I915_HW_MAP | I915_HW_SAMPLER)) + { + const uint nr = i915->current.sampler_enable_nr; + if (nr) { + const uint enabled = i915->current.sampler_enable_flags; + uint unit; + uint count = 0; + OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); + OUT_BATCH(enabled); + for (unit = 0; unit < I915_TEX_UNITS; unit++) { + if (enabled & (1 << unit)) { + struct intel_buffer *buf = i915->texture[unit]->buffer; + uint offset = 0; + assert(buf); + + count++; + + OUT_RELOC(buf, INTEL_USAGE_SAMPLER, offset); + OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ + OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ + } + } + assert(count == nr); + } + } +#endif + +#if 01 + /* samplers */ + /* 2 + I915_TEX_UNITS*3 dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_SAMPLER) + { + if (i915->current.sampler_enable_nr) { + int i; + + OUT_BATCH( _3DSTATE_SAMPLER_STATE | + (3 * i915->current.sampler_enable_nr) ); + + OUT_BATCH( i915->current.sampler_enable_flags ); + + for (i = 0; i < I915_TEX_UNITS; i++) { + if (i915->current.sampler_enable_flags & (1<current.sampler[i][0] ); + OUT_BATCH( i915->current.sampler[i][1] ); + OUT_BATCH( i915->current.sampler[i][2] ); + } + } + } + } +#endif + + /* constants */ + /* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_PROGRAM) + { + /* Collate the user-defined constants with the fragment shader's + * immediates according to the constant_flags[] array. + */ + const uint nr = i915->fs->num_constants; + if (nr) { + uint i; + + OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); + OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) ); + + for (i = 0; i < nr; i++) { + const uint *c; + if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { + /* grab user-defined constant */ + c = (uint *) i915->current.constants[PIPE_SHADER_FRAGMENT][i]; + } + else { + /* emit program constant */ + c = (uint *) i915->fs->constants[i]; + } +#if 0 /* debug */ + { + float *f = (float *) c; + printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], + (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER + ? "user" : "immediate")); + } +#endif + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); + } + } + } + + /* Fragment program */ + /* i915->current.program_len dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_PROGRAM) + { + uint i; + /* we should always have, at least, a pass-through program */ + assert(i915->fs->program_len > 0); + for (i = 0; i < i915->fs->program_len; i++) { + OUT_BATCH(i915->fs->program[i]); + } + } + + /* drawing surface size */ + /* 6 dwords, 0 relocs */ + { + uint w, h; + boolean k = framebuffer_size(&i915->framebuffer, &w, &h); + (void)k; + assert(k); + + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(((w - 1) & 0xffff) | ((h - 1) << 16)); + OUT_BATCH(0); + OUT_BATCH(0); + } + + + i915->hardware_dirty = 0; +} diff --git a/src/gallium/drivers/i915/i915_state_immediate.c b/src/gallium/drivers/i915/i915_state_immediate.c new file mode 100644 index 0000000000..8c16bb4e27 --- /dev/null +++ b/src/gallium/drivers/i915/i915_state_immediate.c @@ -0,0 +1,225 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "i915_state_inlines.h" +#include "i915_context.h" +#include "i915_state.h" +#include "i915_reg.h" +#include "util/u_memory.h" + + +/* All state expressable with the LOAD_STATE_IMMEDIATE_1 packet. + * Would like to opportunistically recombine all these fragments into + * a single packet containing only what has changed, but for now emit + * as multiple packets. + */ + + + + +/*********************************************************************** + * S0,S1: Vertex buffer state. + */ +static void upload_S0S1(struct i915_context *i915) +{ + unsigned LIS0, LIS1; + + /* INTEL_NEW_VBO */ + /* TODO: re-use vertex buffers here? */ + LIS0 = i915->vbo_offset; + + /* INTEL_NEW_VERTEX_SIZE -- do this where the vertex size is calculated! + */ + { + unsigned vertex_size = i915->current.vertex_info.size; + + LIS1 = ((vertex_size << 24) | + (vertex_size << 16)); + } + + /* INTEL_NEW_VBO */ + /* TODO: use a vertex generation number to track vbo changes */ + if (1 || + i915->current.immediate[I915_IMMEDIATE_S0] != LIS0 || + i915->current.immediate[I915_IMMEDIATE_S1] != LIS1) + { + i915->current.immediate[I915_IMMEDIATE_S0] = LIS0; + i915->current.immediate[I915_IMMEDIATE_S1] = LIS1; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + +const struct i915_tracked_state i915_upload_S0S1 = { + I915_NEW_VBO | I915_NEW_VERTEX_FORMAT, + upload_S0S1 +}; + + + + +/*********************************************************************** + * S4: Vertex format, rasterization state + */ +static void upload_S2S4(struct i915_context *i915) +{ + unsigned LIS2, LIS4; + + /* I915_NEW_VERTEX_FORMAT */ + { + LIS2 = i915->current.vertex_info.hwfmt[1]; + LIS4 = i915->current.vertex_info.hwfmt[0]; + /* + debug_printf("LIS2: 0x%x LIS4: 0x%x\n", LIS2, LIS4); + */ + assert(LIS4); /* should never be zero? */ + } + + LIS4 |= i915->rasterizer->LIS4; + + if (LIS2 != i915->current.immediate[I915_IMMEDIATE_S2] || + LIS4 != i915->current.immediate[I915_IMMEDIATE_S4]) { + + i915->current.immediate[I915_IMMEDIATE_S2] = LIS2; + i915->current.immediate[I915_IMMEDIATE_S4] = LIS4; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + + +const struct i915_tracked_state i915_upload_S2S4 = { + I915_NEW_RASTERIZER | I915_NEW_VERTEX_FORMAT, + upload_S2S4 +}; + + + +/*********************************************************************** + * + */ +static void upload_S5( struct i915_context *i915 ) +{ + unsigned LIS5 = 0; + + LIS5 |= i915->depth_stencil->stencil_LIS5; + + LIS5 |= i915->blend->LIS5; + +#if 0 + /* I915_NEW_RASTERIZER */ + if (i915->state.Polygon->OffsetFill) { + LIS5 |= S5_GLOBAL_DEPTH_OFFSET_ENABLE; + } +#endif + + + if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) { + i915->current.immediate[I915_IMMEDIATE_S5] = LIS5; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + +const struct i915_tracked_state i915_upload_S5 = { + (I915_NEW_DEPTH_STENCIL | I915_NEW_BLEND | I915_NEW_RASTERIZER), + upload_S5 +}; + + +/*********************************************************************** + */ +static void upload_S6( struct i915_context *i915 ) +{ + unsigned LIS6 = (2 << S6_TRISTRIP_PV_SHIFT); + + /* I915_NEW_FRAMEBUFFER + */ + if (i915->framebuffer.cbufs[0]) + LIS6 |= S6_COLOR_WRITE_ENABLE; + + /* I915_NEW_BLEND + */ + LIS6 |= i915->blend->LIS6; + + /* I915_NEW_DEPTH + */ + LIS6 |= i915->depth_stencil->depth_LIS6; + + if (LIS6 != i915->current.immediate[I915_IMMEDIATE_S6]) { + i915->current.immediate[I915_IMMEDIATE_S6] = LIS6; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + +const struct i915_tracked_state i915_upload_S6 = { + I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER, + upload_S6 +}; + + +/*********************************************************************** + */ +static void upload_S7( struct i915_context *i915 ) +{ + unsigned LIS7; + + /* I915_NEW_RASTERIZER + */ + LIS7 = i915->rasterizer->LIS7; + + if (LIS7 != i915->current.immediate[I915_IMMEDIATE_S7]) { + i915->current.immediate[I915_IMMEDIATE_S7] = LIS7; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + +const struct i915_tracked_state i915_upload_S7 = { + I915_NEW_RASTERIZER, + upload_S7 +}; + + +static const struct i915_tracked_state *atoms[] = { + &i915_upload_S0S1, + &i915_upload_S2S4, + &i915_upload_S5, + &i915_upload_S6, + &i915_upload_S7 +}; + +/* + */ +void i915_update_immediate( struct i915_context *i915 ) +{ + int i; + + for (i = 0; i < Elements(atoms); i++) + if (i915->dirty & atoms[i]->dirty) + atoms[i]->update( i915 ); +} diff --git a/src/gallium/drivers/i915/i915_state_inlines.h b/src/gallium/drivers/i915/i915_state_inlines.h new file mode 100644 index 0000000000..378de8f9c4 --- /dev/null +++ b/src/gallium/drivers/i915/i915_state_inlines.h @@ -0,0 +1,230 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_STATE_INLINES_H +#define I915_STATE_INLINES_H + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" +#include "i915_reg.h" + + +static INLINE unsigned +i915_translate_compare_func(unsigned func) +{ + switch (func) { + case PIPE_FUNC_NEVER: + return COMPAREFUNC_NEVER; + case PIPE_FUNC_LESS: + return COMPAREFUNC_LESS; + case PIPE_FUNC_LEQUAL: + return COMPAREFUNC_LEQUAL; + case PIPE_FUNC_GREATER: + return COMPAREFUNC_GREATER; + case PIPE_FUNC_GEQUAL: + return COMPAREFUNC_GEQUAL; + case PIPE_FUNC_NOTEQUAL: + return COMPAREFUNC_NOTEQUAL; + case PIPE_FUNC_EQUAL: + return COMPAREFUNC_EQUAL; + case PIPE_FUNC_ALWAYS: + return COMPAREFUNC_ALWAYS; + default: + return COMPAREFUNC_ALWAYS; + } +} + +static INLINE unsigned +i915_translate_stencil_op(unsigned op) +{ + switch (op) { + case PIPE_STENCIL_OP_KEEP: + return STENCILOP_KEEP; + case PIPE_STENCIL_OP_ZERO: + return STENCILOP_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return STENCILOP_REPLACE; + case PIPE_STENCIL_OP_INCR: + return STENCILOP_INCRSAT; + case PIPE_STENCIL_OP_DECR: + return STENCILOP_DECRSAT; + case PIPE_STENCIL_OP_INCR_WRAP: + return STENCILOP_INCR; + case PIPE_STENCIL_OP_DECR_WRAP: + return STENCILOP_DECR; + case PIPE_STENCIL_OP_INVERT: + return STENCILOP_INVERT; + default: + return STENCILOP_ZERO; + } +} + +static INLINE unsigned +i915_translate_blend_factor(unsigned factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ZERO: + return BLENDFACT_ZERO; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return BLENDFACT_SRC_ALPHA; + case PIPE_BLENDFACTOR_ONE: + return BLENDFACT_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return BLENDFACT_SRC_COLR; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return BLENDFACT_INV_SRC_COLR; + case PIPE_BLENDFACTOR_DST_COLOR: + return BLENDFACT_DST_COLR; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return BLENDFACT_INV_DST_COLR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return BLENDFACT_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return BLENDFACT_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return BLENDFACT_INV_DST_ALPHA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return BLENDFACT_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return BLENDFACT_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return BLENDFACT_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return BLENDFACT_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return BLENDFACT_INV_CONST_ALPHA; + default: + return BLENDFACT_ZERO; + } +} + +static INLINE unsigned +i915_translate_blend_func(unsigned mode) +{ + switch (mode) { + case PIPE_BLEND_ADD: + return BLENDFUNC_ADD; + case PIPE_BLEND_MIN: + return BLENDFUNC_MIN; + case PIPE_BLEND_MAX: + return BLENDFUNC_MAX; + case PIPE_BLEND_SUBTRACT: + return BLENDFUNC_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: + return BLENDFUNC_REVERSE_SUBTRACT; + default: + return 0; + } +} + + +static INLINE unsigned +i915_translate_logic_op(unsigned opcode) +{ + switch (opcode) { + case PIPE_LOGICOP_CLEAR: + return LOGICOP_CLEAR; + case PIPE_LOGICOP_AND: + return LOGICOP_AND; + case PIPE_LOGICOP_AND_REVERSE: + return LOGICOP_AND_RVRSE; + case PIPE_LOGICOP_COPY: + return LOGICOP_COPY; + case PIPE_LOGICOP_COPY_INVERTED: + return LOGICOP_COPY_INV; + case PIPE_LOGICOP_AND_INVERTED: + return LOGICOP_AND_INV; + case PIPE_LOGICOP_NOOP: + return LOGICOP_NOOP; + case PIPE_LOGICOP_XOR: + return LOGICOP_XOR; + case PIPE_LOGICOP_OR: + return LOGICOP_OR; + case PIPE_LOGICOP_OR_INVERTED: + return LOGICOP_OR_INV; + case PIPE_LOGICOP_NOR: + return LOGICOP_NOR; + case PIPE_LOGICOP_EQUIV: + return LOGICOP_EQUIV; + case PIPE_LOGICOP_INVERT: + return LOGICOP_INV; + case PIPE_LOGICOP_OR_REVERSE: + return LOGICOP_OR_RVRSE; + case PIPE_LOGICOP_NAND: + return LOGICOP_NAND; + case PIPE_LOGICOP_SET: + return LOGICOP_SET; + default: + return LOGICOP_SET; + } +} + + + +static INLINE boolean i915_validate_vertices( unsigned hw_prim, unsigned nr ) +{ + boolean ok; + + switch (hw_prim) { + case PRIM3D_POINTLIST: + ok = (nr >= 1); + assert(ok); + break; + case PRIM3D_LINELIST: + ok = (nr >= 2) && (nr % 2) == 0; + assert(ok); + break; + case PRIM3D_LINESTRIP: + ok = (nr >= 2); + assert(ok); + break; + case PRIM3D_TRILIST: + ok = (nr >= 3) && (nr % 3) == 0; + assert(ok); + break; + case PRIM3D_TRISTRIP: + ok = (nr >= 3); + assert(ok); + break; + case PRIM3D_TRIFAN: + ok = (nr >= 3); + assert(ok); + break; + case PRIM3D_POLY: + ok = (nr >= 3); + assert(ok); + break; + default: + assert(0); + ok = 0; + break; + } + + return ok; +} + +#endif diff --git a/src/gallium/drivers/i915/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c new file mode 100644 index 0000000000..c5e9084d12 --- /dev/null +++ b/src/gallium/drivers/i915/i915_state_sampler.c @@ -0,0 +1,299 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_memory.h" + +#include "i915_state_inlines.h" +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_state.h" + + +/* + * A note about min_lod & max_lod. + * + * There is a circular dependancy between the sampler state + * and the map state to be submitted to hw. + * + * Two condition must be meet: + * min_lod =< max_lod == true + * max_lod =< last_level == true + * + * + * This is all fine and dandy if it where for the fact that max_lod + * is set on the map state instead of the sampler state. That is + * the max_lod we submit on map is: + * max_lod = MIN2(last_level, max_lod); + * + * So we need to update the map state when we change samplers and + * we need to be change the sampler state when map state is changed. + * The first part is done by calling i915_update_texture in + * i915_update_samplers and the second part is done else where in + * code tracking the state changes. + */ + +static void +i915_update_texture(struct i915_context *i915, + uint unit, + const struct i915_texture *tex, + const struct i915_sampler_state *sampler, + uint state[6]); +/** + * Compute i915 texture sampling state. + * + * Recalculate all state from scratch. Perhaps not the most + * efficient, but this has gotten complex enough that we need + * something which is understandable and reliable. + * \param state returns the 3 words of compute state + */ +static void update_sampler(struct i915_context *i915, + uint unit, + const struct i915_sampler_state *sampler, + const struct i915_texture *tex, + unsigned state[3] ) +{ + const struct pipe_texture *pt = &tex->base; + unsigned minlod, lastlod; + + /* Need to do this after updating the maps, which call the + * intel_finalize_mipmap_tree and hence can update firstLevel: + */ + state[0] = sampler->state[0]; + state[1] = sampler->state[1]; + state[2] = sampler->state[2]; + + if (pt->format == PIPE_FORMAT_YCBCR || + pt->format == PIPE_FORMAT_YCBCR_REV) + state[0] |= SS2_COLORSPACE_CONVERSION; + + /* 3D textures don't seem to respect the border color. + * Fallback if there's ever a danger that they might refer to + * it. + * + * Effectively this means fallback on 3D clamp or + * clamp_to_border. + * + * XXX: Check if this is true on i945. + * XXX: Check if this bug got fixed in release silicon. + */ +#if 0 + { + const unsigned ws = sampler->templ->wrap_s; + const unsigned wt = sampler->templ->wrap_t; + const unsigned wr = sampler->templ->wrap_r; + if (pt->target == PIPE_TEXTURE_3D && + (sampler->templ->min_img_filter != PIPE_TEX_FILTER_NEAREST || + sampler->templ->mag_img_filter != PIPE_TEX_FILTER_NEAREST) && + (ws == PIPE_TEX_WRAP_CLAMP || + wt == PIPE_TEX_WRAP_CLAMP || + wr == PIPE_TEX_WRAP_CLAMP || + ws == PIPE_TEX_WRAP_CLAMP_TO_BORDER || + wt == PIPE_TEX_WRAP_CLAMP_TO_BORDER || + wr == PIPE_TEX_WRAP_CLAMP_TO_BORDER)) { + if (i915->conformance_mode > 0) { + assert(0); + /* sampler->fallback = true; */ + /* TODO */ + } + } + } +#endif + + /* See note at the top of file */ + minlod = sampler->minlod; + lastlod = pt->last_level << 4; + + if (lastlod < minlod) { + minlod = lastlod; + } + + state[1] |= (sampler->minlod << SS3_MIN_LOD_SHIFT); + state[1] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT); +} + + +void i915_update_samplers( struct i915_context *i915 ) +{ + uint unit; + + i915->current.sampler_enable_nr = 0; + i915->current.sampler_enable_flags = 0x0; + + for (unit = 0; unit < i915->num_textures && unit < i915->num_samplers; + unit++) { + /* determine unit enable/disable by looking for a bound texture */ + /* could also examine the fragment program? */ + if (i915->texture[unit]) { + update_sampler( i915, + unit, + i915->sampler[unit], /* sampler state */ + i915->texture[unit], /* texture */ + i915->current.sampler[unit] /* the result */ + ); + i915_update_texture( i915, + unit, + i915->texture[unit], /* texture */ + i915->sampler[unit], /* sampler state */ + i915->current.texbuffer[unit] ); + + i915->current.sampler_enable_nr++; + i915->current.sampler_enable_flags |= (1 << unit); + } + } + + i915->hardware_dirty |= I915_HW_SAMPLER | I915_HW_MAP; +} + + +static uint +translate_texture_format(enum pipe_format pipeFormat) +{ + switch (pipeFormat) { + case PIPE_FORMAT_L8_UNORM: + return MAPSURF_8BIT | MT_8BIT_L8; + case PIPE_FORMAT_I8_UNORM: + return MAPSURF_8BIT | MT_8BIT_I8; + case PIPE_FORMAT_A8_UNORM: + return MAPSURF_8BIT | MT_8BIT_A8; + case PIPE_FORMAT_A8L8_UNORM: + return MAPSURF_16BIT | MT_16BIT_AY88; + case PIPE_FORMAT_R5G6B5_UNORM: + return MAPSURF_16BIT | MT_16BIT_RGB565; + case PIPE_FORMAT_A1R5G5B5_UNORM: + return MAPSURF_16BIT | MT_16BIT_ARGB1555; + case PIPE_FORMAT_A4R4G4B4_UNORM: + return MAPSURF_16BIT | MT_16BIT_ARGB4444; + case PIPE_FORMAT_A8R8G8B8_UNORM: + return MAPSURF_32BIT | MT_32BIT_ARGB8888; + case PIPE_FORMAT_YCBCR_REV: + return (MAPSURF_422 | MT_422_YCRCB_NORMAL); + case PIPE_FORMAT_YCBCR: + return (MAPSURF_422 | MT_422_YCRCB_SWAPY); +#if 0 + case PIPE_FORMAT_RGB_FXT1: + case PIPE_FORMAT_RGBA_FXT1: + return (MAPSURF_COMPRESSED | MT_COMPRESS_FXT1); +#endif + case PIPE_FORMAT_Z16_UNORM: + return (MAPSURF_16BIT | MT_16BIT_L16); +#if 0 + case PIPE_FORMAT_RGBA_DXT1: + case PIPE_FORMAT_RGB_DXT1: + return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1); + case PIPE_FORMAT_RGBA_DXT3: + return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3); + case PIPE_FORMAT_RGBA_DXT5: + return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); +#endif + case PIPE_FORMAT_S8Z24_UNORM: + return (MAPSURF_32BIT | MT_32BIT_xI824); + default: + debug_printf("i915: translate_texture_format() bad image format %x\n", + pipeFormat); + assert(0); + return 0; + } +} + + +static void +i915_update_texture(struct i915_context *i915, + uint unit, + const struct i915_texture *tex, + const struct i915_sampler_state *sampler, + uint state[6]) +{ + const struct pipe_texture *pt = &tex->base; + uint format, pitch; + const uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + const uint num_levels = pt->last_level; + unsigned max_lod = num_levels * 4; + unsigned tiled = MS3_USE_FENCE_REGS; + + assert(tex); + assert(width); + assert(height); + assert(depth); + + format = translate_texture_format(pt->format); + pitch = tex->stride; + + assert(format); + assert(pitch); + + if (tex->sw_tiled) { + assert(!((pitch - 1) & pitch)); + tiled = MS3_TILED_SURFACE; + } + + /* MS3 state */ + state[0] = + (((height - 1) << MS3_HEIGHT_SHIFT) + | ((width - 1) << MS3_WIDTH_SHIFT) + | format + | tiled); + + /* + * XXX When min_filter != mag_filter and there's just one mipmap level, + * set max_lod = 1 to make sure i915 chooses between min/mag filtering. + */ + + /* See note at the top of file */ + if (max_lod > (sampler->maxlod >> 2)) + max_lod = sampler->maxlod >> 2; + + /* MS4 state */ + state[1] = + ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) + | MS4_CUBE_FACE_ENA_MASK + | ((max_lod) << MS4_MAX_LOD_SHIFT) + | ((depth - 1) << MS4_VOLUME_DEPTH_SHIFT)); +} + + +void +i915_update_textures(struct i915_context *i915) +{ + uint unit; + + for (unit = 0; unit < i915->num_textures && unit < i915->num_samplers; + unit++) { + /* determine unit enable/disable by looking for a bound texture */ + /* could also examine the fragment program? */ + if (i915->texture[unit]) { + i915_update_texture( i915, + unit, + i915->texture[unit], /* texture */ + i915->sampler[unit], /* sampler state */ + i915->current.texbuffer[unit] ); + } + } + + i915->hardware_dirty |= I915_HW_MAP; +} diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c new file mode 100644 index 0000000000..ab8331f3e6 --- /dev/null +++ b/src/gallium/drivers/i915/i915_surface.c @@ -0,0 +1,94 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_context.h" +#include "i915_blit.h" +#include "i915_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_inlines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "util/u_tile.h" +#include "util/u_rect.h" + + +/* Assumes all values are within bounds -- no checking at this level - + * do it higher up if required. + */ +static void +i915_surface_copy(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, unsigned width, unsigned height) +{ + struct i915_texture *dst_tex = (struct i915_texture *)dst->texture; + struct i915_texture *src_tex = (struct i915_texture *)src->texture; + + assert( dst != src ); + assert( dst_tex->base.block.size == src_tex->base.block.size ); + assert( dst_tex->base.block.width == src_tex->base.block.height ); + assert( dst_tex->base.block.height == src_tex->base.block.height ); + assert( dst_tex->base.block.width == 1 ); + assert( dst_tex->base.block.height == 1 ); + + i915_copy_blit( i915_context(pipe), + FALSE, + dst_tex->base.block.size, + (unsigned short) src_tex->stride, src_tex->buffer, src->offset, + (unsigned short) dst_tex->stride, dst_tex->buffer, dst->offset, + (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height ); +} + + +static void +i915_surface_fill(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, unsigned value) +{ + struct i915_texture *tex = (struct i915_texture *)dst->texture; + + assert(tex->base.block.width == 1); + assert(tex->base.block.height == 1); + + i915_fill_blit( i915_context(pipe), + tex->base.block.size, + (unsigned short) tex->stride, + tex->buffer, dst->offset, + (short) dstx, (short) dsty, + (short) width, (short) height, + value ); +} + + +void +i915_init_surface_functions(struct i915_context *i915) +{ + i915->base.surface_copy = i915_surface_copy; + i915->base.surface_fill = i915_surface_fill; +} diff --git a/src/gallium/drivers/i915/i915_texture.c b/src/gallium/drivers/i915/i915_texture.c new file mode 100644 index 0000000000..286c9ace8e --- /dev/null +++ b/src/gallium/drivers/i915/i915_texture.c @@ -0,0 +1,958 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell + * Michel Dänzer + */ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "i915_context.h" +#include "i915_texture.h" +#include "i915_debug.h" +#include "i915_screen.h" +#include "intel_winsys.h" + + +/* + * Helper function and arrays + */ + + +/** + * Initial offset for Cube map. + */ +static const int initial_offsets[6][2] = { + {0, 0}, + {0, 2}, + {1, 0}, + {1, 2}, + {1, 1}, + {1, 3} +}; + +/** + * Step offsets for Cube map. + */ +static const int step_offsets[6][2] = { + {0, 2}, + {0, 2}, + {-1, 2}, + {-1, 2}, + {-1, 1}, + {-1, 1} +}; + +static unsigned +power_of_two(unsigned x) +{ + unsigned value = 1; + while (value < x) + value = value << 1; + return value; +} + +static unsigned +round_up(unsigned n, unsigned multiple) +{ + return (n + multiple - 1) & ~(multiple - 1); +} + + +/* + * More advanced helper funcs + */ + + +static void +i915_miptree_set_level_info(struct i915_texture *tex, + unsigned level, + unsigned nr_images, + unsigned w, unsigned h, unsigned d) +{ + struct pipe_texture *pt = &tex->base; + + assert(level < PIPE_MAX_TEXTURE_LEVELS); + + pt->width[level] = w; + pt->height[level] = h; + pt->depth[level] = d; + + pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w); + pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h); + + tex->nr_images[level] = nr_images; + + /* + DBG("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, + level, w, h, d, x, y, tex->level_offset[level]); + */ + + /* Not sure when this would happen, but anyway: + */ + if (tex->image_offset[level]) { + FREE(tex->image_offset[level]); + tex->image_offset[level] = NULL; + } + + assert(nr_images); + assert(!tex->image_offset[level]); + + tex->image_offset[level] = (unsigned *) MALLOC(nr_images * sizeof(unsigned)); + tex->image_offset[level][0] = 0; +} + +static void +i915_miptree_set_image_offset(struct i915_texture *tex, + unsigned level, unsigned img, unsigned x, unsigned y) +{ + if (img == 0 && level == 0) + assert(x == 0 && y == 0); + + assert(img < tex->nr_images[level]); + + tex->image_offset[level][img] = y * tex->stride + x * tex->base.block.size; + + /* + printf("%s level %d img %d pos %d,%d image_offset %x\n", + __FUNCTION__, level, img, x, y, tex->image_offset[level][img]); + */ +} + + +/* + * i915 layout functions, some used by i945 + */ + + +/** + * Special case to deal with scanout textures. + */ +static boolean +i915_scanout_layout(struct i915_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + + if (pt->last_level > 0 || pt->block.size != 4) + return FALSE; + + i915_miptree_set_level_info(tex, 0, 1, + tex->base.width[0], + tex->base.height[0], + 1); + i915_miptree_set_image_offset(tex, 0, 0, 0, 0); + + if (tex->base.width[0] >= 240) { + tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); + tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); + tex->hw_tiled = INTEL_TILE_X; + } else if (tex->base.width[0] == 64 && tex->base.height[0] == 64) { + tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); + tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); + } else { + return FALSE; + } + + debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, + tex->base.width[0], tex->base.height[0], pt->block.size, + tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy); + + return TRUE; +} + +/** + * Special case to deal with shared textures. + */ +static boolean +i915_display_target_layout(struct i915_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + + if (pt->last_level > 0 || pt->block.size != 4) + return FALSE; + + /* fallback to normal textures for small textures */ + if (tex->base.width[0] < 240) + return FALSE; + + i915_miptree_set_level_info(tex, 0, 1, + tex->base.width[0], + tex->base.height[0], + 1); + i915_miptree_set_image_offset(tex, 0, 0, 0, 0); + + tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); + tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); + tex->hw_tiled = INTEL_TILE_X; + + debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, + tex->base.width[0], tex->base.height[0], pt->block.size, + tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy); + + return TRUE; +} + +static void +i915_miptree_layout_2d(struct i915_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + unsigned level; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; + + /* used for scanouts that need special layouts */ + if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) + if (i915_scanout_layout(tex)) + return; + + /* for shared buffers we use some very like scanout */ + if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) + if (i915_display_target_layout(tex)) + return; + + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + tex->total_nblocksy = 0; + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_level_info(tex, level, 1, width, height, 1); + i915_miptree_set_image_offset(tex, level, 0, 0, tex->total_nblocksy); + + nblocksy = round_up(MAX2(2, nblocksy), 2); + + tex->total_nblocksy += nblocksy; + + width = minify(width); + height = minify(height); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); + } +} + +static void +i915_miptree_layout_3d(struct i915_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + unsigned level; + + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; + unsigned stack_nblocksy = 0; + + /* Calculate the size of a single slice. + */ + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + + /* XXX: hardware expects/requires 9 levels at minimum. + */ + for (level = 0; level <= MAX2(8, pt->last_level); level++) { + i915_miptree_set_level_info(tex, level, depth, width, height, depth); + + stack_nblocksy += MAX2(2, nblocksy); + + width = minify(width); + height = minify(height); + depth = minify(depth); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); + } + + /* Fixup depth image_offsets: + */ + depth = pt->depth[0]; + for (level = 0; level <= pt->last_level; level++) { + unsigned i; + for (i = 0; i < depth; i++) + i915_miptree_set_image_offset(tex, level, i, 0, i * stack_nblocksy); + + depth = minify(depth); + } + + /* Multiply slice size by texture depth for total size. It's + * remarkable how wasteful of memory the i915 texture layouts + * are. They are largely fixed in the i945. + */ + tex->total_nblocksy = stack_nblocksy * pt->depth[0]; +} + +static void +i915_miptree_layout_cube(struct i915_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + unsigned width = pt->width[0], height = pt->height[0]; + const unsigned nblocks = pt->nblocksx[0]; + unsigned level; + unsigned face; + + assert(width == height); /* cubemap images are square */ + + /* double pitch for cube layouts */ + tex->stride = round_up(nblocks * pt->block.size * 2, 4); + tex->total_nblocksy = nblocks * 4; + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_level_info(tex, level, 6, width, height, 1); + width /= 2; + height /= 2; + } + + for (face = 0; face < 6; face++) { + unsigned x = initial_offsets[face][0] * nblocks; + unsigned y = initial_offsets[face][1] * nblocks; + unsigned d = nblocks; + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_image_offset(tex, level, face, x, y); + d >>= 1; + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; + } + } +} + +static boolean +i915_miptree_layout(struct i915_texture * tex) +{ + struct pipe_texture *pt = &tex->base; + + switch (pt->target) { + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_2D: + i915_miptree_layout_2d(tex); + break; + case PIPE_TEXTURE_3D: + i915_miptree_layout_3d(tex); + break; + case PIPE_TEXTURE_CUBE: + i915_miptree_layout_cube(tex); + break; + default: + assert(0); + return FALSE; + } + + return TRUE; +} + + +/* + * i945 layout functions + */ + + +static void +i945_miptree_layout_2d(struct i915_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + const int align_x = 2, align_y = 4; + unsigned level; + unsigned x = 0; + unsigned y = 0; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; + + /* used for scanouts that need special layouts */ + if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) + if (i915_scanout_layout(tex)) + return; + + /* for shared buffers we use some very like scanout */ + if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) + if (i915_display_target_layout(tex)) + return; + + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + + /* May need to adjust pitch to accomodate the placement of + * the 2nd mipmap level. This occurs when the alignment + * constraints of mipmap placement push the right edge of the + * 2nd mipmap level out past the width of its parent. + */ + if (pt->last_level > 0) { + unsigned mip1_nblocksx + = align(pf_get_nblocksx(&pt->block, minify(width)), align_x) + + pf_get_nblocksx(&pt->block, minify(minify(width))); + + if (mip1_nblocksx > nblocksx) + tex->stride = mip1_nblocksx * pt->block.size; + } + + /* Pitch must be a whole number of dwords + */ + tex->stride = align(tex->stride, 64); + tex->total_nblocksy = 0; + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_level_info(tex, level, 1, width, height, 1); + i915_miptree_set_image_offset(tex, level, 0, x, y); + + nblocksy = align(nblocksy, align_y); + + /* Because the images are packed better, the final offset + * might not be the maximal one: + */ + tex->total_nblocksy = MAX2(tex->total_nblocksy, y + nblocksy); + + /* Layout_below: step right after second mipmap level. + */ + if (level == 1) { + x += align(nblocksx, align_x); + } + else { + y += nblocksy; + } + + width = minify(width); + height = minify(height); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); + } +} + +static void +i945_miptree_layout_3d(struct i915_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; + unsigned pack_x_pitch, pack_x_nr; + unsigned pack_y_pitch; + unsigned level; + + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + tex->total_nblocksy = 0; + + pack_y_pitch = MAX2(pt->nblocksy[0], 2); + pack_x_pitch = tex->stride / pt->block.size; + pack_x_nr = 1; + + for (level = 0; level <= pt->last_level; level++) { + int x = 0; + int y = 0; + unsigned q, j; + + i915_miptree_set_level_info(tex, level, depth, width, height, depth); + + for (q = 0; q < depth;) { + for (j = 0; j < pack_x_nr && q < depth; j++, q++) { + i915_miptree_set_image_offset(tex, level, q, x, y + tex->total_nblocksy); + x += pack_x_pitch; + } + + x = 0; + y += pack_y_pitch; + } + + tex->total_nblocksy += y; + + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr * pt->block.size <= tex->stride); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + } + + width = minify(width); + height = minify(height); + depth = minify(depth); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); + } +} + +static void +i945_miptree_layout_cube(struct i915_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + unsigned level; + + const unsigned nblocks = pt->nblocksx[0]; + unsigned face; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + + /* + printf("%s %i, %i\n", __FUNCTION__, pt->width[0], pt->height[0]); + */ + + assert(width == height); /* cubemap images are square */ + + /* + * XXX Should only be used for compressed formats. But lets + * keep this code active just in case. + * + * Depending on the size of the largest images, pitch can be + * determined either by the old-style packing of cubemap faces, + * or the final row of 4x4, 2x2 and 1x1 faces below this. + */ + if (nblocks > 32) + tex->stride = round_up(nblocks * pt->block.size * 2, 4); + else + tex->stride = 14 * 8 * pt->block.size; + + tex->total_nblocksy = nblocks * 4; + + /* Set all the levels to effectively occupy the whole rectangular region. + */ + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_level_info(tex, level, 6, width, height, 1); + width /= 2; + height /= 2; + } + + for (face = 0; face < 6; face++) { + unsigned x = initial_offsets[face][0] * nblocks; + unsigned y = initial_offsets[face][1] * nblocks; + unsigned d = nblocks; + +#if 0 /* Fix and enable this code for compressed formats */ + if (nblocks == 4 && face >= 4) { + y = tex->total_height - 4; + x = (face - 4) * 8; + } + else if (nblocks < 4 && (face > 0)) { + y = tex->total_height - 4; + x = face * 8; + } +#endif + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_image_offset(tex, level, face, x, y); + + d >>= 1; + +#if 0 /* Fix and enable this code for compressed formats */ + switch (d) { + case 4: + switch (face) { + case PIPE_TEX_FACE_POS_X: + case PIPE_TEX_FACE_NEG_X: + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; + break; + case PIPE_TEX_FACE_POS_Y: + case PIPE_TEX_FACE_NEG_Y: + y += 12; + x -= 8; + break; + case PIPE_TEX_FACE_POS_Z: + case PIPE_TEX_FACE_NEG_Z: + y = tex->total_height - 4; + x = (face - 4) * 8; + break; + } + case 2: + y = tex->total_height - 4; + x = 16 + face * 8; + break; + + case 1: + x += 48; + break; + default: +#endif + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; +#if 0 + break; + } +#endif + } + } +} + +static boolean +i945_miptree_layout(struct i915_texture * tex) +{ + struct pipe_texture *pt = &tex->base; + + switch (pt->target) { + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_2D: + i945_miptree_layout_2d(tex); + break; + case PIPE_TEXTURE_3D: + i945_miptree_layout_3d(tex); + break; + case PIPE_TEXTURE_CUBE: + i945_miptree_layout_cube(tex); + break; + default: + assert(0); + return FALSE; + } + + return TRUE; +} + + +/* + * Screen texture functions + */ + + +static struct pipe_texture * +i915_texture_create(struct pipe_screen *screen, + const struct pipe_texture *templat) +{ + struct i915_screen *is = i915_screen(screen); + struct intel_winsys *iws = is->iws; + struct i915_texture *tex = CALLOC_STRUCT(i915_texture); + size_t tex_size; + unsigned buf_usage = 0; + + if (!tex) + return NULL; + + tex->base = *templat; + pipe_reference_init(&tex->base.reference, 1); + tex->base.screen = screen; + + tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]); + tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]); + + if (is->is_i945) { + if (!i945_miptree_layout(tex)) + goto fail; + } else { + if (!i915_miptree_layout(tex)) + goto fail; + } + + tex_size = tex->stride * tex->total_nblocksy; + + + + /* for scanouts and cursors, cursors arn't scanouts */ + if (templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY && templat->width[0] != 64) + buf_usage = INTEL_NEW_SCANOUT; + else + buf_usage = INTEL_NEW_TEXTURE; + + tex->buffer = iws->buffer_create(iws, tex_size, 64, buf_usage); + if (!tex->buffer) + goto fail; + + /* setup any hw fences */ + if (tex->hw_tiled) { + assert(tex->sw_tiled == INTEL_TILE_NONE); + iws->buffer_set_fence_reg(iws, tex->buffer, tex->stride, tex->hw_tiled); + } + + +#if 0 + void *ptr = ws->buffer_map(ws, tex->buffer, + PIPE_BUFFER_USAGE_CPU_WRITE); + memset(ptr, 0x80, tex_size); + ws->buffer_unmap(ws, tex->buffer); +#endif + + return &tex->base; + +fail: + FREE(tex); + return NULL; +} + +static struct pipe_texture * +i915_texture_blanket(struct pipe_screen * screen, + const struct pipe_texture *base, + const unsigned *stride, + struct pipe_buffer *buffer) +{ +#if 0 + struct i915_texture *tex; + assert(screen); + + /* Only supports one type */ + if (base->target != PIPE_TEXTURE_2D || + base->last_level != 0 || + base->depth[0] != 1) { + return NULL; + } + + tex = CALLOC_STRUCT(i915_texture); + if (!tex) + return NULL; + + tex->base = *base; + pipe_reference_init(&tex->base.reference, 1); + tex->base.screen = screen; + + tex->stride = stride[0]; + + i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1); + i915_miptree_set_image_offset(tex, 0, 0, 0, 0); + + pipe_buffer_reference(&tex->buffer, buffer); + + return &tex->base; +#else + return NULL; +#endif +} + +static void +i915_texture_destroy(struct pipe_texture *pt) +{ + struct i915_texture *tex = (struct i915_texture *)pt; + struct intel_winsys *iws = i915_screen(pt->screen)->iws; + uint i; + + /* + DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); + */ + + iws->buffer_destroy(iws, tex->buffer); + + for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) + if (tex->image_offset[i]) + FREE(tex->image_offset[i]); + + FREE(tex); +} + + +/* + * Screen surface functions + */ + + +static struct pipe_surface * +i915_get_tex_surface(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) +{ + struct i915_texture *tex = (struct i915_texture *)pt; + struct pipe_surface *ps; + unsigned offset; /* in bytes */ + + if (pt->target == PIPE_TEXTURE_CUBE) { + offset = tex->image_offset[level][face]; + } + else if (pt->target == PIPE_TEXTURE_3D) { + offset = tex->image_offset[level][zslice]; + } + else { + offset = tex->image_offset[level][0]; + assert(face == 0); + assert(zslice == 0); + } + + ps = CALLOC_STRUCT(pipe_surface); + if (ps) { + pipe_reference_init(&ps->reference, 1); + pipe_texture_reference(&ps->texture, pt); + ps->format = pt->format; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->offset = offset; + ps->usage = flags; + } + return ps; +} + +static void +i915_tex_surface_destroy(struct pipe_surface *surf) +{ + pipe_texture_reference(&surf->texture, NULL); + FREE(surf); +} + + +/* + * Screen transfer functions + */ + + +static struct pipe_transfer* +i915_get_tex_transfer(struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned face, unsigned level, unsigned zslice, + enum pipe_transfer_usage usage, unsigned x, unsigned y, + unsigned w, unsigned h) +{ + struct i915_texture *tex = (struct i915_texture *)texture; + struct i915_transfer *trans; + unsigned offset; /* in bytes */ + + if (texture->target == PIPE_TEXTURE_CUBE) { + offset = tex->image_offset[level][face]; + } + else if (texture->target == PIPE_TEXTURE_3D) { + offset = tex->image_offset[level][zslice]; + } + else { + offset = tex->image_offset[level][0]; + assert(face == 0); + assert(zslice == 0); + } + + trans = CALLOC_STRUCT(i915_transfer); + if (trans) { + pipe_texture_reference(&trans->base.texture, texture); + trans->base.format = trans->base.format; + trans->base.x = x; + trans->base.y = y; + trans->base.width = w; + trans->base.height = h; + trans->base.block = texture->block; + trans->base.nblocksx = texture->nblocksx[level]; + trans->base.nblocksy = texture->nblocksy[level]; + trans->base.stride = tex->stride; + trans->offset = offset; + trans->base.usage = usage; + } + return &trans->base; +} + +static void * +i915_transfer_map(struct pipe_screen *screen, + struct pipe_transfer *transfer) +{ + struct i915_texture *tex = (struct i915_texture *)transfer->texture; + struct intel_winsys *iws = i915_screen(tex->base.screen)->iws; + char *map; + boolean write = FALSE; + + if (transfer->usage & PIPE_TRANSFER_WRITE) + write = TRUE; + + map = iws->buffer_map(iws, tex->buffer, write); + if (map == NULL) + return NULL; + + return map + i915_transfer(transfer)->offset + + transfer->y / transfer->block.height * transfer->stride + + transfer->x / transfer->block.width * transfer->block.size; +} + +static void +i915_transfer_unmap(struct pipe_screen *screen, + struct pipe_transfer *transfer) +{ + struct i915_texture *tex = (struct i915_texture *)transfer->texture; + struct intel_winsys *iws = i915_screen(tex->base.screen)->iws; + iws->buffer_unmap(iws, tex->buffer); +} + +static void +i915_tex_transfer_destroy(struct pipe_transfer *trans) +{ + pipe_texture_reference(&trans->texture, NULL); + FREE(trans); +} + + +/* + * Other texture functions + */ + + +void +i915_init_screen_texture_functions(struct i915_screen *is) +{ + is->base.texture_create = i915_texture_create; + is->base.texture_blanket = i915_texture_blanket; + is->base.texture_destroy = i915_texture_destroy; + is->base.get_tex_surface = i915_get_tex_surface; + is->base.tex_surface_destroy = i915_tex_surface_destroy; + is->base.get_tex_transfer = i915_get_tex_transfer; + is->base.transfer_map = i915_transfer_map; + is->base.transfer_unmap = i915_transfer_unmap; + is->base.tex_transfer_destroy = i915_tex_transfer_destroy; +} + +struct pipe_texture * +i915_texture_blanket_intel(struct pipe_screen *screen, + struct pipe_texture *base, + unsigned stride, + struct intel_buffer *buffer) +{ + struct i915_texture *tex; + assert(screen); + + /* Only supports one type */ + if (base->target != PIPE_TEXTURE_2D || + base->last_level != 0 || + base->depth[0] != 1) { + return NULL; + } + + tex = CALLOC_STRUCT(i915_texture); + if (!tex) + return NULL; + + tex->base = *base; + pipe_reference_init(&tex->base.reference, 1); + tex->base.screen = screen; + + tex->stride = stride; + + i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1); + i915_miptree_set_image_offset(tex, 0, 0, 0, 0); + + tex->buffer = buffer; + + return &tex->base; +} + +boolean +i915_get_texture_buffer_intel(struct pipe_texture *texture, + struct intel_buffer **buffer, + unsigned *stride) +{ + struct i915_texture *tex = (struct i915_texture *)texture; + + if (!texture) + return FALSE; + + *stride = tex->stride; + *buffer = tex->buffer; + + return TRUE; +} diff --git a/src/gallium/drivers/i915/i915_texture.h b/src/gallium/drivers/i915/i915_texture.h new file mode 100644 index 0000000000..51a1dd984c --- /dev/null +++ b/src/gallium/drivers/i915/i915_texture.h @@ -0,0 +1,36 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_TEXTURE_H +#define I915_TEXTURE_H + +struct i915_screen; + +extern void +i915_init_screen_texture_functions(struct i915_screen *is); + +#endif /* I915_TEXTURE_H */ diff --git a/src/gallium/drivers/i915/intel_batchbuffer.h b/src/gallium/drivers/i915/intel_batchbuffer.h new file mode 100644 index 0000000000..db12dfd2ac --- /dev/null +++ b/src/gallium/drivers/i915/intel_batchbuffer.h @@ -0,0 +1,87 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_BATCH_H +#define INTEL_BATCH_H + +#include "intel_winsys.h" + +static INLINE boolean +intel_batchbuffer_check(struct intel_batchbuffer *batch, + size_t dwords, + size_t relocs) +{ + return dwords * 4 <= batch->size - (batch->ptr - batch->map) && + relocs <= (batch->max_relocs - batch->relocs); +} + +static INLINE size_t +intel_batchbuffer_space(struct intel_batchbuffer *batch) +{ + return batch->size - (batch->ptr - batch->map); +} + +static INLINE void +intel_batchbuffer_dword(struct intel_batchbuffer *batch, + unsigned dword) +{ + if (intel_batchbuffer_space(batch) < 4) + return; + + *(unsigned *)batch->ptr = dword; + batch->ptr += 4; +} + +static INLINE void +intel_batchbuffer_write(struct intel_batchbuffer *batch, + void *data, + size_t size) +{ + if (intel_batchbuffer_space(batch) < size) + return; + + memcpy(data, batch->ptr, size); + batch->ptr += size; +} + +static INLINE int +intel_batchbuffer_reloc(struct intel_batchbuffer *batch, + struct intel_buffer *buffer, + enum intel_buffer_usage usage, + size_t offset) +{ + return batch->iws->batchbuffer_reloc(batch, buffer, usage, offset); +} + +static INLINE void +intel_batchbuffer_flush(struct intel_batchbuffer *batch, + struct pipe_fence_handle **fence) +{ + batch->iws->batchbuffer_flush(batch, fence); +} + +#endif diff --git a/src/gallium/drivers/i915/intel_winsys.h b/src/gallium/drivers/i915/intel_winsys.h new file mode 100644 index 0000000000..42c5e7470e --- /dev/null +++ b/src/gallium/drivers/i915/intel_winsys.h @@ -0,0 +1,230 @@ +/************************************************************************** + * + * Copyright © 2009 Jakob Bornecrantz + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_WINSYS_H +#define INTEL_WINSYS_H + +#include "pipe/p_compiler.h" + +struct intel_winsys; +struct intel_buffer; +struct intel_batchbuffer; +struct pipe_texture; +struct pipe_fence_handle; + +enum intel_buffer_usage +{ + /* use on textures */ + INTEL_USAGE_RENDER = 0x01, + INTEL_USAGE_SAMPLER = 0x02, + INTEL_USAGE_2D_TARGET = 0x04, + INTEL_USAGE_2D_SOURCE = 0x08, + /* use on vertex */ + INTEL_USAGE_VERTEX = 0x10, +}; + +enum intel_buffer_type +{ + INTEL_NEW_TEXTURE, + INTEL_NEW_SCANOUT, /**< a texture used for scanning out from */ + INTEL_NEW_VERTEX, +}; + +enum intel_buffer_tile +{ + INTEL_TILE_NONE, + INTEL_TILE_X, + INTEL_TILE_Y, +}; + +struct intel_batchbuffer { + + struct intel_winsys *iws; + + /** + * Values exported to speed up the writing the batchbuffer, + * instead of having to go trough a accesor function for + * each dword written. + */ + /*{@*/ + uint8_t *map; + uint8_t *ptr; + size_t size; + + size_t relocs; + size_t max_relocs; + /*@}*/ +}; + +struct intel_winsys { + + /** + * Batchbuffer functions. + */ + /*@{*/ + /** + * Create a new batchbuffer. + */ + struct intel_batchbuffer *(*batchbuffer_create)(struct intel_winsys *iws); + + /** + * Emit a relocation to a buffer. + * Target position in batchbuffer is the same as ptr. + * + * @batch + * @reloc buffer address to be inserted into target. + * @usage how is the hardware going to use the buffer. + * @offset add this to the reloc buffers address + * @target buffer where to write the address, null for batchbuffer. + */ + int (*batchbuffer_reloc)(struct intel_batchbuffer *batch, + struct intel_buffer *reloc, + enum intel_buffer_usage usage, + unsigned offset); + + /** + * Flush a bufferbatch. + */ + void (*batchbuffer_flush)(struct intel_batchbuffer *batch, + struct pipe_fence_handle **fence); + + /** + * Destroy a batchbuffer. + */ + void (*batchbuffer_destroy)(struct intel_batchbuffer *batch); + /*@}*/ + + + /** + * Buffer functions. + */ + /*@{*/ + /** + * Create a buffer. + */ + struct intel_buffer *(*buffer_create)(struct intel_winsys *iws, + unsigned size, unsigned alignment, + enum intel_buffer_type type); + + /** + * Fence a buffer with a fence reg. + * Not to be confused with pipe_fence_handle. + */ + int (*buffer_set_fence_reg)(struct intel_winsys *iws, + struct intel_buffer *buffer, + unsigned stride, + enum intel_buffer_tile tile); + + /** + * Map a buffer. + */ + void *(*buffer_map)(struct intel_winsys *iws, + struct intel_buffer *buffer, + boolean write); + + /** + * Unmap a buffer. + */ + void (*buffer_unmap)(struct intel_winsys *iws, + struct intel_buffer *buffer); + + /** + * Write to a buffer. + * + * Arguments follows pwrite(2) + */ + int (*buffer_write)(struct intel_winsys *iws, + struct intel_buffer *dst, + const void *src, + size_t size, + size_t offset); + + void (*buffer_destroy)(struct intel_winsys *iws, + struct intel_buffer *buffer); + /*@}*/ + + + /** + * Fence functions. + */ + /*@{*/ + /** + * Reference fence and set ptr to fence. + */ + void (*fence_reference)(struct intel_winsys *iws, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence); + + /** + * Check if a fence has finished. + */ + int (*fence_signalled)(struct intel_winsys *iws, + struct pipe_fence_handle *fence); + + /** + * Wait on a fence to finish. + */ + int (*fence_finish)(struct intel_winsys *iws, + struct pipe_fence_handle *fence); + /*@}*/ + + + /** + * Destroy the winsys. + */ + void (*destroy)(struct intel_winsys *iws); +}; + + +/** + * Create i915 pipe_screen. + */ +struct pipe_screen *i915_create_screen(struct intel_winsys *iws, unsigned pci_id); + +/** + * Create a i915 pipe_context. + */ +struct pipe_context *i915_create_context(struct pipe_screen *screen); + +/** + * Get the intel_winsys buffer backing the texture. + * + * TODO UGLY + */ +boolean i915_get_texture_buffer_intel(struct pipe_texture *texture, + struct intel_buffer **buffer, + unsigned *stride); + +/** + * Wrap a intel_winsys buffer with a texture blanket. + * + * TODO UGLY + */ +struct pipe_texture * i915_texture_blanket_intel(struct pipe_screen *screen, + struct pipe_texture *tmplt, + unsigned pitch, + struct intel_buffer *buffer); + +#endif diff --git a/src/gallium/drivers/i915simple/Makefile b/src/gallium/drivers/i915simple/Makefile deleted file mode 100644 index fb533c1796..0000000000 --- a/src/gallium/drivers/i915simple/Makefile +++ /dev/null @@ -1,28 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = i915simple - -C_SOURCES = \ - i915_blit.c \ - i915_buffer.c \ - i915_clear.c \ - i915_flush.c \ - i915_context.c \ - i915_debug.c \ - i915_debug_fp.c \ - i915_state.c \ - i915_state_immediate.c \ - i915_state_dynamic.c \ - i915_state_derived.c \ - i915_state_emit.c \ - i915_state_sampler.c \ - i915_screen.c \ - i915_prim_emit.c \ - i915_prim_vbuf.c \ - i915_texture.c \ - i915_fpc_emit.c \ - i915_fpc_translate.c \ - i915_surface.c - -include ../../Makefile.template diff --git a/src/gallium/drivers/i915simple/SConscript b/src/gallium/drivers/i915simple/SConscript deleted file mode 100644 index 778c4ed0fd..0000000000 --- a/src/gallium/drivers/i915simple/SConscript +++ /dev/null @@ -1,30 +0,0 @@ -Import('*') - -env = env.Clone() - -i915simple = env.ConvenienceLibrary( - target = 'i915simple', - source = [ - 'i915_blit.c', - 'i915_buffer.c', - 'i915_clear.c', - 'i915_context.c', - 'i915_debug.c', - 'i915_debug_fp.c', - 'i915_flush.c', - 'i915_fpc_emit.c', - 'i915_fpc_translate.c', - 'i915_prim_emit.c', - 'i915_prim_vbuf.c', - 'i915_screen.c', - 'i915_state.c', - 'i915_state_derived.c', - 'i915_state_dynamic.c', - 'i915_state_emit.c', - 'i915_state_immediate.c', - 'i915_state_sampler.c', - 'i915_surface.c', - 'i915_texture.c', - ]) - -Export('i915simple') diff --git a/src/gallium/drivers/i915simple/i915_batch.h b/src/gallium/drivers/i915simple/i915_batch.h deleted file mode 100644 index b813784723..0000000000 --- a/src/gallium/drivers/i915simple/i915_batch.h +++ /dev/null @@ -1,47 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef I915_BATCH_H -#define I915_BATCH_H - -#include "intel_batchbuffer.h" - -#define BEGIN_BATCH(dwords, relocs) \ - (intel_batchbuffer_check(i915->batch, dwords, relocs)) - -#define OUT_BATCH(dword) \ - intel_batchbuffer_dword(i915->batch, dword) - -#define OUT_RELOC(buf, usage, offset) \ - intel_batchbuffer_reloc(i915->batch, buf, usage, offset) - -#define FLUSH_BATCH(fence) do { \ - intel_batchbuffer_flush(i915->batch, fence); \ - i915->hardware_dirty = ~0; \ -} while (0) - -#endif diff --git a/src/gallium/drivers/i915simple/i915_blit.c b/src/gallium/drivers/i915simple/i915_blit.c deleted file mode 100644 index 83dfc33528..0000000000 --- a/src/gallium/drivers/i915simple/i915_blit.c +++ /dev/null @@ -1,151 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "i915_blit.h" -#include "i915_reg.h" -#include "i915_batch.h" -#include "i915_debug.h" - -#define FILE_DEBUG_FLAG DEBUG_BLIT - -void -i915_fill_blit(struct i915_context *i915, - unsigned cpp, - unsigned short dst_pitch, - struct intel_buffer *dst_buffer, - unsigned dst_offset, - short x, short y, - short w, short h, - unsigned color) -{ - unsigned BR13, CMD; - - - I915_DBG(i915, - "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", - __FUNCTION__, - dst_buffer, dst_pitch, dst_offset, x, y, w, h); - - switch (cpp) { - case 1: - case 2: - case 3: - BR13 = (((int) dst_pitch) & 0xffff) | - (0xF0 << 16) | (1 << 24); - CMD = XY_COLOR_BLT_CMD; - break; - case 4: - BR13 = (((int) dst_pitch) & 0xffff) | - (0xF0 << 16) | (1 << 24) | (1 << 25); - CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA | - XY_COLOR_BLT_WRITE_RGB); - break; - default: - return; - } - - if (!BEGIN_BATCH(6, 1)) { - FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(6, 1)); - } - OUT_BATCH(CMD); - OUT_BATCH(BR13); - OUT_BATCH((y << 16) | x); - OUT_BATCH(((y + h) << 16) | (x + w)); - OUT_RELOC(dst_buffer, INTEL_USAGE_2D_TARGET, dst_offset); - OUT_BATCH(color); - FLUSH_BATCH(NULL); -} - -void -i915_copy_blit(struct i915_context *i915, - unsigned do_flip, - unsigned cpp, - unsigned short src_pitch, - struct intel_buffer *src_buffer, - unsigned src_offset, - unsigned short dst_pitch, - struct intel_buffer *dst_buffer, - unsigned dst_offset, - short src_x, short src_y, - short dst_x, short dst_y, - short w, short h) -{ - unsigned CMD, BR13; - int dst_y2 = dst_y + h; - int dst_x2 = dst_x + w; - - - I915_DBG(i915, - "%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", - __FUNCTION__, - src_buffer, src_pitch, src_offset, src_x, src_y, - dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); - - switch (cpp) { - case 1: - case 2: - case 3: - BR13 = (((int) dst_pitch) & 0xffff) | - (0xCC << 16) | (1 << 24); - CMD = XY_SRC_COPY_BLT_CMD; - break; - case 4: - BR13 = (((int) dst_pitch) & 0xffff) | - (0xCC << 16) | (1 << 24) | (1 << 25); - CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | - XY_SRC_COPY_BLT_WRITE_RGB); - break; - default: - return; - } - - if (dst_y2 < dst_y || dst_x2 < dst_x) { - return; - } - - /* Hardware can handle negative pitches but loses the ability to do - * proper overlapping blits in that case. We don't really have a - * need for either at this stage. - */ - assert (dst_pitch > 0 && src_pitch > 0); - - if (!BEGIN_BATCH(8, 2)) { - FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(8, 2)); - } - OUT_BATCH(CMD); - OUT_BATCH(BR13); - OUT_BATCH((dst_y << 16) | dst_x); - OUT_BATCH((dst_y2 << 16) | dst_x2); - OUT_RELOC(dst_buffer, INTEL_USAGE_2D_TARGET, dst_offset); - OUT_BATCH((src_y << 16) | src_x); - OUT_BATCH(((int) src_pitch & 0xffff)); - OUT_RELOC(src_buffer, INTEL_USAGE_2D_SOURCE, src_offset); - FLUSH_BATCH(NULL); -} diff --git a/src/gallium/drivers/i915simple/i915_blit.h b/src/gallium/drivers/i915simple/i915_blit.h deleted file mode 100644 index 8ce3220cfd..0000000000 --- a/src/gallium/drivers/i915simple/i915_blit.h +++ /dev/null @@ -1,55 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef I915_BLIT_H -#define I915_BLIT_H - -#include "i915_context.h" - -extern void i915_copy_blit(struct i915_context *i915, - unsigned do_flip, - unsigned cpp, - unsigned short src_pitch, - struct intel_buffer *src_buffer, - unsigned src_offset, - unsigned short dst_pitch, - struct intel_buffer *dst_buffer, - unsigned dst_offset, - short srcx, short srcy, - short dstx, short dsty, - short w, short h); - -extern void i915_fill_blit(struct i915_context *i915, - unsigned cpp, - unsigned short dst_pitch, - struct intel_buffer *dst_buffer, - unsigned dst_offset, - short x, short y, - short w, short h, unsigned color); - - -#endif diff --git a/src/gallium/drivers/i915simple/i915_buffer.c b/src/gallium/drivers/i915simple/i915_buffer.c deleted file mode 100644 index effeba1297..0000000000 --- a/src/gallium/drivers/i915simple/i915_buffer.c +++ /dev/null @@ -1,136 +0,0 @@ -/************************************************************************** - * - * Copyright © 2009 Jakob Bornecrantz - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "util/u_memory.h" -#include "i915_screen.h" -#include "i915_buffer.h" - -struct intel_buffer; - -struct i915_buffer -{ - struct pipe_buffer base; - - struct intel_buffer *ibuf; /** hw buffer */ - - void *data; /**< user and malloc data */ - boolean own; /**< we own the data incase of malloc */ -}; - -static INLINE struct i915_buffer * -i915_buffer(struct pipe_buffer *buffer) -{ - return (struct i915_buffer *)buffer; -} - -static struct pipe_buffer * -i915_buffer_create(struct pipe_screen *screen, - unsigned alignment, - unsigned usage, - unsigned size) -{ - struct i915_buffer *buf = CALLOC_STRUCT(i915_buffer); - - if (!buf) - return NULL; - - pipe_reference_init(&buf->base.reference, 1); - buf->base.alignment = alignment; - buf->base.screen = screen; - buf->base.usage = usage; - buf->base.size = size; - buf->data = MALLOC(size); - buf->own = TRUE; - - if (!buf->data) - goto err; - - return &buf->base; - -err: - FREE(buf); - return NULL; -} - -static struct pipe_buffer * -i915_user_buffer_create(struct pipe_screen *screen, - void *ptr, - unsigned bytes) -{ - struct i915_buffer *buf = CALLOC_STRUCT(i915_buffer); - - if (!buf) - return NULL; - - pipe_reference_init(&buf->base.reference, 1); - buf->base.alignment = 0; - buf->base.screen = screen; - buf->base.usage = 0; - buf->base.size = bytes; - buf->data = ptr; - buf->own = FALSE; - - return &buf->base; -} - -static void * -i915_buffer_map(struct pipe_screen *screen, - struct pipe_buffer *buffer, - unsigned usage) -{ - struct i915_buffer *buf = i915_buffer(buffer); - assert(!buf->ibuf); - return buf->data; -} - -static void -i915_buffer_unmap(struct pipe_screen *screen, - struct pipe_buffer *buffer) -{ - struct i915_buffer *buf = i915_buffer(buffer); - assert(!buf->ibuf); -} - -static void -i915_buffer_destroy(struct pipe_buffer *buffer) -{ - struct i915_buffer *buf = i915_buffer(buffer); - assert(!buf->ibuf); - - if (buf->own) - FREE(buf->data); - FREE(buf); -} - -void i915_init_screen_buffer_functions(struct i915_screen *screen) -{ - screen->base.buffer_create = i915_buffer_create; - screen->base.user_buffer_create = i915_user_buffer_create; - screen->base.buffer_map = i915_buffer_map; - screen->base.buffer_map_range = NULL; - screen->base.buffer_flush_mapped_range = NULL; - screen->base.buffer_unmap = i915_buffer_unmap; - screen->base.buffer_destroy = i915_buffer_destroy; -} diff --git a/src/gallium/drivers/i915simple/i915_buffer.h b/src/gallium/drivers/i915simple/i915_buffer.h deleted file mode 100644 index 80fda7c62f..0000000000 --- a/src/gallium/drivers/i915simple/i915_buffer.h +++ /dev/null @@ -1,31 +0,0 @@ -/************************************************************************** - * - * Copyright © 2009 Jakob Bornecrantz - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef I915_BUFFER_H -#define I915_BUFFER_H - -void i915_init_screen_buffer_functions(struct i915_screen *screen); - -#endif diff --git a/src/gallium/drivers/i915simple/i915_clear.c b/src/gallium/drivers/i915simple/i915_clear.c deleted file mode 100644 index 90530f2826..0000000000 --- a/src/gallium/drivers/i915simple/i915_clear.c +++ /dev/null @@ -1,48 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: - * Brian Paul - */ - - -#include "util/u_clear.h" -#include "i915_context.h" -#include "i915_state.h" - - -/** - * Clear the given buffers to the specified values. - * No masking, no scissor (clear entire buffer). - */ -void -i915_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, - double depth, unsigned stencil) -{ - util_clear(pipe, &i915_context(pipe)->framebuffer, buffers, rgba, depth, - stencil); -} diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c deleted file mode 100644 index e745f3342d..0000000000 --- a/src/gallium/drivers/i915simple/i915_context.c +++ /dev/null @@ -1,244 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "i915_context.h" -#include "i915_state.h" -#include "i915_screen.h" -#include "i915_batch.h" -#include "i915_texture.h" -#include "i915_reg.h" - -#include "draw/draw_context.h" -#include "pipe/p_defines.h" -#include "pipe/internal/p_winsys_screen.h" -#include "pipe/p_inlines.h" -#include "util/u_memory.h" -#include "pipe/p_screen.h" - - -/* - * Draw functions - */ - - -static boolean -i915_draw_range_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned min_index, - unsigned max_index, - unsigned prim, unsigned start, unsigned count) -{ - struct i915_context *i915 = i915_context(pipe); - struct draw_context *draw = i915->draw; - unsigned i; - - if (i915->dirty) - i915_update_derived(i915); - - /* - * Map vertex buffers - */ - for (i = 0; i < i915->num_vertex_buffers; i++) { - void *buf = pipe_buffer_map(pipe->screen, i915->vertex_buffer[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_vertex_buffer(draw, i, buf); - } - - /* - * Map index buffer, if present - */ - if (indexBuffer) { - void *mapped_indexes = pipe_buffer_map(pipe->screen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer_range(draw, indexSize, - min_index, - max_index, - mapped_indexes); - } else { - draw_set_mapped_element_buffer(draw, 0, NULL); - } - - - draw_set_mapped_constant_buffer(draw, - i915->current.constants[PIPE_SHADER_VERTEX], - (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * - 4 * sizeof(float))); - - /* - * Do the drawing - */ - draw_arrays(i915->draw, prim, start, count); - - /* - * unmap vertex/index buffers - */ - for (i = 0; i < i915->num_vertex_buffers; i++) { - pipe_buffer_unmap(pipe->screen, i915->vertex_buffer[i].buffer); - draw_set_mapped_vertex_buffer(draw, i, NULL); - } - - if (indexBuffer) { - pipe_buffer_unmap(pipe->screen, indexBuffer); - draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL); - } - - return TRUE; -} - -static boolean -i915_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned prim, unsigned start, unsigned count) -{ - return i915_draw_range_elements(pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - prim, start, count); -} - -static boolean -i915_draw_arrays(struct pipe_context *pipe, - unsigned prim, unsigned start, unsigned count) -{ - return i915_draw_elements(pipe, NULL, 0, prim, start, count); -} - - -/* - * Is referenced functions - */ - - -static unsigned int -i915_is_texture_referenced(struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level) -{ - /** - * FIXME: Return the corrent result. We can't alays return referenced - * since it causes a double flush within the vbo module. - */ -#if 0 - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -#else - return 0; -#endif -} - -static unsigned int -i915_is_buffer_referenced(struct pipe_context *pipe, - struct pipe_buffer *buf) -{ - /** - * FIXME: Return the corrent result. We can't alays return referenced - * since it causes a double flush within the vbo module. - */ -#if 0 - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -#else - return 0; -#endif -} - - -/* - * Generic context functions - */ - - -static void i915_destroy(struct pipe_context *pipe) -{ - struct i915_context *i915 = i915_context(pipe); - int i; - - draw_destroy(i915->draw); - - if(i915->batch) - i915->iws->batchbuffer_destroy(i915->batch); - - /* unbind framebuffer */ - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - pipe_surface_reference(&i915->framebuffer.cbufs[i], NULL); - } - pipe_surface_reference(&i915->framebuffer.zsbuf, NULL); - - FREE(i915); -} - -struct pipe_context * -i915_create_context(struct pipe_screen *screen) -{ - struct i915_context *i915; - - i915 = CALLOC_STRUCT(i915_context); - if (i915 == NULL) - return NULL; - - i915->iws = i915_screen(screen)->iws; - i915->base.winsys = NULL; - i915->base.screen = screen; - - i915->base.destroy = i915_destroy; - - i915->base.clear = i915_clear; - - i915->base.draw_arrays = i915_draw_arrays; - i915->base.draw_elements = i915_draw_elements; - i915->base.draw_range_elements = i915_draw_range_elements; - - i915->base.is_texture_referenced = i915_is_texture_referenced; - i915->base.is_buffer_referenced = i915_is_buffer_referenced; - - /* - * Create drawing context and plug our rendering stage into it. - */ - i915->draw = draw_create(); - assert(i915->draw); - if (!debug_get_bool_option("I915_NO_VBUF", FALSE)) { - draw_set_rasterize_stage(i915->draw, i915_draw_vbuf_stage(i915)); - } else { - draw_set_rasterize_stage(i915->draw, i915_draw_render_stage(i915)); - } - - i915_init_surface_functions(i915); - i915_init_state_functions(i915); - i915_init_flush_functions(i915); - - draw_install_aaline_stage(i915->draw, &i915->base); - draw_install_aapoint_stage(i915->draw, &i915->base); - - i915->dirty = ~0; - i915->hardware_dirty = ~0; - - /* Batch stream debugging is a bit hacked up at the moment: - */ - i915->batch = i915->iws->batchbuffer_create(i915->iws); - - return &i915->base; -} diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h deleted file mode 100644 index 234b441ce6..0000000000 --- a/src/gallium/drivers/i915simple/i915_context.h +++ /dev/null @@ -1,350 +0,0 @@ - /************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef I915_CONTEXT_H -#define I915_CONTEXT_H - - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "draw/draw_vertex.h" - -#include "tgsi/tgsi_scan.h" - - -struct intel_winsys; -struct intel_buffer; -struct intel_batchbuffer; - - -#define I915_TEX_UNITS 8 - -#define I915_DYNAMIC_MODES4 0 -#define I915_DYNAMIC_DEPTHSCALE_0 1 /* just the header */ -#define I915_DYNAMIC_DEPTHSCALE_1 2 -#define I915_DYNAMIC_IAB 3 -#define I915_DYNAMIC_BC_0 4 /* just the header */ -#define I915_DYNAMIC_BC_1 5 -#define I915_DYNAMIC_BFO_0 6 -#define I915_DYNAMIC_BFO_1 7 -#define I915_DYNAMIC_STP_0 8 -#define I915_DYNAMIC_STP_1 9 -#define I915_DYNAMIC_SC_ENA_0 10 -#define I915_DYNAMIC_SC_RECT_0 11 -#define I915_DYNAMIC_SC_RECT_1 12 -#define I915_DYNAMIC_SC_RECT_2 13 -#define I915_MAX_DYNAMIC 14 - - -#define I915_IMMEDIATE_S0 0 -#define I915_IMMEDIATE_S1 1 -#define I915_IMMEDIATE_S2 2 -#define I915_IMMEDIATE_S3 3 -#define I915_IMMEDIATE_S4 4 -#define I915_IMMEDIATE_S5 5 -#define I915_IMMEDIATE_S6 6 -#define I915_IMMEDIATE_S7 7 -#define I915_MAX_IMMEDIATE 8 - -/* These must mach the order of LI0_STATE_* bits, as they will be used - * to generate hardware packets: - */ -#define I915_CACHE_STATIC 0 -#define I915_CACHE_DYNAMIC 1 /* handled specially */ -#define I915_CACHE_SAMPLER 2 -#define I915_CACHE_MAP 3 -#define I915_CACHE_PROGRAM 4 -#define I915_CACHE_CONSTANTS 5 -#define I915_MAX_CACHE 6 - -#define I915_MAX_CONSTANT 32 - - -/** See constant_flags[] below */ -#define I915_CONSTFLAG_USER 0x1f - - -/** - * Subclass of pipe_shader_state - */ -struct i915_fragment_shader -{ - struct pipe_shader_state state; - - struct tgsi_shader_info info; - - uint *program; - uint program_len; - - /** - * constants introduced during translation. - * These are placed at the end of the constant buffer and grow toward - * the beginning (eg: slot 31, 30 29, ...) - * User-provided constants start at 0. - * This allows both types of constants to co-exist (until there's too many) - * and doesn't require regenerating/changing the fragment program to - * shuffle constants around. - */ - uint num_constants; - float constants[I915_MAX_CONSTANT][4]; - - /** - * Status of each constant - * if I915_CONSTFLAG_PARAM, the value must be taken from the corresponding - * slot of the user's constant buffer. (set by pipe->set_constant_buffer()) - * Else, the bitmask indicates which components are occupied by immediates. - */ - ubyte constant_flags[I915_MAX_CONSTANT]; -}; - - -struct i915_cache_context; - -/* Use to calculate differences between state emitted to hardware and - * current driver-calculated state. - */ -struct i915_state -{ - unsigned immediate[I915_MAX_IMMEDIATE]; - unsigned dynamic[I915_MAX_DYNAMIC]; - - float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4]; - /** number of constants passed in through a constant buffer */ - uint num_user_constants[PIPE_SHADER_TYPES]; - - /* texture sampler state */ - unsigned sampler[I915_TEX_UNITS][3]; - unsigned sampler_enable_flags; - unsigned sampler_enable_nr; - - /* texture image buffers */ - unsigned texbuffer[I915_TEX_UNITS][2]; - - /** Describes the current hardware vertex layout */ - struct vertex_info vertex_info; - - unsigned id; /* track lost context events */ -}; - -struct i915_blend_state { - unsigned iab; - unsigned modes4; - unsigned LIS5; - unsigned LIS6; -}; - -struct i915_depth_stencil_state { - unsigned stencil_modes4; - unsigned bfo[2]; - unsigned stencil_LIS5; - unsigned depth_LIS6; -}; - -struct i915_rasterizer_state { - int light_twoside : 1; - unsigned st; - enum interp_mode color_interp; - - unsigned LIS4; - unsigned LIS7; - unsigned sc[1]; - - const struct pipe_rasterizer_state *templ; - - union { float f; unsigned u; } ds[2]; -}; - -struct i915_sampler_state { - unsigned state[3]; - const struct pipe_sampler_state *templ; - unsigned minlod; - unsigned maxlod; -}; - -struct i915_texture { - struct pipe_texture base; - - /* Derived from the above: - */ - unsigned stride; - unsigned depth_stride; /* per-image on i945? */ - unsigned total_nblocksy; - - unsigned sw_tiled; /**< tiled with software flags */ - unsigned hw_tiled; /**< tiled with hardware fences */ - - unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; - - /* Explicitly store the offset of each image for each cube face or - * depth value. Pretty much have to accept that hardware formats - * are going to be so diverse that there is no unified way to - * compute the offsets of depth/cube images within a mipmap level, - * so have to store them as a lookup table: - */ - unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ - - /* The data is held here: - */ - struct intel_buffer *buffer; -}; - -struct i915_context -{ - struct pipe_context base; - - struct intel_winsys *iws; - - struct draw_context *draw; - - /* The most recent drawing state as set by the driver: - */ - const struct i915_blend_state *blend; - const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS]; - const struct i915_depth_stencil_state *depth_stencil; - const struct i915_rasterizer_state *rasterizer; - - struct i915_fragment_shader *fs; - - struct pipe_blend_color blend_color; - struct pipe_clip_state clip; - struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; - struct pipe_framebuffer_state framebuffer; - struct pipe_poly_stipple poly_stipple; - struct pipe_scissor_state scissor; - struct i915_texture *texture[PIPE_MAX_SAMPLERS]; - struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - - unsigned dirty; - - unsigned num_samplers; - unsigned num_textures; - unsigned num_vertex_elements; - unsigned num_vertex_buffers; - - struct intel_batchbuffer *batch; - - /** Vertex buffer */ - struct intel_buffer *vbo; - size_t vbo_offset; - unsigned vbo_flushed; - - struct i915_state current; - unsigned hardware_dirty; - - unsigned debug; -}; - -/* A flag for each state_tracker state object: - */ -#define I915_NEW_VIEWPORT 0x1 -#define I915_NEW_RASTERIZER 0x2 -#define I915_NEW_FS 0x4 -#define I915_NEW_BLEND 0x8 -#define I915_NEW_CLIP 0x10 -#define I915_NEW_SCISSOR 0x20 -#define I915_NEW_STIPPLE 0x40 -#define I915_NEW_FRAMEBUFFER 0x80 -#define I915_NEW_ALPHA_TEST 0x100 -#define I915_NEW_DEPTH_STENCIL 0x200 -#define I915_NEW_SAMPLER 0x400 -#define I915_NEW_TEXTURE 0x800 -#define I915_NEW_CONSTANTS 0x1000 -#define I915_NEW_VBO 0x2000 -#define I915_NEW_VS 0x4000 - - -/* Driver's internally generated state flags: - */ -#define I915_NEW_VERTEX_FORMAT 0x10000 - - -/* Dirty flags for hardware emit - */ -#define I915_HW_STATIC (1<ptr + stream->offset); - - if (len == 0) { - PRINTF(stream, "Error - zero length packet (0x%08x)\n", stream->ptr[0]); - assert(0); - return FALSE; - } - - if (stream->print_addresses) - PRINTF(stream, "%08x: ", stream->offset); - - - PRINTF(stream, "%s (%d dwords):\n", name, len); - for (i = 0; i < len; i++) - PRINTF(stream, "\t0x%08x\n", ptr[i]); - PRINTF(stream, "\n"); - - stream->offset += len * sizeof(unsigned); - - return TRUE; -} - - -static const char *get_prim_name( unsigned val ) -{ - switch (val & PRIM3D_MASK) { - case PRIM3D_TRILIST: return "TRILIST"; break; - case PRIM3D_TRISTRIP: return "TRISTRIP"; break; - case PRIM3D_TRISTRIP_RVRSE: return "TRISTRIP_RVRSE"; break; - case PRIM3D_TRIFAN: return "TRIFAN"; break; - case PRIM3D_POLY: return "POLY"; break; - case PRIM3D_LINELIST: return "LINELIST"; break; - case PRIM3D_LINESTRIP: return "LINESTRIP"; break; - case PRIM3D_RECTLIST: return "RECTLIST"; break; - case PRIM3D_POINTLIST: return "POINTLIST"; break; - case PRIM3D_DIB: return "DIB"; break; - case PRIM3D_CLEAR_RECT: return "CLEAR_RECT"; break; - case PRIM3D_ZONE_INIT: return "ZONE_INIT"; break; - default: return "????"; break; - } -} - -static boolean debug_prim( struct debug_stream *stream, const char *name, - boolean dump_floats, - unsigned len ) -{ - unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); - const char *prim = get_prim_name( ptr[0] ); - unsigned i; - - - - PRINTF(stream, "%s %s (%d dwords):\n", name, prim, len); - PRINTF(stream, "\t0x%08x\n", ptr[0]); - for (i = 1; i < len; i++) { - if (dump_floats) - PRINTF(stream, "\t0x%08x // %f\n", ptr[i], *(float *)&ptr[i]); - else - PRINTF(stream, "\t0x%08x\n", ptr[i]); - } - - - PRINTF(stream, "\n"); - - stream->offset += len * sizeof(unsigned); - - return TRUE; -} - - - - -static boolean debug_program( struct debug_stream *stream, const char *name, unsigned len ) -{ - unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); - - if (len == 0) { - PRINTF(stream, "Error - zero length packet (0x%08x)\n", stream->ptr[0]); - assert(0); - return FALSE; - } - - if (stream->print_addresses) - PRINTF(stream, "%08x: ", stream->offset); - - PRINTF(stream, "%s (%d dwords):\n", name, len); - i915_disassemble_program( stream, ptr, len ); - - stream->offset += len * sizeof(unsigned); - return TRUE; -} - - -static boolean debug_chain( struct debug_stream *stream, const char *name, unsigned len ) -{ - unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); - unsigned old_offset = stream->offset + len * sizeof(unsigned); - unsigned i; - - PRINTF(stream, "%s (%d dwords):\n", name, len); - for (i = 0; i < len; i++) - PRINTF(stream, "\t0x%08x\n", ptr[i]); - - stream->offset = ptr[1] & ~0x3; - - if (stream->offset < old_offset) - PRINTF(stream, "\n... skipping backwards from 0x%x --> 0x%x ...\n\n", - old_offset, stream->offset ); - else - PRINTF(stream, "\n... skipping from 0x%x --> 0x%x ...\n\n", - old_offset, stream->offset ); - - - return TRUE; -} - - -static boolean debug_variable_length_prim( struct debug_stream *stream ) -{ - unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); - const char *prim = get_prim_name( ptr[0] ); - unsigned i, len; - - ushort *idx = (ushort *)(ptr+1); - for (i = 0; idx[i] != 0xffff; i++) - ; - - len = 1+(i+2)/2; - - PRINTF(stream, "3DPRIM, %s variable length %d indicies (%d dwords):\n", prim, i, len); - for (i = 0; i < len; i++) - PRINTF(stream, "\t0x%08x\n", ptr[i]); - PRINTF(stream, "\n"); - - stream->offset += len * sizeof(unsigned); - return TRUE; -} - - -static void -BITS( - struct debug_stream *stream, - unsigned dw, - unsigned hi, - unsigned lo, - const char *fmt, - ... ) -{ - va_list args; - unsigned himask = ~0UL >> (31 - (hi)); - - PRINTF(stream, "\t\t "); - - va_start( args, fmt ); - debug_vprintf( fmt, args ); - va_end( args ); - - PRINTF(stream, ": 0x%x\n", ((dw) & himask) >> (lo)); -} - -#ifdef DEBUG -#define MBZ( dw, hi, lo) do { \ - unsigned x = (dw) >> (lo); \ - unsigned lomask = (1 << (lo)) - 1; \ - unsigned himask; \ - himask = (1UL << (hi)) - 1; \ - assert ((x & himask & ~lomask) == 0); \ -} while (0) -#else -#define MBZ( dw, hi, lo) do { \ -} while (0) -#endif - -static void -FLAG( - struct debug_stream *stream, - unsigned dw, - unsigned bit, - const char *fmt, - ... ) -{ - if (((dw) >> (bit)) & 1) { - va_list args; - - PRINTF(stream, "\t\t "); - - va_start( args, fmt ); - debug_vprintf( fmt, args ); - va_end( args ); - - PRINTF(stream, "\n"); - } -} - -static boolean debug_load_immediate( struct debug_stream *stream, - const char *name, - unsigned len ) -{ - unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); - unsigned bits = (ptr[0] >> 4) & 0xff; - unsigned j = 0; - - PRINTF(stream, "%s (%d dwords, flags: %x):\n", name, len, bits); - PRINTF(stream, "\t0x%08x\n", ptr[j++]); - - if (bits & (1<<0)) { - PRINTF(stream, "\t LIS0: 0x%08x\n", ptr[j]); - PRINTF(stream, "\t vb address: 0x%08x\n", (ptr[j] & ~0x3)); - BITS(stream, ptr[j], 0, 0, "vb invalidate disable"); - j++; - } - if (bits & (1<<1)) { - PRINTF(stream, "\t LIS1: 0x%08x\n", ptr[j]); - BITS(stream, ptr[j], 29, 24, "vb dword width"); - BITS(stream, ptr[j], 21, 16, "vb dword pitch"); - BITS(stream, ptr[j], 15, 0, "vb max index"); - j++; - } - if (bits & (1<<2)) { - int i; - PRINTF(stream, "\t LIS2: 0x%08x\n", ptr[j]); - for (i = 0; i < 8; i++) { - unsigned tc = (ptr[j] >> (i * 4)) & 0xf; - if (tc != 0xf) - BITS(stream, tc, 3, 0, "tex coord %d", i); - } - j++; - } - if (bits & (1<<3)) { - PRINTF(stream, "\t LIS3: 0x%08x\n", ptr[j]); - j++; - } - if (bits & (1<<4)) { - PRINTF(stream, "\t LIS4: 0x%08x\n", ptr[j]); - BITS(stream, ptr[j], 31, 23, "point width"); - BITS(stream, ptr[j], 22, 19, "line width"); - FLAG(stream, ptr[j], 18, "alpha flatshade"); - FLAG(stream, ptr[j], 17, "fog flatshade"); - FLAG(stream, ptr[j], 16, "spec flatshade"); - FLAG(stream, ptr[j], 15, "rgb flatshade"); - BITS(stream, ptr[j], 14, 13, "cull mode"); - FLAG(stream, ptr[j], 12, "vfmt: point width"); - FLAG(stream, ptr[j], 11, "vfmt: specular/fog"); - FLAG(stream, ptr[j], 10, "vfmt: rgba"); - FLAG(stream, ptr[j], 9, "vfmt: depth offset"); - BITS(stream, ptr[j], 8, 6, "vfmt: position (2==xyzw)"); - FLAG(stream, ptr[j], 5, "force dflt diffuse"); - FLAG(stream, ptr[j], 4, "force dflt specular"); - FLAG(stream, ptr[j], 3, "local depth offset enable"); - FLAG(stream, ptr[j], 2, "vfmt: fp32 fog coord"); - FLAG(stream, ptr[j], 1, "sprite point"); - FLAG(stream, ptr[j], 0, "antialiasing"); - j++; - } - if (bits & (1<<5)) { - PRINTF(stream, "\t LIS5: 0x%08x\n", ptr[j]); - BITS(stream, ptr[j], 31, 28, "rgba write disables"); - FLAG(stream, ptr[j], 27, "force dflt point width"); - FLAG(stream, ptr[j], 26, "last pixel enable"); - FLAG(stream, ptr[j], 25, "global z offset enable"); - FLAG(stream, ptr[j], 24, "fog enable"); - BITS(stream, ptr[j], 23, 16, "stencil ref"); - BITS(stream, ptr[j], 15, 13, "stencil test"); - BITS(stream, ptr[j], 12, 10, "stencil fail op"); - BITS(stream, ptr[j], 9, 7, "stencil pass z fail op"); - BITS(stream, ptr[j], 6, 4, "stencil pass z pass op"); - FLAG(stream, ptr[j], 3, "stencil write enable"); - FLAG(stream, ptr[j], 2, "stencil test enable"); - FLAG(stream, ptr[j], 1, "color dither enable"); - FLAG(stream, ptr[j], 0, "logiop enable"); - j++; - } - if (bits & (1<<6)) { - PRINTF(stream, "\t LIS6: 0x%08x\n", ptr[j]); - FLAG(stream, ptr[j], 31, "alpha test enable"); - BITS(stream, ptr[j], 30, 28, "alpha func"); - BITS(stream, ptr[j], 27, 20, "alpha ref"); - FLAG(stream, ptr[j], 19, "depth test enable"); - BITS(stream, ptr[j], 18, 16, "depth func"); - FLAG(stream, ptr[j], 15, "blend enable"); - BITS(stream, ptr[j], 14, 12, "blend func"); - BITS(stream, ptr[j], 11, 8, "blend src factor"); - BITS(stream, ptr[j], 7, 4, "blend dst factor"); - FLAG(stream, ptr[j], 3, "depth write enable"); - FLAG(stream, ptr[j], 2, "color write enable"); - BITS(stream, ptr[j], 1, 0, "provoking vertex"); - j++; - } - - - PRINTF(stream, "\n"); - - assert(j == len); - - stream->offset += len * sizeof(unsigned); - - return TRUE; -} - - - -static boolean debug_load_indirect( struct debug_stream *stream, - const char *name, - unsigned len ) -{ - unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); - unsigned bits = (ptr[0] >> 8) & 0x3f; - unsigned i, j = 0; - - PRINTF(stream, "%s (%d dwords):\n", name, len); - PRINTF(stream, "\t0x%08x\n", ptr[j++]); - - for (i = 0; i < 6; i++) { - if (bits & (1<offset += len * sizeof(unsigned); - - return TRUE; -} - -static void BR13( struct debug_stream *stream, - unsigned val ) -{ - PRINTF(stream, "\t0x%08x\n", val); - FLAG(stream, val, 30, "clipping enable"); - BITS(stream, val, 25, 24, "color depth (3==32bpp)"); - BITS(stream, val, 23, 16, "raster op"); - BITS(stream, val, 15, 0, "dest pitch"); -} - - -static void BR22( struct debug_stream *stream, - unsigned val ) -{ - PRINTF(stream, "\t0x%08x\n", val); - BITS(stream, val, 31, 16, "dest y1"); - BITS(stream, val, 15, 0, "dest x1"); -} - -static void BR23( struct debug_stream *stream, - unsigned val ) -{ - PRINTF(stream, "\t0x%08x\n", val); - BITS(stream, val, 31, 16, "dest y2"); - BITS(stream, val, 15, 0, "dest x2"); -} - -static void BR09( struct debug_stream *stream, - unsigned val ) -{ - PRINTF(stream, "\t0x%08x -- dest address\n", val); -} - -static void BR26( struct debug_stream *stream, - unsigned val ) -{ - PRINTF(stream, "\t0x%08x\n", val); - BITS(stream, val, 31, 16, "src y1"); - BITS(stream, val, 15, 0, "src x1"); -} - -static void BR11( struct debug_stream *stream, - unsigned val ) -{ - PRINTF(stream, "\t0x%08x\n", val); - BITS(stream, val, 15, 0, "src pitch"); -} - -static void BR12( struct debug_stream *stream, - unsigned val ) -{ - PRINTF(stream, "\t0x%08x -- src address\n", val); -} - -static void BR16( struct debug_stream *stream, - unsigned val ) -{ - PRINTF(stream, "\t0x%08x -- color\n", val); -} - -static boolean debug_copy_blit( struct debug_stream *stream, - const char *name, - unsigned len ) -{ - unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); - int j = 0; - - PRINTF(stream, "%s (%d dwords):\n", name, len); - PRINTF(stream, "\t0x%08x\n", ptr[j++]); - - BR13(stream, ptr[j++]); - BR22(stream, ptr[j++]); - BR23(stream, ptr[j++]); - BR09(stream, ptr[j++]); - BR26(stream, ptr[j++]); - BR11(stream, ptr[j++]); - BR12(stream, ptr[j++]); - - stream->offset += len * sizeof(unsigned); - assert(j == len); - return TRUE; -} - -static boolean debug_color_blit( struct debug_stream *stream, - const char *name, - unsigned len ) -{ - unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); - int j = 0; - - PRINTF(stream, "%s (%d dwords):\n", name, len); - PRINTF(stream, "\t0x%08x\n", ptr[j++]); - - BR13(stream, ptr[j++]); - BR22(stream, ptr[j++]); - BR23(stream, ptr[j++]); - BR09(stream, ptr[j++]); - BR16(stream, ptr[j++]); - - stream->offset += len * sizeof(unsigned); - assert(j == len); - return TRUE; -} - -static boolean debug_modes4( struct debug_stream *stream, - const char *name, - unsigned len ) -{ - unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); - int j = 0; - - PRINTF(stream, "%s (%d dwords):\n", name, len); - PRINTF(stream, "\t0x%08x\n", ptr[j]); - BITS(stream, ptr[j], 21, 18, "logicop func"); - FLAG(stream, ptr[j], 17, "stencil test mask modify-enable"); - FLAG(stream, ptr[j], 16, "stencil write mask modify-enable"); - BITS(stream, ptr[j], 15, 8, "stencil test mask"); - BITS(stream, ptr[j], 7, 0, "stencil write mask"); - j++; - - stream->offset += len * sizeof(unsigned); - assert(j == len); - return TRUE; -} - -static boolean debug_map_state( struct debug_stream *stream, - const char *name, - unsigned len ) -{ - unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); - unsigned j = 0; - - PRINTF(stream, "%s (%d dwords):\n", name, len); - PRINTF(stream, "\t0x%08x\n", ptr[j++]); - - { - PRINTF(stream, "\t0x%08x\n", ptr[j]); - BITS(stream, ptr[j], 15, 0, "map mask"); - j++; - } - - while (j < len) { - { - PRINTF(stream, "\t TMn.0: 0x%08x\n", ptr[j]); - PRINTF(stream, "\t map address: 0x%08x\n", (ptr[j] & ~0x3)); - FLAG(stream, ptr[j], 1, "vertical line stride"); - FLAG(stream, ptr[j], 0, "vertical line stride offset"); - j++; - } - - { - PRINTF(stream, "\t TMn.1: 0x%08x\n", ptr[j]); - BITS(stream, ptr[j], 31, 21, "height"); - BITS(stream, ptr[j], 20, 10, "width"); - BITS(stream, ptr[j], 9, 7, "surface format"); - BITS(stream, ptr[j], 6, 3, "texel format"); - FLAG(stream, ptr[j], 2, "use fence regs"); - FLAG(stream, ptr[j], 1, "tiled surface"); - FLAG(stream, ptr[j], 0, "tile walk ymajor"); - j++; - } - { - PRINTF(stream, "\t TMn.2: 0x%08x\n", ptr[j]); - BITS(stream, ptr[j], 31, 21, "dword pitch"); - BITS(stream, ptr[j], 20, 15, "cube face enables"); - BITS(stream, ptr[j], 14, 9, "max lod"); - FLAG(stream, ptr[j], 8, "mip layout right"); - BITS(stream, ptr[j], 7, 0, "depth"); - j++; - } - } - - stream->offset += len * sizeof(unsigned); - assert(j == len); - return TRUE; -} - -static boolean debug_sampler_state( struct debug_stream *stream, - const char *name, - unsigned len ) -{ - unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); - unsigned j = 0; - - PRINTF(stream, "%s (%d dwords):\n", name, len); - PRINTF(stream, "\t0x%08x\n", ptr[j++]); - - { - PRINTF(stream, "\t0x%08x\n", ptr[j]); - BITS(stream, ptr[j], 15, 0, "sampler mask"); - j++; - } - - while (j < len) { - { - PRINTF(stream, "\t TSn.0: 0x%08x\n", ptr[j]); - FLAG(stream, ptr[j], 31, "reverse gamma"); - FLAG(stream, ptr[j], 30, "planar to packed"); - FLAG(stream, ptr[j], 29, "yuv->rgb"); - BITS(stream, ptr[j], 28, 27, "chromakey index"); - BITS(stream, ptr[j], 26, 22, "base mip level"); - BITS(stream, ptr[j], 21, 20, "mip mode filter"); - BITS(stream, ptr[j], 19, 17, "mag mode filter"); - BITS(stream, ptr[j], 16, 14, "min mode filter"); - BITS(stream, ptr[j], 13, 5, "lod bias (s4.4)"); - FLAG(stream, ptr[j], 4, "shadow enable"); - FLAG(stream, ptr[j], 3, "max-aniso-4"); - BITS(stream, ptr[j], 2, 0, "shadow func"); - j++; - } - - { - PRINTF(stream, "\t TSn.1: 0x%08x\n", ptr[j]); - BITS(stream, ptr[j], 31, 24, "min lod"); - MBZ( ptr[j], 23, 18 ); - FLAG(stream, ptr[j], 17, "kill pixel enable"); - FLAG(stream, ptr[j], 16, "keyed tex filter mode"); - FLAG(stream, ptr[j], 15, "chromakey enable"); - BITS(stream, ptr[j], 14, 12, "tcx wrap mode"); - BITS(stream, ptr[j], 11, 9, "tcy wrap mode"); - BITS(stream, ptr[j], 8, 6, "tcz wrap mode"); - FLAG(stream, ptr[j], 5, "normalized coords"); - BITS(stream, ptr[j], 4, 1, "map (surface) index"); - FLAG(stream, ptr[j], 0, "EAST deinterlacer enable"); - j++; - } - { - PRINTF(stream, "\t TSn.2: 0x%08x (default color)\n", ptr[j]); - j++; - } - } - - stream->offset += len * sizeof(unsigned); - assert(j == len); - return TRUE; -} - -static boolean debug_dest_vars( struct debug_stream *stream, - const char *name, - unsigned len ) -{ - unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); - int j = 0; - - PRINTF(stream, "%s (%d dwords):\n", name, len); - PRINTF(stream, "\t0x%08x\n", ptr[j++]); - - { - PRINTF(stream, "\t0x%08x\n", ptr[j]); - FLAG(stream, ptr[j], 31, "early classic ztest"); - FLAG(stream, ptr[j], 30, "opengl tex default color"); - FLAG(stream, ptr[j], 29, "bypass iz"); - FLAG(stream, ptr[j], 28, "lod preclamp"); - BITS(stream, ptr[j], 27, 26, "dither pattern"); - FLAG(stream, ptr[j], 25, "linear gamma blend"); - FLAG(stream, ptr[j], 24, "debug dither"); - BITS(stream, ptr[j], 23, 20, "dstorg x"); - BITS(stream, ptr[j], 19, 16, "dstorg y"); - MBZ (ptr[j], 15, 15 ); - BITS(stream, ptr[j], 14, 12, "422 write select"); - BITS(stream, ptr[j], 11, 8, "cbuf format"); - BITS(stream, ptr[j], 3, 2, "zbuf format"); - FLAG(stream, ptr[j], 1, "vert line stride"); - FLAG(stream, ptr[j], 1, "vert line stride offset"); - j++; - } - - stream->offset += len * sizeof(unsigned); - assert(j == len); - return TRUE; -} - -static boolean debug_buf_info( struct debug_stream *stream, - const char *name, - unsigned len ) -{ - unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); - int j = 0; - - PRINTF(stream, "%s (%d dwords):\n", name, len); - PRINTF(stream, "\t0x%08x\n", ptr[j++]); - - { - PRINTF(stream, "\t0x%08x\n", ptr[j]); - BITS(stream, ptr[j], 28, 28, "aux buffer id"); - BITS(stream, ptr[j], 27, 24, "buffer id (7=depth, 3=back)"); - FLAG(stream, ptr[j], 23, "use fence regs"); - FLAG(stream, ptr[j], 22, "tiled surface"); - FLAG(stream, ptr[j], 21, "tile walk ymajor"); - MBZ (ptr[j], 20, 14); - BITS(stream, ptr[j], 13, 2, "dword pitch"); - MBZ (ptr[j], 2, 0); - j++; - } - - PRINTF(stream, "\t0x%08x -- buffer base address\n", ptr[j++]); - - stream->offset += len * sizeof(unsigned); - assert(j == len); - return TRUE; -} - -static boolean i915_debug_packet( struct debug_stream *stream ) -{ - unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); - unsigned cmd = *ptr; - - switch (((cmd >> 29) & 0x7)) { - case 0x0: - switch ((cmd >> 23) & 0x3f) { - case 0x0: - return debug(stream, "MI_NOOP", 1); - case 0x3: - return debug(stream, "MI_WAIT_FOR_EVENT", 1); - case 0x4: - return debug(stream, "MI_FLUSH", 1); - case 0xA: - debug(stream, "MI_BATCH_BUFFER_END", 1); - return FALSE; - case 0x22: - return debug(stream, "MI_LOAD_REGISTER_IMM", 3); - case 0x31: - return debug_chain(stream, "MI_BATCH_BUFFER_START", 2); - default: - (void)debug(stream, "UNKNOWN 0x0 case!", 1); - assert(0); - break; - } - break; - case 0x1: - (void) debug(stream, "UNKNOWN 0x1 case!", 1); - assert(0); - break; - case 0x2: - switch ((cmd >> 22) & 0xff) { - case 0x50: - return debug_color_blit(stream, "XY_COLOR_BLT", (cmd & 0xff) + 2); - case 0x53: - return debug_copy_blit(stream, "XY_SRC_COPY_BLT", (cmd & 0xff) + 2); - default: - return debug(stream, "blit command", (cmd & 0xff) + 2); - } - break; - case 0x3: - switch ((cmd >> 24) & 0x1f) { - case 0x6: - return debug(stream, "3DSTATE_ANTI_ALIASING", 1); - case 0x7: - return debug(stream, "3DSTATE_RASTERIZATION_RULES", 1); - case 0x8: - return debug(stream, "3DSTATE_BACKFACE_STENCIL_OPS", 2); - case 0x9: - return debug(stream, "3DSTATE_BACKFACE_STENCIL_MASKS", 1); - case 0xb: - return debug(stream, "3DSTATE_INDEPENDENT_ALPHA_BLEND", 1); - case 0xc: - return debug(stream, "3DSTATE_MODES5", 1); - case 0xd: - return debug_modes4(stream, "3DSTATE_MODES4", 1); - case 0x15: - return debug(stream, "3DSTATE_FOG_COLOR", 1); - case 0x16: - return debug(stream, "3DSTATE_COORD_SET_BINDINGS", 1); - case 0x1c: - /* 3DState16NP */ - switch((cmd >> 19) & 0x1f) { - case 0x10: - return debug(stream, "3DSTATE_SCISSOR_ENABLE", 1); - case 0x11: - return debug(stream, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE", 1); - default: - (void) debug(stream, "UNKNOWN 0x1c case!", 1); - assert(0); - break; - } - break; - case 0x1d: - /* 3DStateMW */ - switch ((cmd >> 16) & 0xff) { - case 0x0: - return debug_map_state(stream, "3DSTATE_MAP_STATE", (cmd & 0x1f) + 2); - case 0x1: - return debug_sampler_state(stream, "3DSTATE_SAMPLER_STATE", (cmd & 0x1f) + 2); - case 0x4: - return debug_load_immediate(stream, "3DSTATE_LOAD_STATE_IMMEDIATE", (cmd & 0xf) + 2); - case 0x5: - return debug_program(stream, "3DSTATE_PIXEL_SHADER_PROGRAM", (cmd & 0x1ff) + 2); - case 0x6: - return debug(stream, "3DSTATE_PIXEL_SHADER_CONSTANTS", (cmd & 0xff) + 2); - case 0x7: - return debug_load_indirect(stream, "3DSTATE_LOAD_INDIRECT", (cmd & 0xff) + 2); - case 0x80: - return debug(stream, "3DSTATE_DRAWING_RECTANGLE", (cmd & 0xffff) + 2); - case 0x81: - return debug(stream, "3DSTATE_SCISSOR_RECTANGLE", (cmd & 0xffff) + 2); - case 0x83: - return debug(stream, "3DSTATE_SPAN_STIPPLE", (cmd & 0xffff) + 2); - case 0x85: - return debug_dest_vars(stream, "3DSTATE_DEST_BUFFER_VARS", (cmd & 0xffff) + 2); - case 0x88: - return debug(stream, "3DSTATE_CONSTANT_BLEND_COLOR", (cmd & 0xffff) + 2); - case 0x89: - return debug(stream, "3DSTATE_FOG_MODE", (cmd & 0xffff) + 2); - case 0x8e: - return debug_buf_info(stream, "3DSTATE_BUFFER_INFO", (cmd & 0xffff) + 2); - case 0x97: - return debug(stream, "3DSTATE_DEPTH_OFFSET_SCALE", (cmd & 0xffff) + 2); - case 0x98: - return debug(stream, "3DSTATE_DEFAULT_Z", (cmd & 0xffff) + 2); - case 0x99: - return debug(stream, "3DSTATE_DEFAULT_DIFFUSE", (cmd & 0xffff) + 2); - case 0x9a: - return debug(stream, "3DSTATE_DEFAULT_SPECULAR", (cmd & 0xffff) + 2); - case 0x9c: - return debug(stream, "3DSTATE_CLEAR_PARAMETERS", (cmd & 0xffff) + 2); - default: - assert(0); - return 0; - } - break; - case 0x1e: - if (cmd & (1 << 23)) - return debug(stream, "???", (cmd & 0xffff) + 1); - else - return debug(stream, "", 1); - break; - case 0x1f: - if ((cmd & (1 << 23)) == 0) - return debug_prim(stream, "3DPRIM (inline)", 1, (cmd & 0x1ffff) + 2); - else if (cmd & (1 << 17)) - { - if ((cmd & 0xffff) == 0) - return debug_variable_length_prim(stream); - else - return debug_prim(stream, "3DPRIM (indexed)", 0, (((cmd & 0xffff) + 1) / 2) + 1); - } - else - return debug_prim(stream, "3DPRIM (indirect sequential)", 0, 2); - break; - default: - return debug(stream, "", 0); - } - default: - assert(0); - return 0; - } - - assert(0); - return 0; -} - - - -void -i915_dump_batchbuffer( struct intel_batchbuffer *batch ) -{ - struct debug_stream stream; - unsigned *start = (unsigned*)batch->map; - unsigned *end = (unsigned*)batch->ptr; - unsigned long bytes = (unsigned long) (end - start) * 4; - boolean done = FALSE; - - stream.offset = 0; - stream.ptr = (char *)start; - stream.print_addresses = 0; - - if (!start || !end) { - debug_printf( "\n\nBATCH: ???\n"); - return; - } - - debug_printf( "\n\nBATCH: (%d)\n", bytes / 4); - - while (!done && - stream.offset < bytes) - { - if (!i915_debug_packet( &stream )) - break; - - assert(stream.offset <= bytes && - stream.offset >= 0); - } - - debug_printf( "END-BATCH\n\n\n"); -} - - diff --git a/src/gallium/drivers/i915simple/i915_debug.h b/src/gallium/drivers/i915simple/i915_debug.h deleted file mode 100644 index dd9b86e17b..0000000000 --- a/src/gallium/drivers/i915simple/i915_debug.h +++ /dev/null @@ -1,114 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell - */ - -#ifndef I915_DEBUG_H -#define I915_DEBUG_H - -#include - -struct i915_context; - -struct debug_stream -{ - unsigned offset; /* current gtt offset */ - char *ptr; /* pointer to gtt offset zero */ - char *end; /* pointer to gtt offset zero */ - unsigned print_addresses; -}; - - -/* Internal functions - */ -void i915_disassemble_program(struct debug_stream *stream, - const unsigned *program, unsigned sz); - -void i915_print_ureg(const char *msg, unsigned ureg); - - -#define DEBUG_BATCH 0x1 -#define DEBUG_BLIT 0x2 -#define DEBUG_BUFFER 0x4 -#define DEBUG_CONSTANTS 0x8 -#define DEBUG_CONTEXT 0x10 -#define DEBUG_DRAW 0x20 -#define DEBUG_DYNAMIC 0x40 -#define DEBUG_FLUSH 0x80 -#define DEBUG_MAP 0x100 -#define DEBUG_PROGRAM 0x200 -#define DEBUG_REGIONS 0x400 -#define DEBUG_SAMPLER 0x800 -#define DEBUG_STATIC 0x1000 -#define DEBUG_SURFACE 0x2000 -#define DEBUG_WINSYS 0x4000 - -#include "pipe/p_compiler.h" - -#if defined(DEBUG) && defined(FILE_DEBUG_FLAG) - -#include "pipe/internal/p_winsys_screen.h" - -static INLINE void -I915_DBG( - struct i915_context *i915, - const char *fmt, - ... ) -{ - if ((i915)->debug & FILE_DEBUG_FLAG) { - va_list args; - - va_start( args, fmt ); - debug_vprintf( fmt, args ); - va_end( args ); - } -} - -#else - -static INLINE void -I915_DBG( - struct i915_context *i915, - const char *fmt, - ... ) -{ - (void) i915; - (void) fmt; -} - -#endif - - -struct intel_batchbuffer; - -void i915_dump_batchbuffer( struct intel_batchbuffer *i915 ); - -void i915_debug_init( struct i915_context *i915 ); - - -#endif diff --git a/src/gallium/drivers/i915simple/i915_debug_fp.c b/src/gallium/drivers/i915simple/i915_debug_fp.c deleted file mode 100644 index 9c5b117b6d..0000000000 --- a/src/gallium/drivers/i915simple/i915_debug_fp.c +++ /dev/null @@ -1,363 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "i915_reg.h" -#include "i915_debug.h" -#include "pipe/internal/p_winsys_screen.h" -#include "util/u_memory.h" - - -static void -PRINTF( - struct debug_stream *stream, - const char *fmt, - ... ) -{ - va_list args; - - va_start( args, fmt ); - debug_vprintf( fmt, args ); - va_end( args ); -} - - -static const char *opcodes[0x20] = { - "NOP", - "ADD", - "MOV", - "MUL", - "MAD", - "DP2ADD", - "DP3", - "DP4", - "FRC", - "RCP", - "RSQ", - "EXP", - "LOG", - "CMP", - "MIN", - "MAX", - "FLR", - "MOD", - "TRC", - "SGE", - "SLT", - "TEXLD", - "TEXLDP", - "TEXLDB", - "TEXKILL", - "DCL", - "0x1a", - "0x1b", - "0x1c", - "0x1d", - "0x1e", - "0x1f", -}; - - -static const int args[0x20] = { - 0, /* 0 nop */ - 2, /* 1 add */ - 1, /* 2 mov */ - 2, /* 3 m ul */ - 3, /* 4 mad */ - 3, /* 5 dp2add */ - 2, /* 6 dp3 */ - 2, /* 7 dp4 */ - 1, /* 8 frc */ - 1, /* 9 rcp */ - 1, /* a rsq */ - 1, /* b exp */ - 1, /* c log */ - 3, /* d cmp */ - 2, /* e min */ - 2, /* f max */ - 1, /* 10 flr */ - 1, /* 11 mod */ - 1, /* 12 trc */ - 2, /* 13 sge */ - 2, /* 14 slt */ - 1, - 1, - 1, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, -}; - - -static const char *regname[0x8] = { - "R", - "T", - "CONST", - "S", - "OC", - "OD", - "U", - "UNKNOWN", -}; - -static void -print_reg_type_nr(struct debug_stream *stream, unsigned type, unsigned nr) -{ - switch (type) { - case REG_TYPE_T: - switch (nr) { - case T_DIFFUSE: - PRINTF(stream, "T_DIFFUSE"); - return; - case T_SPECULAR: - PRINTF(stream, "T_SPECULAR"); - return; - case T_FOG_W: - PRINTF(stream, "T_FOG_W"); - return; - default: - PRINTF(stream, "T_TEX%d", nr); - return; - } - case REG_TYPE_OC: - if (nr == 0) { - PRINTF(stream, "oC"); - return; - } - break; - case REG_TYPE_OD: - if (nr == 0) { - PRINTF(stream, "oD"); - return; - } - break; - default: - break; - } - - PRINTF(stream, "%s[%d]", regname[type], nr); -} - -#define REG_SWIZZLE_MASK 0x7777 -#define REG_NEGATE_MASK 0x8888 - -#define REG_SWIZZLE_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) | \ - (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) | \ - (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) | \ - (SRC_W << A2_SRC2_CHANNEL_W_SHIFT)) - - -static void -print_reg_neg_swizzle(struct debug_stream *stream, unsigned reg) -{ - int i; - - if ((reg & REG_SWIZZLE_MASK) == REG_SWIZZLE_XYZW && - (reg & REG_NEGATE_MASK) == 0) - return; - - PRINTF(stream, "."); - - for (i = 3; i >= 0; i--) { - if (reg & (1 << ((i * 4) + 3))) - PRINTF(stream, "-"); - - switch ((reg >> (i * 4)) & 0x7) { - case 0: - PRINTF(stream, "x"); - break; - case 1: - PRINTF(stream, "y"); - break; - case 2: - PRINTF(stream, "z"); - break; - case 3: - PRINTF(stream, "w"); - break; - case 4: - PRINTF(stream, "0"); - break; - case 5: - PRINTF(stream, "1"); - break; - default: - PRINTF(stream, "?"); - break; - } - } -} - - -static void -print_src_reg(struct debug_stream *stream, unsigned dword) -{ - unsigned nr = (dword >> A2_SRC2_NR_SHIFT) & REG_NR_MASK; - unsigned type = (dword >> A2_SRC2_TYPE_SHIFT) & REG_TYPE_MASK; - print_reg_type_nr(stream, type, nr); - print_reg_neg_swizzle(stream, dword); -} - - -static void -print_dest_reg(struct debug_stream *stream, unsigned dword) -{ - unsigned nr = (dword >> A0_DEST_NR_SHIFT) & REG_NR_MASK; - unsigned type = (dword >> A0_DEST_TYPE_SHIFT) & REG_TYPE_MASK; - print_reg_type_nr(stream, type, nr); - if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL) - return; - PRINTF(stream, "."); - if (dword & A0_DEST_CHANNEL_X) - PRINTF(stream, "x"); - if (dword & A0_DEST_CHANNEL_Y) - PRINTF(stream, "y"); - if (dword & A0_DEST_CHANNEL_Z) - PRINTF(stream, "z"); - if (dword & A0_DEST_CHANNEL_W) - PRINTF(stream, "w"); -} - - -#define GET_SRC0_REG(r0, r1) ((r0<<14)|(r1>>A1_SRC0_CHANNEL_W_SHIFT)) -#define GET_SRC1_REG(r0, r1) ((r0<<8)|(r1>>A2_SRC1_CHANNEL_W_SHIFT)) -#define GET_SRC2_REG(r) (r) - - -static void -print_arith_op(struct debug_stream *stream, - unsigned opcode, const unsigned * program) -{ - if (opcode != A0_NOP) { - print_dest_reg(stream, program[0]); - if (program[0] & A0_DEST_SATURATE) - PRINTF(stream, " = SATURATE "); - else - PRINTF(stream, " = "); - } - - PRINTF(stream, "%s ", opcodes[opcode]); - - print_src_reg(stream, GET_SRC0_REG(program[0], program[1])); - if (args[opcode] == 1) { - PRINTF(stream, "\n"); - return; - } - - PRINTF(stream, ", "); - print_src_reg(stream, GET_SRC1_REG(program[1], program[2])); - if (args[opcode] == 2) { - PRINTF(stream, "\n"); - return; - } - - PRINTF(stream, ", "); - print_src_reg(stream, GET_SRC2_REG(program[2])); - PRINTF(stream, "\n"); - return; -} - - -static void -print_tex_op(struct debug_stream *stream, - unsigned opcode, const unsigned * program) -{ - print_dest_reg(stream, program[0] | A0_DEST_CHANNEL_ALL); - PRINTF(stream, " = "); - - PRINTF(stream, "%s ", opcodes[opcode]); - - PRINTF(stream, "S[%d],", program[0] & T0_SAMPLER_NR_MASK); - - print_reg_type_nr(stream, - (program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) & - REG_TYPE_MASK, - (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK); - PRINTF(stream, "\n"); -} - -static void -print_texkil_op(struct debug_stream *stream, - unsigned opcode, const unsigned * program) -{ - PRINTF(stream, "TEXKIL "); - - print_reg_type_nr(stream, - (program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) & - REG_TYPE_MASK, - (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK); - PRINTF(stream, "\n"); -} - -static void -print_dcl_op(struct debug_stream *stream, - unsigned opcode, const unsigned * program) -{ - PRINTF(stream, "%s ", opcodes[opcode]); - print_dest_reg(stream, - program[0] | A0_DEST_CHANNEL_ALL); - PRINTF(stream, "\n"); -} - - -void -i915_disassemble_program(struct debug_stream *stream, - const unsigned * program, unsigned sz) -{ - unsigned i; - - PRINTF(stream, "\t\tBEGIN\n"); - - assert((program[0] & 0x1ff) + 2 == sz); - - program++; - for (i = 1; i < sz; i += 3, program += 3) { - unsigned opcode = program[0] & (0x1f << 24); - - PRINTF(stream, "\t\t"); - - if ((int) opcode >= A0_NOP && opcode <= A0_SLT) - print_arith_op(stream, opcode >> 24, program); - else if (opcode >= T0_TEXLD && opcode < T0_TEXKILL) - print_tex_op(stream, opcode >> 24, program); - else if (opcode == T0_TEXKILL) - print_texkil_op(stream, opcode >> 24, program); - else if (opcode == D0_DCL) - print_dcl_op(stream, opcode >> 24, program); - else - PRINTF(stream, "Unknown opcode 0x%x\n", opcode); - } - - PRINTF(stream, "\t\tEND\n\n"); -} - - diff --git a/src/gallium/drivers/i915simple/i915_flush.c b/src/gallium/drivers/i915simple/i915_flush.c deleted file mode 100644 index 1582168eba..0000000000 --- a/src/gallium/drivers/i915simple/i915_flush.c +++ /dev/null @@ -1,86 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Author: - * Keith Whitwell - */ - - -#include "pipe/p_defines.h" -#include "draw/draw_context.h" -#include "i915_context.h" -#include "i915_reg.h" -#include "i915_batch.h" - - -static void i915_flush( struct pipe_context *pipe, - unsigned flags, - struct pipe_fence_handle **fence ) -{ - struct i915_context *i915 = i915_context(pipe); - - draw_flush(i915->draw); - -#if 0 - /* Do we need to emit an MI_FLUSH command to flush the hardware - * caches? - */ - if (flags & (PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE)) { - unsigned flush = MI_FLUSH; - - if (!(flags & PIPE_FLUSH_RENDER_CACHE)) - flush |= INHIBIT_FLUSH_RENDER_CACHE; - - if (flags & PIPE_FLUSH_TEXTURE_CACHE) - flush |= FLUSH_MAP_CACHE; - - if (!BEGIN_BATCH(1, 0)) { - FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(1, 0)); - } - OUT_BATCH( flush ); - } -#endif - -#if 0 - if (i915->batch->map == i915->batch->ptr) { - return; - } -#endif - - /* If there are no flags, just flush pending commands to hardware: - */ - FLUSH_BATCH(fence); - i915->vbo_flushed = 1; -} - - - -void i915_init_flush_functions( struct i915_context *i915 ) -{ - i915->base.flush = i915_flush; -} diff --git a/src/gallium/drivers/i915simple/i915_fpc.h b/src/gallium/drivers/i915simple/i915_fpc.h deleted file mode 100644 index 2f0f99d046..0000000000 --- a/src/gallium/drivers/i915simple/i915_fpc.h +++ /dev/null @@ -1,207 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef I915_FPC_H -#define I915_FPC_H - - -#include "i915_context.h" -#include "i915_reg.h" - - - -#define I915_PROGRAM_SIZE 192 - - - -/** - * Program translation state - */ -struct i915_fp_compile { - struct i915_fragment_shader *shader; /* the shader we're compiling */ - - boolean used_constants[I915_MAX_CONSTANT]; - - /** maps TGSI immediate index to constant slot */ - uint num_immediates; - uint immediates_map[I915_MAX_CONSTANT]; - float immediates[I915_MAX_CONSTANT][4]; - - boolean first_instruction; - - uint declarations[I915_PROGRAM_SIZE]; - uint program[I915_PROGRAM_SIZE]; - - uint *csr; /**< Cursor, points into program. */ - - uint *decl; /**< Cursor, points into declarations. */ - - uint decl_s; /**< flags for which s regs need to be decl'd */ - uint decl_t; /**< flags for which t regs need to be decl'd */ - - uint temp_flag; /**< Tracks temporary regs which are in use */ - uint utemp_flag; /**< Tracks TYPE_U temporary regs which are in use */ - - uint nr_tex_indirect; - uint nr_tex_insn; - uint nr_alu_insn; - uint nr_decl_insn; - - boolean error; /**< Set if i915_program_error() is called */ - uint wpos_tex; - uint NumNativeInstructions; - uint NumNativeAluInstructions; - uint NumNativeTexInstructions; - uint NumNativeTexIndirections; -}; - - -/* Having zero and one in here makes the definition of swizzle a lot - * easier. - */ -#define UREG_TYPE_SHIFT 29 -#define UREG_NR_SHIFT 24 -#define UREG_CHANNEL_X_NEGATE_SHIFT 23 -#define UREG_CHANNEL_X_SHIFT 20 -#define UREG_CHANNEL_Y_NEGATE_SHIFT 19 -#define UREG_CHANNEL_Y_SHIFT 16 -#define UREG_CHANNEL_Z_NEGATE_SHIFT 15 -#define UREG_CHANNEL_Z_SHIFT 12 -#define UREG_CHANNEL_W_NEGATE_SHIFT 11 -#define UREG_CHANNEL_W_SHIFT 8 -#define UREG_CHANNEL_ZERO_NEGATE_MBZ 5 -#define UREG_CHANNEL_ZERO_SHIFT 4 -#define UREG_CHANNEL_ONE_NEGATE_MBZ 1 -#define UREG_CHANNEL_ONE_SHIFT 0 - -#define UREG_BAD 0xffffffff /* not a valid ureg */ - -#define X SRC_X -#define Y SRC_Y -#define Z SRC_Z -#define W SRC_W -#define ZERO SRC_ZERO -#define ONE SRC_ONE - -/* Construct a ureg: - */ -#define UREG( type, nr ) (((type)<< UREG_TYPE_SHIFT) | \ - ((nr) << UREG_NR_SHIFT) | \ - (X << UREG_CHANNEL_X_SHIFT) | \ - (Y << UREG_CHANNEL_Y_SHIFT) | \ - (Z << UREG_CHANNEL_Z_SHIFT) | \ - (W << UREG_CHANNEL_W_SHIFT) | \ - (ZERO << UREG_CHANNEL_ZERO_SHIFT) | \ - (ONE << UREG_CHANNEL_ONE_SHIFT)) - -#define GET_CHANNEL_SRC( reg, channel ) ((reg<<(channel*4)) & (0xf<<20)) -#define CHANNEL_SRC( src, channel ) (src>>(channel*4)) - -#define GET_UREG_TYPE(reg) (((reg)>>UREG_TYPE_SHIFT)®_TYPE_MASK) -#define GET_UREG_NR(reg) (((reg)>>UREG_NR_SHIFT)®_NR_MASK) - - - -#define UREG_XYZW_CHANNEL_MASK 0x00ffff00 - -/* One neat thing about the UREG representation: - */ -static INLINE int -swizzle(int reg, uint x, uint y, uint z, uint w) -{ - assert(x <= SRC_ONE); - assert(y <= SRC_ONE); - assert(z <= SRC_ONE); - assert(w <= SRC_ONE); - return ((reg & ~UREG_XYZW_CHANNEL_MASK) | - CHANNEL_SRC(GET_CHANNEL_SRC(reg, x), 0) | - CHANNEL_SRC(GET_CHANNEL_SRC(reg, y), 1) | - CHANNEL_SRC(GET_CHANNEL_SRC(reg, z), 2) | - CHANNEL_SRC(GET_CHANNEL_SRC(reg, w), 3)); -} - - - -/*********************************************************************** - * Public interface for the compiler - */ -extern void -i915_translate_fragment_program( struct i915_context *i915, - struct i915_fragment_shader *fs); - - - -extern uint i915_get_temp(struct i915_fp_compile *p); -extern uint i915_get_utemp(struct i915_fp_compile *p); -extern void i915_release_utemps(struct i915_fp_compile *p); - - -extern uint i915_emit_texld(struct i915_fp_compile *p, - uint dest, - uint destmask, - uint sampler, uint coord, uint op); - -extern uint i915_emit_arith(struct i915_fp_compile *p, - uint op, - uint dest, - uint mask, - uint saturate, - uint src0, uint src1, uint src2); - -extern uint i915_emit_decl(struct i915_fp_compile *p, - uint type, uint nr, uint d0_flags); - - -extern uint i915_emit_const1f(struct i915_fp_compile *p, float c0); - -extern uint i915_emit_const2f(struct i915_fp_compile *p, - float c0, float c1); - -extern uint i915_emit_const4fv(struct i915_fp_compile *p, - const float * c); - -extern uint i915_emit_const4f(struct i915_fp_compile *p, - float c0, float c1, - float c2, float c3); - - -/*====================================================================== - * i915_fpc_debug.c - */ -extern void i915_disassemble_program(const uint * program, uint sz); - - -/*====================================================================== - * i915_fpc_translate.c - */ - -extern void -i915_program_error(struct i915_fp_compile *p, const char *msg, ...); - - -#endif diff --git a/src/gallium/drivers/i915simple/i915_fpc_emit.c b/src/gallium/drivers/i915simple/i915_fpc_emit.c deleted file mode 100644 index b054ce41d3..0000000000 --- a/src/gallium/drivers/i915simple/i915_fpc_emit.c +++ /dev/null @@ -1,375 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "i915_reg.h" -#include "i915_context.h" -#include "i915_fpc.h" -#include "util/u_math.h" - - -#define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) -#define D0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) -#define T0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) -#define A0_SRC0( reg ) (((reg)&UREG_MASK)>>UREG_A0_SRC0_SHIFT_LEFT) -#define A1_SRC0( reg ) (((reg)&UREG_MASK)<>UREG_A1_SRC1_SHIFT_LEFT) -#define A2_SRC1( reg ) (((reg)&UREG_MASK)<>UREG_A2_SRC2_SHIFT_LEFT) - -/* These are special, and don't have swizzle/negate bits. - */ -#define T0_SAMPLER( reg ) (GET_UREG_NR(reg)<temp_flag); - if (!bit) { - i915_program_error(p, "i915_get_temp: out of temporaries\n"); - return 0; - } - - p->temp_flag |= 1 << (bit - 1); - return bit - 1; -} - - -static void -i915_release_temp(struct i915_fp_compile *p, int reg) -{ - p->temp_flag &= ~(1 << reg); -} - - -/** - * Get unpreserved temporary, a temp whose value is not preserved between - * PS program phases. - */ -uint -i915_get_utemp(struct i915_fp_compile * p) -{ - int bit = ffs(~p->utemp_flag); - if (!bit) { - i915_program_error(p, "i915_get_utemp: out of temporaries\n"); - return 0; - } - - p->utemp_flag |= 1 << (bit - 1); - return UREG(REG_TYPE_U, (bit - 1)); -} - -void -i915_release_utemps(struct i915_fp_compile *p) -{ - p->utemp_flag = ~0x7; -} - - -uint -i915_emit_decl(struct i915_fp_compile *p, - uint type, uint nr, uint d0_flags) -{ - uint reg = UREG(type, nr); - - if (type == REG_TYPE_T) { - if (p->decl_t & (1 << nr)) - return reg; - - p->decl_t |= (1 << nr); - } - else if (type == REG_TYPE_S) { - if (p->decl_s & (1 << nr)) - return reg; - - p->decl_s |= (1 << nr); - } - else - return reg; - - *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags); - *(p->decl++) = D1_MBZ; - *(p->decl++) = D2_MBZ; - - p->nr_decl_insn++; - return reg; -} - -uint -i915_emit_arith(struct i915_fp_compile * p, - uint op, - uint dest, - uint mask, - uint saturate, uint src0, uint src1, uint src2) -{ - uint c[3]; - uint nr_const = 0; - - assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); - dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)); - assert(dest); - - if (GET_UREG_TYPE(src0) == REG_TYPE_CONST) - c[nr_const++] = 0; - if (GET_UREG_TYPE(src1) == REG_TYPE_CONST) - c[nr_const++] = 1; - if (GET_UREG_TYPE(src2) == REG_TYPE_CONST) - c[nr_const++] = 2; - - /* Recursively call this function to MOV additional const values - * into temporary registers. Use utemp registers for this - - * currently shouldn't be possible to run out, but keep an eye on - * this. - */ - if (nr_const > 1) { - uint s[3], first, i, old_utemp_flag; - - s[0] = src0; - s[1] = src1; - s[2] = src2; - old_utemp_flag = p->utemp_flag; - - first = GET_UREG_NR(s[c[0]]); - for (i = 1; i < nr_const; i++) { - if (GET_UREG_NR(s[c[i]]) != first) { - uint tmp = i915_get_utemp(p); - - i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, - s[c[i]], 0, 0); - s[c[i]] = tmp; - } - } - - src0 = s[0]; - src1 = s[1]; - src2 = s[2]; - p->utemp_flag = old_utemp_flag; /* restore */ - } - - *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0)); - *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1)); - *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2)); - - p->nr_alu_insn++; - return dest; -} - - -/** - * Emit a texture load or texkill instruction. - * \param dest the dest i915 register - * \param destmask the dest register writemask - * \param sampler the i915 sampler register - * \param coord the i915 source texcoord operand - * \param opcode the instruction opcode - */ -uint i915_emit_texld( struct i915_fp_compile *p, - uint dest, - uint destmask, - uint sampler, - uint coord, - uint opcode ) -{ - const uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord)); - int temp = -1; - - if (coord != k) { - /* texcoord is swizzled or negated. Need to allocate a new temporary - * register (a utemp / unpreserved temp) won't do. - */ - uint tempReg; - - temp = i915_get_temp(p); /* get temp reg index */ - tempReg = UREG(REG_TYPE_R, temp); /* make i915 register */ - - i915_emit_arith( p, A0_MOV, - tempReg, A0_DEST_CHANNEL_ALL, /* dest reg, writemask */ - 0, /* saturate */ - coord, 0, 0 ); /* src0, src1, src2 */ - - /* new src texcoord is tempReg */ - coord = tempReg; - } - - /* Don't worry about saturate as we only support - */ - if (destmask != A0_DEST_CHANNEL_ALL) { - /* if not writing to XYZW... */ - uint tmp = i915_get_utemp(p); - i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode ); - i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 ); - /* XXX release utemp here? */ - } - else { - assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); - assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); - - /* is the sampler coord a texcoord input reg? */ - if (GET_UREG_TYPE(coord) != REG_TYPE_T) { - p->nr_tex_indirect++; - } - - *(p->csr++) = (opcode | - T0_DEST( dest ) | - T0_SAMPLER( sampler )); - - *(p->csr++) = T1_ADDRESS_REG( coord ); - *(p->csr++) = T2_MBZ; - - p->nr_tex_insn++; - } - - if (temp >= 0) - i915_release_temp(p, temp); - - return dest; -} - - -uint -i915_emit_const1f(struct i915_fp_compile * p, float c0) -{ - struct i915_fragment_shader *ifs = p->shader; - unsigned reg, idx; - - if (c0 == 0.0) - return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO); - if (c0 == 1.0) - return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE); - - for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { - if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER) - continue; - for (idx = 0; idx < 4; idx++) { - if (!(ifs->constant_flags[reg] & (1 << idx)) || - ifs->constants[reg][idx] == c0) { - ifs->constants[reg][idx] = c0; - ifs->constant_flags[reg] |= 1 << idx; - if (reg + 1 > ifs->num_constants) - ifs->num_constants = reg + 1; - return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); - } - } - } - - i915_program_error(p, "i915_emit_const1f: out of constants\n"); - return 0; -} - -uint -i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) -{ - struct i915_fragment_shader *ifs = p->shader; - unsigned reg, idx; - - if (c0 == 0.0) - return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W); - if (c0 == 1.0) - return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W); - - if (c1 == 0.0) - return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W); - if (c1 == 1.0) - return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); - - for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { - if (ifs->constant_flags[reg] == 0xf || - ifs->constant_flags[reg] == I915_CONSTFLAG_USER) - continue; - for (idx = 0; idx < 3; idx++) { - if (!(ifs->constant_flags[reg] & (3 << idx))) { - ifs->constants[reg][idx + 0] = c0; - ifs->constants[reg][idx + 1] = c1; - ifs->constant_flags[reg] |= 3 << idx; - if (reg + 1 > ifs->num_constants) - ifs->num_constants = reg + 1; - return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE); - } - } - } - - i915_program_error(p, "i915_emit_const2f: out of constants\n"); - return 0; -} - - - -uint -i915_emit_const4f(struct i915_fp_compile * p, - float c0, float c1, float c2, float c3) -{ - struct i915_fragment_shader *ifs = p->shader; - unsigned reg; - - for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { - if (ifs->constant_flags[reg] == 0xf && - ifs->constants[reg][0] == c0 && - ifs->constants[reg][1] == c1 && - ifs->constants[reg][2] == c2 && - ifs->constants[reg][3] == c3) { - return UREG(REG_TYPE_CONST, reg); - } - else if (ifs->constant_flags[reg] == 0) { - - ifs->constants[reg][0] = c0; - ifs->constants[reg][1] = c1; - ifs->constants[reg][2] = c2; - ifs->constants[reg][3] = c3; - ifs->constant_flags[reg] = 0xf; - if (reg + 1 > ifs->num_constants) - ifs->num_constants = reg + 1; - return UREG(REG_TYPE_CONST, reg); - } - } - - i915_program_error(p, "i915_emit_const4f: out of constants\n"); - return 0; -} - - -uint -i915_emit_const4fv(struct i915_fp_compile * p, const float * c) -{ - return i915_emit_const4f(p, c[0], c[1], c[2], c[3]); -} diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c deleted file mode 100644 index 89504ced27..0000000000 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ /dev/null @@ -1,1202 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include - -#include "i915_reg.h" -#include "i915_context.h" -#include "i915_fpc.h" - -#include "pipe/p_shader_tokens.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "util/u_string.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_dump.h" - -#include "draw/draw_vertex.h" - - -/** - * Simple pass-through fragment shader to use when we don't have - * a real shader (or it fails to compile for some reason). - */ -static unsigned passthrough[] = -{ - _3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1), - - /* declare input color: - */ - (D0_DCL | - (REG_TYPE_T << D0_TYPE_SHIFT) | - (T_DIFFUSE << D0_NR_SHIFT) | - D0_CHANNEL_ALL), - 0, - 0, - - /* move to output color: - */ - (A0_MOV | - (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | - A0_DEST_CHANNEL_ALL | - (REG_TYPE_T << A0_SRC0_TYPE_SHIFT) | - (T_DIFFUSE << A0_SRC0_NR_SHIFT)), - 0x01230000, /* .xyzw */ - 0 -}; - - -/* 1, -1/3!, 1/5!, -1/7! */ -static const float sin_constants[4] = { 1.0, - -1.0f / (3 * 2 * 1), - 1.0f / (5 * 4 * 3 * 2 * 1), - -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1) -}; - -/* 1, -1/2!, 1/4!, -1/6! */ -static const float cos_constants[4] = { 1.0, - -1.0f / (2 * 1), - 1.0f / (4 * 3 * 2 * 1), - -1.0f / (6 * 5 * 4 * 3 * 2 * 1) -}; - - - -/** - * component-wise negation of ureg - */ -static INLINE int -negate(int reg, int x, int y, int z, int w) -{ - /* Another neat thing about the UREG representation */ - return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) | - ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) | - ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) | - ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT)); -} - - -/** - * In the event of a translation failure, we'll generate a simple color - * pass-through program. - */ -static void -i915_use_passthrough_shader(struct i915_fragment_shader *fs) -{ - fs->program = (uint *) MALLOC(sizeof(passthrough)); - if (fs->program) { - memcpy(fs->program, passthrough, sizeof(passthrough)); - fs->program_len = Elements(passthrough); - } - fs->num_constants = 0; -} - - -void -i915_program_error(struct i915_fp_compile *p, const char *msg, ...) -{ - va_list args; - char buffer[1024]; - - debug_printf("i915_program_error: "); - va_start( args, msg ); - util_vsnprintf( buffer, sizeof(buffer), msg, args ); - va_end( args ); - debug_printf(buffer); - debug_printf("\n"); - - p->error = 1; -} - - - -/** - * Construct a ureg for the given source register. Will emit - * constants, apply swizzling and negation as needed. - */ -static uint -src_vector(struct i915_fp_compile *p, - const struct tgsi_full_src_register *source) -{ - uint index = source->SrcRegister.Index; - uint src = 0, sem_name, sem_ind; - - switch (source->SrcRegister.File) { - case TGSI_FILE_TEMPORARY: - if (source->SrcRegister.Index >= I915_MAX_TEMPORARY) { - i915_program_error(p, "Exceeded max temporary reg"); - return 0; - } - src = UREG(REG_TYPE_R, index); - break; - case TGSI_FILE_INPUT: - /* XXX: Packing COL1, FOGC into a single attribute works for - * texenv programs, but will fail for real fragment programs - * that use these attributes and expect them to be a full 4 - * components wide. Could use a texcoord to pass these - * attributes if necessary, but that won't work in the general - * case. - * - * We also use a texture coordinate to pass wpos when possible. - */ - - sem_name = p->shader->info.input_semantic_name[index]; - sem_ind = p->shader->info.input_semantic_index[index]; - - switch (sem_name) { - case TGSI_SEMANTIC_POSITION: - debug_printf("SKIP SEM POS\n"); - /* - assert(p->wpos_tex != -1); - src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL); - */ - break; - case TGSI_SEMANTIC_COLOR: - if (sem_ind == 0) { - src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); - } - else { - /* secondary color */ - assert(sem_ind == 1); - src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); - src = swizzle(src, X, Y, Z, ONE); - } - break; - case TGSI_SEMANTIC_FOG: - src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); - src = swizzle(src, W, W, W, W); - break; - case TGSI_SEMANTIC_GENERIC: - /* usually a texcoord */ - src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + sem_ind, D0_CHANNEL_ALL); - break; - default: - i915_program_error(p, "Bad source->Index"); - return 0; - } - break; - - case TGSI_FILE_IMMEDIATE: - assert(index < p->num_immediates); - index = p->immediates_map[index]; - /* fall-through */ - case TGSI_FILE_CONSTANT: - src = UREG(REG_TYPE_CONST, index); - break; - - default: - i915_program_error(p, "Bad source->File"); - return 0; - } - - if (source->SrcRegister.Extended) { - src = swizzle(src, - source->SrcRegisterExtSwz.ExtSwizzleX, - source->SrcRegisterExtSwz.ExtSwizzleY, - source->SrcRegisterExtSwz.ExtSwizzleZ, - source->SrcRegisterExtSwz.ExtSwizzleW); - } - else { - src = swizzle(src, - source->SrcRegister.SwizzleX, - source->SrcRegister.SwizzleY, - source->SrcRegister.SwizzleZ, - source->SrcRegister.SwizzleW); - } - - - /* There's both negate-all-components and per-component negation. - * Try to handle both here. - */ - { - int nx = source->SrcRegisterExtSwz.NegateX; - int ny = source->SrcRegisterExtSwz.NegateY; - int nz = source->SrcRegisterExtSwz.NegateZ; - int nw = source->SrcRegisterExtSwz.NegateW; - if (source->SrcRegister.Negate) { - nx = !nx; - ny = !ny; - nz = !nz; - nw = !nw; - } - src = negate(src, nx, ny, nz, nw); - } - - /* no abs() or post-abs negation */ -#if 0 - /* XXX assertions disabled to allow arbfplight.c to run */ - /* XXX enable these assertions, or fix things */ - assert(!source->SrcRegisterExtMod.Absolute); - assert(!source->SrcRegisterExtMod.Negate); -#endif - return src; -} - - -/** - * Construct a ureg for a destination register. - */ -static uint -get_result_vector(struct i915_fp_compile *p, - const struct tgsi_full_dst_register *dest) -{ - switch (dest->DstRegister.File) { - case TGSI_FILE_OUTPUT: - { - uint sem_name = p->shader->info.output_semantic_name[dest->DstRegister.Index]; - switch (sem_name) { - case TGSI_SEMANTIC_POSITION: - return UREG(REG_TYPE_OD, 0); - case TGSI_SEMANTIC_COLOR: - return UREG(REG_TYPE_OC, 0); - default: - i915_program_error(p, "Bad inst->DstReg.Index/semantics"); - return 0; - } - } - case TGSI_FILE_TEMPORARY: - return UREG(REG_TYPE_R, dest->DstRegister.Index); - default: - i915_program_error(p, "Bad inst->DstReg.File"); - return 0; - } -} - - -/** - * Compute flags for saturation and writemask. - */ -static uint -get_result_flags(const struct tgsi_full_instruction *inst) -{ - const uint writeMask - = inst->FullDstRegisters[0].DstRegister.WriteMask; - uint flags = 0x0; - - if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) - flags |= A0_DEST_SATURATE; - - if (writeMask & TGSI_WRITEMASK_X) - flags |= A0_DEST_CHANNEL_X; - if (writeMask & TGSI_WRITEMASK_Y) - flags |= A0_DEST_CHANNEL_Y; - if (writeMask & TGSI_WRITEMASK_Z) - flags |= A0_DEST_CHANNEL_Z; - if (writeMask & TGSI_WRITEMASK_W) - flags |= A0_DEST_CHANNEL_W; - - return flags; -} - - -/** - * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token - */ -static uint -translate_tex_src_target(struct i915_fp_compile *p, uint tex) -{ - switch (tex) { - case TGSI_TEXTURE_SHADOW1D: - /* fall-through */ - case TGSI_TEXTURE_1D: - return D0_SAMPLE_TYPE_2D; - - case TGSI_TEXTURE_SHADOW2D: - /* fall-through */ - case TGSI_TEXTURE_2D: - return D0_SAMPLE_TYPE_2D; - - case TGSI_TEXTURE_SHADOWRECT: - /* fall-through */ - case TGSI_TEXTURE_RECT: - return D0_SAMPLE_TYPE_2D; - - case TGSI_TEXTURE_3D: - return D0_SAMPLE_TYPE_VOLUME; - - case TGSI_TEXTURE_CUBE: - return D0_SAMPLE_TYPE_CUBE; - - default: - i915_program_error(p, "TexSrc type"); - return 0; - } -} - - -/** - * Generate texel lookup instruction. - */ -static void -emit_tex(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst, - uint opcode) -{ - uint texture = inst->InstructionExtTexture.Texture; - uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; - uint tex = translate_tex_src_target( p, texture ); - uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); - uint coord = src_vector( p, &inst->FullSrcRegisters[0]); - - i915_emit_texld( p, - get_result_vector( p, &inst->FullDstRegisters[0] ), - get_result_flags( inst ), - sampler, - coord, - opcode); -} - - -/** - * Generate a simple arithmetic instruction - * \param opcode the i915 opcode - * \param numArgs the number of input/src arguments - */ -static void -emit_simple_arith(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst, - uint opcode, uint numArgs) -{ - uint arg1, arg2, arg3; - - assert(numArgs <= 3); - - arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->FullSrcRegisters[0] ); - arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->FullSrcRegisters[1] ); - arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->FullSrcRegisters[2] ); - - i915_emit_arith( p, - opcode, - get_result_vector( p, &inst->FullDstRegisters[0]), - get_result_flags( inst ), 0, - arg1, - arg2, - arg3 ); -} - - -/** As above, but swap the first two src regs */ -static void -emit_simple_arith_swap2(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst, - uint opcode, uint numArgs) -{ - struct tgsi_full_instruction inst2; - - assert(numArgs == 2); - - /* transpose first two registers */ - inst2 = *inst; - inst2.FullSrcRegisters[0] = inst->FullSrcRegisters[1]; - inst2.FullSrcRegisters[1] = inst->FullSrcRegisters[0]; - - emit_simple_arith(p, &inst2, opcode, numArgs); -} - - -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif - -/* - * Translate TGSI instruction to i915 instruction. - * - * Possible concerns: - * - * SIN, COS -- could use another taylor step? - * LIT -- results seem a little different to sw mesa - * LOG -- different to mesa on negative numbers, but this is conformant. - */ -static void -i915_translate_instruction(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst) -{ - uint writemask; - uint src0, src1, src2, flags; - uint tmp = 0; - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - i915_emit_arith(p, - A0_MAX, - get_result_vector(p, &inst->FullDstRegisters[0]), - get_result_flags(inst), 0, - src0, negate(src0, 1, 1, 1, 1), 0); - break; - - case TGSI_OPCODE_ADD: - emit_simple_arith(p, inst, A0_ADD, 2); - break; - - case TGSI_OPCODE_CMP: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); - src2 = src_vector(p, &inst->FullSrcRegisters[2]); - i915_emit_arith(p, A0_CMP, - get_result_vector(p, &inst->FullDstRegisters[0]), - get_result_flags(inst), - 0, src0, src2, src1); /* NOTE: order of src2, src1 */ - break; - - case TGSI_OPCODE_COS: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - tmp = i915_get_utemp(p); - - i915_emit_arith(p, - A0_MUL, - tmp, A0_DEST_CHANNEL_X, 0, - src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); - - i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); - - /* By choosing different taylor constants, could get rid of this mul: - */ - i915_emit_arith(p, - A0_MUL, - tmp, A0_DEST_CHANNEL_X, 0, - tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0); - - /* - * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 - * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 - * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 - * result = DP4 t0, cos_constants - */ - i915_emit_arith(p, - A0_MUL, - tmp, A0_DEST_CHANNEL_XY, 0, - swizzle(tmp, X, X, ONE, ONE), - swizzle(tmp, X, ONE, ONE, ONE), 0); - - i915_emit_arith(p, - A0_MUL, - tmp, A0_DEST_CHANNEL_XYZ, 0, - swizzle(tmp, X, Y, X, ONE), - swizzle(tmp, X, X, ONE, ONE), 0); - - i915_emit_arith(p, - A0_MUL, - tmp, A0_DEST_CHANNEL_XYZ, 0, - swizzle(tmp, X, X, Z, ONE), - swizzle(tmp, Z, ONE, ONE, ONE), 0); - - i915_emit_arith(p, - A0_DP4, - get_result_vector(p, &inst->FullDstRegisters[0]), - get_result_flags(inst), 0, - swizzle(tmp, ONE, Z, Y, X), - i915_emit_const4fv(p, cos_constants), 0); - break; - - case TGSI_OPCODE_DP3: - emit_simple_arith(p, inst, A0_DP3, 2); - break; - - case TGSI_OPCODE_DP4: - emit_simple_arith(p, inst, A0_DP4, 2); - break; - - case TGSI_OPCODE_DPH: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); - - i915_emit_arith(p, - A0_DP4, - get_result_vector(p, &inst->FullDstRegisters[0]), - get_result_flags(inst), 0, - swizzle(src0, X, Y, Z, ONE), src1, 0); - break; - - case TGSI_OPCODE_DST: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); - - /* result[0] = 1 * 1; - * result[1] = a[1] * b[1]; - * result[2] = a[2] * 1; - * result[3] = 1 * b[3]; - */ - i915_emit_arith(p, - A0_MUL, - get_result_vector(p, &inst->FullDstRegisters[0]), - get_result_flags(inst), 0, - swizzle(src0, ONE, Y, Z, ONE), - swizzle(src1, ONE, Y, ONE, W), 0); - break; - - case TGSI_OPCODE_END: - /* no-op */ - break; - - case TGSI_OPCODE_EX2: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - - i915_emit_arith(p, - A0_EXP, - get_result_vector(p, &inst->FullDstRegisters[0]), - get_result_flags(inst), 0, - swizzle(src0, X, X, X, X), 0, 0); - break; - - case TGSI_OPCODE_FLR: - emit_simple_arith(p, inst, A0_FLR, 1); - break; - - case TGSI_OPCODE_FRC: - emit_simple_arith(p, inst, A0_FRC, 1); - break; - - case TGSI_OPCODE_KIL: - /* kill if src[0].x < 0 || src[0].y < 0 ... */ - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - tmp = i915_get_utemp(p); - - i915_emit_texld(p, - tmp, /* dest reg: a dummy reg */ - A0_DEST_CHANNEL_ALL, /* dest writemask */ - 0, /* sampler */ - src0, /* coord*/ - T0_TEXKILL); /* opcode */ - break; - - case TGSI_OPCODE_KILP: - assert(0); /* not tested yet */ - break; - - case TGSI_OPCODE_LG2: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - - i915_emit_arith(p, - A0_LOG, - get_result_vector(p, &inst->FullDstRegisters[0]), - get_result_flags(inst), 0, - swizzle(src0, X, X, X, X), 0, 0); - break; - - case TGSI_OPCODE_LIT: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - tmp = i915_get_utemp(p); - - /* tmp = max( a.xyzw, a.00zw ) - * XXX: Clamp tmp.w to -128..128 - * tmp.y = log(tmp.y) - * tmp.y = tmp.w * tmp.y - * tmp.y = exp(tmp.y) - * result = cmp (a.11-x1, a.1x01, a.1xy1 ) - */ - i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, - src0, swizzle(src0, ZERO, ZERO, Z, W), 0); - - i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, - swizzle(tmp, Y, Y, Y, Y), 0, 0); - - i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, - swizzle(tmp, ZERO, Y, ZERO, ZERO), - swizzle(tmp, ZERO, W, ZERO, ZERO), 0); - - i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, - swizzle(tmp, Y, Y, Y, Y), 0, 0); - - i915_emit_arith(p, A0_CMP, - get_result_vector(p, &inst->FullDstRegisters[0]), - get_result_flags(inst), 0, - negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), - swizzle(tmp, ONE, X, ZERO, ONE), - swizzle(tmp, ONE, X, Y, ONE)); - - break; - - case TGSI_OPCODE_LRP: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); - src2 = src_vector(p, &inst->FullSrcRegisters[2]); - flags = get_result_flags(inst); - tmp = i915_get_utemp(p); - - /* b*a + c*(1-a) - * - * b*a + c - ca - * - * tmp = b*a + c, - * result = (-c)*a + tmp - */ - i915_emit_arith(p, A0_MAD, tmp, - flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); - - i915_emit_arith(p, A0_MAD, - get_result_vector(p, &inst->FullDstRegisters[0]), - flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); - break; - - case TGSI_OPCODE_MAD: - emit_simple_arith(p, inst, A0_MAD, 3); - break; - - case TGSI_OPCODE_MAX: - emit_simple_arith(p, inst, A0_MAX, 2); - break; - - case TGSI_OPCODE_MIN: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); - tmp = i915_get_utemp(p); - flags = get_result_flags(inst); - - i915_emit_arith(p, - A0_MAX, - tmp, flags & A0_DEST_CHANNEL_ALL, 0, - negate(src0, 1, 1, 1, 1), - negate(src1, 1, 1, 1, 1), 0); - - i915_emit_arith(p, - A0_MOV, - get_result_vector(p, &inst->FullDstRegisters[0]), - flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0); - break; - - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: - emit_simple_arith(p, inst, A0_MOV, 1); - break; - - case TGSI_OPCODE_MUL: - emit_simple_arith(p, inst, A0_MUL, 2); - break; - - case TGSI_OPCODE_POW: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); - tmp = i915_get_utemp(p); - flags = get_result_flags(inst); - - /* XXX: masking on intermediate values, here and elsewhere. - */ - i915_emit_arith(p, - A0_LOG, - tmp, A0_DEST_CHANNEL_X, 0, - swizzle(src0, X, X, X, X), 0, 0); - - i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); - - i915_emit_arith(p, - A0_EXP, - get_result_vector(p, &inst->FullDstRegisters[0]), - flags, 0, swizzle(tmp, X, X, X, X), 0, 0); - break; - - case TGSI_OPCODE_RET: - /* XXX: no-op? */ - break; - - case TGSI_OPCODE_RCP: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - - i915_emit_arith(p, - A0_RCP, - get_result_vector(p, &inst->FullDstRegisters[0]), - get_result_flags(inst), 0, - swizzle(src0, X, X, X, X), 0, 0); - break; - - case TGSI_OPCODE_RSQ: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - - i915_emit_arith(p, - A0_RSQ, - get_result_vector(p, &inst->FullDstRegisters[0]), - get_result_flags(inst), 0, - swizzle(src0, X, X, X, X), 0, 0); - break; - - case TGSI_OPCODE_SCS: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - tmp = i915_get_utemp(p); - - /* - * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 - * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x - * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x - * scs.x = DP4 t1, sin_constants - * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 - * scs.y = DP4 t1, cos_constants - */ - i915_emit_arith(p, - A0_MUL, - tmp, A0_DEST_CHANNEL_XY, 0, - swizzle(src0, X, X, ONE, ONE), - swizzle(src0, X, ONE, ONE, ONE), 0); - - i915_emit_arith(p, - A0_MUL, - tmp, A0_DEST_CHANNEL_ALL, 0, - swizzle(tmp, X, Y, X, Y), - swizzle(tmp, X, X, ONE, ONE), 0); - - writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; - - if (writemask & TGSI_WRITEMASK_Y) { - uint tmp1; - - if (writemask & TGSI_WRITEMASK_X) - tmp1 = i915_get_utemp(p); - else - tmp1 = tmp; - - i915_emit_arith(p, - A0_MUL, - tmp1, A0_DEST_CHANNEL_ALL, 0, - swizzle(tmp, X, Y, Y, W), - swizzle(tmp, X, Z, ONE, ONE), 0); - - i915_emit_arith(p, - A0_DP4, - get_result_vector(p, &inst->FullDstRegisters[0]), - A0_DEST_CHANNEL_Y, 0, - swizzle(tmp1, W, Z, Y, X), - i915_emit_const4fv(p, sin_constants), 0); - } - - if (writemask & TGSI_WRITEMASK_X) { - i915_emit_arith(p, - A0_MUL, - tmp, A0_DEST_CHANNEL_XYZ, 0, - swizzle(tmp, X, X, Z, ONE), - swizzle(tmp, Z, ONE, ONE, ONE), 0); - - i915_emit_arith(p, - A0_DP4, - get_result_vector(p, &inst->FullDstRegisters[0]), - A0_DEST_CHANNEL_X, 0, - swizzle(tmp, ONE, Z, Y, X), - i915_emit_const4fv(p, cos_constants), 0); - } - break; - - case TGSI_OPCODE_SGE: - emit_simple_arith(p, inst, A0_SGE, 2); - break; - - case TGSI_OPCODE_SLE: - /* like SGE, but swap reg0, reg1 */ - emit_simple_arith_swap2(p, inst, A0_SGE, 2); - break; - - case TGSI_OPCODE_SIN: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - tmp = i915_get_utemp(p); - - i915_emit_arith(p, - A0_MUL, - tmp, A0_DEST_CHANNEL_X, 0, - src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); - - i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); - - /* By choosing different taylor constants, could get rid of this mul: - */ - i915_emit_arith(p, - A0_MUL, - tmp, A0_DEST_CHANNEL_X, 0, - tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0); - - /* - * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 - * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x - * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x - * result = DP4 t1.wzyx, sin_constants - */ - i915_emit_arith(p, - A0_MUL, - tmp, A0_DEST_CHANNEL_XY, 0, - swizzle(tmp, X, X, ONE, ONE), - swizzle(tmp, X, ONE, ONE, ONE), 0); - - i915_emit_arith(p, - A0_MUL, - tmp, A0_DEST_CHANNEL_ALL, 0, - swizzle(tmp, X, Y, X, Y), - swizzle(tmp, X, X, ONE, ONE), 0); - - i915_emit_arith(p, - A0_MUL, - tmp, A0_DEST_CHANNEL_ALL, 0, - swizzle(tmp, X, Y, Y, W), - swizzle(tmp, X, Z, ONE, ONE), 0); - - i915_emit_arith(p, - A0_DP4, - get_result_vector(p, &inst->FullDstRegisters[0]), - get_result_flags(inst), 0, - swizzle(tmp, W, Z, Y, X), - i915_emit_const4fv(p, sin_constants), 0); - break; - - case TGSI_OPCODE_SLT: - emit_simple_arith(p, inst, A0_SLT, 2); - break; - - case TGSI_OPCODE_SGT: - /* like SLT, but swap reg0, reg1 */ - emit_simple_arith_swap2(p, inst, A0_SLT, 2); - break; - - case TGSI_OPCODE_SUB: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); - - i915_emit_arith(p, - A0_ADD, - get_result_vector(p, &inst->FullDstRegisters[0]), - get_result_flags(inst), 0, - src0, negate(src1, 1, 1, 1, 1), 0); - break; - - case TGSI_OPCODE_TEX: - emit_tex(p, inst, T0_TEXLD); - break; - - case TGSI_OPCODE_TXB: - emit_tex(p, inst, T0_TEXLDB); - break; - - case TGSI_OPCODE_TXP: - emit_tex(p, inst, T0_TEXLDP); - break; - - case TGSI_OPCODE_XPD: - /* Cross product: - * result.x = src0.y * src1.z - src0.z * src1.y; - * result.y = src0.z * src1.x - src0.x * src1.z; - * result.z = src0.x * src1.y - src0.y * src1.x; - * result.w = undef; - */ - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); - tmp = i915_get_utemp(p); - - i915_emit_arith(p, - A0_MUL, - tmp, A0_DEST_CHANNEL_ALL, 0, - swizzle(src0, Z, X, Y, ONE), - swizzle(src1, Y, Z, X, ONE), 0); - - i915_emit_arith(p, - A0_MAD, - get_result_vector(p, &inst->FullDstRegisters[0]), - get_result_flags(inst), 0, - swizzle(src0, Y, Z, X, ONE), - swizzle(src1, Z, X, Y, ONE), - negate(tmp, 1, 1, 1, 0)); - break; - - default: - i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode); - p->error = 1; - return; - } - - i915_release_utemps(p); -} - - -/** - * Translate TGSI fragment shader into i915 hardware instructions. - * \param p the translation state - * \param tokens the TGSI token array - */ -static void -i915_translate_instructions(struct i915_fp_compile *p, - const struct tgsi_token *tokens) -{ - struct i915_fragment_shader *ifs = p->shader; - struct tgsi_parse_context parse; - - tgsi_parse_init( &parse, tokens ); - - while( !tgsi_parse_end_of_tokens( &parse ) ) { - - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - if (parse.FullToken.FullDeclaration.Declaration.File - == TGSI_FILE_CONSTANT) { - uint i; - for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; - i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; - i++) { - assert(ifs->constant_flags[i] == 0x0); - ifs->constant_flags[i] = I915_CONSTFLAG_USER; - ifs->num_constants = MAX2(ifs->num_constants, i + 1); - } - } - else if (parse.FullToken.FullDeclaration.Declaration.File - == TGSI_FILE_TEMPORARY) { - uint i; - for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; - i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; - i++) { - assert(i < I915_MAX_TEMPORARY); - /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ - p->temp_flag |= (1 << i); /* mark temp as used */ - } - } - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - const struct tgsi_full_immediate *imm - = &parse.FullToken.FullImmediate; - const uint pos = p->num_immediates++; - uint j; - assert( imm->Immediate.NrTokens <= 4 + 1 ); - for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { - p->immediates[pos][j] = imm->u[j].Float; - } - } - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - if (p->first_instruction) { - /* resolve location of immediates */ - uint i, j; - for (i = 0; i < p->num_immediates; i++) { - /* find constant slot for this immediate */ - for (j = 0; j < I915_MAX_CONSTANT; j++) { - if (ifs->constant_flags[j] == 0x0) { - memcpy(ifs->constants[j], - p->immediates[i], - 4 * sizeof(float)); - /*printf("immediate %d maps to const %d\n", i, j);*/ - ifs->constant_flags[j] = 0xf; /* all four comps used */ - p->immediates_map[i] = j; - ifs->num_constants = MAX2(ifs->num_constants, j + 1); - break; - } - } - } - - p->first_instruction = FALSE; - } - - i915_translate_instruction(p, &parse.FullToken.FullInstruction); - break; - - default: - assert( 0 ); - } - - } /* while */ - - tgsi_parse_free (&parse); -} - - -static struct i915_fp_compile * -i915_init_compile(struct i915_context *i915, - struct i915_fragment_shader *ifs) -{ - struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); - - p->shader = ifs; - - /* Put new constants at end of const buffer, growing downward. - * The problem is we don't know how many user-defined constants might - * be specified with pipe->set_constant_buffer(). - * Should pre-scan the user's program to determine the highest-numbered - * constant referenced. - */ - ifs->num_constants = 0; - memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); - - p->first_instruction = TRUE; - - p->nr_tex_indirect = 1; /* correct? */ - p->nr_tex_insn = 0; - p->nr_alu_insn = 0; - p->nr_decl_insn = 0; - - p->csr = p->program; - p->decl = p->declarations; - p->decl_s = 0; - p->decl_t = 0; - p->temp_flag = ~0x0 << I915_MAX_TEMPORARY; - p->utemp_flag = ~0x7; - - p->wpos_tex = -1; - - /* initialize the first program word */ - *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; - - return p; -} - - -/* Copy compile results to the fragment program struct and destroy the - * compilation context. - */ -static void -i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) -{ - struct i915_fragment_shader *ifs = p->shader; - unsigned long program_size = (unsigned long) (p->csr - p->program); - unsigned long decl_size = (unsigned long) (p->decl - p->declarations); - - if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) - i915_program_error(p, "Exceeded max nr indirect texture lookups"); - - if (p->nr_tex_insn > I915_MAX_TEX_INSN) - i915_program_error(p, "Exceeded max TEX instructions"); - - if (p->nr_alu_insn > I915_MAX_ALU_INSN) - i915_program_error(p, "Exceeded max ALU instructions"); - - if (p->nr_decl_insn > I915_MAX_DECL_INSN) - i915_program_error(p, "Exceeded max DECL instructions"); - - if (p->error) { - p->NumNativeInstructions = 0; - p->NumNativeAluInstructions = 0; - p->NumNativeTexInstructions = 0; - p->NumNativeTexIndirections = 0; - - i915_use_passthrough_shader(ifs); - } - else { - p->NumNativeInstructions - = p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn; - p->NumNativeAluInstructions = p->nr_alu_insn; - p->NumNativeTexInstructions = p->nr_tex_insn; - p->NumNativeTexIndirections = p->nr_tex_indirect; - - /* patch in the program length */ - p->declarations[0] |= program_size + decl_size - 2; - - /* Copy compilation results to fragment program struct: - */ - assert(!ifs->program); - ifs->program - = (uint *) MALLOC((program_size + decl_size) * sizeof(uint)); - if (ifs->program) { - ifs->program_len = program_size + decl_size; - - memcpy(ifs->program, - p->declarations, - decl_size * sizeof(uint)); - - memcpy(ifs->program + decl_size, - p->program, - program_size * sizeof(uint)); - } - } - - /* Release the compilation struct: - */ - FREE(p); -} - - -/** - * Find an unused texture coordinate slot to use for fragment WPOS. - * Update p->fp->wpos_tex with the result (-1 if no used texcoord slot is found). - */ -static void -i915_find_wpos_space(struct i915_fp_compile *p) -{ -#if 0 - const uint inputs - = p->shader->inputs_read | (1 << TGSI_ATTRIB_POS); /*XXX hack*/ - uint i; - - p->wpos_tex = -1; - - if (inputs & (1 << TGSI_ATTRIB_POS)) { - for (i = 0; i < I915_TEX_UNITS; i++) { - if ((inputs & (1 << (TGSI_ATTRIB_TEX0 + i))) == 0) { - p->wpos_tex = i; - return; - } - } - - i915_program_error(p, "No free texcoord for wpos value"); - } -#else - if (p->shader->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) { - /* frag shader using the fragment position input */ -#if 0 - assert(0); -#endif - } -#endif -} - - - - -/** - * Rather than trying to intercept and jiggle depth writes during - * emit, just move the value into its correct position at the end of - * the program: - */ -static void -i915_fixup_depth_write(struct i915_fp_compile *p) -{ - /* XXX assuming pos/depth is always in output[0] */ - if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { - const uint depth = UREG(REG_TYPE_OD, 0); - - i915_emit_arith(p, - A0_MOV, /* opcode */ - depth, /* dest reg */ - A0_DEST_CHANNEL_W, /* write mask */ - 0, /* saturate? */ - swizzle(depth, X, Y, Z, Z), /* src0 */ - 0, 0 /* src1, src2 */); - } -} - - -void -i915_translate_fragment_program( struct i915_context *i915, - struct i915_fragment_shader *fs) -{ - struct i915_fp_compile *p = i915_init_compile(i915, fs); - const struct tgsi_token *tokens = fs->state.tokens; - - i915_find_wpos_space(p); - -#if 0 - tgsi_dump(tokens, 0); -#endif - - i915_translate_instructions(p, tokens); - i915_fixup_depth_write(p); - - i915_fini_compile(i915, p); -} diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c deleted file mode 100644 index d9a5c40ab9..0000000000 --- a/src/gallium/drivers/i915simple/i915_prim_emit.c +++ /dev/null @@ -1,219 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "draw/draw_pipe.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "util/u_pack_color.h" - -#include "i915_context.h" -#include "i915_reg.h" -#include "i915_state.h" -#include "i915_batch.h" - - - -/** - * Primitive emit to hardware. No support for vertex buffers or any - * nice fast paths. - */ -struct setup_stage { - struct draw_stage stage; /**< This must be first (base class) */ - - struct i915_context *i915; -}; - - - -/** - * Basically a cast wrapper. - */ -static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) -{ - return (struct setup_stage *)stage; -} - - -/** - * Extract the needed fields from vertex_header and emit i915 dwords. - * Recall that the vertices are constructed by the 'draw' module and - * have a couple of slots at the beginning (1-dword header, 4-dword - * clip pos) that we ignore here. - */ -static INLINE void -emit_hw_vertex( struct i915_context *i915, - const struct vertex_header *vertex) -{ - const struct vertex_info *vinfo = &i915->current.vertex_info; - uint i; - uint count = 0; /* for debug/sanity */ - - assert(!i915->dirty); - - for (i = 0; i < vinfo->num_attribs; i++) { - const uint j = vinfo->attrib[i].src_index; - const float *attrib = vertex->data[j]; - switch (vinfo->attrib[i].emit) { - case EMIT_1F: - OUT_BATCH( fui(attrib[0]) ); - count++; - break; - case EMIT_2F: - OUT_BATCH( fui(attrib[0]) ); - OUT_BATCH( fui(attrib[1]) ); - count += 2; - break; - case EMIT_3F: - OUT_BATCH( fui(attrib[0]) ); - OUT_BATCH( fui(attrib[1]) ); - OUT_BATCH( fui(attrib[2]) ); - count += 3; - break; - case EMIT_4F: - OUT_BATCH( fui(attrib[0]) ); - OUT_BATCH( fui(attrib[1]) ); - OUT_BATCH( fui(attrib[2]) ); - OUT_BATCH( fui(attrib[3]) ); - count += 4; - break; - case EMIT_4UB: - OUT_BATCH( pack_ub4(float_to_ubyte( attrib[2] ), - float_to_ubyte( attrib[1] ), - float_to_ubyte( attrib[0] ), - float_to_ubyte( attrib[3] )) ); - count += 1; - break; - default: - assert(0); - } - } - assert(count == vinfo->size); -} - - - -static INLINE void -emit_prim( struct draw_stage *stage, - struct prim_header *prim, - unsigned hwprim, - unsigned nr ) -{ - struct i915_context *i915 = setup_stage(stage)->i915; - unsigned vertex_size; - unsigned i; - - if (i915->dirty) - i915_update_derived( i915 ); - - if (i915->hardware_dirty) - i915_emit_hardware_state( i915 ); - - /* need to do this after validation! */ - vertex_size = i915->current.vertex_info.size * 4; /* in bytes */ - assert(vertex_size >= 12); /* never smaller than 12 bytes */ - - if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { - FLUSH_BATCH(NULL); - - /* Make sure state is re-emitted after a flush: - */ - i915_update_derived( i915 ); - i915_emit_hardware_state( i915 ); - - if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { - assert(0); - return; - } - } - - /* Emit each triangle as a single primitive. I told you this was - * simple. - */ - OUT_BATCH(_3DPRIMITIVE | - hwprim | - ((4 + vertex_size * nr)/4 - 2)); - - for (i = 0; i < nr; i++) - emit_hw_vertex(i915, prim->v[i]); -} - - -static void -setup_tri( struct draw_stage *stage, struct prim_header *prim ) -{ - emit_prim( stage, prim, PRIM3D_TRILIST, 3 ); -} - - -static void -setup_line(struct draw_stage *stage, struct prim_header *prim) -{ - emit_prim( stage, prim, PRIM3D_LINELIST, 2 ); -} - - -static void -setup_point(struct draw_stage *stage, struct prim_header *prim) -{ - emit_prim( stage, prim, PRIM3D_POINTLIST, 1 ); -} - - -static void setup_flush( struct draw_stage *stage, unsigned flags ) -{ -} - -static void reset_stipple_counter( struct draw_stage *stage ) -{ -} - -static void render_destroy( struct draw_stage *stage ) -{ - FREE( stage ); -} - - -/** - * Create a new primitive setup/render stage. This gets plugged into - * the 'draw' module's pipeline. - */ -struct draw_stage *i915_draw_render_stage( struct i915_context *i915 ) -{ - struct setup_stage *setup = CALLOC_STRUCT(setup_stage); - - setup->i915 = i915; - setup->stage.draw = i915->draw; - setup->stage.point = setup_point; - setup->stage.line = setup_line; - setup->stage.tri = setup_tri; - setup->stage.flush = setup_flush; - setup->stage.reset_stipple_counter = reset_stipple_counter; - setup->stage.destroy = render_destroy; - - return &setup->stage; -} diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c deleted file mode 100644 index 8a3e466c84..0000000000 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ /dev/null @@ -1,645 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \file - * Build post-transformation, post-clipping vertex buffers and element - * lists by hooking into the end of the primitive pipeline and - * manipulating the vertex_id field in the vertex headers. - * - * XXX: work in progress - * - * \author José Fonseca - * \author Keith Whitwell - */ - - -#include "draw/draw_context.h" -#include "draw/draw_vbuf.h" -#include "util/u_debug.h" -#include "pipe/p_inlines.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "util/u_fifo.h" - -#include "i915_context.h" -#include "i915_reg.h" -#include "i915_batch.h" -#include "i915_state.h" - - -/** - * Primitive renderer for i915. - */ -struct i915_vbuf_render { - struct vbuf_render base; - - struct i915_context *i915; - - /** Vertex size in bytes */ - size_t vertex_size; - - /** Software primitive */ - unsigned prim; - - /** Hardware primitive */ - unsigned hwprim; - - /** Genereate a vertex list */ - unsigned fallback; - - /* Stuff for the vbo */ - struct intel_buffer *vbo; - size_t vbo_size; - size_t vbo_offset; - void *vbo_ptr; - size_t vbo_max_used; - - /* stuff for the pool */ - struct util_fifo *pool_fifo; - unsigned pool_used; - unsigned pool_buffer_size; - boolean pool_not_used; -}; - - -/** - * Basically a cast wrapper. - */ -static INLINE struct i915_vbuf_render * -i915_vbuf_render(struct vbuf_render *render) -{ - assert(render); - return (struct i915_vbuf_render *)render; -} - -static const struct vertex_info * -i915_vbuf_render_get_vertex_info(struct vbuf_render *render) -{ - struct i915_vbuf_render *i915_render = i915_vbuf_render(render); - struct i915_context *i915 = i915_render->i915; - - if (i915->dirty) { - /* make sure we have up to date vertex layout */ - i915_update_derived(i915); - } - - return &i915->current.vertex_info; -} - -static boolean -i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size) -{ - struct i915_context *i915 = i915_render->i915; - - if (i915_render->vbo_size < size + i915_render->vbo_offset) - return FALSE; - - if (i915->vbo_flushed) - return FALSE; - - return TRUE; -} - -static void -i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) -{ - struct i915_context *i915 = i915_render->i915; - struct intel_winsys *iws = i915->iws; - - if (i915_render->vbo) { - if (i915_render->pool_not_used) - iws->buffer_destroy(iws, i915_render->vbo); - else - u_fifo_add(i915_render->pool_fifo, i915_render->vbo); - i915_render->vbo = NULL; - } - - i915->vbo_flushed = 0; - - i915_render->vbo_size = MAX2(size, i915_render->pool_buffer_size); - i915_render->vbo_offset = 0; - - if (i915_render->vbo_size != i915_render->pool_buffer_size) { - i915_render->pool_not_used = TRUE; - i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, - INTEL_NEW_VERTEX); - } else { - i915_render->pool_not_used = FALSE; - - if (i915_render->pool_used >= 2) { - FLUSH_BATCH(NULL); - i915->vbo_flushed = 0; - i915_render->pool_used = 0; - } - u_fifo_pop(i915_render->pool_fifo, (void**)&i915_render->vbo); - } -} - -static boolean -i915_vbuf_render_allocate_vertices(struct vbuf_render *render, - ushort vertex_size, - ushort nr_vertices) -{ - struct i915_vbuf_render *i915_render = i915_vbuf_render(render); - struct i915_context *i915 = i915_render->i915; - size_t size = (size_t)vertex_size * (size_t)nr_vertices; - - /* FIXME: handle failure */ - assert(!i915->vbo); - - if (!i915_vbuf_render_reserve(i915_render, size)) { - - if (i915->vbo_flushed) - i915_render->pool_used = 0; - - i915_vbuf_render_new_buf(i915_render, size); - } - - i915_render->vertex_size = vertex_size; - i915->vbo = i915_render->vbo; - i915->vbo_offset = i915_render->vbo_offset; - i915->dirty |= I915_NEW_VBO; - - if (!i915_render->vbo) - return FALSE; - return TRUE; -} - -static void * -i915_vbuf_render_map_vertices(struct vbuf_render *render) -{ - struct i915_vbuf_render *i915_render = i915_vbuf_render(render); - struct i915_context *i915 = i915_render->i915; - struct intel_winsys *iws = i915->iws; - - if (i915->vbo_flushed) - debug_printf("%s bad vbo flush occured stalling on hw\n", __FUNCTION__); - - i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); - - return (unsigned char *)i915_render->vbo_ptr + i915->vbo_offset; -} - -static void -i915_vbuf_render_unmap_vertices(struct vbuf_render *render, - ushort min_index, - ushort max_index) -{ - struct i915_vbuf_render *i915_render = i915_vbuf_render(render); - struct i915_context *i915 = i915_render->i915; - struct intel_winsys *iws = i915->iws; - - i915_render->vbo_max_used = MAX2(i915_render->vbo_max_used, i915_render->vertex_size * (max_index + 1)); - iws->buffer_unmap(iws, i915_render->vbo); -} - -static boolean -i915_vbuf_render_set_primitive(struct vbuf_render *render, - unsigned prim) -{ - struct i915_vbuf_render *i915_render = i915_vbuf_render(render); - i915_render->prim = prim; - - switch(prim) { - case PIPE_PRIM_POINTS: - i915_render->hwprim = PRIM3D_POINTLIST; - i915_render->fallback = 0; - return TRUE; - case PIPE_PRIM_LINES: - i915_render->hwprim = PRIM3D_LINELIST; - i915_render->fallback = 0; - return TRUE; - case PIPE_PRIM_LINE_LOOP: - i915_render->hwprim = PRIM3D_LINELIST; - i915_render->fallback = PIPE_PRIM_LINE_LOOP; - return TRUE; - case PIPE_PRIM_LINE_STRIP: - i915_render->hwprim = PRIM3D_LINESTRIP; - i915_render->fallback = 0; - return TRUE; - case PIPE_PRIM_TRIANGLES: - i915_render->hwprim = PRIM3D_TRILIST; - i915_render->fallback = 0; - return TRUE; - case PIPE_PRIM_TRIANGLE_STRIP: - i915_render->hwprim = PRIM3D_TRISTRIP; - i915_render->fallback = 0; - return TRUE; - case PIPE_PRIM_TRIANGLE_FAN: - i915_render->hwprim = PRIM3D_TRIFAN; - i915_render->fallback = 0; - return TRUE; - case PIPE_PRIM_QUADS: - i915_render->hwprim = PRIM3D_TRILIST; - i915_render->fallback = PIPE_PRIM_QUADS; - return TRUE; - case PIPE_PRIM_QUAD_STRIP: - i915_render->hwprim = PRIM3D_TRILIST; - i915_render->fallback = PIPE_PRIM_QUAD_STRIP; - return TRUE; - case PIPE_PRIM_POLYGON: - i915_render->hwprim = PRIM3D_POLY; - i915_render->fallback = 0; - return TRUE; - default: - /* FIXME: Actually, can handle a lot more just fine... */ - return FALSE; - } -} - -/** - * Used for fallbacks in draw_arrays - */ -static void -draw_arrays_generate_indices(struct vbuf_render *render, - unsigned start, uint nr, - unsigned type) -{ - struct i915_vbuf_render *i915_render = i915_vbuf_render(render); - struct i915_context *i915 = i915_render->i915; - unsigned i; - unsigned end = start + nr; - switch(type) { - case 0: - for (i = start; i+1 < end; i += 2) - OUT_BATCH((i+0) | (i+1) << 16); - if (i < end) - OUT_BATCH(i); - break; - case PIPE_PRIM_LINE_LOOP: - if (nr >= 2) { - for (i = start + 1; i < end; i++) - OUT_BATCH((i-0) | (i+0) << 16); - OUT_BATCH((i-0) | ( start) << 16); - } - break; - case PIPE_PRIM_QUADS: - for (i = start; i + 3 < end; i += 4) { - OUT_BATCH((i+0) | (i+1) << 16); - OUT_BATCH((i+3) | (i+1) << 16); - OUT_BATCH((i+2) | (i+3) << 16); - } - break; - case PIPE_PRIM_QUAD_STRIP: - for (i = start; i + 3 < end; i += 2) { - OUT_BATCH((i+0) | (i+1) << 16); - OUT_BATCH((i+3) | (i+2) << 16); - OUT_BATCH((i+0) | (i+3) << 16); - } - break; - default: - assert(0); - } -} - -static unsigned -draw_arrays_calc_nr_indices(uint nr, unsigned type) -{ - switch (type) { - case 0: - return nr; - case PIPE_PRIM_LINE_LOOP: - if (nr >= 2) - return nr * 2; - else - return 0; - case PIPE_PRIM_QUADS: - return (nr / 4) * 6; - case PIPE_PRIM_QUAD_STRIP: - return ((nr - 2) / 2) * 6; - default: - assert(0); - return 0; - } -} - -static void -draw_arrays_fallback(struct vbuf_render *render, - unsigned start, - uint nr) -{ - struct i915_vbuf_render *i915_render = i915_vbuf_render(render); - struct i915_context *i915 = i915_render->i915; - unsigned nr_indices; - - if (i915->dirty) - i915_update_derived(i915); - - if (i915->hardware_dirty) - i915_emit_hardware_state(i915); - - nr_indices = draw_arrays_calc_nr_indices(nr, i915_render->fallback); - if (!nr_indices) - return; - - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { - FLUSH_BATCH(NULL); - - /* Make sure state is re-emitted after a flush: - */ - i915_update_derived(i915); - i915_emit_hardware_state(i915); - i915->vbo_flushed = 1; - - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { - assert(0); - goto out; - } - } - OUT_BATCH(_3DPRIMITIVE | - PRIM_INDIRECT | - i915_render->hwprim | - PRIM_INDIRECT_ELTS | - nr_indices); - - draw_arrays_generate_indices(render, start, nr, i915_render->fallback); - -out: - return; -} - -static void -i915_vbuf_render_draw_arrays(struct vbuf_render *render, - unsigned start, - uint nr) -{ - struct i915_vbuf_render *i915_render = i915_vbuf_render(render); - struct i915_context *i915 = i915_render->i915; - - if (i915_render->fallback) { - draw_arrays_fallback(render, start, nr); - return; - } - - if (i915->dirty) - i915_update_derived(i915); - - if (i915->hardware_dirty) - i915_emit_hardware_state(i915); - - if (!BEGIN_BATCH(2, 0)) { - FLUSH_BATCH(NULL); - - /* Make sure state is re-emitted after a flush: - */ - i915_update_derived(i915); - i915_emit_hardware_state(i915); - i915->vbo_flushed = 1; - - if (!BEGIN_BATCH(2, 0)) { - assert(0); - goto out; - } - } - - OUT_BATCH(_3DPRIMITIVE | - PRIM_INDIRECT | - PRIM_INDIRECT_SEQUENTIAL | - i915_render->hwprim | - nr); - OUT_BATCH(start); /* Beginning vertex index */ - -out: - return; -} - -/** - * Used for normal and fallback emitting of indices - * If type is zero normal operation assumed. - */ -static void -draw_generate_indices(struct vbuf_render *render, - const ushort *indices, - uint nr_indices, - unsigned type) -{ - struct i915_vbuf_render *i915_render = i915_vbuf_render(render); - struct i915_context *i915 = i915_render->i915; - unsigned i; - - switch(type) { - case 0: - for (i = 0; i + 1 < nr_indices; i += 2) { - OUT_BATCH(indices[i] | indices[i+1] << 16); - } - if (i < nr_indices) { - OUT_BATCH(indices[i]); - } - break; - case PIPE_PRIM_LINE_LOOP: - if (nr_indices >= 2) { - for (i = 1; i < nr_indices; i++) - OUT_BATCH(indices[i-1] | indices[i] << 16); - OUT_BATCH(indices[i-1] | indices[0] << 16); - } - break; - case PIPE_PRIM_QUADS: - for (i = 0; i + 3 < nr_indices; i += 4) { - OUT_BATCH(indices[i+0] | indices[i+1] << 16); - OUT_BATCH(indices[i+3] | indices[i+1] << 16); - OUT_BATCH(indices[i+2] | indices[i+3] << 16); - } - break; - case PIPE_PRIM_QUAD_STRIP: - for (i = 0; i + 3 < nr_indices; i += 2) { - OUT_BATCH(indices[i+0] | indices[i+1] << 16); - OUT_BATCH(indices[i+3] | indices[i+2] << 16); - OUT_BATCH(indices[i+0] | indices[i+3] << 16); - } - break; - default: - assert(0); - break; - } -} - -static unsigned -draw_calc_nr_indices(uint nr_indices, unsigned type) -{ - switch (type) { - case 0: - return nr_indices; - case PIPE_PRIM_LINE_LOOP: - if (nr_indices >= 2) - return nr_indices * 2; - else - return 0; - case PIPE_PRIM_QUADS: - return (nr_indices / 4) * 6; - case PIPE_PRIM_QUAD_STRIP: - return ((nr_indices - 2) / 2) * 6; - default: - assert(0); - return 0; - } -} - -static void -i915_vbuf_render_draw(struct vbuf_render *render, - const ushort *indices, - uint nr_indices) -{ - struct i915_vbuf_render *i915_render = i915_vbuf_render(render); - struct i915_context *i915 = i915_render->i915; - unsigned save_nr_indices; - - save_nr_indices = nr_indices; - - nr_indices = draw_calc_nr_indices(nr_indices, i915_render->fallback); - if (!nr_indices) - return; - - if (i915->dirty) - i915_update_derived(i915); - - if (i915->hardware_dirty) - i915_emit_hardware_state(i915); - - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { - FLUSH_BATCH(NULL); - - /* Make sure state is re-emitted after a flush: - */ - i915_update_derived(i915); - i915_emit_hardware_state(i915); - i915->vbo_flushed = 1; - - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { - assert(0); - goto out; - } - } - - OUT_BATCH(_3DPRIMITIVE | - PRIM_INDIRECT | - i915_render->hwprim | - PRIM_INDIRECT_ELTS | - nr_indices); - draw_generate_indices(render, - indices, - save_nr_indices, - i915_render->fallback); - -out: - return; -} - -static void -i915_vbuf_render_release_vertices(struct vbuf_render *render) -{ - struct i915_vbuf_render *i915_render = i915_vbuf_render(render); - struct i915_context *i915 = i915_render->i915; - - assert(i915->vbo); - - i915_render->vbo_offset += i915_render->vbo_max_used; - i915_render->vbo_max_used = 0; - i915->vbo = NULL; - i915->dirty |= I915_NEW_VBO; -} - -static void -i915_vbuf_render_destroy(struct vbuf_render *render) -{ - struct i915_vbuf_render *i915_render = i915_vbuf_render(render); - FREE(i915_render); -} - -/** - * Create a new primitive render. - */ -static struct vbuf_render * -i915_vbuf_render_create(struct i915_context *i915) -{ - struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render); - struct intel_winsys *iws = i915->iws; - int i; - - i915_render->i915 = i915; - - i915_render->base.max_vertex_buffer_bytes = 128*1024; - - /* NOTE: it must be such that state and vertices indices fit in a single - * batch buffer. - */ - i915_render->base.max_indices = 16*1024; - - i915_render->base.get_vertex_info = i915_vbuf_render_get_vertex_info; - i915_render->base.allocate_vertices = i915_vbuf_render_allocate_vertices; - i915_render->base.map_vertices = i915_vbuf_render_map_vertices; - i915_render->base.unmap_vertices = i915_vbuf_render_unmap_vertices; - i915_render->base.set_primitive = i915_vbuf_render_set_primitive; - i915_render->base.draw = i915_vbuf_render_draw; - i915_render->base.draw_arrays = i915_vbuf_render_draw_arrays; - i915_render->base.release_vertices = i915_vbuf_render_release_vertices; - i915_render->base.destroy = i915_vbuf_render_destroy; - - - i915_render->vbo = NULL; - i915_render->vbo_size = 0; - i915_render->vbo_offset = 0; - - i915_render->pool_used = FALSE; - i915_render->pool_buffer_size = 128 * 4096; - i915_render->pool_fifo = u_fifo_create(6); - for (i = 0; i < 6; i++) - u_fifo_add(i915_render->pool_fifo, - iws->buffer_create(iws, i915_render->pool_buffer_size, 64, - INTEL_NEW_VERTEX)); - -#if 0 - /* TODO JB: is this realy needed? */ - i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); - iws->buffer_unmap(iws, i915_render->vbo); -#endif - - return &i915_render->base; -} - -/** - * Create a new primitive vbuf/render stage. - */ -struct draw_stage *i915_draw_vbuf_stage(struct i915_context *i915) -{ - struct vbuf_render *render; - struct draw_stage *stage; - - render = i915_vbuf_render_create(i915); - if(!render) - return NULL; - - stage = draw_vbuf_stage(i915->draw, render); - if(!stage) { - render->destroy(render); - return NULL; - } - /** TODO JB: this shouldn't be here */ - draw_set_render(i915->draw, render); - - return stage; -} diff --git a/src/gallium/drivers/i915simple/i915_reg.h b/src/gallium/drivers/i915simple/i915_reg.h deleted file mode 100644 index 04620fec68..0000000000 --- a/src/gallium/drivers/i915simple/i915_reg.h +++ /dev/null @@ -1,978 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef I915_REG_H -#define I915_REG_H - - -#define I915_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value) - -#define CMD_3D (0x3<<29) - -#define PRIM3D_INLINE (CMD_3D | (0x1f<<24)) -#define PRIM3D_TRILIST (0x0<<18) -#define PRIM3D_TRISTRIP (0x1<<18) -#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) -#define PRIM3D_TRIFAN (0x3<<18) -#define PRIM3D_POLY (0x4<<18) -#define PRIM3D_LINELIST (0x5<<18) -#define PRIM3D_LINESTRIP (0x6<<18) -#define PRIM3D_RECTLIST (0x7<<18) -#define PRIM3D_POINTLIST (0x8<<18) -#define PRIM3D_DIB (0x9<<18) -#define PRIM3D_CLEAR_RECT (0xa<<18) -#define PRIM3D_ZONE_INIT (0xd<<18) -#define PRIM3D_MASK (0x1f<<18) - -/* p137 */ -#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24)) -#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16) -#define AA_LINE_ECAAR_WIDTH_0_5 0 -#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14) -#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14) -#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14) -#define AA_LINE_REGION_WIDTH_ENABLE (1<<8) -#define AA_LINE_REGION_WIDTH_0_5 0 -#define AA_LINE_REGION_WIDTH_1_0 (1<<6) -#define AA_LINE_REGION_WIDTH_2_0 (2<<6) -#define AA_LINE_REGION_WIDTH_4_0 (3<<6) - -/* 3DSTATE_BACKFACE_STENCIL_OPS, p138*/ -#define _3DSTATE_BACKFACE_STENCIL_OPS (CMD_3D | (0x8<<24)) -#define BFO_ENABLE_STENCIL_REF (1<<23) -#define BFO_STENCIL_REF_SHIFT 15 -#define BFO_STENCIL_REF_MASK (0xff<<15) -#define BFO_ENABLE_STENCIL_FUNCS (1<<14) -#define BFO_STENCIL_TEST_SHIFT 11 -#define BFO_STENCIL_TEST_MASK (0x7<<11) -#define BFO_STENCIL_FAIL_SHIFT 8 -#define BFO_STENCIL_FAIL_MASK (0x7<<8) -#define BFO_STENCIL_PASS_Z_FAIL_SHIFT 5 -#define BFO_STENCIL_PASS_Z_FAIL_MASK (0x7<<5) -#define BFO_STENCIL_PASS_Z_PASS_SHIFT 2 -#define BFO_STENCIL_PASS_Z_PASS_MASK (0x7<<2) -#define BFO_ENABLE_STENCIL_TWO_SIDE (1<<1) -#define BFO_STENCIL_TWO_SIDE (1<<0) - - -/* 3DSTATE_BACKFACE_STENCIL_MASKS, p140 */ -#define _3DSTATE_BACKFACE_STENCIL_MASKS (CMD_3D | (0x9<<24)) -#define BFM_ENABLE_STENCIL_TEST_MASK (1<<17) -#define BFM_ENABLE_STENCIL_WRITE_MASK (1<<16) -#define BFM_STENCIL_TEST_MASK_SHIFT 8 -#define BFM_STENCIL_TEST_MASK_MASK (0xff<<8) -#define BFM_STENCIL_WRITE_MASK_SHIFT 0 -#define BFM_STENCIL_WRITE_MASK_MASK (0xff<<0) - - - -/* 3DSTATE_BIN_CONTROL p141 */ - -/* p143 */ -#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) -/* Dword 1 */ -#define BUF_3D_ID_COLOR_BACK (0x3<<24) -#define BUF_3D_ID_DEPTH (0x7<<24) -#define BUF_3D_USE_FENCE (1<<23) -#define BUF_3D_TILED_SURFACE (1<<22) -#define BUF_3D_TILE_WALK_X 0 -#define BUF_3D_TILE_WALK_Y (1<<21) -#define BUF_3D_PITCH(x) (((x)/4)<<2) -/* Dword 2 */ -#define BUF_3D_ADDR(x) ((x) & ~0x3) - - -/* 3DSTATE_CHROMA_KEY */ - -/* 3DSTATE_CLEAR_PARAMETERS, p150 */ -#define _3DSTATE_CLEAR_PARAMETERS (CMD_3D | (0x1d<<24) | (0x9c<<16) | 5) -/* Dword 1 */ -#define CLEARPARAM_CLEAR_RECT (1 << 16) -#define CLEARPARAM_ZONE_INIT (0 << 16) -#define CLEARPARAM_WRITE_COLOR (1 << 2) -#define CLEARPARAM_WRITE_DEPTH (1 << 1) -#define CLEARPARAM_WRITE_STENCIL (1 << 0) - -/* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */ -#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16)) - - - -/* 3DSTATE_COORD_SET_BINDINGS, p154 */ -#define _3DSTATE_COORD_SET_BINDINGS (CMD_3D | (0x16<<24)) -#define CSB_TCB(iunit, eunit) ((eunit)<<(iunit*3)) - -/* p156 */ -#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16)) - -/* p157 */ -#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16)) - -/* p158 */ -#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16)) - - -/* 3DSTATE_DEPTH_OFFSET_SCALE, p159 */ -#define _3DSTATE_DEPTH_OFFSET_SCALE (CMD_3D | (0x1d<<24) | (0x97<<16)) -/* scale in dword 1 */ - - -/* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */ -#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<<19) | 0x2) - -/* p161 */ -#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) -/* Dword 1 */ -#define TEX_DEFAULT_COLOR_OGL (0<<30) -#define TEX_DEFAULT_COLOR_D3D (1<<30) -#define ZR_EARLY_DEPTH (1<<29) -#define LOD_PRECLAMP_OGL (1<<28) -#define LOD_PRECLAMP_D3D (0<<28) -#define DITHER_FULL_ALWAYS (0<<26) -#define DITHER_FULL_ON_FB_BLEND (1<<26) -#define DITHER_CLAMPED_ALWAYS (2<<26) -#define LINEAR_GAMMA_BLEND_32BPP (1<<25) -#define DEBUG_DISABLE_ENH_DITHER (1<<24) -#define DSTORG_HORT_BIAS(x) ((x)<<20) -#define DSTORG_VERT_BIAS(x) ((x)<<16) -#define COLOR_4_2_2_CHNL_WRT_ALL 0 -#define COLOR_4_2_2_CHNL_WRT_Y (1<<12) -#define COLOR_4_2_2_CHNL_WRT_CR (2<<12) -#define COLOR_4_2_2_CHNL_WRT_CB (3<<12) -#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12) -#define COLOR_BUF_8BIT 0 -#define COLOR_BUF_RGB555 (1<<8) -#define COLOR_BUF_RGB565 (2<<8) -#define COLOR_BUF_ARGB8888 (3<<8) -#define DEPTH_FRMT_16_FIXED 0 -#define DEPTH_FRMT_16_FLOAT (1<<2) -#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2) -#define VERT_LINE_STRIDE_1 (1<<1) -#define VERT_LINE_STRIDE_0 (0<<1) -#define VERT_LINE_STRIDE_OFS_1 1 -#define VERT_LINE_STRIDE_OFS_0 0 - -/* p166 */ -#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3) -/* Dword 1 */ -#define DRAW_RECT_DIS_DEPTH_OFS (1<<30) -#define DRAW_DITHER_OFS_X(x) ((x)<<26) -#define DRAW_DITHER_OFS_Y(x) ((x)<<24) -/* Dword 2 */ -#define DRAW_YMIN(x) ((x)<<16) -#define DRAW_XMIN(x) (x) -/* Dword 3 */ -#define DRAW_YMAX(x) ((x)<<16) -#define DRAW_XMAX(x) (x) -/* Dword 4 */ -#define DRAW_YORG(x) ((x)<<16) -#define DRAW_XORG(x) (x) - - -/* 3DSTATE_FILTER_COEFFICIENTS_4X4, p170 */ - -/* 3DSTATE_FILTER_COEFFICIENTS_6X5, p172 */ - - -/* _3DSTATE_FOG_COLOR, p173 */ -#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24)) -#define FOG_COLOR_RED(x) ((x)<<16) -#define FOG_COLOR_GREEN(x) ((x)<<8) -#define FOG_COLOR_BLUE(x) (x) - -/* _3DSTATE_FOG_MODE, p174 */ -#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2) -/* Dword 1 */ -#define FMC1_FOGFUNC_MODIFY_ENABLE (1<<31) -#define FMC1_FOGFUNC_VERTEX (0<<28) -#define FMC1_FOGFUNC_PIXEL_EXP (1<<28) -#define FMC1_FOGFUNC_PIXEL_EXP2 (2<<28) -#define FMC1_FOGFUNC_PIXEL_LINEAR (3<<28) -#define FMC1_FOGFUNC_MASK (3<<28) -#define FMC1_FOGINDEX_MODIFY_ENABLE (1<<27) -#define FMC1_FOGINDEX_Z (0<<25) -#define FMC1_FOGINDEX_W (1<<25) -#define FMC1_C1_C2_MODIFY_ENABLE (1<<24) -#define FMC1_DENSITY_MODIFY_ENABLE (1<<23) -#define FMC1_C1_ONE (1<<13) -#define FMC1_C1_MASK (0xffff<<4) -/* Dword 2 */ -#define FMC2_C2_ONE (1<<16) -/* Dword 3 */ -#define FMC3_D_ONE (1<<16) - - - -/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p177 */ -#define _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24)) -#define IAB_MODIFY_ENABLE (1<<23) -#define IAB_ENABLE (1<<22) -#define IAB_MODIFY_FUNC (1<<21) -#define IAB_FUNC_SHIFT 16 -#define IAB_MODIFY_SRC_FACTOR (1<<11) -#define IAB_SRC_FACTOR_SHIFT 6 -#define IAB_SRC_FACTOR_MASK (BLENDFACT_MASK<<6) -#define IAB_MODIFY_DST_FACTOR (1<<5) -#define IAB_DST_FACTOR_SHIFT 0 -#define IAB_DST_FACTOR_MASK (BLENDFACT_MASK<<0) - - -#define BLENDFUNC_ADD 0x0 -#define BLENDFUNC_SUBTRACT 0x1 -#define BLENDFUNC_REVERSE_SUBTRACT 0x2 -#define BLENDFUNC_MIN 0x3 -#define BLENDFUNC_MAX 0x4 -#define BLENDFUNC_MASK 0x7 - -/* 3DSTATE_LOAD_INDIRECT, p180 */ - -#define _3DSTATE_LOAD_INDIRECT (CMD_3D|(0x1d<<24)|(0x7<<16)) -#define LI0_STATE_STATIC_INDIRECT (0x01<<8) -#define LI0_STATE_DYNAMIC_INDIRECT (0x02<<8) -#define LI0_STATE_SAMPLER (0x04<<8) -#define LI0_STATE_MAP (0x08<<8) -#define LI0_STATE_PROGRAM (0x10<<8) -#define LI0_STATE_CONSTANTS (0x20<<8) - -#define SIS0_BUFFER_ADDRESS(x) ((x)&~0x3) -#define SIS0_FORCE_LOAD (1<<1) -#define SIS0_BUFFER_VALID (1<<0) -#define SIS1_BUFFER_LENGTH(x) ((x)&0xff) - -#define DIS0_BUFFER_ADDRESS(x) ((x)&~0x3) -#define DIS0_BUFFER_RESET (1<<1) -#define DIS0_BUFFER_VALID (1<<0) - -#define SSB0_BUFFER_ADDRESS(x) ((x)&~0x3) -#define SSB0_FORCE_LOAD (1<<1) -#define SSB0_BUFFER_VALID (1<<0) -#define SSB1_BUFFER_LENGTH(x) ((x)&0xff) - -#define MSB0_BUFFER_ADDRESS(x) ((x)&~0x3) -#define MSB0_FORCE_LOAD (1<<1) -#define MSB0_BUFFER_VALID (1<<0) -#define MSB1_BUFFER_LENGTH(x) ((x)&0xff) - -#define PSP0_BUFFER_ADDRESS(x) ((x)&~0x3) -#define PSP0_FORCE_LOAD (1<<1) -#define PSP0_BUFFER_VALID (1<<0) -#define PSP1_BUFFER_LENGTH(x) ((x)&0xff) - -#define PSC0_BUFFER_ADDRESS(x) ((x)&~0x3) -#define PSC0_FORCE_LOAD (1<<1) -#define PSC0_BUFFER_VALID (1<<0) -#define PSC1_BUFFER_LENGTH(x) ((x)&0xff) - - - - - -/* _3DSTATE_RASTERIZATION_RULES */ -#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24)) -#define ENABLE_POINT_RASTER_RULE (1<<15) -#define OGL_POINT_RASTER_RULE (1<<13) -#define ENABLE_TEXKILL_3D_4D (1<<10) -#define TEXKILL_3D (0<<9) -#define TEXKILL_4D (1<<9) -#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8) -#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5) -#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6) -#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3) - -/* _3DSTATE_SCISSOR_ENABLE, p256 */ -#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19)) -#define ENABLE_SCISSOR_RECT ((1<<1) | 1) -#define DISABLE_SCISSOR_RECT (1<<1) - -/* _3DSTATE_SCISSOR_RECTANGLE_0, p257 */ -#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1) -/* Dword 1 */ -#define SCISSOR_RECT_0_YMIN(x) ((x)<<16) -#define SCISSOR_RECT_0_XMIN(x) (x) -/* Dword 2 */ -#define SCISSOR_RECT_0_YMAX(x) ((x)<<16) -#define SCISSOR_RECT_0_XMAX(x) (x) - -/* p189 */ -#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 ((0x3<<29)|(0x1d<<24)|(0x04<<16)) -#define I1_LOAD_S(n) (1<<(4+n)) - -#define S0_VB_OFFSET_MASK 0xffffffc -#define S0_AUTO_CACHE_INV_DISABLE (1<<0) - -#define S1_VERTEX_WIDTH_SHIFT 24 -#define S1_VERTEX_WIDTH_MASK (0x3f<<24) -#define S1_VERTEX_PITCH_SHIFT 16 -#define S1_VERTEX_PITCH_MASK (0x3f<<16) - -#define TEXCOORDFMT_2D 0x0 -#define TEXCOORDFMT_3D 0x1 -#define TEXCOORDFMT_4D 0x2 -#define TEXCOORDFMT_1D 0x3 -#define TEXCOORDFMT_2D_16 0x4 -#define TEXCOORDFMT_4D_16 0x5 -#define TEXCOORDFMT_NOT_PRESENT 0xf -#define S2_TEXCOORD_FMT0_MASK 0xf -#define S2_TEXCOORD_FMT1_SHIFT 4 -#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4)) -#define S2_TEXCOORD_NONE (~0) - -/* S3 not interesting */ - -#define S4_POINT_WIDTH_SHIFT 23 -#define S4_POINT_WIDTH_MASK (0x1ff<<23) -#define S4_LINE_WIDTH_SHIFT 19 -#define S4_LINE_WIDTH_ONE (0x2<<19) -#define S4_LINE_WIDTH_MASK (0xf<<19) -#define S4_FLATSHADE_ALPHA (1<<18) -#define S4_FLATSHADE_FOG (1<<17) -#define S4_FLATSHADE_SPECULAR (1<<16) -#define S4_FLATSHADE_COLOR (1<<15) -#define S4_CULLMODE_BOTH (0<<13) -#define S4_CULLMODE_NONE (1<<13) -#define S4_CULLMODE_CW (2<<13) -#define S4_CULLMODE_CCW (3<<13) -#define S4_CULLMODE_MASK (3<<13) -#define S4_VFMT_POINT_WIDTH (1<<12) -#define S4_VFMT_SPEC_FOG (1<<11) -#define S4_VFMT_COLOR (1<<10) -#define S4_VFMT_DEPTH_OFFSET (1<<9) -#define S4_VFMT_XYZ (1<<6) -#define S4_VFMT_XYZW (2<<6) -#define S4_VFMT_XY (3<<6) -#define S4_VFMT_XYW (4<<6) -#define S4_VFMT_XYZW_MASK (7<<6) -#define S4_FORCE_DEFAULT_DIFFUSE (1<<5) -#define S4_FORCE_DEFAULT_SPECULAR (1<<4) -#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3) -#define S4_VFMT_FOG_PARAM (1<<2) -#define S4_SPRITE_POINT_ENABLE (1<<1) -#define S4_LINE_ANTIALIAS_ENABLE (1<<0) - -#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \ - S4_VFMT_SPEC_FOG | \ - S4_VFMT_COLOR | \ - S4_VFMT_DEPTH_OFFSET | \ - S4_VFMT_XYZW_MASK | \ - S4_VFMT_FOG_PARAM) - - -#define S5_WRITEDISABLE_ALPHA (1<<31) -#define S5_WRITEDISABLE_RED (1<<30) -#define S5_WRITEDISABLE_GREEN (1<<29) -#define S5_WRITEDISABLE_BLUE (1<<28) -#define S5_WRITEDISABLE_MASK (0xf<<28) -#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27) -#define S5_LAST_PIXEL_ENABLE (1<<26) -#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25) -#define S5_FOG_ENABLE (1<<24) -#define S5_STENCIL_REF_SHIFT 16 -#define S5_STENCIL_REF_MASK (0xff<<16) -#define S5_STENCIL_TEST_FUNC_SHIFT 13 -#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13) -#define S5_STENCIL_FAIL_SHIFT 10 -#define S5_STENCIL_FAIL_MASK (0x7<<10) -#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7 -#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7) -#define S5_STENCIL_PASS_Z_PASS_SHIFT 4 -#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4) -#define S5_STENCIL_WRITE_ENABLE (1<<3) -#define S5_STENCIL_TEST_ENABLE (1<<2) -#define S5_COLOR_DITHER_ENABLE (1<<1) -#define S5_LOGICOP_ENABLE (1<<0) - - -#define S6_ALPHA_TEST_ENABLE (1<<31) -#define S6_ALPHA_TEST_FUNC_SHIFT 28 -#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28) -#define S6_ALPHA_REF_SHIFT 20 -#define S6_ALPHA_REF_MASK (0xff<<20) -#define S6_DEPTH_TEST_ENABLE (1<<19) -#define S6_DEPTH_TEST_FUNC_SHIFT 16 -#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16) -#define S6_CBUF_BLEND_ENABLE (1<<15) -#define S6_CBUF_BLEND_FUNC_SHIFT 12 -#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12) -#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8 -#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8) -#define S6_CBUF_DST_BLEND_FACT_SHIFT 4 -#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4) -#define S6_DEPTH_WRITE_ENABLE (1<<3) -#define S6_COLOR_WRITE_ENABLE (1<<2) -#define S6_TRISTRIP_PV_SHIFT 0 -#define S6_TRISTRIP_PV_MASK (0x3<<0) - -#define S7_DEPTH_OFFSET_CONST_MASK ~0 - - - -#define DST_BLND_FACT(f) ((f)<= 0.0) ? src1 : src2 */ -#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */ -#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */ -#define A0_FLR (0x10<<24) /* dst = floor(src0) */ -#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */ -#define A0_TRC (0x12<<24) /* dst = int(src0) */ -#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */ -#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */ -#define A0_DEST_SATURATE (1<<22) -#define A0_DEST_TYPE_SHIFT 19 -/* Allow: R, OC, OD, U */ -#define A0_DEST_NR_SHIFT 14 -/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ -#define A0_DEST_CHANNEL_X (1<<10) -#define A0_DEST_CHANNEL_Y (2<<10) -#define A0_DEST_CHANNEL_Z (4<<10) -#define A0_DEST_CHANNEL_W (8<<10) -#define A0_DEST_CHANNEL_ALL (0xf<<10) -#define A0_DEST_CHANNEL_SHIFT 10 -#define A0_SRC0_TYPE_SHIFT 7 -#define A0_SRC0_NR_SHIFT 2 - -#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y) -#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z) - - -#define SRC_X 0 -#define SRC_Y 1 -#define SRC_Z 2 -#define SRC_W 3 -#define SRC_ZERO 4 -#define SRC_ONE 5 - -#define A1_SRC0_CHANNEL_X_NEGATE (1<<31) -#define A1_SRC0_CHANNEL_X_SHIFT 28 -#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27) -#define A1_SRC0_CHANNEL_Y_SHIFT 24 -#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23) -#define A1_SRC0_CHANNEL_Z_SHIFT 20 -#define A1_SRC0_CHANNEL_W_NEGATE (1<<19) -#define A1_SRC0_CHANNEL_W_SHIFT 16 -#define A1_SRC1_TYPE_SHIFT 13 -#define A1_SRC1_NR_SHIFT 8 -#define A1_SRC1_CHANNEL_X_NEGATE (1<<7) -#define A1_SRC1_CHANNEL_X_SHIFT 4 -#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3) -#define A1_SRC1_CHANNEL_Y_SHIFT 0 - -#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31) -#define A2_SRC1_CHANNEL_Z_SHIFT 28 -#define A2_SRC1_CHANNEL_W_NEGATE (1<<27) -#define A2_SRC1_CHANNEL_W_SHIFT 24 -#define A2_SRC2_TYPE_SHIFT 21 -#define A2_SRC2_NR_SHIFT 16 -#define A2_SRC2_CHANNEL_X_NEGATE (1<<15) -#define A2_SRC2_CHANNEL_X_SHIFT 12 -#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11) -#define A2_SRC2_CHANNEL_Y_SHIFT 8 -#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7) -#define A2_SRC2_CHANNEL_Z_SHIFT 4 -#define A2_SRC2_CHANNEL_W_NEGATE (1<<3) -#define A2_SRC2_CHANNEL_W_SHIFT 0 - - - -/* Texture instructions */ -#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared - * sampler and address, and output - * filtered texel data to destination - * register */ -#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a - * perspective divide of the texture - * coordinate .xyz values by .w before - * sampling. */ -#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the - * computed LOD by w. Only S4.6 two's - * comp is used. This implies that a - * float to fixed conversion is - * done. */ -#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling - * operation. Simply kills the pixel - * if any channel of the address - * register is < 0.0. */ -#define T0_DEST_TYPE_SHIFT 19 -/* Allow: R, OC, OD, U */ -/* Note: U (unpreserved) regs do not retain their values between - * phases (cannot be used for feedback) - * - * Note: oC and OD registers can only be used as the destination of a - * texture instruction once per phase (this is an implementation - * restriction). - */ -#define T0_DEST_NR_SHIFT 14 -/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ -#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */ -#define T0_SAMPLER_NR_MASK (0xf<<0) - -#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */ -/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */ -#define T1_ADDRESS_REG_NR_SHIFT 17 -#define T2_MBZ 0 - -/* Declaration instructions */ -#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib) - * register or an s (sampler) - * register. */ -#define D0_SAMPLE_TYPE_SHIFT 22 -#define D0_SAMPLE_TYPE_2D (0x0<<22) -#define D0_SAMPLE_TYPE_CUBE (0x1<<22) -#define D0_SAMPLE_TYPE_VOLUME (0x2<<22) -#define D0_SAMPLE_TYPE_MASK (0x3<<22) - -#define D0_TYPE_SHIFT 19 -/* Allow: T, S */ -#define D0_NR_SHIFT 14 -/* Allow T: 0..10, S: 0..15 */ -#define D0_CHANNEL_X (1<<10) -#define D0_CHANNEL_Y (2<<10) -#define D0_CHANNEL_Z (4<<10) -#define D0_CHANNEL_W (8<<10) -#define D0_CHANNEL_ALL (0xf<<10) -#define D0_CHANNEL_NONE (0<<10) - -#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y) -#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z) - -/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse - * or specular declarations. - * - * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw) - * - * Must be zero for S (sampler) dcls - */ -#define D1_MBZ 0 -#define D2_MBZ 0 - - - -/* p207 */ -#define _3DSTATE_MAP_STATE (CMD_3D|(0x1d<<24)|(0x0<<16)) - -#define MS1_MAPMASK_SHIFT 0 -#define MS1_MAPMASK_MASK (0x8fff<<0) - -#define MS2_UNTRUSTED_SURFACE (1<<31) -#define MS2_ADDRESS_MASK 0xfffffffc -#define MS2_VERTICAL_LINE_STRIDE (1<<1) -#define MS2_VERTICAL_OFFSET (1<<1) - -#define MS3_HEIGHT_SHIFT 21 -#define MS3_WIDTH_SHIFT 10 -#define MS3_PALETTE_SELECT (1<<9) -#define MS3_MAPSURF_FORMAT_SHIFT 7 -#define MS3_MAPSURF_FORMAT_MASK (0x7<<7) -#define MAPSURF_8BIT (1<<7) -#define MAPSURF_16BIT (2<<7) -#define MAPSURF_32BIT (3<<7) -#define MAPSURF_422 (5<<7) -#define MAPSURF_COMPRESSED (6<<7) -#define MAPSURF_4BIT_INDEXED (7<<7) -#define MS3_MT_FORMAT_MASK (0x7 << 3) -#define MS3_MT_FORMAT_SHIFT 3 -#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */ -#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */ -#define MT_8BIT_L8 (1<<3) -#define MT_8BIT_A8 (4<<3) -#define MT_8BIT_MONO8 (5<<3) -#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */ -#define MT_16BIT_ARGB1555 (1<<3) -#define MT_16BIT_ARGB4444 (2<<3) -#define MT_16BIT_AY88 (3<<3) -#define MT_16BIT_88DVDU (5<<3) -#define MT_16BIT_BUMP_655LDVDU (6<<3) -#define MT_16BIT_I16 (7<<3) -#define MT_16BIT_L16 (8<<3) -#define MT_16BIT_A16 (9<<3) -#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */ -#define MT_32BIT_ABGR8888 (1<<3) -#define MT_32BIT_XRGB8888 (2<<3) -#define MT_32BIT_XBGR8888 (3<<3) -#define MT_32BIT_QWVU8888 (4<<3) -#define MT_32BIT_AXVU8888 (5<<3) -#define MT_32BIT_LXVU8888 (6<<3) -#define MT_32BIT_XLVU8888 (7<<3) -#define MT_32BIT_ARGB2101010 (8<<3) -#define MT_32BIT_ABGR2101010 (9<<3) -#define MT_32BIT_AWVU2101010 (0xA<<3) -#define MT_32BIT_GR1616 (0xB<<3) -#define MT_32BIT_VU1616 (0xC<<3) -#define MT_32BIT_xI824 (0xD<<3) -#define MT_32BIT_xA824 (0xE<<3) -#define MT_32BIT_xL824 (0xF<<3) -#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */ -#define MT_422_YCRCB_NORMAL (1<<3) -#define MT_422_YCRCB_SWAPUV (2<<3) -#define MT_422_YCRCB_SWAPUVY (3<<3) -#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */ -#define MT_COMPRESS_DXT2_3 (1<<3) -#define MT_COMPRESS_DXT4_5 (2<<3) -#define MT_COMPRESS_FXT1 (3<<3) -#define MT_COMPRESS_DXT1_RGB (4<<3) -#define MS3_USE_FENCE_REGS (1<<2) -#define MS3_TILED_SURFACE (1<<1) -#define MS3_TILE_WALK (1<<0) - -#define MS4_PITCH_SHIFT 21 -#define MS4_CUBE_FACE_ENA_NEGX (1<<20) -#define MS4_CUBE_FACE_ENA_POSX (1<<19) -#define MS4_CUBE_FACE_ENA_NEGY (1<<18) -#define MS4_CUBE_FACE_ENA_POSY (1<<17) -#define MS4_CUBE_FACE_ENA_NEGZ (1<<16) -#define MS4_CUBE_FACE_ENA_POSZ (1<<15) -#define MS4_CUBE_FACE_ENA_MASK (0x3f<<15) -#define MS4_MAX_LOD_SHIFT 9 -#define MS4_MAX_LOD_MASK (0x3f<<9) -#define MS4_MIP_LAYOUT_LEGACY (0<<8) -#define MS4_MIP_LAYOUT_BELOW_LPT (0<<8) -#define MS4_MIP_LAYOUT_RIGHT_LPT (1<<8) -#define MS4_VOLUME_DEPTH_SHIFT 0 -#define MS4_VOLUME_DEPTH_MASK (0xff<<0) - -/* p244 */ -#define _3DSTATE_SAMPLER_STATE (CMD_3D|(0x1d<<24)|(0x1<<16)) - -#define SS1_MAPMASK_SHIFT 0 -#define SS1_MAPMASK_MASK (0x8fff<<0) - -#define SS2_REVERSE_GAMMA_ENABLE (1<<31) -#define SS2_PACKED_TO_PLANAR_ENABLE (1<<30) -#define SS2_COLORSPACE_CONVERSION (1<<29) -#define SS2_CHROMAKEY_SHIFT 27 -#define SS2_BASE_MIP_LEVEL_SHIFT 22 -#define SS2_BASE_MIP_LEVEL_MASK (0x1f<<22) -#define SS2_MIP_FILTER_SHIFT 20 -#define SS2_MIP_FILTER_MASK (0x3<<20) -#define MIPFILTER_NONE 0 -#define MIPFILTER_NEAREST 1 -#define MIPFILTER_LINEAR 3 -#define SS2_MAG_FILTER_SHIFT 17 -#define SS2_MAG_FILTER_MASK (0x7<<17) -#define FILTER_NEAREST 0 -#define FILTER_LINEAR 1 -#define FILTER_ANISOTROPIC 2 -#define FILTER_4X4_1 3 -#define FILTER_4X4_2 4 -#define FILTER_4X4_FLAT 5 -#define FILTER_6X5_MONO 6 /* XXX - check */ -#define SS2_MIN_FILTER_SHIFT 14 -#define SS2_MIN_FILTER_MASK (0x7<<14) -#define SS2_LOD_BIAS_SHIFT 5 -#define SS2_LOD_BIAS_ONE (0x10<<5) -#define SS2_LOD_BIAS_MASK (0x1ff<<5) -/* Shadow requires: - * MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format - * FILTER_4X4_x MIN and MAG filters - */ -#define SS2_SHADOW_ENABLE (1<<4) -#define SS2_MAX_ANISO_MASK (1<<3) -#define SS2_MAX_ANISO_2 (0<<3) -#define SS2_MAX_ANISO_4 (1<<3) -#define SS2_SHADOW_FUNC_SHIFT 0 -#define SS2_SHADOW_FUNC_MASK (0x7<<0) -/* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */ - -#define SS3_MIN_LOD_SHIFT 24 -#define SS3_MIN_LOD_ONE (0x10<<24) -#define SS3_MIN_LOD_MASK (0xff<<24) -#define SS3_KILL_PIXEL_ENABLE (1<<17) -#define SS3_TCX_ADDR_MODE_SHIFT 12 -#define SS3_TCX_ADDR_MODE_MASK (0x7<<12) -#define TEXCOORDMODE_WRAP 0 -#define TEXCOORDMODE_MIRROR 1 -#define TEXCOORDMODE_CLAMP_EDGE 2 -#define TEXCOORDMODE_CUBE 3 -#define TEXCOORDMODE_CLAMP_BORDER 4 -#define TEXCOORDMODE_MIRROR_ONCE 5 -#define SS3_TCY_ADDR_MODE_SHIFT 9 -#define SS3_TCY_ADDR_MODE_MASK (0x7<<9) -#define SS3_TCZ_ADDR_MODE_SHIFT 6 -#define SS3_TCZ_ADDR_MODE_MASK (0x7<<6) -#define SS3_NORMALIZED_COORDS (1<<5) -#define SS3_TEXTUREMAP_INDEX_SHIFT 1 -#define SS3_TEXTUREMAP_INDEX_MASK (0xf<<1) -#define SS3_DEINTERLACER_ENABLE (1<<0) - -#define SS4_BORDER_COLOR_MASK (~0) - -/* 3DSTATE_SPAN_STIPPLE, p258 - */ -#define _3DSTATE_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16)) -#define ST1_ENABLE (1<<16) -#define ST1_MASK (0xffff) - -#define _3DSTATE_DEFAULT_Z ((0x3<<29)|(0x1d<<24)|(0x98<<16)) -#define _3DSTATE_DEFAULT_DIFFUSE ((0x3<<29)|(0x1d<<24)|(0x99<<16)) -#define _3DSTATE_DEFAULT_SPECULAR ((0x3<<29)|(0x1d<<24)|(0x9a<<16)) - - -#define MI_FLUSH ((0<<29)|(4<<23)) -#define FLUSH_MAP_CACHE (1<<0) -#define INHIBIT_FLUSH_RENDER_CACHE (1<<2) - - -#define CMD_3D (0x3<<29) - - -#define _3DPRIMITIVE ((0x3<<29)|(0x1f<<24)) -#define PRIM_INDIRECT (1<<23) -#define PRIM_INLINE (0<<23) -#define PRIM_INDIRECT_SEQUENTIAL (0<<17) -#define PRIM_INDIRECT_ELTS (1<<17) - -#define PRIM3D_TRILIST (0x0<<18) -#define PRIM3D_TRISTRIP (0x1<<18) -#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) -#define PRIM3D_TRIFAN (0x3<<18) -#define PRIM3D_POLY (0x4<<18) -#define PRIM3D_LINELIST (0x5<<18) -#define PRIM3D_LINESTRIP (0x6<<18) -#define PRIM3D_RECTLIST (0x7<<18) -#define PRIM3D_POINTLIST (0x8<<18) -#define PRIM3D_DIB (0x9<<18) -#define PRIM3D_MASK (0x1f<<18) - -#define I915PACKCOLOR4444(r,g,b,a) \ - ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4)) - -#define I915PACKCOLOR1555(r,g,b,a) \ - ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \ - ((a) ? 0x8000 : 0)) - -#define I915PACKCOLOR565(r,g,b) \ - ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3)) - -#define I915PACKCOLOR8888(r,g,b,a) \ - ((a<<24) | (r<<16) | (g<<8) | b) - - - - -#define BR00_BITBLT_CLIENT 0x40000000 -#define BR00_OP_COLOR_BLT 0x10000000 -#define BR00_OP_SRC_COPY_BLT 0x10C00000 -#define BR13_SOLID_PATTERN 0x80000000 - -#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|0x4) -#define XY_COLOR_BLT_WRITE_ALPHA (1<<21) -#define XY_COLOR_BLT_WRITE_RGB (1<<20) - -#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) -#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21) -#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20) - -#define MI_WAIT_FOR_EVENT ((0x3<<23)) -#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) -#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) - -#define MI_BATCH_BUFFER (0x30<<23) -#define MI_BATCH_BUFFER_START (0x31<<23) -#define MI_BATCH_BUFFER_END (0xa<<23) - - - -#define COMPAREFUNC_ALWAYS 0 -#define COMPAREFUNC_NEVER 0x1 -#define COMPAREFUNC_LESS 0x2 -#define COMPAREFUNC_EQUAL 0x3 -#define COMPAREFUNC_LEQUAL 0x4 -#define COMPAREFUNC_GREATER 0x5 -#define COMPAREFUNC_NOTEQUAL 0x6 -#define COMPAREFUNC_GEQUAL 0x7 - -#define STENCILOP_KEEP 0 -#define STENCILOP_ZERO 0x1 -#define STENCILOP_REPLACE 0x2 -#define STENCILOP_INCRSAT 0x3 -#define STENCILOP_DECRSAT 0x4 -#define STENCILOP_INCR 0x5 -#define STENCILOP_DECR 0x6 -#define STENCILOP_INVERT 0x7 - -#define LOGICOP_CLEAR 0 -#define LOGICOP_NOR 0x1 -#define LOGICOP_AND_INV 0x2 -#define LOGICOP_COPY_INV 0x3 -#define LOGICOP_AND_RVRSE 0x4 -#define LOGICOP_INV 0x5 -#define LOGICOP_XOR 0x6 -#define LOGICOP_NAND 0x7 -#define LOGICOP_AND 0x8 -#define LOGICOP_EQUIV 0x9 -#define LOGICOP_NOOP 0xa -#define LOGICOP_OR_INV 0xb -#define LOGICOP_COPY 0xc -#define LOGICOP_OR_RVRSE 0xd -#define LOGICOP_OR 0xe -#define LOGICOP_SET 0xf - -#define BLENDFACT_ZERO 0x01 -#define BLENDFACT_ONE 0x02 -#define BLENDFACT_SRC_COLR 0x03 -#define BLENDFACT_INV_SRC_COLR 0x04 -#define BLENDFACT_SRC_ALPHA 0x05 -#define BLENDFACT_INV_SRC_ALPHA 0x06 -#define BLENDFACT_DST_ALPHA 0x07 -#define BLENDFACT_INV_DST_ALPHA 0x08 -#define BLENDFACT_DST_COLR 0x09 -#define BLENDFACT_INV_DST_COLR 0x0a -#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b -#define BLENDFACT_CONST_COLOR 0x0c -#define BLENDFACT_INV_CONST_COLOR 0x0d -#define BLENDFACT_CONST_ALPHA 0x0e -#define BLENDFACT_INV_CONST_ALPHA 0x0f -#define BLENDFACT_MASK 0x0f - -#define PCI_CHIP_I915_G 0x2582 -#define PCI_CHIP_I915_GM 0x2592 -#define PCI_CHIP_I945_G 0x2772 -#define PCI_CHIP_I945_GM 0x27A2 -#define PCI_CHIP_I945_GME 0x27AE -#define PCI_CHIP_G33_G 0x29C2 -#define PCI_CHIP_Q35_G 0x29B2 -#define PCI_CHIP_Q33_G 0x29D2 - - -#endif diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c deleted file mode 100644 index c66558c320..0000000000 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ /dev/null @@ -1,298 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "pipe/p_inlines.h" -#include "util/u_memory.h" -#include "util/u_string.h" - -#include "i915_reg.h" -#include "i915_context.h" -#include "i915_screen.h" -#include "i915_buffer.h" -#include "i915_texture.h" -#include "intel_winsys.h" - - -/* - * Probe functions - */ - - -static const char * -i915_get_vendor(struct pipe_screen *screen) -{ - return "VMware, Inc."; -} - -static const char * -i915_get_name(struct pipe_screen *screen) -{ - static char buffer[128]; - const char *chipset; - - switch (i915_screen(screen)->pci_id) { - case PCI_CHIP_I915_G: - chipset = "915G"; - break; - case PCI_CHIP_I915_GM: - chipset = "915GM"; - break; - case PCI_CHIP_I945_G: - chipset = "945G"; - break; - case PCI_CHIP_I945_GM: - chipset = "945GM"; - break; - case PCI_CHIP_I945_GME: - chipset = "945GME"; - break; - case PCI_CHIP_G33_G: - chipset = "G33"; - break; - case PCI_CHIP_Q35_G: - chipset = "Q35"; - break; - case PCI_CHIP_Q33_G: - chipset = "Q33"; - break; - default: - chipset = "unknown"; - break; - } - - util_snprintf(buffer, sizeof(buffer), "i915 (chipset: %s)", chipset); - return buffer; -} - -static int -i915_get_param(struct pipe_screen *screen, int param) -{ - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 8; - case PIPE_CAP_NPOT_TEXTURES: - return 1; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; - case PIPE_CAP_POINT_SPRITE: - return 0; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; - case PIPE_CAP_OCCLUSION_QUERY: - return 0; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 11; /* max 1024x1024 */ - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 8; /* max 128x128x128 */ - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 11; /* max 1024x1024 */ - default: - return 0; - } -} - -static float -i915_get_paramf(struct pipe_screen *screen, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 7.5; - - case PIPE_CAP_MAX_POINT_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 255.0; - - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 4.0; - - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0; - - default: - return 0; - } -} - -static boolean -i915_is_format_supported(struct pipe_screen *screen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned tex_usage, - unsigned geom_flags) -{ - static const enum pipe_format tex_supported[] = { - PIPE_FORMAT_R8G8B8A8_UNORM, - PIPE_FORMAT_A8R8G8B8_UNORM, - PIPE_FORMAT_R5G6B5_UNORM, - PIPE_FORMAT_L8_UNORM, - PIPE_FORMAT_A8_UNORM, - PIPE_FORMAT_I8_UNORM, - PIPE_FORMAT_A8L8_UNORM, - PIPE_FORMAT_YCBCR, - PIPE_FORMAT_YCBCR_REV, - PIPE_FORMAT_S8Z24_UNORM, - PIPE_FORMAT_NONE /* list terminator */ - }; - static const enum pipe_format surface_supported[] = { - PIPE_FORMAT_A8R8G8B8_UNORM, - PIPE_FORMAT_R5G6B5_UNORM, - PIPE_FORMAT_S8Z24_UNORM, - PIPE_FORMAT_NONE /* list terminator */ - }; - const enum pipe_format *list; - uint i; - - if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) - list = surface_supported; - else - list = tex_supported; - - for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) { - if (list[i] == format) - return TRUE; - } - - return FALSE; -} - - -/* - * Fence functions - */ - - -static void -i915_fence_reference(struct pipe_screen *screen, - struct pipe_fence_handle **ptr, - struct pipe_fence_handle *fence) -{ - struct i915_screen *is = i915_screen(screen); - - is->iws->fence_reference(is->iws, ptr, fence); -} - -static int -i915_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flags) -{ - struct i915_screen *is = i915_screen(screen); - - return is->iws->fence_signalled(is->iws, fence); -} - -static int -i915_fence_finish(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flags) -{ - struct i915_screen *is = i915_screen(screen); - - return is->iws->fence_finish(is->iws, fence); -} - - -/* - * Generic functions - */ - - -static void -i915_destroy_screen(struct pipe_screen *screen) -{ - struct i915_screen *is = i915_screen(screen); - - if (is->iws) - is->iws->destroy(is->iws); - - FREE(is); -} - -/** - * Create a new i915_screen object - */ -struct pipe_screen * -i915_create_screen(struct intel_winsys *iws, uint pci_id) -{ - struct i915_screen *is = CALLOC_STRUCT(i915_screen); - - if (!is) - return NULL; - - switch (pci_id) { - case PCI_CHIP_I915_G: - case PCI_CHIP_I915_GM: - is->is_i945 = FALSE; - break; - - case PCI_CHIP_I945_G: - case PCI_CHIP_I945_GM: - case PCI_CHIP_I945_GME: - case PCI_CHIP_G33_G: - case PCI_CHIP_Q33_G: - case PCI_CHIP_Q35_G: - is->is_i945 = TRUE; - break; - - default: - debug_printf("%s: unknown pci id 0x%x, cannot create screen\n", - __FUNCTION__, pci_id); - return NULL; - } - - is->pci_id = pci_id; - is->iws = iws; - - is->base.winsys = NULL; - - is->base.destroy = i915_destroy_screen; - - is->base.get_name = i915_get_name; - is->base.get_vendor = i915_get_vendor; - is->base.get_param = i915_get_param; - is->base.get_paramf = i915_get_paramf; - is->base.is_format_supported = i915_is_format_supported; - - is->base.fence_reference = i915_fence_reference; - is->base.fence_signalled = i915_fence_signalled; - is->base.fence_finish = i915_fence_finish; - - i915_init_screen_texture_functions(is); - i915_init_screen_buffer_functions(is); - - return &is->base; -} diff --git a/src/gallium/drivers/i915simple/i915_screen.h b/src/gallium/drivers/i915simple/i915_screen.h deleted file mode 100644 index 5126485caa..0000000000 --- a/src/gallium/drivers/i915simple/i915_screen.h +++ /dev/null @@ -1,80 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef I915_SCREEN_H -#define I915_SCREEN_H - -#include "pipe/p_state.h" -#include "pipe/p_screen.h" - - -struct intel_winsys; - - -/** - * Subclass of pipe_screen - */ -struct i915_screen -{ - struct pipe_screen base; - - struct intel_winsys *iws; - - boolean is_i945; - uint pci_id; -}; - -/** - * Subclass of pipe_transfer - */ -struct i915_transfer -{ - struct pipe_transfer base; - - unsigned offset; -}; - - -/* - * Cast wrappers - */ - - -static INLINE struct i915_screen * -i915_screen(struct pipe_screen *pscreen) -{ - return (struct i915_screen *) pscreen; -} - -static INLINE struct i915_transfer * -i915_transfer(struct pipe_transfer *transfer) -{ - return (struct i915_transfer *)transfer; -} - - -#endif /* I915_SCREEN_H */ diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c deleted file mode 100644 index 7d48e6e84d..0000000000 --- a/src/gallium/drivers/i915simple/i915_state.c +++ /dev/null @@ -1,796 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell - */ - - -#include "draw/draw_context.h" -#include "pipe/internal/p_winsys_screen.h" -#include "pipe/p_inlines.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "tgsi/tgsi_parse.h" - -#include "i915_context.h" -#include "i915_reg.h" -#include "i915_state.h" -#include "i915_state_inlines.h" -#include "i915_fpc.h" - -/* The i915 (and related graphics cores) do not support GL_CLAMP. The - * Intel drivers for "other operating systems" implement GL_CLAMP as - * GL_CLAMP_TO_EDGE, so the same is done here. - */ -static unsigned -translate_wrap_mode(unsigned wrap) -{ - switch (wrap) { - case PIPE_TEX_WRAP_REPEAT: - return TEXCOORDMODE_WRAP; - case PIPE_TEX_WRAP_CLAMP: - return TEXCOORDMODE_CLAMP_EDGE; /* not quite correct */ - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - return TEXCOORDMODE_CLAMP_EDGE; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - return TEXCOORDMODE_CLAMP_BORDER; -// case PIPE_TEX_WRAP_MIRRORED_REPEAT: -// return TEXCOORDMODE_MIRROR; - default: - return TEXCOORDMODE_WRAP; - } -} - -static unsigned translate_img_filter( unsigned filter ) -{ - switch (filter) { - case PIPE_TEX_FILTER_NEAREST: - return FILTER_NEAREST; - case PIPE_TEX_FILTER_LINEAR: - return FILTER_LINEAR; - case PIPE_TEX_FILTER_ANISO: - return FILTER_ANISOTROPIC; - default: - assert(0); - return FILTER_NEAREST; - } -} - -static unsigned translate_mip_filter( unsigned filter ) -{ - switch (filter) { - case PIPE_TEX_MIPFILTER_NONE: - return MIPFILTER_NONE; - case PIPE_TEX_MIPFILTER_NEAREST: - return MIPFILTER_NEAREST; - case PIPE_TEX_MIPFILTER_LINEAR: - return MIPFILTER_LINEAR; - default: - assert(0); - return MIPFILTER_NONE; - } -} - - -/* None of this state is actually used for anything yet. - */ -static void * -i915_create_blend_state(struct pipe_context *pipe, - const struct pipe_blend_state *blend) -{ - struct i915_blend_state *cso_data = CALLOC_STRUCT( i915_blend_state ); - - { - unsigned eqRGB = blend->rgb_func; - unsigned srcRGB = blend->rgb_src_factor; - unsigned dstRGB = blend->rgb_dst_factor; - - unsigned eqA = blend->alpha_func; - unsigned srcA = blend->alpha_src_factor; - unsigned dstA = blend->alpha_dst_factor; - - /* Special handling for MIN/MAX filter modes handled at - * state_tracker level. - */ - - if (srcA != srcRGB || - dstA != dstRGB || - eqA != eqRGB) { - - cso_data->iab = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | - IAB_MODIFY_ENABLE | - IAB_ENABLE | - IAB_MODIFY_FUNC | - IAB_MODIFY_SRC_FACTOR | - IAB_MODIFY_DST_FACTOR | - SRC_ABLND_FACT(i915_translate_blend_factor(srcA)) | - DST_ABLND_FACT(i915_translate_blend_factor(dstA)) | - (i915_translate_blend_func(eqA) << IAB_FUNC_SHIFT)); - } - else { - cso_data->iab = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | - IAB_MODIFY_ENABLE | - 0); - } - } - - cso_data->modes4 |= (_3DSTATE_MODES_4_CMD | - ENABLE_LOGIC_OP_FUNC | - LOGIC_OP_FUNC(i915_translate_logic_op(blend->logicop_func))); - - if (blend->logicop_enable) - cso_data->LIS5 |= S5_LOGICOP_ENABLE; - - if (blend->dither) - cso_data->LIS5 |= S5_COLOR_DITHER_ENABLE; - - if ((blend->colormask & PIPE_MASK_R) == 0) - cso_data->LIS5 |= S5_WRITEDISABLE_RED; - - if ((blend->colormask & PIPE_MASK_G) == 0) - cso_data->LIS5 |= S5_WRITEDISABLE_GREEN; - - if ((blend->colormask & PIPE_MASK_B) == 0) - cso_data->LIS5 |= S5_WRITEDISABLE_BLUE; - - if ((blend->colormask & PIPE_MASK_A) == 0) - cso_data->LIS5 |= S5_WRITEDISABLE_ALPHA; - - if (blend->blend_enable) { - unsigned funcRGB = blend->rgb_func; - unsigned srcRGB = blend->rgb_src_factor; - unsigned dstRGB = blend->rgb_dst_factor; - - cso_data->LIS6 |= (S6_CBUF_BLEND_ENABLE | - SRC_BLND_FACT(i915_translate_blend_factor(srcRGB)) | - DST_BLND_FACT(i915_translate_blend_factor(dstRGB)) | - (i915_translate_blend_func(funcRGB) << S6_CBUF_BLEND_FUNC_SHIFT)); - } - - return cso_data; -} - -static void i915_bind_blend_state(struct pipe_context *pipe, - void *blend) -{ - struct i915_context *i915 = i915_context(pipe); - draw_flush(i915->draw); - - i915->blend = (struct i915_blend_state*)blend; - - i915->dirty |= I915_NEW_BLEND; -} - - -static void i915_delete_blend_state(struct pipe_context *pipe, void *blend) -{ - FREE(blend); -} - -static void i915_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ) -{ - struct i915_context *i915 = i915_context(pipe); - draw_flush(i915->draw); - - i915->blend_color = *blend_color; - - i915->dirty |= I915_NEW_BLEND; -} - -static void * -i915_create_sampler_state(struct pipe_context *pipe, - const struct pipe_sampler_state *sampler) -{ - struct i915_sampler_state *cso = CALLOC_STRUCT( i915_sampler_state ); - const unsigned ws = sampler->wrap_s; - const unsigned wt = sampler->wrap_t; - const unsigned wr = sampler->wrap_r; - unsigned minFilt, magFilt; - unsigned mipFilt; - - cso->templ = sampler; - - mipFilt = translate_mip_filter(sampler->min_mip_filter); - minFilt = translate_img_filter( sampler->min_img_filter ); - magFilt = translate_img_filter( sampler->mag_img_filter ); - - if (sampler->max_anisotropy > 2.0) { - cso->state[0] |= SS2_MAX_ANISO_4; - } - - { - int b = (int) (sampler->lod_bias * 16.0); - b = CLAMP(b, -256, 255); - cso->state[0] |= ((b << SS2_LOD_BIAS_SHIFT) & SS2_LOD_BIAS_MASK); - } - - /* Shadow: - */ - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) - { - cso->state[0] |= (SS2_SHADOW_ENABLE | - i915_translate_compare_func(sampler->compare_func)); - - minFilt = FILTER_4X4_FLAT; - magFilt = FILTER_4X4_FLAT; - } - - cso->state[0] |= ((minFilt << SS2_MIN_FILTER_SHIFT) | - (mipFilt << SS2_MIP_FILTER_SHIFT) | - (magFilt << SS2_MAG_FILTER_SHIFT)); - - cso->state[1] |= - ((translate_wrap_mode(ws) << SS3_TCX_ADDR_MODE_SHIFT) | - (translate_wrap_mode(wt) << SS3_TCY_ADDR_MODE_SHIFT) | - (translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT)); - - if (sampler->normalized_coords) - cso->state[1] |= SS3_NORMALIZED_COORDS; - - { - int minlod = (int) (16.0 * sampler->min_lod); - int maxlod = (int) (16.0 * sampler->max_lod); - minlod = CLAMP(minlod, 0, 16 * 11); - maxlod = CLAMP(maxlod, 0, 16 * 11); - - if (minlod > maxlod) - maxlod = minlod; - - cso->minlod = minlod; - cso->maxlod = maxlod; - } - - { - ubyte r = float_to_ubyte(sampler->border_color[0]); - ubyte g = float_to_ubyte(sampler->border_color[1]); - ubyte b = float_to_ubyte(sampler->border_color[2]); - ubyte a = float_to_ubyte(sampler->border_color[3]); - cso->state[2] = I915PACKCOLOR8888(r, g, b, a); - } - return cso; -} - -static void i915_bind_sampler_states(struct pipe_context *pipe, - unsigned num, void **sampler) -{ - struct i915_context *i915 = i915_context(pipe); - unsigned i; - - assert(num <= PIPE_MAX_SAMPLERS); - - /* Check for no-op */ - if (num == i915->num_samplers && - !memcmp(i915->sampler, sampler, num * sizeof(void *))) - return; - - draw_flush(i915->draw); - - for (i = 0; i < num; ++i) - i915->sampler[i] = sampler[i]; - for (i = num; i < PIPE_MAX_SAMPLERS; ++i) - i915->sampler[i] = NULL; - - i915->num_samplers = num; - - i915->dirty |= I915_NEW_SAMPLER; -} - -static void i915_delete_sampler_state(struct pipe_context *pipe, - void *sampler) -{ - FREE(sampler); -} - - -/** XXX move someday? Or consolidate all these simple state setters - * into one file. - */ - -static void * -i915_create_depth_stencil_state(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *depth_stencil) -{ - struct i915_depth_stencil_state *cso = CALLOC_STRUCT( i915_depth_stencil_state ); - - { - int testmask = depth_stencil->stencil[0].valuemask & 0xff; - int writemask = depth_stencil->stencil[0].writemask & 0xff; - - cso->stencil_modes4 |= (_3DSTATE_MODES_4_CMD | - ENABLE_STENCIL_TEST_MASK | - STENCIL_TEST_MASK(testmask) | - ENABLE_STENCIL_WRITE_MASK | - STENCIL_WRITE_MASK(writemask)); - } - - if (depth_stencil->stencil[0].enabled) { - int test = i915_translate_compare_func(depth_stencil->stencil[0].func); - int fop = i915_translate_stencil_op(depth_stencil->stencil[0].fail_op); - int dfop = i915_translate_stencil_op(depth_stencil->stencil[0].zfail_op); - int dpop = i915_translate_stencil_op(depth_stencil->stencil[0].zpass_op); - int ref = depth_stencil->stencil[0].ref_value & 0xff; - - cso->stencil_LIS5 |= (S5_STENCIL_TEST_ENABLE | - S5_STENCIL_WRITE_ENABLE | - (ref << S5_STENCIL_REF_SHIFT) | - (test << S5_STENCIL_TEST_FUNC_SHIFT) | - (fop << S5_STENCIL_FAIL_SHIFT) | - (dfop << S5_STENCIL_PASS_Z_FAIL_SHIFT) | - (dpop << S5_STENCIL_PASS_Z_PASS_SHIFT)); - } - - if (depth_stencil->stencil[1].enabled) { - int test = i915_translate_compare_func(depth_stencil->stencil[1].func); - int fop = i915_translate_stencil_op(depth_stencil->stencil[1].fail_op); - int dfop = i915_translate_stencil_op(depth_stencil->stencil[1].zfail_op); - int dpop = i915_translate_stencil_op(depth_stencil->stencil[1].zpass_op); - int ref = depth_stencil->stencil[1].ref_value & 0xff; - int tmask = depth_stencil->stencil[1].valuemask & 0xff; - int wmask = depth_stencil->stencil[1].writemask & 0xff; - - cso->bfo[0] = (_3DSTATE_BACKFACE_STENCIL_OPS | - BFO_ENABLE_STENCIL_FUNCS | - BFO_ENABLE_STENCIL_TWO_SIDE | - BFO_ENABLE_STENCIL_REF | - BFO_STENCIL_TWO_SIDE | - (ref << BFO_STENCIL_REF_SHIFT) | - (test << BFO_STENCIL_TEST_SHIFT) | - (fop << BFO_STENCIL_FAIL_SHIFT) | - (dfop << BFO_STENCIL_PASS_Z_FAIL_SHIFT) | - (dpop << BFO_STENCIL_PASS_Z_PASS_SHIFT)); - - cso->bfo[1] = (_3DSTATE_BACKFACE_STENCIL_MASKS | - BFM_ENABLE_STENCIL_TEST_MASK | - BFM_ENABLE_STENCIL_WRITE_MASK | - (tmask << BFM_STENCIL_TEST_MASK_SHIFT) | - (wmask << BFM_STENCIL_WRITE_MASK_SHIFT)); - } - else { - /* This actually disables two-side stencil: The bit set is a - * modify-enable bit to indicate we are changing the two-side - * setting. Then there is a symbolic zero to show that we are - * setting the flag to zero/off. - */ - cso->bfo[0] = (_3DSTATE_BACKFACE_STENCIL_OPS | - BFO_ENABLE_STENCIL_TWO_SIDE | - 0); - cso->bfo[1] = 0; - } - - if (depth_stencil->depth.enabled) { - int func = i915_translate_compare_func(depth_stencil->depth.func); - - cso->depth_LIS6 |= (S6_DEPTH_TEST_ENABLE | - (func << S6_DEPTH_TEST_FUNC_SHIFT)); - - if (depth_stencil->depth.writemask) - cso->depth_LIS6 |= S6_DEPTH_WRITE_ENABLE; - } - - if (depth_stencil->alpha.enabled) { - int test = i915_translate_compare_func(depth_stencil->alpha.func); - ubyte refByte = float_to_ubyte(depth_stencil->alpha.ref_value); - - cso->depth_LIS6 |= (S6_ALPHA_TEST_ENABLE | - (test << S6_ALPHA_TEST_FUNC_SHIFT) | - (((unsigned) refByte) << S6_ALPHA_REF_SHIFT)); - } - - return cso; -} - -static void i915_bind_depth_stencil_state(struct pipe_context *pipe, - void *depth_stencil) -{ - struct i915_context *i915 = i915_context(pipe); - draw_flush(i915->draw); - - i915->depth_stencil = (const struct i915_depth_stencil_state *)depth_stencil; - - i915->dirty |= I915_NEW_DEPTH_STENCIL; -} - -static void i915_delete_depth_stencil_state(struct pipe_context *pipe, - void *depth_stencil) -{ - FREE(depth_stencil); -} - - -static void i915_set_scissor_state( struct pipe_context *pipe, - const struct pipe_scissor_state *scissor ) -{ - struct i915_context *i915 = i915_context(pipe); - draw_flush(i915->draw); - - memcpy( &i915->scissor, scissor, sizeof(*scissor) ); - i915->dirty |= I915_NEW_SCISSOR; -} - - -static void i915_set_polygon_stipple( struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple ) -{ -} - - - -static void * -i915_create_fs_state(struct pipe_context *pipe, - const struct pipe_shader_state *templ) -{ - struct i915_context *i915 = i915_context(pipe); - struct i915_fragment_shader *ifs = CALLOC_STRUCT(i915_fragment_shader); - if (!ifs) - return NULL; - - ifs->state.tokens = tgsi_dup_tokens(templ->tokens); - - tgsi_scan_shader(templ->tokens, &ifs->info); - - /* The shader's compiled to i915 instructions here */ - i915_translate_fragment_program(i915, ifs); - - return ifs; -} - -static void -i915_bind_fs_state(struct pipe_context *pipe, void *shader) -{ - struct i915_context *i915 = i915_context(pipe); - draw_flush(i915->draw); - - i915->fs = (struct i915_fragment_shader*) shader; - - i915->dirty |= I915_NEW_FS; -} - -static -void i915_delete_fs_state(struct pipe_context *pipe, void *shader) -{ - struct i915_fragment_shader *ifs = (struct i915_fragment_shader *) shader; - - if (ifs->program) - FREE(ifs->program); - ifs->program_len = 0; - - FREE((struct tgsi_token *)ifs->state.tokens); - - FREE(ifs); -} - - -static void * -i915_create_vs_state(struct pipe_context *pipe, - const struct pipe_shader_state *templ) -{ - struct i915_context *i915 = i915_context(pipe); - - /* just pass-through to draw module */ - return draw_create_vertex_shader(i915->draw, templ); -} - -static void i915_bind_vs_state(struct pipe_context *pipe, void *shader) -{ - struct i915_context *i915 = i915_context(pipe); - - /* just pass-through to draw module */ - draw_bind_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader); - - i915->dirty |= I915_NEW_VS; -} - -static void i915_delete_vs_state(struct pipe_context *pipe, void *shader) -{ - struct i915_context *i915 = i915_context(pipe); - - /* just pass-through to draw module */ - draw_delete_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader); -} - -static void i915_set_constant_buffer(struct pipe_context *pipe, - uint shader, uint index, - const struct pipe_constant_buffer *buf) -{ - struct i915_context *i915 = i915_context(pipe); - struct pipe_screen *screen = pipe->screen; - draw_flush(i915->draw); - - assert(shader < PIPE_SHADER_TYPES); - assert(index == 0); - - /* Make a copy of shader constants. - * During fragment program translation we may add additional - * constants to the array. - * - * We want to consider the situation where some user constants - * (ex: a material color) may change frequently but the shader program - * stays the same. In that case we should only be updating the first - * N constants, leaving any extras from shader translation alone. - */ - if (buf) { - void *mapped; - if (buf->buffer && buf->buffer->size && - (mapped = pipe_buffer_map(screen, buf->buffer, - PIPE_BUFFER_USAGE_CPU_READ))) { - memcpy(i915->current.constants[shader], mapped, buf->buffer->size); - pipe_buffer_unmap(screen, buf->buffer); - i915->current.num_user_constants[shader] - = buf->buffer->size / (4 * sizeof(float)); - } - else { - i915->current.num_user_constants[shader] = 0; - } - } - - i915->dirty |= I915_NEW_CONSTANTS; -} - - -static void i915_set_sampler_textures(struct pipe_context *pipe, - unsigned num, - struct pipe_texture **texture) -{ - struct i915_context *i915 = i915_context(pipe); - uint i; - - assert(num <= PIPE_MAX_SAMPLERS); - - /* Check for no-op */ - if (num == i915->num_textures && - !memcmp(i915->texture, texture, num * sizeof(struct pipe_texture *))) - return; - - /* Fixes wrong texture in texobj with VBUF */ - draw_flush(i915->draw); - - for (i = 0; i < num; i++) - pipe_texture_reference((struct pipe_texture **) &i915->texture[i], - texture[i]); - - for (i = num; i < i915->num_textures; i++) - pipe_texture_reference((struct pipe_texture **) &i915->texture[i], - NULL); - - i915->num_textures = num; - - i915->dirty |= I915_NEW_TEXTURE; -} - - - -static void i915_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ - struct i915_context *i915 = i915_context(pipe); - int i; - - draw_flush(i915->draw); - - i915->framebuffer.width = fb->width; - i915->framebuffer.height = fb->height; - i915->framebuffer.nr_cbufs = fb->nr_cbufs; - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - pipe_surface_reference(&i915->framebuffer.cbufs[i], fb->cbufs[i]); - } - pipe_surface_reference(&i915->framebuffer.zsbuf, fb->zsbuf); - - i915->dirty |= I915_NEW_FRAMEBUFFER; -} - - - -static void i915_set_clip_state( struct pipe_context *pipe, - const struct pipe_clip_state *clip ) -{ - struct i915_context *i915 = i915_context(pipe); - draw_flush(i915->draw); - - draw_set_clip_state(i915->draw, clip); - - i915->dirty |= I915_NEW_CLIP; -} - - - -/* Called when driver state tracker notices changes to the viewport - * matrix: - */ -static void i915_set_viewport_state( struct pipe_context *pipe, - const struct pipe_viewport_state *viewport ) -{ - struct i915_context *i915 = i915_context(pipe); - - i915->viewport = *viewport; /* struct copy */ - - /* pass the viewport info to the draw module */ - draw_set_viewport_state(i915->draw, &i915->viewport); - - i915->dirty |= I915_NEW_VIEWPORT; -} - - -static void * -i915_create_rasterizer_state(struct pipe_context *pipe, - const struct pipe_rasterizer_state *rasterizer) -{ - struct i915_rasterizer_state *cso = CALLOC_STRUCT( i915_rasterizer_state ); - - cso->templ = rasterizer; - cso->color_interp = rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; - cso->light_twoside = rasterizer->light_twoside; - cso->ds[0].u = _3DSTATE_DEPTH_OFFSET_SCALE; - cso->ds[1].f = rasterizer->offset_scale; - if (rasterizer->poly_stipple_enable) { - cso->st |= ST1_ENABLE; - } - - if (rasterizer->scissor) - cso->sc[0] = _3DSTATE_SCISSOR_ENABLE_CMD | ENABLE_SCISSOR_RECT; - else - cso->sc[0] = _3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT; - - switch (rasterizer->cull_mode) { - case PIPE_WINDING_NONE: - cso->LIS4 |= S4_CULLMODE_NONE; - break; - case PIPE_WINDING_CW: - cso->LIS4 |= S4_CULLMODE_CW; - break; - case PIPE_WINDING_CCW: - cso->LIS4 |= S4_CULLMODE_CCW; - break; - case PIPE_WINDING_BOTH: - cso->LIS4 |= S4_CULLMODE_BOTH; - break; - } - - { - int line_width = CLAMP((int)(rasterizer->line_width * 2), 1, 0xf); - - cso->LIS4 |= line_width << S4_LINE_WIDTH_SHIFT; - - if (rasterizer->line_smooth) - cso->LIS4 |= S4_LINE_ANTIALIAS_ENABLE; - } - - { - int point_size = CLAMP((int) rasterizer->point_size, 1, 0xff); - - cso->LIS4 |= point_size << S4_POINT_WIDTH_SHIFT; - } - - if (rasterizer->flatshade) { - cso->LIS4 |= (S4_FLATSHADE_ALPHA | - S4_FLATSHADE_COLOR | - S4_FLATSHADE_SPECULAR); - } - - cso->LIS7 = fui( rasterizer->offset_units ); - - - return cso; -} - -static void i915_bind_rasterizer_state( struct pipe_context *pipe, - void *raster ) -{ - struct i915_context *i915 = i915_context(pipe); - - i915->rasterizer = (struct i915_rasterizer_state *)raster; - - /* pass-through to draw module */ - draw_set_rasterizer_state(i915->draw, - (i915->rasterizer ? i915->rasterizer->templ : NULL)); - - i915->dirty |= I915_NEW_RASTERIZER; -} - -static void i915_delete_rasterizer_state(struct pipe_context *pipe, - void *raster) -{ - FREE(raster); -} - -static void i915_set_vertex_buffers(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_buffer *buffers) -{ - struct i915_context *i915 = i915_context(pipe); - /* Because we change state before the draw_set_vertex_buffers call - * we need a flush here, just to be sure. - */ - draw_flush(i915->draw); - - memcpy(i915->vertex_buffer, buffers, count * sizeof(buffers[0])); - i915->num_vertex_buffers = count; - - /* pass-through to draw module */ - draw_set_vertex_buffers(i915->draw, count, buffers); -} - -static void i915_set_vertex_elements(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_element *elements) -{ - struct i915_context *i915 = i915_context(pipe); - /* Because we change state before the draw_set_vertex_buffers call - * we need a flush here, just to be sure. - */ - draw_flush(i915->draw); - - i915->num_vertex_elements = count; - /* pass-through to draw module */ - draw_set_vertex_elements(i915->draw, count, elements); -} - - -static void i915_set_edgeflags(struct pipe_context *pipe, - const unsigned *bitfield) -{ - /* TODO do something here */ -} - -void -i915_init_state_functions( struct i915_context *i915 ) -{ - i915->base.set_edgeflags = i915_set_edgeflags; - i915->base.create_blend_state = i915_create_blend_state; - i915->base.bind_blend_state = i915_bind_blend_state; - i915->base.delete_blend_state = i915_delete_blend_state; - - i915->base.create_sampler_state = i915_create_sampler_state; - i915->base.bind_sampler_states = i915_bind_sampler_states; - i915->base.delete_sampler_state = i915_delete_sampler_state; - - i915->base.create_depth_stencil_alpha_state = i915_create_depth_stencil_state; - i915->base.bind_depth_stencil_alpha_state = i915_bind_depth_stencil_state; - i915->base.delete_depth_stencil_alpha_state = i915_delete_depth_stencil_state; - - i915->base.create_rasterizer_state = i915_create_rasterizer_state; - i915->base.bind_rasterizer_state = i915_bind_rasterizer_state; - i915->base.delete_rasterizer_state = i915_delete_rasterizer_state; - i915->base.create_fs_state = i915_create_fs_state; - i915->base.bind_fs_state = i915_bind_fs_state; - i915->base.delete_fs_state = i915_delete_fs_state; - i915->base.create_vs_state = i915_create_vs_state; - i915->base.bind_vs_state = i915_bind_vs_state; - i915->base.delete_vs_state = i915_delete_vs_state; - - i915->base.set_blend_color = i915_set_blend_color; - i915->base.set_clip_state = i915_set_clip_state; - i915->base.set_constant_buffer = i915_set_constant_buffer; - i915->base.set_framebuffer_state = i915_set_framebuffer_state; - - i915->base.set_polygon_stipple = i915_set_polygon_stipple; - i915->base.set_scissor_state = i915_set_scissor_state; - i915->base.set_sampler_textures = i915_set_sampler_textures; - i915->base.set_viewport_state = i915_set_viewport_state; - i915->base.set_vertex_buffers = i915_set_vertex_buffers; - i915->base.set_vertex_elements = i915_set_vertex_elements; -} diff --git a/src/gallium/drivers/i915simple/i915_state.h b/src/gallium/drivers/i915simple/i915_state.h deleted file mode 100644 index 86c6b0027d..0000000000 --- a/src/gallium/drivers/i915simple/i915_state.h +++ /dev/null @@ -1,50 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell - */ - -#ifndef I915_STATE_H -#define I915_STATE_H - -struct i915_context; - - -struct i915_tracked_state { - unsigned dirty; - void (*update)( struct i915_context * ); -}; - -void i915_update_immediate( struct i915_context *i915 ); -void i915_update_dynamic( struct i915_context *i915 ); -void i915_update_derived( struct i915_context *i915 ); -void i915_update_samplers( struct i915_context *i915 ); -void i915_update_textures(struct i915_context *i915); - -void i915_emit_hardware_state( struct i915_context *i915 ); - -#endif diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c deleted file mode 100644 index 178d4e8781..0000000000 --- a/src/gallium/drivers/i915simple/i915_state_derived.c +++ /dev/null @@ -1,183 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "util/u_memory.h" -#include "pipe/p_shader_tokens.h" -#include "draw/draw_context.h" -#include "draw/draw_vertex.h" -#include "i915_context.h" -#include "i915_state.h" -#include "i915_reg.h" -#include "i915_fpc.h" - - - -/** - * Determine the hardware vertex layout. - * Depends on vertex/fragment shader state. - */ -static void calculate_vertex_layout( struct i915_context *i915 ) -{ - const struct i915_fragment_shader *fs = i915->fs; - const enum interp_mode colorInterp = i915->rasterizer->color_interp; - struct vertex_info vinfo; - boolean texCoords[8], colors[2], fog, needW; - uint i; - int src; - - memset(texCoords, 0, sizeof(texCoords)); - colors[0] = colors[1] = fog = needW = FALSE; - memset(&vinfo, 0, sizeof(vinfo)); - - /* Determine which fragment program inputs are needed. Setup HW vertex - * layout below, in the HW-specific attribute order. - */ - for (i = 0; i < fs->info.num_inputs; i++) { - switch (fs->info.input_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - break; - case TGSI_SEMANTIC_COLOR: - assert(fs->info.input_semantic_index[i] < 2); - colors[fs->info.input_semantic_index[i]] = TRUE; - break; - case TGSI_SEMANTIC_GENERIC: - /* usually a texcoord */ - { - const uint unit = fs->info.input_semantic_index[i]; - assert(unit < 8); - texCoords[unit] = TRUE; - needW = TRUE; - } - break; - case TGSI_SEMANTIC_FOG: - fog = TRUE; - break; - default: - assert(0); - } - } - - - /* pos */ - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_POSITION, 0); - if (needW) { - draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src); - vinfo.hwfmt[0] |= S4_VFMT_XYZW; - vinfo.attrib[0].emit = EMIT_4F; - } - else { - draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src); - vinfo.hwfmt[0] |= S4_VFMT_XYZ; - vinfo.attrib[0].emit = EMIT_3F; - } - - /* hardware point size */ - /* XXX todo */ - - /* primary color */ - if (colors[0]) { - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 0); - draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); - vinfo.hwfmt[0] |= S4_VFMT_COLOR; - } - - /* secondary color */ - if (colors[1]) { - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 1); - draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); - vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG; - } - - /* fog coord, not fog blend factor */ - if (fog) { - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_FOG, 0); - draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); - vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM; - } - - /* texcoords */ - for (i = 0; i < 8; i++) { - uint hwtc; - if (texCoords[i]) { - hwtc = TEXCOORDFMT_4D; - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_GENERIC, i); - draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); - } - else { - hwtc = TEXCOORDFMT_NOT_PRESENT; - } - vinfo.hwfmt[1] |= hwtc << (i * 4); - } - - draw_compute_vertex_size(&vinfo); - - if (memcmp(&i915->current.vertex_info, &vinfo, sizeof(vinfo))) { - /* Need to set this flag so that the LIS2/4 registers get set. - * It also means the i915_update_immediate() function must be called - * after this one, in i915_update_derived(). - */ - i915->dirty |= I915_NEW_VERTEX_FORMAT; - - memcpy(&i915->current.vertex_info, &vinfo, sizeof(vinfo)); - } -} - - - - -/* Hopefully this will remain quite simple, otherwise need to pull in - * something like the state tracker mechanism. - */ -void i915_update_derived( struct i915_context *i915 ) -{ - if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS)) - calculate_vertex_layout( i915 ); - - if (i915->dirty & (I915_NEW_SAMPLER | I915_NEW_TEXTURE)) - i915_update_samplers(i915); - - if (i915->dirty & I915_NEW_TEXTURE) - i915_update_textures(i915); - - if (i915->dirty) - i915_update_immediate( i915 ); - - if (i915->dirty) - i915_update_dynamic( i915 ); - - if (i915->dirty & I915_NEW_FS) { - i915->hardware_dirty |= I915_HW_PROGRAM; /* XXX right? */ - } - - /* HW emit currently references framebuffer state directly: - */ - if (i915->dirty & I915_NEW_FRAMEBUFFER) - i915->hardware_dirty |= I915_HW_STATIC; - - i915->dirty = 0; -} diff --git a/src/gallium/drivers/i915simple/i915_state_dynamic.c b/src/gallium/drivers/i915simple/i915_state_dynamic.c deleted file mode 100644 index 86126a5a15..0000000000 --- a/src/gallium/drivers/i915simple/i915_state_dynamic.c +++ /dev/null @@ -1,310 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "i915_batch.h" -#include "i915_state_inlines.h" -#include "i915_context.h" -#include "i915_reg.h" -#include "i915_state.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "util/u_pack_color.h" - -#define FILE_DEBUG_FLAG DEBUG_STATE - -/* State that we have chosen to store in the DYNAMIC segment of the - * i915 indirect state mechanism. - * - * Can't cache these in the way we do the static state, as there is no - * start/size in the command packet, instead an 'end' value that gets - * incremented. - * - * Additionally, there seems to be a requirement to re-issue the full - * (active) state every time a 4kb boundary is crossed. - */ - -static INLINE void set_dynamic_indirect( struct i915_context *i915, - unsigned offset, - const unsigned *src, - unsigned dwords ) -{ - unsigned i; - - for (i = 0; i < dwords; i++) - i915->current.dynamic[offset + i] = src[i]; - - i915->hardware_dirty |= I915_HW_DYNAMIC; -} - - -/*********************************************************************** - * Modes4: stencil masks and logicop - */ -static void upload_MODES4( struct i915_context *i915 ) -{ - unsigned modes4 = 0; - - /* I915_NEW_STENCIL */ - modes4 |= i915->depth_stencil->stencil_modes4; - /* I915_NEW_BLEND */ - modes4 |= i915->blend->modes4; - - /* Always, so that we know when state is in-active: - */ - set_dynamic_indirect( i915, - I915_DYNAMIC_MODES4, - &modes4, - 1 ); -} - -const struct i915_tracked_state i915_upload_MODES4 = { - I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL, - upload_MODES4 -}; - - - - -/*********************************************************************** - */ - -static void upload_BFO( struct i915_context *i915 ) -{ - set_dynamic_indirect( i915, - I915_DYNAMIC_BFO_0, - &(i915->depth_stencil->bfo[0]), - 2 ); -} - -const struct i915_tracked_state i915_upload_BFO = { - I915_NEW_DEPTH_STENCIL, - upload_BFO -}; - - -/*********************************************************************** - */ - - -static void upload_BLENDCOLOR( struct i915_context *i915 ) -{ - unsigned bc[2]; - - memset( bc, 0, sizeof(bc) ); - - /* I915_NEW_BLEND {_COLOR} - */ - { - const float *color = i915->blend_color.color; - - bc[0] = _3DSTATE_CONST_BLEND_COLOR_CMD; - bc[1] = pack_ui32_float4( color[0], - color[1], - color[2], - color[3] ); - } - - set_dynamic_indirect( i915, - I915_DYNAMIC_BC_0, - bc, - 2 ); -} - -const struct i915_tracked_state i915_upload_BLENDCOLOR = { - I915_NEW_BLEND, - upload_BLENDCOLOR -}; - -/*********************************************************************** - */ - - -static void upload_IAB( struct i915_context *i915 ) -{ - unsigned iab = i915->blend->iab; - - - set_dynamic_indirect( i915, - I915_DYNAMIC_IAB, - &iab, - 1 ); -} - -const struct i915_tracked_state i915_upload_IAB = { - I915_NEW_BLEND, - upload_IAB -}; - - -/*********************************************************************** - */ - - - -static void upload_DEPTHSCALE( struct i915_context *i915 ) -{ - set_dynamic_indirect( i915, - I915_DYNAMIC_DEPTHSCALE_0, - &(i915->rasterizer->ds[0].u), - 2 ); -} - -const struct i915_tracked_state i915_upload_DEPTHSCALE = { - I915_NEW_RASTERIZER, - upload_DEPTHSCALE -}; - - - -/*********************************************************************** - * Polygon stipple - * - * The i915 supports a 4x4 stipple natively, GL wants 32x32. - * Fortunately stipple is usually a repeating pattern. - * - * XXX: does stipple pattern need to be adjusted according to - * the window position? - * - * XXX: possibly need workaround for conform paths test. - */ - -static void upload_STIPPLE( struct i915_context *i915 ) -{ - unsigned st[2]; - - st[0] = _3DSTATE_STIPPLE; - st[1] = 0; - - /* I915_NEW_RASTERIZER - */ - st[1] |= i915->rasterizer->st; - - - /* I915_NEW_STIPPLE - */ - { - const ubyte *mask = (const ubyte *)i915->poly_stipple.stipple; - ubyte p[4]; - - p[0] = mask[12] & 0xf; - p[1] = mask[8] & 0xf; - p[2] = mask[4] & 0xf; - p[3] = mask[0] & 0xf; - - /* Not sure what to do about fallbacks, so for now just dont: - */ - st[1] |= ((p[0] << 0) | - (p[1] << 4) | - (p[2] << 8) | - (p[3] << 12)); - } - - - set_dynamic_indirect( i915, - I915_DYNAMIC_STP_0, - &st[0], - 2 ); -} - - -const struct i915_tracked_state i915_upload_STIPPLE = { - I915_NEW_RASTERIZER | I915_NEW_STIPPLE, - upload_STIPPLE -}; - - - -/*********************************************************************** - * Scissor. - */ -static void upload_SCISSOR_ENABLE( struct i915_context *i915 ) -{ - set_dynamic_indirect( i915, - I915_DYNAMIC_SC_ENA_0, - &(i915->rasterizer->sc[0]), - 1 ); -} - -const struct i915_tracked_state i915_upload_SCISSOR_ENABLE = { - I915_NEW_RASTERIZER, - upload_SCISSOR_ENABLE -}; - - - -static void upload_SCISSOR_RECT( struct i915_context *i915 ) -{ - unsigned x1 = i915->scissor.minx; - unsigned y1 = i915->scissor.miny; - unsigned x2 = i915->scissor.maxx; - unsigned y2 = i915->scissor.maxy; - unsigned sc[3]; - - sc[0] = _3DSTATE_SCISSOR_RECT_0_CMD; - sc[1] = (y1 << 16) | (x1 & 0xffff); - sc[2] = (y2 << 16) | (x2 & 0xffff); - - set_dynamic_indirect( i915, - I915_DYNAMIC_SC_RECT_0, - &sc[0], - 3 ); -} - - -const struct i915_tracked_state i915_upload_SCISSOR_RECT = { - I915_NEW_SCISSOR, - upload_SCISSOR_RECT -}; - - - - - - -static const struct i915_tracked_state *atoms[] = { - &i915_upload_MODES4, - &i915_upload_BFO, - &i915_upload_BLENDCOLOR, - &i915_upload_IAB, - &i915_upload_DEPTHSCALE, - &i915_upload_STIPPLE, - &i915_upload_SCISSOR_ENABLE, - &i915_upload_SCISSOR_RECT -}; - -/* These will be dynamic indirect state commands, but for now just end - * up on the batch buffer with everything else. - */ -void i915_update_dynamic( struct i915_context *i915 ) -{ - int i; - - for (i = 0; i < Elements(atoms); i++) - if (i915->dirty & atoms[i]->dirty) - atoms[i]->update( i915 ); -} - diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c deleted file mode 100644 index a3d4e3b04e..0000000000 --- a/src/gallium/drivers/i915simple/i915_state_emit.c +++ /dev/null @@ -1,402 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "i915_reg.h" -#include "i915_context.h" -#include "i915_batch.h" -#include "i915_reg.h" - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" - -static unsigned translate_format( enum pipe_format format ) -{ - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - return COLOR_BUF_ARGB8888; - case PIPE_FORMAT_R5G6B5_UNORM: - return COLOR_BUF_RGB565; - default: - assert(0); - return 0; - } -} - -static unsigned translate_depth_format( enum pipe_format zformat ) -{ - switch (zformat) { - case PIPE_FORMAT_S8Z24_UNORM: - return DEPTH_FRMT_24_FIXED_8_OTHER; - case PIPE_FORMAT_Z16_UNORM: - return DEPTH_FRMT_16_FIXED; - default: - assert(0); - return 0; - } -} - - -/** - * Examine framebuffer state to determine width, height. - */ -static boolean -framebuffer_size(const struct pipe_framebuffer_state *fb, - uint *width, uint *height) -{ - if (fb->cbufs[0]) { - *width = fb->cbufs[0]->width; - *height = fb->cbufs[0]->height; - return TRUE; - } - else if (fb->zsbuf) { - *width = fb->zsbuf->width; - *height = fb->zsbuf->height; - return TRUE; - } - else { - *width = *height = 0; - return FALSE; - } -} - - -/* Push the state into the sarea and/or texture memory. - */ -void -i915_emit_hardware_state(struct i915_context *i915 ) -{ - /* XXX: there must be an easier way */ - const unsigned dwords = ( 14 + - 7 + - I915_MAX_DYNAMIC + - 8 + - 2 + I915_TEX_UNITS*3 + - 2 + I915_TEX_UNITS*3 + - 2 + I915_MAX_CONSTANT*4 + -#if 0 - i915->current.program_len + -#else - i915->fs->program_len + -#endif - 6 - ) * 3/2; /* plus 50% margin */ - const unsigned relocs = ( I915_TEX_UNITS + - 3 - ) * 3/2; /* plus 50% margin */ - -#if 0 - debug_printf("i915_emit_hardware_state: %d dwords, %d relocs\n", dwords, relocs); -#endif - - if(!BEGIN_BATCH(dwords, relocs)) { - FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(dwords, relocs)); - } - - /* 14 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_INVARIENT) - { - OUT_BATCH(_3DSTATE_AA_CMD | - AA_LINE_ECAAR_WIDTH_ENABLE | - AA_LINE_ECAAR_WIDTH_1_0 | - AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0); - - OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_DFLT_Z_CMD); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | - CSB_TCB(0, 0) | - CSB_TCB(1, 1) | - CSB_TCB(2, 2) | - CSB_TCB(3, 3) | - CSB_TCB(4, 4) | - CSB_TCB(5, 5) | - CSB_TCB(6, 6) | - CSB_TCB(7, 7)); - - OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | - ENABLE_POINT_RASTER_RULE | - OGL_POINT_RASTER_RULE | - ENABLE_LINE_STRIP_PROVOKE_VRTX | - ENABLE_TRI_FAN_PROVOKE_VRTX | - LINE_STRIP_PROVOKE_VRTX(1) | - TRI_FAN_PROVOKE_VRTX(2) | - ENABLE_TEXKILL_3D_4D | - TEXKILL_4D); - - /* Need to initialize this to zero. - */ - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0)); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); - - /* disable indirect state for now - */ - OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); - OUT_BATCH(0); - } - - /* 7 dwords, 1 relocs */ - if (i915->hardware_dirty & I915_HW_IMMEDIATE) - { - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - I1_LOAD_S(0) | - I1_LOAD_S(1) | - I1_LOAD_S(2) | - I1_LOAD_S(4) | - I1_LOAD_S(5) | - I1_LOAD_S(6) | - (5)); - - if(i915->vbo) - OUT_RELOC(i915->vbo, - INTEL_USAGE_VERTEX, - i915->current.immediate[I915_IMMEDIATE_S0]); - else - /* FIXME: we should not do this */ - OUT_BATCH(0); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S1]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S2]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S4]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S5]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S6]); - } - - /* I915_MAX_DYNAMIC dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_DYNAMIC) - { - int i; - for (i = 0; i < I915_MAX_DYNAMIC; i++) { - OUT_BATCH(i915->current.dynamic[i]); - } - } - - /* 8 dwords, 2 relocs */ - if (i915->hardware_dirty & I915_HW_STATIC) - { - struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; - struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; - - if (cbuf_surface) { - unsigned ctile = BUF_3D_USE_FENCE; - struct i915_texture *tex = (struct i915_texture *) - cbuf_surface->texture; - assert(tex); - - if (tex && tex->sw_tiled) { - ctile = BUF_3D_TILED_SURFACE; - } - - OUT_BATCH(_3DSTATE_BUF_INFO_CMD); - - OUT_BATCH(BUF_3D_ID_COLOR_BACK | - BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ - ctile); - - OUT_RELOC(tex->buffer, - INTEL_USAGE_RENDER, - cbuf_surface->offset); - } - - /* What happens if no zbuf?? - */ - if (depth_surface) { - unsigned ztile = BUF_3D_USE_FENCE; - struct i915_texture *tex = (struct i915_texture *) - depth_surface->texture; - assert(tex); - - if (tex && tex->sw_tiled) { - ztile = BUF_3D_TILED_SURFACE; - } - - OUT_BATCH(_3DSTATE_BUF_INFO_CMD); - - OUT_BATCH(BUF_3D_ID_DEPTH | - BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ - ztile); - - OUT_RELOC(tex->buffer, - INTEL_USAGE_RENDER, - depth_surface->offset); - } - - { - unsigned cformat, zformat = 0; - - if (cbuf_surface) - cformat = cbuf_surface->format; - else - cformat = PIPE_FORMAT_A8R8G8B8_UNORM; /* arbitrary */ - cformat = translate_format(cformat); - - if (depth_surface) - zformat = translate_depth_format( i915->framebuffer.zsbuf->format ); - - OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); - OUT_BATCH(DSTORG_HORT_BIAS(0x8) | /* .5 */ - DSTORG_VERT_BIAS(0x8) | /* .5 */ - LOD_PRECLAMP_OGL | - TEX_DEFAULT_COLOR_OGL | - cformat | - zformat ); - } - } - -#if 01 - /* texture images */ - /* 2 + I915_TEX_UNITS*3 dwords, I915_TEX_UNITS relocs */ - if (i915->hardware_dirty & (I915_HW_MAP | I915_HW_SAMPLER)) - { - const uint nr = i915->current.sampler_enable_nr; - if (nr) { - const uint enabled = i915->current.sampler_enable_flags; - uint unit; - uint count = 0; - OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); - OUT_BATCH(enabled); - for (unit = 0; unit < I915_TEX_UNITS; unit++) { - if (enabled & (1 << unit)) { - struct intel_buffer *buf = i915->texture[unit]->buffer; - uint offset = 0; - assert(buf); - - count++; - - OUT_RELOC(buf, INTEL_USAGE_SAMPLER, offset); - OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ - OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ - } - } - assert(count == nr); - } - } -#endif - -#if 01 - /* samplers */ - /* 2 + I915_TEX_UNITS*3 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_SAMPLER) - { - if (i915->current.sampler_enable_nr) { - int i; - - OUT_BATCH( _3DSTATE_SAMPLER_STATE | - (3 * i915->current.sampler_enable_nr) ); - - OUT_BATCH( i915->current.sampler_enable_flags ); - - for (i = 0; i < I915_TEX_UNITS; i++) { - if (i915->current.sampler_enable_flags & (1<current.sampler[i][0] ); - OUT_BATCH( i915->current.sampler[i][1] ); - OUT_BATCH( i915->current.sampler[i][2] ); - } - } - } - } -#endif - - /* constants */ - /* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_PROGRAM) - { - /* Collate the user-defined constants with the fragment shader's - * immediates according to the constant_flags[] array. - */ - const uint nr = i915->fs->num_constants; - if (nr) { - uint i; - - OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); - OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) ); - - for (i = 0; i < nr; i++) { - const uint *c; - if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { - /* grab user-defined constant */ - c = (uint *) i915->current.constants[PIPE_SHADER_FRAGMENT][i]; - } - else { - /* emit program constant */ - c = (uint *) i915->fs->constants[i]; - } -#if 0 /* debug */ - { - float *f = (float *) c; - printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], - (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER - ? "user" : "immediate")); - } -#endif - OUT_BATCH(*c++); - OUT_BATCH(*c++); - OUT_BATCH(*c++); - OUT_BATCH(*c++); - } - } - } - - /* Fragment program */ - /* i915->current.program_len dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_PROGRAM) - { - uint i; - /* we should always have, at least, a pass-through program */ - assert(i915->fs->program_len > 0); - for (i = 0; i < i915->fs->program_len; i++) { - OUT_BATCH(i915->fs->program[i]); - } - } - - /* drawing surface size */ - /* 6 dwords, 0 relocs */ - { - uint w, h; - boolean k = framebuffer_size(&i915->framebuffer, &w, &h); - (void)k; - assert(k); - - OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(((w - 1) & 0xffff) | ((h - 1) << 16)); - OUT_BATCH(0); - OUT_BATCH(0); - } - - - i915->hardware_dirty = 0; -} diff --git a/src/gallium/drivers/i915simple/i915_state_immediate.c b/src/gallium/drivers/i915simple/i915_state_immediate.c deleted file mode 100644 index 8c16bb4e27..0000000000 --- a/src/gallium/drivers/i915simple/i915_state_immediate.c +++ /dev/null @@ -1,225 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "i915_state_inlines.h" -#include "i915_context.h" -#include "i915_state.h" -#include "i915_reg.h" -#include "util/u_memory.h" - - -/* All state expressable with the LOAD_STATE_IMMEDIATE_1 packet. - * Would like to opportunistically recombine all these fragments into - * a single packet containing only what has changed, but for now emit - * as multiple packets. - */ - - - - -/*********************************************************************** - * S0,S1: Vertex buffer state. - */ -static void upload_S0S1(struct i915_context *i915) -{ - unsigned LIS0, LIS1; - - /* INTEL_NEW_VBO */ - /* TODO: re-use vertex buffers here? */ - LIS0 = i915->vbo_offset; - - /* INTEL_NEW_VERTEX_SIZE -- do this where the vertex size is calculated! - */ - { - unsigned vertex_size = i915->current.vertex_info.size; - - LIS1 = ((vertex_size << 24) | - (vertex_size << 16)); - } - - /* INTEL_NEW_VBO */ - /* TODO: use a vertex generation number to track vbo changes */ - if (1 || - i915->current.immediate[I915_IMMEDIATE_S0] != LIS0 || - i915->current.immediate[I915_IMMEDIATE_S1] != LIS1) - { - i915->current.immediate[I915_IMMEDIATE_S0] = LIS0; - i915->current.immediate[I915_IMMEDIATE_S1] = LIS1; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } -} - -const struct i915_tracked_state i915_upload_S0S1 = { - I915_NEW_VBO | I915_NEW_VERTEX_FORMAT, - upload_S0S1 -}; - - - - -/*********************************************************************** - * S4: Vertex format, rasterization state - */ -static void upload_S2S4(struct i915_context *i915) -{ - unsigned LIS2, LIS4; - - /* I915_NEW_VERTEX_FORMAT */ - { - LIS2 = i915->current.vertex_info.hwfmt[1]; - LIS4 = i915->current.vertex_info.hwfmt[0]; - /* - debug_printf("LIS2: 0x%x LIS4: 0x%x\n", LIS2, LIS4); - */ - assert(LIS4); /* should never be zero? */ - } - - LIS4 |= i915->rasterizer->LIS4; - - if (LIS2 != i915->current.immediate[I915_IMMEDIATE_S2] || - LIS4 != i915->current.immediate[I915_IMMEDIATE_S4]) { - - i915->current.immediate[I915_IMMEDIATE_S2] = LIS2; - i915->current.immediate[I915_IMMEDIATE_S4] = LIS4; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } -} - - -const struct i915_tracked_state i915_upload_S2S4 = { - I915_NEW_RASTERIZER | I915_NEW_VERTEX_FORMAT, - upload_S2S4 -}; - - - -/*********************************************************************** - * - */ -static void upload_S5( struct i915_context *i915 ) -{ - unsigned LIS5 = 0; - - LIS5 |= i915->depth_stencil->stencil_LIS5; - - LIS5 |= i915->blend->LIS5; - -#if 0 - /* I915_NEW_RASTERIZER */ - if (i915->state.Polygon->OffsetFill) { - LIS5 |= S5_GLOBAL_DEPTH_OFFSET_ENABLE; - } -#endif - - - if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) { - i915->current.immediate[I915_IMMEDIATE_S5] = LIS5; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } -} - -const struct i915_tracked_state i915_upload_S5 = { - (I915_NEW_DEPTH_STENCIL | I915_NEW_BLEND | I915_NEW_RASTERIZER), - upload_S5 -}; - - -/*********************************************************************** - */ -static void upload_S6( struct i915_context *i915 ) -{ - unsigned LIS6 = (2 << S6_TRISTRIP_PV_SHIFT); - - /* I915_NEW_FRAMEBUFFER - */ - if (i915->framebuffer.cbufs[0]) - LIS6 |= S6_COLOR_WRITE_ENABLE; - - /* I915_NEW_BLEND - */ - LIS6 |= i915->blend->LIS6; - - /* I915_NEW_DEPTH - */ - LIS6 |= i915->depth_stencil->depth_LIS6; - - if (LIS6 != i915->current.immediate[I915_IMMEDIATE_S6]) { - i915->current.immediate[I915_IMMEDIATE_S6] = LIS6; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } -} - -const struct i915_tracked_state i915_upload_S6 = { - I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER, - upload_S6 -}; - - -/*********************************************************************** - */ -static void upload_S7( struct i915_context *i915 ) -{ - unsigned LIS7; - - /* I915_NEW_RASTERIZER - */ - LIS7 = i915->rasterizer->LIS7; - - if (LIS7 != i915->current.immediate[I915_IMMEDIATE_S7]) { - i915->current.immediate[I915_IMMEDIATE_S7] = LIS7; - i915->hardware_dirty |= I915_HW_IMMEDIATE; - } -} - -const struct i915_tracked_state i915_upload_S7 = { - I915_NEW_RASTERIZER, - upload_S7 -}; - - -static const struct i915_tracked_state *atoms[] = { - &i915_upload_S0S1, - &i915_upload_S2S4, - &i915_upload_S5, - &i915_upload_S6, - &i915_upload_S7 -}; - -/* - */ -void i915_update_immediate( struct i915_context *i915 ) -{ - int i; - - for (i = 0; i < Elements(atoms); i++) - if (i915->dirty & atoms[i]->dirty) - atoms[i]->update( i915 ); -} diff --git a/src/gallium/drivers/i915simple/i915_state_inlines.h b/src/gallium/drivers/i915simple/i915_state_inlines.h deleted file mode 100644 index 378de8f9c4..0000000000 --- a/src/gallium/drivers/i915simple/i915_state_inlines.h +++ /dev/null @@ -1,230 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef I915_STATE_INLINES_H -#define I915_STATE_INLINES_H - -#include "pipe/p_compiler.h" -#include "pipe/p_defines.h" -#include "i915_reg.h" - - -static INLINE unsigned -i915_translate_compare_func(unsigned func) -{ - switch (func) { - case PIPE_FUNC_NEVER: - return COMPAREFUNC_NEVER; - case PIPE_FUNC_LESS: - return COMPAREFUNC_LESS; - case PIPE_FUNC_LEQUAL: - return COMPAREFUNC_LEQUAL; - case PIPE_FUNC_GREATER: - return COMPAREFUNC_GREATER; - case PIPE_FUNC_GEQUAL: - return COMPAREFUNC_GEQUAL; - case PIPE_FUNC_NOTEQUAL: - return COMPAREFUNC_NOTEQUAL; - case PIPE_FUNC_EQUAL: - return COMPAREFUNC_EQUAL; - case PIPE_FUNC_ALWAYS: - return COMPAREFUNC_ALWAYS; - default: - return COMPAREFUNC_ALWAYS; - } -} - -static INLINE unsigned -i915_translate_stencil_op(unsigned op) -{ - switch (op) { - case PIPE_STENCIL_OP_KEEP: - return STENCILOP_KEEP; - case PIPE_STENCIL_OP_ZERO: - return STENCILOP_ZERO; - case PIPE_STENCIL_OP_REPLACE: - return STENCILOP_REPLACE; - case PIPE_STENCIL_OP_INCR: - return STENCILOP_INCRSAT; - case PIPE_STENCIL_OP_DECR: - return STENCILOP_DECRSAT; - case PIPE_STENCIL_OP_INCR_WRAP: - return STENCILOP_INCR; - case PIPE_STENCIL_OP_DECR_WRAP: - return STENCILOP_DECR; - case PIPE_STENCIL_OP_INVERT: - return STENCILOP_INVERT; - default: - return STENCILOP_ZERO; - } -} - -static INLINE unsigned -i915_translate_blend_factor(unsigned factor) -{ - switch (factor) { - case PIPE_BLENDFACTOR_ZERO: - return BLENDFACT_ZERO; - case PIPE_BLENDFACTOR_SRC_ALPHA: - return BLENDFACT_SRC_ALPHA; - case PIPE_BLENDFACTOR_ONE: - return BLENDFACT_ONE; - case PIPE_BLENDFACTOR_SRC_COLOR: - return BLENDFACT_SRC_COLR; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - return BLENDFACT_INV_SRC_COLR; - case PIPE_BLENDFACTOR_DST_COLOR: - return BLENDFACT_DST_COLR; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - return BLENDFACT_INV_DST_COLR; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - return BLENDFACT_INV_SRC_ALPHA; - case PIPE_BLENDFACTOR_DST_ALPHA: - return BLENDFACT_DST_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - return BLENDFACT_INV_DST_ALPHA; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - return BLENDFACT_SRC_ALPHA_SATURATE; - case PIPE_BLENDFACTOR_CONST_COLOR: - return BLENDFACT_CONST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - return BLENDFACT_INV_CONST_COLOR; - case PIPE_BLENDFACTOR_CONST_ALPHA: - return BLENDFACT_CONST_ALPHA; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - return BLENDFACT_INV_CONST_ALPHA; - default: - return BLENDFACT_ZERO; - } -} - -static INLINE unsigned -i915_translate_blend_func(unsigned mode) -{ - switch (mode) { - case PIPE_BLEND_ADD: - return BLENDFUNC_ADD; - case PIPE_BLEND_MIN: - return BLENDFUNC_MIN; - case PIPE_BLEND_MAX: - return BLENDFUNC_MAX; - case PIPE_BLEND_SUBTRACT: - return BLENDFUNC_SUBTRACT; - case PIPE_BLEND_REVERSE_SUBTRACT: - return BLENDFUNC_REVERSE_SUBTRACT; - default: - return 0; - } -} - - -static INLINE unsigned -i915_translate_logic_op(unsigned opcode) -{ - switch (opcode) { - case PIPE_LOGICOP_CLEAR: - return LOGICOP_CLEAR; - case PIPE_LOGICOP_AND: - return LOGICOP_AND; - case PIPE_LOGICOP_AND_REVERSE: - return LOGICOP_AND_RVRSE; - case PIPE_LOGICOP_COPY: - return LOGICOP_COPY; - case PIPE_LOGICOP_COPY_INVERTED: - return LOGICOP_COPY_INV; - case PIPE_LOGICOP_AND_INVERTED: - return LOGICOP_AND_INV; - case PIPE_LOGICOP_NOOP: - return LOGICOP_NOOP; - case PIPE_LOGICOP_XOR: - return LOGICOP_XOR; - case PIPE_LOGICOP_OR: - return LOGICOP_OR; - case PIPE_LOGICOP_OR_INVERTED: - return LOGICOP_OR_INV; - case PIPE_LOGICOP_NOR: - return LOGICOP_NOR; - case PIPE_LOGICOP_EQUIV: - return LOGICOP_EQUIV; - case PIPE_LOGICOP_INVERT: - return LOGICOP_INV; - case PIPE_LOGICOP_OR_REVERSE: - return LOGICOP_OR_RVRSE; - case PIPE_LOGICOP_NAND: - return LOGICOP_NAND; - case PIPE_LOGICOP_SET: - return LOGICOP_SET; - default: - return LOGICOP_SET; - } -} - - - -static INLINE boolean i915_validate_vertices( unsigned hw_prim, unsigned nr ) -{ - boolean ok; - - switch (hw_prim) { - case PRIM3D_POINTLIST: - ok = (nr >= 1); - assert(ok); - break; - case PRIM3D_LINELIST: - ok = (nr >= 2) && (nr % 2) == 0; - assert(ok); - break; - case PRIM3D_LINESTRIP: - ok = (nr >= 2); - assert(ok); - break; - case PRIM3D_TRILIST: - ok = (nr >= 3) && (nr % 3) == 0; - assert(ok); - break; - case PRIM3D_TRISTRIP: - ok = (nr >= 3); - assert(ok); - break; - case PRIM3D_TRIFAN: - ok = (nr >= 3); - assert(ok); - break; - case PRIM3D_POLY: - ok = (nr >= 3); - assert(ok); - break; - default: - assert(0); - ok = 0; - break; - } - - return ok; -} - -#endif diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c deleted file mode 100644 index c5e9084d12..0000000000 --- a/src/gallium/drivers/i915simple/i915_state_sampler.c +++ /dev/null @@ -1,299 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_context.h" -#include "pipe/p_state.h" -#include "util/u_memory.h" - -#include "i915_state_inlines.h" -#include "i915_context.h" -#include "i915_reg.h" -#include "i915_state.h" - - -/* - * A note about min_lod & max_lod. - * - * There is a circular dependancy between the sampler state - * and the map state to be submitted to hw. - * - * Two condition must be meet: - * min_lod =< max_lod == true - * max_lod =< last_level == true - * - * - * This is all fine and dandy if it where for the fact that max_lod - * is set on the map state instead of the sampler state. That is - * the max_lod we submit on map is: - * max_lod = MIN2(last_level, max_lod); - * - * So we need to update the map state when we change samplers and - * we need to be change the sampler state when map state is changed. - * The first part is done by calling i915_update_texture in - * i915_update_samplers and the second part is done else where in - * code tracking the state changes. - */ - -static void -i915_update_texture(struct i915_context *i915, - uint unit, - const struct i915_texture *tex, - const struct i915_sampler_state *sampler, - uint state[6]); -/** - * Compute i915 texture sampling state. - * - * Recalculate all state from scratch. Perhaps not the most - * efficient, but this has gotten complex enough that we need - * something which is understandable and reliable. - * \param state returns the 3 words of compute state - */ -static void update_sampler(struct i915_context *i915, - uint unit, - const struct i915_sampler_state *sampler, - const struct i915_texture *tex, - unsigned state[3] ) -{ - const struct pipe_texture *pt = &tex->base; - unsigned minlod, lastlod; - - /* Need to do this after updating the maps, which call the - * intel_finalize_mipmap_tree and hence can update firstLevel: - */ - state[0] = sampler->state[0]; - state[1] = sampler->state[1]; - state[2] = sampler->state[2]; - - if (pt->format == PIPE_FORMAT_YCBCR || - pt->format == PIPE_FORMAT_YCBCR_REV) - state[0] |= SS2_COLORSPACE_CONVERSION; - - /* 3D textures don't seem to respect the border color. - * Fallback if there's ever a danger that they might refer to - * it. - * - * Effectively this means fallback on 3D clamp or - * clamp_to_border. - * - * XXX: Check if this is true on i945. - * XXX: Check if this bug got fixed in release silicon. - */ -#if 0 - { - const unsigned ws = sampler->templ->wrap_s; - const unsigned wt = sampler->templ->wrap_t; - const unsigned wr = sampler->templ->wrap_r; - if (pt->target == PIPE_TEXTURE_3D && - (sampler->templ->min_img_filter != PIPE_TEX_FILTER_NEAREST || - sampler->templ->mag_img_filter != PIPE_TEX_FILTER_NEAREST) && - (ws == PIPE_TEX_WRAP_CLAMP || - wt == PIPE_TEX_WRAP_CLAMP || - wr == PIPE_TEX_WRAP_CLAMP || - ws == PIPE_TEX_WRAP_CLAMP_TO_BORDER || - wt == PIPE_TEX_WRAP_CLAMP_TO_BORDER || - wr == PIPE_TEX_WRAP_CLAMP_TO_BORDER)) { - if (i915->conformance_mode > 0) { - assert(0); - /* sampler->fallback = true; */ - /* TODO */ - } - } - } -#endif - - /* See note at the top of file */ - minlod = sampler->minlod; - lastlod = pt->last_level << 4; - - if (lastlod < minlod) { - minlod = lastlod; - } - - state[1] |= (sampler->minlod << SS3_MIN_LOD_SHIFT); - state[1] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT); -} - - -void i915_update_samplers( struct i915_context *i915 ) -{ - uint unit; - - i915->current.sampler_enable_nr = 0; - i915->current.sampler_enable_flags = 0x0; - - for (unit = 0; unit < i915->num_textures && unit < i915->num_samplers; - unit++) { - /* determine unit enable/disable by looking for a bound texture */ - /* could also examine the fragment program? */ - if (i915->texture[unit]) { - update_sampler( i915, - unit, - i915->sampler[unit], /* sampler state */ - i915->texture[unit], /* texture */ - i915->current.sampler[unit] /* the result */ - ); - i915_update_texture( i915, - unit, - i915->texture[unit], /* texture */ - i915->sampler[unit], /* sampler state */ - i915->current.texbuffer[unit] ); - - i915->current.sampler_enable_nr++; - i915->current.sampler_enable_flags |= (1 << unit); - } - } - - i915->hardware_dirty |= I915_HW_SAMPLER | I915_HW_MAP; -} - - -static uint -translate_texture_format(enum pipe_format pipeFormat) -{ - switch (pipeFormat) { - case PIPE_FORMAT_L8_UNORM: - return MAPSURF_8BIT | MT_8BIT_L8; - case PIPE_FORMAT_I8_UNORM: - return MAPSURF_8BIT | MT_8BIT_I8; - case PIPE_FORMAT_A8_UNORM: - return MAPSURF_8BIT | MT_8BIT_A8; - case PIPE_FORMAT_A8L8_UNORM: - return MAPSURF_16BIT | MT_16BIT_AY88; - case PIPE_FORMAT_R5G6B5_UNORM: - return MAPSURF_16BIT | MT_16BIT_RGB565; - case PIPE_FORMAT_A1R5G5B5_UNORM: - return MAPSURF_16BIT | MT_16BIT_ARGB1555; - case PIPE_FORMAT_A4R4G4B4_UNORM: - return MAPSURF_16BIT | MT_16BIT_ARGB4444; - case PIPE_FORMAT_A8R8G8B8_UNORM: - return MAPSURF_32BIT | MT_32BIT_ARGB8888; - case PIPE_FORMAT_YCBCR_REV: - return (MAPSURF_422 | MT_422_YCRCB_NORMAL); - case PIPE_FORMAT_YCBCR: - return (MAPSURF_422 | MT_422_YCRCB_SWAPY); -#if 0 - case PIPE_FORMAT_RGB_FXT1: - case PIPE_FORMAT_RGBA_FXT1: - return (MAPSURF_COMPRESSED | MT_COMPRESS_FXT1); -#endif - case PIPE_FORMAT_Z16_UNORM: - return (MAPSURF_16BIT | MT_16BIT_L16); -#if 0 - case PIPE_FORMAT_RGBA_DXT1: - case PIPE_FORMAT_RGB_DXT1: - return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1); - case PIPE_FORMAT_RGBA_DXT3: - return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3); - case PIPE_FORMAT_RGBA_DXT5: - return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); -#endif - case PIPE_FORMAT_S8Z24_UNORM: - return (MAPSURF_32BIT | MT_32BIT_xI824); - default: - debug_printf("i915: translate_texture_format() bad image format %x\n", - pipeFormat); - assert(0); - return 0; - } -} - - -static void -i915_update_texture(struct i915_context *i915, - uint unit, - const struct i915_texture *tex, - const struct i915_sampler_state *sampler, - uint state[6]) -{ - const struct pipe_texture *pt = &tex->base; - uint format, pitch; - const uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; - const uint num_levels = pt->last_level; - unsigned max_lod = num_levels * 4; - unsigned tiled = MS3_USE_FENCE_REGS; - - assert(tex); - assert(width); - assert(height); - assert(depth); - - format = translate_texture_format(pt->format); - pitch = tex->stride; - - assert(format); - assert(pitch); - - if (tex->sw_tiled) { - assert(!((pitch - 1) & pitch)); - tiled = MS3_TILED_SURFACE; - } - - /* MS3 state */ - state[0] = - (((height - 1) << MS3_HEIGHT_SHIFT) - | ((width - 1) << MS3_WIDTH_SHIFT) - | format - | tiled); - - /* - * XXX When min_filter != mag_filter and there's just one mipmap level, - * set max_lod = 1 to make sure i915 chooses between min/mag filtering. - */ - - /* See note at the top of file */ - if (max_lod > (sampler->maxlod >> 2)) - max_lod = sampler->maxlod >> 2; - - /* MS4 state */ - state[1] = - ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) - | MS4_CUBE_FACE_ENA_MASK - | ((max_lod) << MS4_MAX_LOD_SHIFT) - | ((depth - 1) << MS4_VOLUME_DEPTH_SHIFT)); -} - - -void -i915_update_textures(struct i915_context *i915) -{ - uint unit; - - for (unit = 0; unit < i915->num_textures && unit < i915->num_samplers; - unit++) { - /* determine unit enable/disable by looking for a bound texture */ - /* could also examine the fragment program? */ - if (i915->texture[unit]) { - i915_update_texture( i915, - unit, - i915->texture[unit], /* texture */ - i915->sampler[unit], /* sampler state */ - i915->current.texbuffer[unit] ); - } - } - - i915->hardware_dirty |= I915_HW_MAP; -} diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c deleted file mode 100644 index ab8331f3e6..0000000000 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ /dev/null @@ -1,94 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "i915_context.h" -#include "i915_blit.h" -#include "i915_state.h" -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/p_inlines.h" -#include "pipe/internal/p_winsys_screen.h" -#include "util/u_tile.h" -#include "util/u_rect.h" - - -/* Assumes all values are within bounds -- no checking at this level - - * do it higher up if required. - */ -static void -i915_surface_copy(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - struct pipe_surface *src, - unsigned srcx, unsigned srcy, unsigned width, unsigned height) -{ - struct i915_texture *dst_tex = (struct i915_texture *)dst->texture; - struct i915_texture *src_tex = (struct i915_texture *)src->texture; - - assert( dst != src ); - assert( dst_tex->base.block.size == src_tex->base.block.size ); - assert( dst_tex->base.block.width == src_tex->base.block.height ); - assert( dst_tex->base.block.height == src_tex->base.block.height ); - assert( dst_tex->base.block.width == 1 ); - assert( dst_tex->base.block.height == 1 ); - - i915_copy_blit( i915_context(pipe), - FALSE, - dst_tex->base.block.size, - (unsigned short) src_tex->stride, src_tex->buffer, src->offset, - (unsigned short) dst_tex->stride, dst_tex->buffer, dst->offset, - (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height ); -} - - -static void -i915_surface_fill(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height, unsigned value) -{ - struct i915_texture *tex = (struct i915_texture *)dst->texture; - - assert(tex->base.block.width == 1); - assert(tex->base.block.height == 1); - - i915_fill_blit( i915_context(pipe), - tex->base.block.size, - (unsigned short) tex->stride, - tex->buffer, dst->offset, - (short) dstx, (short) dsty, - (short) width, (short) height, - value ); -} - - -void -i915_init_surface_functions(struct i915_context *i915) -{ - i915->base.surface_copy = i915_surface_copy; - i915->base.surface_fill = i915_surface_fill; -} diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c deleted file mode 100644 index 286c9ace8e..0000000000 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ /dev/null @@ -1,958 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - /* - * Authors: - * Keith Whitwell - * Michel Dänzer - */ - -#include "pipe/p_state.h" -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/internal/p_winsys_screen.h" -#include "util/u_math.h" -#include "util/u_memory.h" - -#include "i915_context.h" -#include "i915_texture.h" -#include "i915_debug.h" -#include "i915_screen.h" -#include "intel_winsys.h" - - -/* - * Helper function and arrays - */ - - -/** - * Initial offset for Cube map. - */ -static const int initial_offsets[6][2] = { - {0, 0}, - {0, 2}, - {1, 0}, - {1, 2}, - {1, 1}, - {1, 3} -}; - -/** - * Step offsets for Cube map. - */ -static const int step_offsets[6][2] = { - {0, 2}, - {0, 2}, - {-1, 2}, - {-1, 2}, - {-1, 1}, - {-1, 1} -}; - -static unsigned -power_of_two(unsigned x) -{ - unsigned value = 1; - while (value < x) - value = value << 1; - return value; -} - -static unsigned -round_up(unsigned n, unsigned multiple) -{ - return (n + multiple - 1) & ~(multiple - 1); -} - - -/* - * More advanced helper funcs - */ - - -static void -i915_miptree_set_level_info(struct i915_texture *tex, - unsigned level, - unsigned nr_images, - unsigned w, unsigned h, unsigned d) -{ - struct pipe_texture *pt = &tex->base; - - assert(level < PIPE_MAX_TEXTURE_LEVELS); - - pt->width[level] = w; - pt->height[level] = h; - pt->depth[level] = d; - - pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w); - pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h); - - tex->nr_images[level] = nr_images; - - /* - DBG("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, - level, w, h, d, x, y, tex->level_offset[level]); - */ - - /* Not sure when this would happen, but anyway: - */ - if (tex->image_offset[level]) { - FREE(tex->image_offset[level]); - tex->image_offset[level] = NULL; - } - - assert(nr_images); - assert(!tex->image_offset[level]); - - tex->image_offset[level] = (unsigned *) MALLOC(nr_images * sizeof(unsigned)); - tex->image_offset[level][0] = 0; -} - -static void -i915_miptree_set_image_offset(struct i915_texture *tex, - unsigned level, unsigned img, unsigned x, unsigned y) -{ - if (img == 0 && level == 0) - assert(x == 0 && y == 0); - - assert(img < tex->nr_images[level]); - - tex->image_offset[level][img] = y * tex->stride + x * tex->base.block.size; - - /* - printf("%s level %d img %d pos %d,%d image_offset %x\n", - __FUNCTION__, level, img, x, y, tex->image_offset[level][img]); - */ -} - - -/* - * i915 layout functions, some used by i945 - */ - - -/** - * Special case to deal with scanout textures. - */ -static boolean -i915_scanout_layout(struct i915_texture *tex) -{ - struct pipe_texture *pt = &tex->base; - - if (pt->last_level > 0 || pt->block.size != 4) - return FALSE; - - i915_miptree_set_level_info(tex, 0, 1, - tex->base.width[0], - tex->base.height[0], - 1); - i915_miptree_set_image_offset(tex, 0, 0, 0, 0); - - if (tex->base.width[0] >= 240) { - tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); - tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); - tex->hw_tiled = INTEL_TILE_X; - } else if (tex->base.width[0] == 64 && tex->base.height[0] == 64) { - tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); - tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); - } else { - return FALSE; - } - - debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, - tex->base.width[0], tex->base.height[0], pt->block.size, - tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy); - - return TRUE; -} - -/** - * Special case to deal with shared textures. - */ -static boolean -i915_display_target_layout(struct i915_texture *tex) -{ - struct pipe_texture *pt = &tex->base; - - if (pt->last_level > 0 || pt->block.size != 4) - return FALSE; - - /* fallback to normal textures for small textures */ - if (tex->base.width[0] < 240) - return FALSE; - - i915_miptree_set_level_info(tex, 0, 1, - tex->base.width[0], - tex->base.height[0], - 1); - i915_miptree_set_image_offset(tex, 0, 0, 0, 0); - - tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); - tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); - tex->hw_tiled = INTEL_TILE_X; - - debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, - tex->base.width[0], tex->base.height[0], pt->block.size, - tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy); - - return TRUE; -} - -static void -i915_miptree_layout_2d(struct i915_texture *tex) -{ - struct pipe_texture *pt = &tex->base; - unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned nblocksx = pt->nblocksx[0]; - unsigned nblocksy = pt->nblocksy[0]; - - /* used for scanouts that need special layouts */ - if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) - if (i915_scanout_layout(tex)) - return; - - /* for shared buffers we use some very like scanout */ - if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) - if (i915_display_target_layout(tex)) - return; - - tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); - tex->total_nblocksy = 0; - - for (level = 0; level <= pt->last_level; level++) { - i915_miptree_set_level_info(tex, level, 1, width, height, 1); - i915_miptree_set_image_offset(tex, level, 0, 0, tex->total_nblocksy); - - nblocksy = round_up(MAX2(2, nblocksy), 2); - - tex->total_nblocksy += nblocksy; - - width = minify(width); - height = minify(height); - nblocksx = pf_get_nblocksx(&pt->block, width); - nblocksy = pf_get_nblocksy(&pt->block, height); - } -} - -static void -i915_miptree_layout_3d(struct i915_texture *tex) -{ - struct pipe_texture *pt = &tex->base; - unsigned level; - - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; - unsigned nblocksx = pt->nblocksx[0]; - unsigned nblocksy = pt->nblocksy[0]; - unsigned stack_nblocksy = 0; - - /* Calculate the size of a single slice. - */ - tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); - - /* XXX: hardware expects/requires 9 levels at minimum. - */ - for (level = 0; level <= MAX2(8, pt->last_level); level++) { - i915_miptree_set_level_info(tex, level, depth, width, height, depth); - - stack_nblocksy += MAX2(2, nblocksy); - - width = minify(width); - height = minify(height); - depth = minify(depth); - nblocksx = pf_get_nblocksx(&pt->block, width); - nblocksy = pf_get_nblocksy(&pt->block, height); - } - - /* Fixup depth image_offsets: - */ - depth = pt->depth[0]; - for (level = 0; level <= pt->last_level; level++) { - unsigned i; - for (i = 0; i < depth; i++) - i915_miptree_set_image_offset(tex, level, i, 0, i * stack_nblocksy); - - depth = minify(depth); - } - - /* Multiply slice size by texture depth for total size. It's - * remarkable how wasteful of memory the i915 texture layouts - * are. They are largely fixed in the i945. - */ - tex->total_nblocksy = stack_nblocksy * pt->depth[0]; -} - -static void -i915_miptree_layout_cube(struct i915_texture *tex) -{ - struct pipe_texture *pt = &tex->base; - unsigned width = pt->width[0], height = pt->height[0]; - const unsigned nblocks = pt->nblocksx[0]; - unsigned level; - unsigned face; - - assert(width == height); /* cubemap images are square */ - - /* double pitch for cube layouts */ - tex->stride = round_up(nblocks * pt->block.size * 2, 4); - tex->total_nblocksy = nblocks * 4; - - for (level = 0; level <= pt->last_level; level++) { - i915_miptree_set_level_info(tex, level, 6, width, height, 1); - width /= 2; - height /= 2; - } - - for (face = 0; face < 6; face++) { - unsigned x = initial_offsets[face][0] * nblocks; - unsigned y = initial_offsets[face][1] * nblocks; - unsigned d = nblocks; - - for (level = 0; level <= pt->last_level; level++) { - i915_miptree_set_image_offset(tex, level, face, x, y); - d >>= 1; - x += step_offsets[face][0] * d; - y += step_offsets[face][1] * d; - } - } -} - -static boolean -i915_miptree_layout(struct i915_texture * tex) -{ - struct pipe_texture *pt = &tex->base; - - switch (pt->target) { - case PIPE_TEXTURE_1D: - case PIPE_TEXTURE_2D: - i915_miptree_layout_2d(tex); - break; - case PIPE_TEXTURE_3D: - i915_miptree_layout_3d(tex); - break; - case PIPE_TEXTURE_CUBE: - i915_miptree_layout_cube(tex); - break; - default: - assert(0); - return FALSE; - } - - return TRUE; -} - - -/* - * i945 layout functions - */ - - -static void -i945_miptree_layout_2d(struct i915_texture *tex) -{ - struct pipe_texture *pt = &tex->base; - const int align_x = 2, align_y = 4; - unsigned level; - unsigned x = 0; - unsigned y = 0; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned nblocksx = pt->nblocksx[0]; - unsigned nblocksy = pt->nblocksy[0]; - - /* used for scanouts that need special layouts */ - if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) - if (i915_scanout_layout(tex)) - return; - - /* for shared buffers we use some very like scanout */ - if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) - if (i915_display_target_layout(tex)) - return; - - tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); - - /* May need to adjust pitch to accomodate the placement of - * the 2nd mipmap level. This occurs when the alignment - * constraints of mipmap placement push the right edge of the - * 2nd mipmap level out past the width of its parent. - */ - if (pt->last_level > 0) { - unsigned mip1_nblocksx - = align(pf_get_nblocksx(&pt->block, minify(width)), align_x) - + pf_get_nblocksx(&pt->block, minify(minify(width))); - - if (mip1_nblocksx > nblocksx) - tex->stride = mip1_nblocksx * pt->block.size; - } - - /* Pitch must be a whole number of dwords - */ - tex->stride = align(tex->stride, 64); - tex->total_nblocksy = 0; - - for (level = 0; level <= pt->last_level; level++) { - i915_miptree_set_level_info(tex, level, 1, width, height, 1); - i915_miptree_set_image_offset(tex, level, 0, x, y); - - nblocksy = align(nblocksy, align_y); - - /* Because the images are packed better, the final offset - * might not be the maximal one: - */ - tex->total_nblocksy = MAX2(tex->total_nblocksy, y + nblocksy); - - /* Layout_below: step right after second mipmap level. - */ - if (level == 1) { - x += align(nblocksx, align_x); - } - else { - y += nblocksy; - } - - width = minify(width); - height = minify(height); - nblocksx = pf_get_nblocksx(&pt->block, width); - nblocksy = pf_get_nblocksy(&pt->block, height); - } -} - -static void -i945_miptree_layout_3d(struct i915_texture *tex) -{ - struct pipe_texture *pt = &tex->base; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; - unsigned nblocksx = pt->nblocksx[0]; - unsigned nblocksy = pt->nblocksy[0]; - unsigned pack_x_pitch, pack_x_nr; - unsigned pack_y_pitch; - unsigned level; - - tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); - tex->total_nblocksy = 0; - - pack_y_pitch = MAX2(pt->nblocksy[0], 2); - pack_x_pitch = tex->stride / pt->block.size; - pack_x_nr = 1; - - for (level = 0; level <= pt->last_level; level++) { - int x = 0; - int y = 0; - unsigned q, j; - - i915_miptree_set_level_info(tex, level, depth, width, height, depth); - - for (q = 0; q < depth;) { - for (j = 0; j < pack_x_nr && q < depth; j++, q++) { - i915_miptree_set_image_offset(tex, level, q, x, y + tex->total_nblocksy); - x += pack_x_pitch; - } - - x = 0; - y += pack_y_pitch; - } - - tex->total_nblocksy += y; - - if (pack_x_pitch > 4) { - pack_x_pitch >>= 1; - pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr * pt->block.size <= tex->stride); - } - - if (pack_y_pitch > 2) { - pack_y_pitch >>= 1; - } - - width = minify(width); - height = minify(height); - depth = minify(depth); - nblocksx = pf_get_nblocksx(&pt->block, width); - nblocksy = pf_get_nblocksy(&pt->block, height); - } -} - -static void -i945_miptree_layout_cube(struct i915_texture *tex) -{ - struct pipe_texture *pt = &tex->base; - unsigned level; - - const unsigned nblocks = pt->nblocksx[0]; - unsigned face; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - - /* - printf("%s %i, %i\n", __FUNCTION__, pt->width[0], pt->height[0]); - */ - - assert(width == height); /* cubemap images are square */ - - /* - * XXX Should only be used for compressed formats. But lets - * keep this code active just in case. - * - * Depending on the size of the largest images, pitch can be - * determined either by the old-style packing of cubemap faces, - * or the final row of 4x4, 2x2 and 1x1 faces below this. - */ - if (nblocks > 32) - tex->stride = round_up(nblocks * pt->block.size * 2, 4); - else - tex->stride = 14 * 8 * pt->block.size; - - tex->total_nblocksy = nblocks * 4; - - /* Set all the levels to effectively occupy the whole rectangular region. - */ - for (level = 0; level <= pt->last_level; level++) { - i915_miptree_set_level_info(tex, level, 6, width, height, 1); - width /= 2; - height /= 2; - } - - for (face = 0; face < 6; face++) { - unsigned x = initial_offsets[face][0] * nblocks; - unsigned y = initial_offsets[face][1] * nblocks; - unsigned d = nblocks; - -#if 0 /* Fix and enable this code for compressed formats */ - if (nblocks == 4 && face >= 4) { - y = tex->total_height - 4; - x = (face - 4) * 8; - } - else if (nblocks < 4 && (face > 0)) { - y = tex->total_height - 4; - x = face * 8; - } -#endif - - for (level = 0; level <= pt->last_level; level++) { - i915_miptree_set_image_offset(tex, level, face, x, y); - - d >>= 1; - -#if 0 /* Fix and enable this code for compressed formats */ - switch (d) { - case 4: - switch (face) { - case PIPE_TEX_FACE_POS_X: - case PIPE_TEX_FACE_NEG_X: - x += step_offsets[face][0] * d; - y += step_offsets[face][1] * d; - break; - case PIPE_TEX_FACE_POS_Y: - case PIPE_TEX_FACE_NEG_Y: - y += 12; - x -= 8; - break; - case PIPE_TEX_FACE_POS_Z: - case PIPE_TEX_FACE_NEG_Z: - y = tex->total_height - 4; - x = (face - 4) * 8; - break; - } - case 2: - y = tex->total_height - 4; - x = 16 + face * 8; - break; - - case 1: - x += 48; - break; - default: -#endif - x += step_offsets[face][0] * d; - y += step_offsets[face][1] * d; -#if 0 - break; - } -#endif - } - } -} - -static boolean -i945_miptree_layout(struct i915_texture * tex) -{ - struct pipe_texture *pt = &tex->base; - - switch (pt->target) { - case PIPE_TEXTURE_1D: - case PIPE_TEXTURE_2D: - i945_miptree_layout_2d(tex); - break; - case PIPE_TEXTURE_3D: - i945_miptree_layout_3d(tex); - break; - case PIPE_TEXTURE_CUBE: - i945_miptree_layout_cube(tex); - break; - default: - assert(0); - return FALSE; - } - - return TRUE; -} - - -/* - * Screen texture functions - */ - - -static struct pipe_texture * -i915_texture_create(struct pipe_screen *screen, - const struct pipe_texture *templat) -{ - struct i915_screen *is = i915_screen(screen); - struct intel_winsys *iws = is->iws; - struct i915_texture *tex = CALLOC_STRUCT(i915_texture); - size_t tex_size; - unsigned buf_usage = 0; - - if (!tex) - return NULL; - - tex->base = *templat; - pipe_reference_init(&tex->base.reference, 1); - tex->base.screen = screen; - - tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]); - tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]); - - if (is->is_i945) { - if (!i945_miptree_layout(tex)) - goto fail; - } else { - if (!i915_miptree_layout(tex)) - goto fail; - } - - tex_size = tex->stride * tex->total_nblocksy; - - - - /* for scanouts and cursors, cursors arn't scanouts */ - if (templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY && templat->width[0] != 64) - buf_usage = INTEL_NEW_SCANOUT; - else - buf_usage = INTEL_NEW_TEXTURE; - - tex->buffer = iws->buffer_create(iws, tex_size, 64, buf_usage); - if (!tex->buffer) - goto fail; - - /* setup any hw fences */ - if (tex->hw_tiled) { - assert(tex->sw_tiled == INTEL_TILE_NONE); - iws->buffer_set_fence_reg(iws, tex->buffer, tex->stride, tex->hw_tiled); - } - - -#if 0 - void *ptr = ws->buffer_map(ws, tex->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE); - memset(ptr, 0x80, tex_size); - ws->buffer_unmap(ws, tex->buffer); -#endif - - return &tex->base; - -fail: - FREE(tex); - return NULL; -} - -static struct pipe_texture * -i915_texture_blanket(struct pipe_screen * screen, - const struct pipe_texture *base, - const unsigned *stride, - struct pipe_buffer *buffer) -{ -#if 0 - struct i915_texture *tex; - assert(screen); - - /* Only supports one type */ - if (base->target != PIPE_TEXTURE_2D || - base->last_level != 0 || - base->depth[0] != 1) { - return NULL; - } - - tex = CALLOC_STRUCT(i915_texture); - if (!tex) - return NULL; - - tex->base = *base; - pipe_reference_init(&tex->base.reference, 1); - tex->base.screen = screen; - - tex->stride = stride[0]; - - i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1); - i915_miptree_set_image_offset(tex, 0, 0, 0, 0); - - pipe_buffer_reference(&tex->buffer, buffer); - - return &tex->base; -#else - return NULL; -#endif -} - -static void -i915_texture_destroy(struct pipe_texture *pt) -{ - struct i915_texture *tex = (struct i915_texture *)pt; - struct intel_winsys *iws = i915_screen(pt->screen)->iws; - uint i; - - /* - DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); - */ - - iws->buffer_destroy(iws, tex->buffer); - - for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) - if (tex->image_offset[i]) - FREE(tex->image_offset[i]); - - FREE(tex); -} - - -/* - * Screen surface functions - */ - - -static struct pipe_surface * -i915_get_tex_surface(struct pipe_screen *screen, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned flags) -{ - struct i915_texture *tex = (struct i915_texture *)pt; - struct pipe_surface *ps; - unsigned offset; /* in bytes */ - - if (pt->target == PIPE_TEXTURE_CUBE) { - offset = tex->image_offset[level][face]; - } - else if (pt->target == PIPE_TEXTURE_3D) { - offset = tex->image_offset[level][zslice]; - } - else { - offset = tex->image_offset[level][0]; - assert(face == 0); - assert(zslice == 0); - } - - ps = CALLOC_STRUCT(pipe_surface); - if (ps) { - pipe_reference_init(&ps->reference, 1); - pipe_texture_reference(&ps->texture, pt); - ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; - ps->offset = offset; - ps->usage = flags; - } - return ps; -} - -static void -i915_tex_surface_destroy(struct pipe_surface *surf) -{ - pipe_texture_reference(&surf->texture, NULL); - FREE(surf); -} - - -/* - * Screen transfer functions - */ - - -static struct pipe_transfer* -i915_get_tex_transfer(struct pipe_screen *screen, - struct pipe_texture *texture, - unsigned face, unsigned level, unsigned zslice, - enum pipe_transfer_usage usage, unsigned x, unsigned y, - unsigned w, unsigned h) -{ - struct i915_texture *tex = (struct i915_texture *)texture; - struct i915_transfer *trans; - unsigned offset; /* in bytes */ - - if (texture->target == PIPE_TEXTURE_CUBE) { - offset = tex->image_offset[level][face]; - } - else if (texture->target == PIPE_TEXTURE_3D) { - offset = tex->image_offset[level][zslice]; - } - else { - offset = tex->image_offset[level][0]; - assert(face == 0); - assert(zslice == 0); - } - - trans = CALLOC_STRUCT(i915_transfer); - if (trans) { - pipe_texture_reference(&trans->base.texture, texture); - trans->base.format = trans->base.format; - trans->base.x = x; - trans->base.y = y; - trans->base.width = w; - trans->base.height = h; - trans->base.block = texture->block; - trans->base.nblocksx = texture->nblocksx[level]; - trans->base.nblocksy = texture->nblocksy[level]; - trans->base.stride = tex->stride; - trans->offset = offset; - trans->base.usage = usage; - } - return &trans->base; -} - -static void * -i915_transfer_map(struct pipe_screen *screen, - struct pipe_transfer *transfer) -{ - struct i915_texture *tex = (struct i915_texture *)transfer->texture; - struct intel_winsys *iws = i915_screen(tex->base.screen)->iws; - char *map; - boolean write = FALSE; - - if (transfer->usage & PIPE_TRANSFER_WRITE) - write = TRUE; - - map = iws->buffer_map(iws, tex->buffer, write); - if (map == NULL) - return NULL; - - return map + i915_transfer(transfer)->offset + - transfer->y / transfer->block.height * transfer->stride + - transfer->x / transfer->block.width * transfer->block.size; -} - -static void -i915_transfer_unmap(struct pipe_screen *screen, - struct pipe_transfer *transfer) -{ - struct i915_texture *tex = (struct i915_texture *)transfer->texture; - struct intel_winsys *iws = i915_screen(tex->base.screen)->iws; - iws->buffer_unmap(iws, tex->buffer); -} - -static void -i915_tex_transfer_destroy(struct pipe_transfer *trans) -{ - pipe_texture_reference(&trans->texture, NULL); - FREE(trans); -} - - -/* - * Other texture functions - */ - - -void -i915_init_screen_texture_functions(struct i915_screen *is) -{ - is->base.texture_create = i915_texture_create; - is->base.texture_blanket = i915_texture_blanket; - is->base.texture_destroy = i915_texture_destroy; - is->base.get_tex_surface = i915_get_tex_surface; - is->base.tex_surface_destroy = i915_tex_surface_destroy; - is->base.get_tex_transfer = i915_get_tex_transfer; - is->base.transfer_map = i915_transfer_map; - is->base.transfer_unmap = i915_transfer_unmap; - is->base.tex_transfer_destroy = i915_tex_transfer_destroy; -} - -struct pipe_texture * -i915_texture_blanket_intel(struct pipe_screen *screen, - struct pipe_texture *base, - unsigned stride, - struct intel_buffer *buffer) -{ - struct i915_texture *tex; - assert(screen); - - /* Only supports one type */ - if (base->target != PIPE_TEXTURE_2D || - base->last_level != 0 || - base->depth[0] != 1) { - return NULL; - } - - tex = CALLOC_STRUCT(i915_texture); - if (!tex) - return NULL; - - tex->base = *base; - pipe_reference_init(&tex->base.reference, 1); - tex->base.screen = screen; - - tex->stride = stride; - - i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1); - i915_miptree_set_image_offset(tex, 0, 0, 0, 0); - - tex->buffer = buffer; - - return &tex->base; -} - -boolean -i915_get_texture_buffer_intel(struct pipe_texture *texture, - struct intel_buffer **buffer, - unsigned *stride) -{ - struct i915_texture *tex = (struct i915_texture *)texture; - - if (!texture) - return FALSE; - - *stride = tex->stride; - *buffer = tex->buffer; - - return TRUE; -} diff --git a/src/gallium/drivers/i915simple/i915_texture.h b/src/gallium/drivers/i915simple/i915_texture.h deleted file mode 100644 index 51a1dd984c..0000000000 --- a/src/gallium/drivers/i915simple/i915_texture.h +++ /dev/null @@ -1,36 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef I915_TEXTURE_H -#define I915_TEXTURE_H - -struct i915_screen; - -extern void -i915_init_screen_texture_functions(struct i915_screen *is); - -#endif /* I915_TEXTURE_H */ diff --git a/src/gallium/drivers/i915simple/intel_batchbuffer.h b/src/gallium/drivers/i915simple/intel_batchbuffer.h deleted file mode 100644 index db12dfd2ac..0000000000 --- a/src/gallium/drivers/i915simple/intel_batchbuffer.h +++ /dev/null @@ -1,87 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INTEL_BATCH_H -#define INTEL_BATCH_H - -#include "intel_winsys.h" - -static INLINE boolean -intel_batchbuffer_check(struct intel_batchbuffer *batch, - size_t dwords, - size_t relocs) -{ - return dwords * 4 <= batch->size - (batch->ptr - batch->map) && - relocs <= (batch->max_relocs - batch->relocs); -} - -static INLINE size_t -intel_batchbuffer_space(struct intel_batchbuffer *batch) -{ - return batch->size - (batch->ptr - batch->map); -} - -static INLINE void -intel_batchbuffer_dword(struct intel_batchbuffer *batch, - unsigned dword) -{ - if (intel_batchbuffer_space(batch) < 4) - return; - - *(unsigned *)batch->ptr = dword; - batch->ptr += 4; -} - -static INLINE void -intel_batchbuffer_write(struct intel_batchbuffer *batch, - void *data, - size_t size) -{ - if (intel_batchbuffer_space(batch) < size) - return; - - memcpy(data, batch->ptr, size); - batch->ptr += size; -} - -static INLINE int -intel_batchbuffer_reloc(struct intel_batchbuffer *batch, - struct intel_buffer *buffer, - enum intel_buffer_usage usage, - size_t offset) -{ - return batch->iws->batchbuffer_reloc(batch, buffer, usage, offset); -} - -static INLINE void -intel_batchbuffer_flush(struct intel_batchbuffer *batch, - struct pipe_fence_handle **fence) -{ - batch->iws->batchbuffer_flush(batch, fence); -} - -#endif diff --git a/src/gallium/drivers/i915simple/intel_winsys.h b/src/gallium/drivers/i915simple/intel_winsys.h deleted file mode 100644 index 42c5e7470e..0000000000 --- a/src/gallium/drivers/i915simple/intel_winsys.h +++ /dev/null @@ -1,230 +0,0 @@ -/************************************************************************** - * - * Copyright © 2009 Jakob Bornecrantz - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INTEL_WINSYS_H -#define INTEL_WINSYS_H - -#include "pipe/p_compiler.h" - -struct intel_winsys; -struct intel_buffer; -struct intel_batchbuffer; -struct pipe_texture; -struct pipe_fence_handle; - -enum intel_buffer_usage -{ - /* use on textures */ - INTEL_USAGE_RENDER = 0x01, - INTEL_USAGE_SAMPLER = 0x02, - INTEL_USAGE_2D_TARGET = 0x04, - INTEL_USAGE_2D_SOURCE = 0x08, - /* use on vertex */ - INTEL_USAGE_VERTEX = 0x10, -}; - -enum intel_buffer_type -{ - INTEL_NEW_TEXTURE, - INTEL_NEW_SCANOUT, /**< a texture used for scanning out from */ - INTEL_NEW_VERTEX, -}; - -enum intel_buffer_tile -{ - INTEL_TILE_NONE, - INTEL_TILE_X, - INTEL_TILE_Y, -}; - -struct intel_batchbuffer { - - struct intel_winsys *iws; - - /** - * Values exported to speed up the writing the batchbuffer, - * instead of having to go trough a accesor function for - * each dword written. - */ - /*{@*/ - uint8_t *map; - uint8_t *ptr; - size_t size; - - size_t relocs; - size_t max_relocs; - /*@}*/ -}; - -struct intel_winsys { - - /** - * Batchbuffer functions. - */ - /*@{*/ - /** - * Create a new batchbuffer. - */ - struct intel_batchbuffer *(*batchbuffer_create)(struct intel_winsys *iws); - - /** - * Emit a relocation to a buffer. - * Target position in batchbuffer is the same as ptr. - * - * @batch - * @reloc buffer address to be inserted into target. - * @usage how is the hardware going to use the buffer. - * @offset add this to the reloc buffers address - * @target buffer where to write the address, null for batchbuffer. - */ - int (*batchbuffer_reloc)(struct intel_batchbuffer *batch, - struct intel_buffer *reloc, - enum intel_buffer_usage usage, - unsigned offset); - - /** - * Flush a bufferbatch. - */ - void (*batchbuffer_flush)(struct intel_batchbuffer *batch, - struct pipe_fence_handle **fence); - - /** - * Destroy a batchbuffer. - */ - void (*batchbuffer_destroy)(struct intel_batchbuffer *batch); - /*@}*/ - - - /** - * Buffer functions. - */ - /*@{*/ - /** - * Create a buffer. - */ - struct intel_buffer *(*buffer_create)(struct intel_winsys *iws, - unsigned size, unsigned alignment, - enum intel_buffer_type type); - - /** - * Fence a buffer with a fence reg. - * Not to be confused with pipe_fence_handle. - */ - int (*buffer_set_fence_reg)(struct intel_winsys *iws, - struct intel_buffer *buffer, - unsigned stride, - enum intel_buffer_tile tile); - - /** - * Map a buffer. - */ - void *(*buffer_map)(struct intel_winsys *iws, - struct intel_buffer *buffer, - boolean write); - - /** - * Unmap a buffer. - */ - void (*buffer_unmap)(struct intel_winsys *iws, - struct intel_buffer *buffer); - - /** - * Write to a buffer. - * - * Arguments follows pwrite(2) - */ - int (*buffer_write)(struct intel_winsys *iws, - struct intel_buffer *dst, - const void *src, - size_t size, - size_t offset); - - void (*buffer_destroy)(struct intel_winsys *iws, - struct intel_buffer *buffer); - /*@}*/ - - - /** - * Fence functions. - */ - /*@{*/ - /** - * Reference fence and set ptr to fence. - */ - void (*fence_reference)(struct intel_winsys *iws, - struct pipe_fence_handle **ptr, - struct pipe_fence_handle *fence); - - /** - * Check if a fence has finished. - */ - int (*fence_signalled)(struct intel_winsys *iws, - struct pipe_fence_handle *fence); - - /** - * Wait on a fence to finish. - */ - int (*fence_finish)(struct intel_winsys *iws, - struct pipe_fence_handle *fence); - /*@}*/ - - - /** - * Destroy the winsys. - */ - void (*destroy)(struct intel_winsys *iws); -}; - - -/** - * Create i915 pipe_screen. - */ -struct pipe_screen *i915_create_screen(struct intel_winsys *iws, unsigned pci_id); - -/** - * Create a i915 pipe_context. - */ -struct pipe_context *i915_create_context(struct pipe_screen *screen); - -/** - * Get the intel_winsys buffer backing the texture. - * - * TODO UGLY - */ -boolean i915_get_texture_buffer_intel(struct pipe_texture *texture, - struct intel_buffer **buffer, - unsigned *stride); - -/** - * Wrap a intel_winsys buffer with a texture blanket. - * - * TODO UGLY - */ -struct pipe_texture * i915_texture_blanket_intel(struct pipe_screen *screen, - struct pipe_texture *tmplt, - unsigned pitch, - struct intel_buffer *buffer); - -#endif diff --git a/src/gallium/winsys/drm/intel/dri/Makefile b/src/gallium/winsys/drm/intel/dri/Makefile index 5e212b62a4..c0ecd9680e 100644 --- a/src/gallium/winsys/drm/intel/dri/Makefile +++ b/src/gallium/winsys/drm/intel/dri/Makefile @@ -9,7 +9,7 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(TOP)/src/gallium/drivers/identity/libidentity.a \ - $(TOP)/src/gallium/drivers/i915simple/libi915simple.a + $(TOP)/src/gallium/drivers/i915/libi915.a DRIVER_SOURCES = diff --git a/src/gallium/winsys/drm/intel/dri/SConscript b/src/gallium/winsys/drm/intel/dri/SConscript index f973811072..b1b654d9f8 100644 --- a/src/gallium/winsys/drm/intel/dri/SConscript +++ b/src/gallium/winsys/drm/intel/dri/SConscript @@ -8,7 +8,7 @@ drivers = [ st_dri, inteldrm, softpipe, - i915simple, + i915, trace, ] diff --git a/src/gallium/winsys/drm/intel/egl/Makefile b/src/gallium/winsys/drm/intel/egl/Makefile index 490baded66..1397e9f729 100644 --- a/src/gallium/winsys/drm/intel/egl/Makefile +++ b/src/gallium/winsys/drm/intel/egl/Makefile @@ -9,7 +9,7 @@ PIPE_DRIVERS = \ $(GALLIUMDIR)/winsys/drm/intel/gem/libinteldrm.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ - $(TOP)/src/gallium/drivers/i915simple/libi915simple.a + $(TOP)/src/gallium/drivers/i915/libi915.a DRIVER_SOURCES = diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_api.c b/src/gallium/winsys/drm/intel/gem/intel_drm_api.c index 8b647a769b..9ed570ff6e 100644 --- a/src/gallium/winsys/drm/intel/gem/intel_drm_api.c +++ b/src/gallium/winsys/drm/intel/gem/intel_drm_api.c @@ -4,8 +4,8 @@ #include "intel_drm_winsys.h" #include "util/u_memory.h" -#include "i915simple/i915_context.h" -#include "i915simple/i915_screen.h" +#include "i915/i915_context.h" +#include "i915/i915_screen.h" #include "trace/tr_drm.h" diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_winsys.h b/src/gallium/winsys/drm/intel/gem/intel_drm_winsys.h index 415c45feea..b4a60563ef 100644 --- a/src/gallium/winsys/drm/intel/gem/intel_drm_winsys.h +++ b/src/gallium/winsys/drm/intel/gem/intel_drm_winsys.h @@ -2,7 +2,7 @@ #ifndef INTEL_DRM_WINSYS_H #define INTEL_DRM_WINSYS_H -#include "i915simple/intel_batchbuffer.h" +#include "i915/intel_batchbuffer.h" #include "drm.h" #include "intel_bufmgr.h" diff --git a/src/gallium/winsys/drm/intel/xorg/Makefile b/src/gallium/winsys/drm/intel/xorg/Makefile index 9e56853b02..14c2462524 100644 --- a/src/gallium/winsys/drm/intel/xorg/Makefile +++ b/src/gallium/winsys/drm/intel/xorg/Makefile @@ -18,7 +18,7 @@ INCLUDES = \ LIBS = \ $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ $(TOP)/src/gallium/winsys/drm/intel/gem/libinteldrm.a \ - $(TOP)/src/gallium/drivers/i915simple/libi915simple.a \ + $(TOP)/src/gallium/drivers/i915/libi915.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(GALLIUM_AUXILIARIES) -- cgit v1.2.3 From ce3c2b51a23c1f674b7a6e862d238c3935d72ca3 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Mon, 5 Oct 2009 18:11:25 -0700 Subject: i915g: Tweek vertexbuffer size --- src/gallium/drivers/i915/i915_prim_vbuf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c index 8a3e466c84..a7e1d4b45e 100644 --- a/src/gallium/drivers/i915/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915/i915_prim_vbuf.c @@ -581,9 +581,9 @@ i915_vbuf_render_create(struct i915_context *i915) int i; i915_render->i915 = i915; - - i915_render->base.max_vertex_buffer_bytes = 128*1024; - + + i915_render->base.max_vertex_buffer_bytes = 16*4096; + /* NOTE: it must be such that state and vertices indices fit in a single * batch buffer. */ @@ -605,7 +605,7 @@ i915_vbuf_render_create(struct i915_context *i915) i915_render->vbo_offset = 0; i915_render->pool_used = FALSE; - i915_render->pool_buffer_size = 128 * 4096; + i915_render->pool_buffer_size = i915_render->base.max_vertex_buffer_bytes * 4; i915_render->pool_fifo = u_fifo_create(6); for (i = 0; i < 6; i++) u_fifo_add(i915_render->pool_fifo, -- cgit v1.2.3 From 340436d8d2f5f6360c2920de3a7547af95deb8f6 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 6 Oct 2009 13:55:48 +1000 Subject: nv50: fix segfault when there's gaps in enabled texture units Tested with progs/demos/multiarb. --- src/gallium/drivers/nv50/nv50_state_validate.c | 11 +++++++---- src/gallium/drivers/nv50/nv50_tex.c | 3 +++ 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 867b1ea872..fd27620371 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -356,13 +356,16 @@ viewport_uptodate: if (nv50->dirty & NV50_NEW_SAMPLER) { int i; - so = so_new(nv50->sampler_nr * 8 + 3, 0); + so = so_new(nv50->sampler_nr * 9 + 2, 0); so_method(so, tesla, NV50TCL_CB_ADDR, 1); so_data (so, NV50_CB_TSC); - so_method(so, tesla, NV50TCL_CB_DATA(0) | 0x40000000, - nv50->sampler_nr * 8); - for (i = 0; i < nv50->sampler_nr; i++) + for (i = 0; i < nv50->sampler_nr; i++) { + if (!nv50->sampler[i]) + continue; + + so_method(so, tesla, NV50TCL_CB_DATA(0) | (2<<29), 8); so_datap (so, nv50->sampler[i]->tsc, 8); + } so_ref(so, &nv50->state.tsc_upload); so_ref(NULL, &so); } diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index 21825a0411..72d33150af 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -157,6 +157,9 @@ nv50_tex_validate(struct nv50_context *nv50) for (unit = 0; unit < nv50->miptree_nr; unit++) { struct nv50_miptree *mt = nv50->miptree[unit]; + if (!mt) + continue; + so_method(so, tesla, NV50TCL_CB_DATA(0) | 0x40000000, 8); if (nv50_tex_construct(nv50, so, mt, unit)) { NOUVEAU_ERR("failed tex validate\n"); -- cgit v1.2.3 From aec2c010f6dc2febcd5f3a10a0dc92738db68e1a Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Tue, 6 Oct 2009 22:07:47 -0400 Subject: nv04-nv40: Fix swizzle transfers for NPOT sizes. Workarounds not necessary, SIFM can handle NPOT, we just weren't setting dst dimensions properly. SIFM can't handle odd w,h though, that still needs fixing. --- src/gallium/drivers/nv04/nv04_surface_2d.c | 147 +++++++---------------------- 1 file changed, 34 insertions(+), 113 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c index f88e138c79..b2ab50ee21 100644 --- a/src/gallium/drivers/nv04/nv04_surface_2d.c +++ b/src/gallium/drivers/nv04/nv04_surface_2d.c @@ -1,5 +1,6 @@ #include "pipe/p_context.h" #include "pipe/p_format.h" +#include "util/u_math.h" #include "util/u_memory.h" #include "nouveau/nouveau_winsys.h" @@ -107,17 +108,20 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, struct nouveau_bo *src_bo = nouveau_bo(ctx->buf(src)); struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst)); const unsigned src_pitch = ((struct nv04_surface *)src)->pitch; + /* Max width & height may not be the same on all HW, but must be POT */ const unsigned max_w = 1024; const unsigned max_h = 1024; - const unsigned sub_w = w > max_w ? max_w : w; - const unsigned sub_h = h > max_h ? max_h : h; - unsigned cx; - unsigned cy; + unsigned sub_w = w > max_w ? max_w : w; + unsigned sub_h = h > max_h ? max_h : h; + unsigned x; + unsigned y; -#if 0 - /* That's the way she likes it */ - assert(src_pitch == ((struct nv04_surface *)dst)->pitch); -#endif + /* Swizzled surfaces must be POT */ + assert(util_is_pot(dst->width) && util_is_pot(dst->height)); + + /* If area is too large to copy in one shot we must copy it in POT chunks to meet alignment requirements */ + assert(sub_w == w || util_is_pot(sub_w)); + assert(sub_h == h || util_is_pot(sub_h)); BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1); OUT_RELOCo(chan, dst_bo, @@ -125,8 +129,8 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_FORMAT, 1); OUT_RING (chan, nv04_surface_format(dst->format) | - log2i(w) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT | - log2i(h) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT); + log2i(dst->width) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT | + log2i(dst->height) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT); BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1); OUT_RELOCo(chan, src_bo, @@ -134,32 +138,37 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE, 1); OUT_RING (chan, swzsurf->handle); - for (cy = 0; cy < h; cy += sub_h) { - for (cx = 0; cx < w; cx += sub_w) { + for (y = 0; y < h; y += sub_h) { + sub_h = MIN2(sub_h, h - y); + + for (x = 0; x < w; x += sub_w) { + sub_w = MIN2(sub_w, w - x); + + /* Must be 64-byte aligned */ + assert(!((dst->offset + nv04_swizzle_bits(dx+x, dy+y) * dst->texture->block.size) & 63)); + BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1); - OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(cx+dx, cy+dy) * - dst->texture->block.size, NOUVEAU_BO_GART | - NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(dx+x, dy+y) * dst->texture->block.size, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9); OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE); OUT_RING (chan, nv04_scaled_image_format(src->format)); OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY); OUT_RING (chan, 0); - OUT_RING (chan, sub_h << 16 | sub_w); + OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | sub_w); OUT_RING (chan, 0); - OUT_RING (chan, sub_h << 16 | sub_w); + OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | sub_w); OUT_RING (chan, 1 << 20); OUT_RING (chan, 1 << 20); BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SIZE, 4); - OUT_RING (chan, sub_h << 16 | sub_w); + OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_H_SHIFT | sub_w); OUT_RING (chan, src_pitch | NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER | NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE); - OUT_RELOCl(chan, src_bo, src->offset + (cy+sy) * src_pitch + - (cx+sx) * src->texture->block.size, NOUVEAU_BO_GART | - NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, src_bo, src->offset + (sy+y) * src_pitch + (sx+x) * src->texture->block.size, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); OUT_RING (chan, 0); } } @@ -213,43 +222,6 @@ nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx, return 0; } -static int -nv04_surface_copy_m2mf_swizzle(struct nv04_surface_2d *ctx, - struct pipe_surface *dst, int dx, int dy, - struct pipe_surface *src, int sx, int sy) -{ - struct nouveau_channel *chan = ctx->m2mf->channel; - struct nouveau_grobj *m2mf = ctx->m2mf; - struct nouveau_bo *src_bo = nouveau_bo(ctx->buf(src)); - struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst)); - unsigned src_pitch = ((struct nv04_surface *)src)->pitch; - unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch; - unsigned dst_offset = dst->offset + nv04_swizzle_bits(dx, dy) * - dst->texture->block.size; - unsigned src_offset = src->offset + sy * src_pitch + - sx * src->texture->block.size; - - BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2); - OUT_RELOCo(chan, src_bo, - NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCo(chan, dst_bo, - NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8); - OUT_RELOCl(chan, src_bo, src_offset, - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCl(chan, dst_bo, dst_offset, - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR); - OUT_RING (chan, src_pitch); - OUT_RING (chan, dst_pitch); - OUT_RING (chan, 1 * src->texture->block.size); - OUT_RING (chan, 1); - OUT_RING (chan, 0x0101); - OUT_RING (chan, 0); - - return 0; -} - static int nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst, int dx, int dy, struct pipe_surface *src, int sx, int sy, @@ -299,61 +271,10 @@ nv04_surface_copy(struct nv04_surface_2d *ctx, struct pipe_surface *dst, assert(src->format == dst->format); /* Setup transfer to swizzle the texture to vram if needed */ - if (src_linear && !dst_linear) { - int x,y; - - if ((w>1) && (h>1)) { - int potWidth = 1<potHeight ? potHeight : potWidth); - - /* top left is always POT, but we can only swizzle squares */ - for (y=0; y0) { - nv04_surface_copy(ctx, dst, dx+potWidth, dy, - src, sx+potWidth, sy, - remainWidth, potHeight); - } - - /* bottom left */ - if (remainHeight>0) { - nv04_surface_copy(ctx, dst, dx, dy+potHeight, - src, sx, sy+potHeight, - potWidth, remainHeight); - } - - /* bottom right */ - if ((remainWidth>0) && (remainHeight>0)) { - nv04_surface_copy(ctx, dst, dx+potWidth, dy+potHeight, - src, sx+potWidth, sy+potHeight, - remainWidth, remainHeight); - } - } else if (w==1) { - /* We have a column to copy to a swizzled texture */ - for (y=0; y 1 && h > 1) { + nv04_surface_copy_swizzle(ctx, dst, dx, dy, src, sx, sy, w, h); + return; + } /* NV_CONTEXT_SURFACES_2D has buffer alignment restrictions, fallback * to NV_MEMORY_TO_MEMORY_FORMAT in this case. -- cgit v1.2.3 From 030723fc5d3faa919cac245fc7b13430ca201826 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Wed, 7 Oct 2009 01:40:37 +0100 Subject: i915g: Disable vbuf fifo and minor commenting of vbuf code The vbuf fifo doesn't appear to help once the libdrm reuse flag has been set. --- src/gallium/drivers/i915/i915_prim_vbuf.c | 37 ++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c index a7e1d4b45e..cf065fd51b 100644 --- a/src/gallium/drivers/i915/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915/i915_prim_vbuf.c @@ -52,6 +52,8 @@ #include "i915_state.h" +#undef VBUF_USE_FIFO + /** * Primitive renderer for i915. */ @@ -74,16 +76,19 @@ struct i915_vbuf_render { /* Stuff for the vbo */ struct intel_buffer *vbo; - size_t vbo_size; + size_t vbo_size; /**< current size of allocated buffer */ + size_t vbo_alloc_size; /**< minimum buffer size to allocate */ size_t vbo_offset; void *vbo_ptr; size_t vbo_max_used; - /* stuff for the pool */ +#ifdef VBUF_USE_FIFO + /* Stuff for the pool */ struct util_fifo *pool_fifo; unsigned pool_used; unsigned pool_buffer_size; boolean pool_not_used; +#endif }; @@ -132,18 +137,23 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) struct intel_winsys *iws = i915->iws; if (i915_render->vbo) { +#ifdef VBUF_USE_FIFO if (i915_render->pool_not_used) iws->buffer_destroy(iws, i915_render->vbo); else u_fifo_add(i915_render->pool_fifo, i915_render->vbo); i915_render->vbo = NULL; +#else + iws->buffer_destroy(iws, i915_render->vbo); +#endif } i915->vbo_flushed = 0; - i915_render->vbo_size = MAX2(size, i915_render->pool_buffer_size); + i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size); i915_render->vbo_offset = 0; +#ifdef VBUF_USE_FIFO if (i915_render->vbo_size != i915_render->pool_buffer_size) { i915_render->pool_not_used = TRUE; i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, @@ -158,6 +168,10 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) } u_fifo_pop(i915_render->pool_fifo, (void**)&i915_render->vbo); } +#else + i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, + 64, INTEL_NEW_VERTEX); +#endif } static boolean @@ -173,10 +187,11 @@ i915_vbuf_render_allocate_vertices(struct vbuf_render *render, assert(!i915->vbo); if (!i915_vbuf_render_reserve(i915_render, size)) { - +#ifdef VBUF_USE_FIFO + /* incase we flushed reset the number of pool buffers used */ if (i915->vbo_flushed) i915_render->pool_used = 0; - +#endif i915_vbuf_render_new_buf(i915_render, size); } @@ -603,19 +618,19 @@ i915_vbuf_render_create(struct i915_context *i915) i915_render->vbo = NULL; i915_render->vbo_size = 0; i915_render->vbo_offset = 0; + i915_render->vbo_alloc_size = i915_render->base.max_vertex_buffer_bytes * 4; +#ifdef VBUF_USE_POOL i915_render->pool_used = FALSE; - i915_render->pool_buffer_size = i915_render->base.max_vertex_buffer_bytes * 4; + i915_render->pool_buffer_size = i915_render->vbo_alloc_size; i915_render->pool_fifo = u_fifo_create(6); for (i = 0; i < 6; i++) u_fifo_add(i915_render->pool_fifo, iws->buffer_create(iws, i915_render->pool_buffer_size, 64, INTEL_NEW_VERTEX)); - -#if 0 - /* TODO JB: is this realy needed? */ - i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); - iws->buffer_unmap(iws, i915_render->vbo); +#else + (void)i; + (void)iws; #endif return &i915_render->base; -- cgit v1.2.3 From f8ba93aefdf23b88a945d6037cd2e672c99b314c Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Wed, 7 Oct 2009 03:26:03 +0100 Subject: i915g: Change order of buffer_write arguments They now follow the pipe_buffer_write style, its the gallium driver that sets the interface not the winsys. --- src/gallium/drivers/i915/intel_winsys.h | 6 +++--- src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/intel_winsys.h b/src/gallium/drivers/i915/intel_winsys.h index 42c5e7470e..2c8dc63f3f 100644 --- a/src/gallium/drivers/i915/intel_winsys.h +++ b/src/gallium/drivers/i915/intel_winsys.h @@ -153,13 +153,13 @@ struct intel_winsys { /** * Write to a buffer. * - * Arguments follows pwrite(2) + * Arguments follows pipe_buffer_write. */ int (*buffer_write)(struct intel_winsys *iws, struct intel_buffer *dst, - const void *src, + size_t offset, size_t size, - size_t offset); + const void *data); void (*buffer_destroy)(struct intel_winsys *iws, struct intel_buffer *buffer); diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c b/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c index 327e19fcd6..ac4dd6e00e 100644 --- a/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c +++ b/src/gallium/winsys/drm/intel/gem/intel_drm_buffer.c @@ -119,9 +119,9 @@ intel_drm_buffer_unmap(struct intel_winsys *iws, static int intel_drm_buffer_write(struct intel_winsys *iws, struct intel_buffer *buffer, - const void *data, + size_t offset, size_t size, - size_t offset) + const void *data) { struct intel_drm_buffer *buf = intel_drm_buffer(buffer); -- cgit v1.2.3 From 0f0127f6f9ee6c976c707cd406bf392aea978976 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Wed, 7 Oct 2009 03:28:04 +0100 Subject: i915g: Use buffer write instead of map for lit vertices --- src/gallium/drivers/i915/i915_prim_vbuf.c | 39 +++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c index cf065fd51b..07546c03b2 100644 --- a/src/gallium/drivers/i915/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915/i915_prim_vbuf.c @@ -53,6 +53,7 @@ #undef VBUF_USE_FIFO +#undef VBUF_MAP_BUFFER /** * Primitive renderer for i915. @@ -82,6 +83,12 @@ struct i915_vbuf_render { void *vbo_ptr; size_t vbo_max_used; +#ifndef VBUF_MAP_BUFFER + size_t map_used_start; + size_t map_used_end; + size_t map_size; +#endif + #ifdef VBUF_USE_FIFO /* Stuff for the pool */ struct util_fifo *pool_fifo; @@ -153,6 +160,14 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size); i915_render->vbo_offset = 0; +#ifndef VBUF_MAP_BUFFER + if (i915_render->vbo_size > i915_render->map_size) { + i915_render->map_size = i915_render->vbo_size; + FREE(i915_render->vbo_ptr); + i915_render->vbo_ptr = MALLOC(i915_render->map_size); + } +#endif + #ifdef VBUF_USE_FIFO if (i915_render->vbo_size != i915_render->pool_buffer_size) { i915_render->pool_not_used = TRUE; @@ -215,9 +230,13 @@ i915_vbuf_render_map_vertices(struct vbuf_render *render) if (i915->vbo_flushed) debug_printf("%s bad vbo flush occured stalling on hw\n", __FUNCTION__); +#ifdef VBUF_MAP_BUFFER i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); - - return (unsigned char *)i915_render->vbo_ptr + i915->vbo_offset; + return (unsigned char *)i915_render->vbo_ptr + i915_render->vbo_offset; +#else + (void)iws; + return (unsigned char *)i915_render->vbo_ptr; +#endif } static void @@ -230,7 +249,17 @@ i915_vbuf_render_unmap_vertices(struct vbuf_render *render, struct intel_winsys *iws = i915->iws; i915_render->vbo_max_used = MAX2(i915_render->vbo_max_used, i915_render->vertex_size * (max_index + 1)); +#ifdef VBUF_MAP_BUFFER iws->buffer_unmap(iws, i915_render->vbo); +#else + i915_render->map_used_start = i915_render->vertex_size * min_index; + i915_render->map_used_end = i915_render->vertex_size * (max_index + 1); + iws->buffer_write(iws, i915_render->vbo, + i915_render->map_used_start + i915_render->vbo_offset, + i915_render->map_used_end - i915_render->map_used_start, + i915_render->vbo_ptr + i915_render->map_used_start); + +#endif } static boolean @@ -614,8 +643,14 @@ i915_vbuf_render_create(struct i915_context *i915) i915_render->base.release_vertices = i915_vbuf_render_release_vertices; i915_render->base.destroy = i915_vbuf_render_destroy; +#ifndef VBUF_MAP_BUFFER + i915_render->map_size = 0; + i915_render->map_used_start = 0; + i915_render->map_used_end = 0; +#endif i915_render->vbo = NULL; + i915_render->vbo_ptr = NULL; i915_render->vbo_size = 0; i915_render->vbo_offset = 0; i915_render->vbo_alloc_size = i915_render->base.max_vertex_buffer_bytes * 4; -- cgit v1.2.3 From 9a0ff33ad60cb63d430c4f93f6531f7aa2ec2ba8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 7 Oct 2009 13:17:20 -0600 Subject: softpipe: prefix non-static functions with sp_ --- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 86 ++++++++++++++--------------- src/gallium/drivers/softpipe/sp_setup.c | 12 ++-- src/gallium/drivers/softpipe/sp_setup.h | 12 ++-- 3 files changed, 55 insertions(+), 55 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index e603c20fc4..5fbac06a53 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -138,7 +138,7 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); struct setup_context *setup_ctx = cvbr->setup; - setup_prepare( setup_ctx ); + sp_setup_prepare( setup_ctx ); cvbr->softpipe->reduced_prim = u_reduced_prim(prim); cvbr->prim = prim; @@ -171,14 +171,14 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { - setup_point( setup_ctx, + sp_setup_point( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride) ); } break; case PIPE_PRIM_LINES: for (i = 1; i < nr; i += 2) { - setup_line( setup_ctx, + sp_setup_line( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); } @@ -186,7 +186,7 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_LINE_STRIP: for (i = 1; i < nr; i ++) { - setup_line( setup_ctx, + sp_setup_line( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); } @@ -194,12 +194,12 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_LINE_LOOP: for (i = 1; i < nr; i ++) { - setup_line( setup_ctx, + sp_setup_line( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); } if (nr) { - setup_line( setup_ctx, + sp_setup_line( setup_ctx, get_vert(vertex_buffer, indices[nr-1], stride), get_vert(vertex_buffer, indices[0], stride) ); } @@ -208,7 +208,7 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_TRIANGLES: if (softpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 3) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-2], stride) ); @@ -216,7 +216,7 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 2; i < nr; i += 3) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -227,7 +227,7 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_TRIANGLE_STRIP: if (softpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 1) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i+(i&1)-1], stride), get_vert(vertex_buffer, indices[i-(i&1)], stride), get_vert(vertex_buffer, indices[i-2], stride) ); @@ -235,7 +235,7 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 2; i < nr; i += 1) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i+(i&1)-2], stride), get_vert(vertex_buffer, indices[i-(i&1)-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -246,7 +246,7 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_TRIANGLE_FAN: if (softpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 1) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[0], stride), get_vert(vertex_buffer, indices[i-1], stride) ); @@ -254,7 +254,7 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 2; i < nr; i += 1) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[0], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -265,11 +265,11 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_QUADS: if (softpipe->rasterizer->flatshade_first) { for (i = 3; i < nr; i += 4) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-3], stride) ); - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-3], stride) ); @@ -277,12 +277,12 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 3; i < nr; i += 4) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-0], stride) ); - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -293,11 +293,11 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_QUAD_STRIP: if (softpipe->rasterizer->flatshade_first) { for (i = 3; i < nr; i += 2) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-3], stride)); - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-3], stride) ); @@ -305,11 +305,11 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 3; i < nr; i += 2) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-0], stride) ); - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -324,7 +324,7 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) * flatshade_first state makes no difference. */ for (i = 2; i < nr; i += 1) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[0], stride) ); @@ -355,14 +355,14 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { - setup_point( setup_ctx, + sp_setup_point( setup_ctx, get_vert(vertex_buffer, i-0, stride) ); } break; case PIPE_PRIM_LINES: for (i = 1; i < nr; i += 2) { - setup_line( setup_ctx, + sp_setup_line( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); } @@ -370,7 +370,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_LINE_STRIP: for (i = 1; i < nr; i ++) { - setup_line( setup_ctx, + sp_setup_line( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); } @@ -378,12 +378,12 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_LINE_LOOP: for (i = 1; i < nr; i ++) { - setup_line( setup_ctx, + sp_setup_line( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); } if (nr) { - setup_line( setup_ctx, + sp_setup_line( setup_ctx, get_vert(vertex_buffer, nr-1, stride), get_vert(vertex_buffer, 0, stride) ); } @@ -392,7 +392,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_TRIANGLES: if (softpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 3) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-2, stride) ); @@ -400,7 +400,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 2; i < nr; i += 3) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -411,7 +411,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_TRIANGLE_STRIP: if (softpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i++) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i+(i&1)-1, stride), get_vert(vertex_buffer, i-(i&1), stride), get_vert(vertex_buffer, i-2, stride) ); @@ -419,7 +419,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 2; i < nr; i++) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i+(i&1)-2, stride), get_vert(vertex_buffer, i-(i&1)-1, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -430,7 +430,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_TRIANGLE_FAN: if (softpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 1) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, 0, stride), get_vert(vertex_buffer, i-1, stride) ); @@ -438,7 +438,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 2; i < nr; i += 1) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, 0, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -449,11 +449,11 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_QUADS: if (softpipe->rasterizer->flatshade_first) { for (i = 3; i < nr; i += 4) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-3, stride) ); - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-3, stride) ); @@ -461,11 +461,11 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 3; i < nr; i += 4) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-0, stride) ); - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -476,11 +476,11 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_QUAD_STRIP: if (softpipe->rasterizer->flatshade_first) { for (i = 3; i < nr; i += 2) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-3, stride) ); - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-3, stride) ); @@ -488,11 +488,11 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 3; i < nr; i += 2) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-0, stride) ); - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -507,7 +507,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) * flatshade_first state makes no difference. */ for (i = 2; i < nr; i += 1) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, 0, stride) ); @@ -525,7 +525,7 @@ static void sp_vbuf_destroy(struct vbuf_render *vbr) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); - setup_destroy_context(cvbr->setup); + sp_setup_destroy_context(cvbr->setup); FREE(cvbr); } @@ -556,7 +556,7 @@ sp_create_vbuf_backend(struct softpipe_context *sp) cvbr->softpipe = sp; - cvbr->setup = setup_create_context(cvbr->softpipe); + cvbr->setup = sp_setup_create_context(cvbr->softpipe); return &cvbr->base; } diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index ade125662a..e55e209fd1 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -696,7 +696,7 @@ calc_det( const float (*v0)[4], /** * Do setup for triangle rasterization, then render the triangle. */ -void setup_tri( struct setup_context *setup, +void sp_setup_tri( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) @@ -915,7 +915,7 @@ plot(struct setup_context *setup, int x, int y) * to handle stippling and wide lines. */ void -setup_line(struct setup_context *setup, +sp_setup_line(struct setup_context *setup, const float (*v0)[4], const float (*v1)[4]) { @@ -1045,7 +1045,7 @@ point_persp_coeff(const struct setup_context *setup, * XXX could optimize a lot for 1-pixel points. */ void -setup_point( struct setup_context *setup, +sp_setup_point( struct setup_context *setup, const float (*v0)[4] ) { struct softpipe_context *softpipe = setup->softpipe; @@ -1246,7 +1246,7 @@ setup_point( struct setup_context *setup, } } -void setup_prepare( struct setup_context *setup ) +void sp_setup_prepare( struct setup_context *setup ) { struct softpipe_context *sp = setup->softpipe; @@ -1270,7 +1270,7 @@ void setup_prepare( struct setup_context *setup ) -void setup_destroy_context( struct setup_context *setup ) +void sp_setup_destroy_context( struct setup_context *setup ) { FREE( setup ); } @@ -1279,7 +1279,7 @@ void setup_destroy_context( struct setup_context *setup ) /** * Create a new primitive setup/render stage. */ -struct setup_context *setup_create_context( struct softpipe_context *softpipe ) +struct setup_context *sp_setup_create_context( struct softpipe_context *softpipe ) { struct setup_context *setup = CALLOC_STRUCT(setup_context); unsigned i; diff --git a/src/gallium/drivers/softpipe/sp_setup.h b/src/gallium/drivers/softpipe/sp_setup.h index d54f334428..9c8844d2e8 100644 --- a/src/gallium/drivers/softpipe/sp_setup.h +++ b/src/gallium/drivers/softpipe/sp_setup.h @@ -31,23 +31,23 @@ struct setup_context; struct softpipe_context; void -setup_tri( struct setup_context *setup, +sp_setup_tri( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ); void -setup_line(struct setup_context *setup, +sp_setup_line(struct setup_context *setup, const float (*v0)[4], const float (*v1)[4]); void -setup_point( struct setup_context *setup, +sp_setup_point( struct setup_context *setup, const float (*v0)[4] ); -struct setup_context *setup_create_context( struct softpipe_context *softpipe ); -void setup_prepare( struct setup_context *setup ); -void setup_destroy_context( struct setup_context *setup ); +struct setup_context *sp_setup_create_context( struct softpipe_context *softpipe ); +void sp_setup_prepare( struct setup_context *setup ); +void sp_setup_destroy_context( struct setup_context *setup ); #endif -- cgit v1.2.3 From 0fb71be2179ebf140b086682f050399809ef57b8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 7 Oct 2009 13:21:57 -0600 Subject: softpipe: whitespace and comment fixes --- src/gallium/drivers/softpipe/sp_context.h | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 43a195c8ef..a735573d6f 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -85,7 +85,7 @@ struct softpipe_context { /** Mapped vertex buffers */ ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS]; - + /** Mapped constant buffers */ void *mapped_constants[PIPE_SHADER_TYPES]; @@ -96,14 +96,13 @@ struct softpipe_context { /** Which vertex shader output slot contains point size */ int psize_slot; - /* The reduced version of the primitive supplied by the state - * tracker. - */ + /** The reduced version of the primitive supplied by the state tracker */ unsigned reduced_api_prim; - /* The reduced primitive after unfilled triangles, wide-line - * decomposition, etc, are taken into account. This is the - * primitive actually rasterized. + /** + * The reduced primitive after unfilled triangles, wide-line decomposition, + * etc, are taken into account. This is the primitive type that's actually + * rasterized. */ unsigned reduced_prim; @@ -117,7 +116,6 @@ struct softpipe_context { struct quad_stage *shade; struct quad_stage *depth_test; struct quad_stage *blend; - struct quad_stage *first; /**< points to one of the above stages */ } quad; @@ -135,10 +133,10 @@ struct softpipe_context { struct draw_stage *vbuf; boolean dirty_render_cache; - + struct softpipe_tile_cache *cbuf_cache[PIPE_MAX_COLOR_BUFS]; struct softpipe_tile_cache *zsbuf_cache; - + unsigned tex_timestamp; struct softpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; @@ -159,4 +157,3 @@ softpipe_reset_sampler_varients(struct softpipe_context *softpipe); #endif /* SP_CONTEXT_H */ - -- cgit v1.2.3 From 2b9418b2785b3f25fc44daf90436f24b4de35980 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 7 Oct 2009 13:30:48 -0600 Subject: softpipe: new comments --- src/gallium/drivers/softpipe/sp_texture.c | 39 +++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 2e6c43c7ef..7caf2928b4 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -111,6 +111,9 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, } +/** + * Create new pipe_texture given the template information. + */ static struct pipe_texture * softpipe_texture_create(struct pipe_screen *screen, const struct pipe_texture *template) @@ -145,6 +148,9 @@ softpipe_texture_create(struct pipe_screen *screen, } +/** + * Create a new pipe_texture which wraps an existing buffer. + */ static struct pipe_texture * softpipe_texture_blanket(struct pipe_screen * screen, const struct pipe_texture *base, @@ -188,6 +194,9 @@ softpipe_texture_destroy(struct pipe_texture *pt) } +/** + * Get a pipe_surface "view" into a texture. + */ static struct pipe_surface * softpipe_get_tex_surface(struct pipe_screen *screen, struct pipe_texture *pt, @@ -248,6 +257,9 @@ softpipe_get_tex_surface(struct pipe_screen *screen, } +/** + * Free a pipe_surface which was created with softpipe_get_tex_surface(). + */ static void softpipe_tex_surface_destroy(struct pipe_surface *surf) { @@ -261,6 +273,18 @@ softpipe_tex_surface_destroy(struct pipe_surface *surf) } +/** + * Geta pipe_transfer object which is used for moving data in/out of + * a texture object. + * \param face one of PIPE_TEX_FACE_x or 0 + * \param level texture mipmap level + * \param zslice 2D slice of a 3D texture + * \param usage one of PIPE_TRANSFER_READ/WRITE/READ_WRITE + * \param x X position of region to read/write + * \param y Y position of region to read/write + * \param width width of region to read/write + * \param height height of region to read/write + */ static struct pipe_transfer * softpipe_get_tex_transfer(struct pipe_screen *screen, struct pipe_texture *texture, @@ -310,6 +334,10 @@ softpipe_get_tex_transfer(struct pipe_screen *screen, } +/** + * Free a pipe_transfer object which was created with + * softpipe_get_tex_transfer(). + */ static void softpipe_tex_transfer_destroy(struct pipe_transfer *transfer) { @@ -323,6 +351,9 @@ softpipe_tex_transfer_destroy(struct pipe_transfer *transfer) } +/** + * Create memory mapping for given pipe_transfer object. + */ static void * softpipe_transfer_map( struct pipe_screen *screen, struct pipe_transfer *transfer ) @@ -355,6 +386,9 @@ softpipe_transfer_map( struct pipe_screen *screen, } +/** + * Unmap memory mapping for given pipe_transfer object. + */ static void softpipe_transfer_unmap(struct pipe_screen *screen, struct pipe_transfer *transfer) @@ -372,6 +406,7 @@ softpipe_transfer_unmap(struct pipe_screen *screen, } } + static struct pipe_video_surface* softpipe_video_surface_create(struct pipe_screen *screen, enum pipe_video_chroma_format chroma_format, @@ -445,6 +480,10 @@ softpipe_init_screen_texture_funcs(struct pipe_screen *screen) } +/** + * Return pipe_buffer handle and stride for given texture object. + * XXX used for??? + */ boolean softpipe_get_texture_buffer( struct pipe_texture *texture, struct pipe_buffer **buf, -- cgit v1.2.3 From 0083d2e40a8b0aa9ea36f98d4b6b7981d5dca0e3 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 7 Oct 2009 14:29:23 -0600 Subject: i915g: Fix MSVC build. --- src/gallium/drivers/i915/i915_prim_vbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c index 07546c03b2..6b832140a8 100644 --- a/src/gallium/drivers/i915/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915/i915_prim_vbuf.c @@ -257,7 +257,7 @@ i915_vbuf_render_unmap_vertices(struct vbuf_render *render, iws->buffer_write(iws, i915_render->vbo, i915_render->map_used_start + i915_render->vbo_offset, i915_render->map_used_end - i915_render->map_used_start, - i915_render->vbo_ptr + i915_render->map_used_start); + (unsigned char *)i915_render->vbo_ptr + i915_render->map_used_start); #endif } -- cgit v1.2.3 From 69588d7ed59a019a5272a9cc391e30c47d006aee Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 9 Oct 2009 11:29:33 +0100 Subject: llvmpipe: Eliminate constant mapping/unmapping. --- src/gallium/drivers/llvmpipe/lp_context.h | 3 -- src/gallium/drivers/llvmpipe/lp_draw_arrays.c | 50 --------------------------- src/gallium/drivers/llvmpipe/lp_state_fs.c | 20 +++++++++-- 3 files changed, 17 insertions(+), 56 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 8d5a0d4f1f..7df340554e 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -88,9 +88,6 @@ struct llvmpipe_context { /** Mapped vertex buffers */ ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS]; - /** Mapped constant buffers */ - void *mapped_constants[PIPE_SHADER_TYPES]; - /** Vertex format */ struct vertex_info vertex_info; struct vertex_info vertex_info_vbuf; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 89772e62d3..0aa13a1fc6 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -45,54 +45,6 @@ -static void -llvmpipe_map_constant_buffers(struct llvmpipe_context *lp) -{ - struct pipe_screen *screen = lp->pipe.screen; - uint i, size; - - for (i = 0; i < PIPE_SHADER_TYPES; i++) { - if (lp->constants[i].buffer && lp->constants[i].buffer->size) - lp->mapped_constants[i] = screen->buffer_map(screen, lp->constants[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); - } - - if (lp->constants[PIPE_SHADER_VERTEX].buffer) - size = lp->constants[PIPE_SHADER_VERTEX].buffer->size; - else - size = 0; - - lp->jit_context.constants = lp->mapped_constants[PIPE_SHADER_FRAGMENT]; - - draw_set_mapped_constant_buffer(lp->draw, - lp->mapped_constants[PIPE_SHADER_VERTEX], - size); -} - - -static void -llvmpipe_unmap_constant_buffers(struct llvmpipe_context *lp) -{ - struct pipe_screen *screen = lp->pipe.screen; - uint i; - - /* really need to flush all prims since the vert/frag shaders const buffers - * are going away now. - */ - draw_flush(lp->draw); - - draw_set_mapped_constant_buffer(lp->draw, NULL, 0); - - lp->jit_context.constants = NULL; - - for (i = 0; i < 2; i++) { - if (lp->constants[i].buffer && lp->constants[i].buffer->size) - screen->buffer_unmap(screen, lp->constants[i].buffer); - lp->mapped_constants[i] = NULL; - } -} - - boolean llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) @@ -124,7 +76,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, llvmpipe_update_derived( lp ); llvmpipe_map_transfers(lp); - llvmpipe_map_constant_buffers(lp); /* * Map vertex buffers @@ -163,7 +114,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, /* Note: leave drawing surfaces mapped */ - llvmpipe_unmap_constant_buffers(lp); lp->dirty_render_cache = TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index b00be0cc32..7728ba6076 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -83,6 +83,7 @@ #include "lp_bld_debug.h" #include "lp_screen.h" #include "lp_context.h" +#include "lp_buffer.h" #include "lp_state.h" #include "lp_quad.h" #include "lp_tex_sample.h" @@ -671,16 +672,29 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) void llvmpipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + const struct pipe_constant_buffer *constants) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + struct pipe_buffer *buffer = constants ? constants->buffer : NULL; + unsigned size = buffer ? buffer->size : 0; + const void *data = buffer ? llvmpipe_buffer(buffer)->data : NULL; assert(shader < PIPE_SHADER_TYPES); assert(index == 0); + if(shader == PIPE_SHADER_VERTEX) + draw_flush(llvmpipe->draw); + /* note: reference counting */ - pipe_buffer_reference(&llvmpipe->constants[shader].buffer, - buf ? buf->buffer : NULL); + pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer); + + if(shader == PIPE_SHADER_FRAGMENT) { + llvmpipe->jit_context.constants = data; + } + + if(shader == PIPE_SHADER_VERTEX) { + draw_set_mapped_constant_buffer(llvmpipe->draw, data, size); + } llvmpipe->dirty |= LP_NEW_CONSTANTS; } -- cgit v1.2.3 From f36123323c9d696fec6e54882242cab15247ab0d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 8 Oct 2009 13:00:37 -0600 Subject: softpipe: restore/fix print_vertex() debug helper --- src/gallium/drivers/softpipe/sp_setup.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index e55e209fd1..00fb52a64f 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -106,6 +106,7 @@ struct setup_context { #endif unsigned winding; /* which winding to cull */ + unsigned nr_vertex_attrs; }; @@ -268,8 +269,8 @@ static void print_vertex(const struct setup_context *setup, const float (*v)[4]) { int i; - debug_printf(" Vertex: (%p)\n", v); - for (i = 0; i < setup->quad[0].nr_attrs; i++) { + debug_printf(" Vertex: (%p)\n", (void *) v); + for (i = 0; i < setup->nr_vertex_attrs; i++) { debug_printf(" %d: %f %f %f %f\n", i, v[i][0], v[i][1], v[i][2], v[i][3]); if (util_is_inf_or_nan(v[i][0])) { @@ -1254,6 +1255,9 @@ void sp_setup_prepare( struct setup_context *setup ) softpipe_update_derived(sp); } + /* Note: nr_attrs is only used for debugging (vertex printing) */ + setup->nr_vertex_attrs = draw_num_vs_outputs(sp->draw); + sp->quad.first->begin( sp->quad.first ); if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES && -- cgit v1.2.3 From a74e53ddba246b1f6604c6120b63a923fd9c60d5 Mon Sep 17 00:00:00 2001 From: Cooper Yuan Date: Sat, 10 Oct 2009 14:41:44 +0800 Subject: r300g: add video surface create and destroy functions --- src/gallium/drivers/r300/r300_texture.c | 52 +++++++++++++++++++++++++++++++++ src/gallium/drivers/r300/r300_texture.h | 14 ++++++++- 2 files changed, 65 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index ce60ded7ca..7ea4c33fa9 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -215,6 +215,55 @@ static struct pipe_texture* return (struct pipe_texture*)tex; } +static struct pipe_video_surface * +r300_video_surface_create(struct pipe_screen *screen, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height) +{ + struct r300_video_surface *r300_vsfc; + struct pipe_texture template; + + assert(screen); + assert(width && height); + + r300_vsfc = CALLOC_STRUCT(r300_video_surface); + if (!r300_vsfc) + return NULL; + + pipe_reference_init(&r300_vsfc->base.reference, 1); + r300_vsfc->base.screen = screen; + r300_vsfc->base.chroma_format = chroma_format; + r300_vsfc->base.width = width; + r300_vsfc->base.height = height; + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_X8R8G8B8_UNORM; + template.last_level = 0; + template.width[0] = util_next_power_of_two(width); + template.height[0] = util_next_power_of_two(height); + template.depth[0] = 1; + pf_get_block(template.format, &template.block); + template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | + PIPE_TEXTURE_USAGE_RENDER_TARGET; + + r300_vsfc->tex = screen->texture_create(screen, &template); + if (!r300_vsfc->tex) + { + FREE(r300_vsfc); + return NULL; + } + + return &r300_vsfc->base; +} + +static void r300_video_surface_destroy(struct pipe_video_surface *vsfc) +{ + struct r300_video_surface *r300_vsfc = r300_video_surface(vsfc); + pipe_texture_reference(&r300_vsfc->tex, NULL); + FREE(r300_vsfc); +} + void r300_init_screen_texture_functions(struct pipe_screen* screen) { screen->texture_create = r300_texture_create; @@ -222,6 +271,9 @@ void r300_init_screen_texture_functions(struct pipe_screen* screen) screen->get_tex_surface = r300_get_tex_surface; screen->tex_surface_destroy = r300_tex_surface_destroy; screen->texture_blanket = r300_texture_blanket; + + screen->video_surface_create = r300_video_surface_create; + screen->video_surface_destroy= r300_video_surface_destroy; } boolean r300_get_texture_buffer(struct pipe_texture* texture, diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index bd87790bc3..e5182d31b4 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -24,7 +24,7 @@ #define R300_TEXTURE_H #include "pipe/p_screen.h" - +#include "pipe/p_video_state.h" #include "util/u_math.h" #include "r300_context.h" @@ -91,6 +91,18 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) return 0; } +struct r300_video_surface +{ + struct pipe_video_surface base; + struct pipe_texture *tex; +}; + +static INLINE struct r300_video_surface * +r300_video_surface(struct pipe_video_surface *pvs) +{ + return (struct r300_video_surface *)pvs; +} + #ifndef R300_WINSYS_H boolean r300_get_texture_buffer(struct pipe_texture* texture, -- cgit v1.2.3 From 768481ed40cb7530fdbadbf4d6dc00b74209adf1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Sat, 10 Oct 2009 09:18:14 -0600 Subject: softpipe: revert 564df9dc5f6335eb8dc68f3c69cf054d2142663c This change silenced valgrind warnings but broke progs/tests/drawbuffers. The problem is we don't know the surface's state when we start caching it (it may or may not be initialized/cleared/etc). So "clearing" it here was presumptuous. Leaving the code in place (but disabled) for reference and when using valgrind. Fixes bug 24401 --- src/gallium/drivers/softpipe/sp_tile_cache.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 5f7864e671..b2195ec6b5 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -131,7 +131,12 @@ sp_create_tile_cache( struct pipe_screen *screen ) tc->entries[pos].y = -1; } -#if TILE_CLEAR_OPTIMIZATION + /* XXX this code prevents valgrind warnings about use of uninitialized + * memory in programs that don't clear the surface before rendering. + * However, it breaks clearing in other situations (such as in + * progs/tests/drawbuffers, see bug 24402). + */ +#if 0 && TILE_CLEAR_OPTIMIZATION /* set flags to indicate all the tiles are cleared */ memset(tc->clear_flags, 255, sizeof(tc->clear_flags)); #endif -- cgit v1.2.3 From 39daa763b59cc80d862709e99ee3619bd0f7a14d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Sat, 10 Oct 2009 09:12:00 -0600 Subject: softpipe: fix multi-drawbuffers regression This is part of the fix for bug 24401. --- src/gallium/drivers/softpipe/sp_quad_blend.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index e243c63fa2..0ad0b98654 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -946,15 +946,15 @@ choose_blend_quad(struct quad_stage *qs, qs->run = blend_noop; } else if (!softpipe->blend->logicop_enable && - softpipe->blend->colormask == 0xf) + softpipe->blend->colormask == 0xf && + softpipe->framebuffer.nr_cbufs == 1) { if (!blend->blend_enable) { qs->run = single_output_color; } else if (blend->rgb_src_factor == blend->alpha_src_factor && blend->rgb_dst_factor == blend->alpha_dst_factor && - blend->rgb_func == blend->alpha_func && - softpipe->framebuffer.nr_cbufs == 1) + blend->rgb_func == blend->alpha_func) { if (blend->alpha_func == PIPE_BLEND_ADD) { if (blend->rgb_src_factor == PIPE_BLENDFACTOR_ONE && -- cgit v1.2.3 From 3611d01a44d5d3cd2c132e685836b1ea9c8b9922 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 11 Oct 2009 19:12:24 +1000 Subject: r300g: fix blending default state + alpha separate. this makes the default state same as r300 --- src/gallium/drivers/r300/r300_state.c | 41 +++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 88cb9af6fb..3cef285dee 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -46,23 +46,46 @@ static void* r300_create_blend_state(struct pipe_context* pipe, { struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state); + { + unsigned eqRGB = state->rgb_func; + unsigned srcRGB = state->rgb_src_factor; + unsigned dstRGB = state->rgb_dst_factor; + + unsigned eqA = state->alpha_func; + unsigned srcA = state->alpha_src_factor; + unsigned dstA = state->alpha_dst_factor; + + if (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB) { + blend->alpha_blend_control = + r300_translate_blend_function(eqA) | + (r300_translate_blend_factor(srcA) << + R300_SRC_BLEND_SHIFT) | + (r300_translate_blend_factor(dstA) << + R300_DST_BLEND_SHIFT); + blend->blend_control |= R300_ALPHA_BLEND_ENABLE | + R300_SEPARATE_ALPHA_ENABLE; + } else { + blend->alpha_blend_control = R300_COMB_FCN_ADD_CLAMP | + (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT); + } + } if (state->blend_enable) { /* XXX for now, always do separate alpha... * is it faster to do it with one reg? */ - blend->blend_control = R300_ALPHA_BLEND_ENABLE | - R300_SEPARATE_ALPHA_ENABLE | - R300_READ_ENABLE | + blend->blend_control |= R300_READ_ENABLE | r300_translate_blend_function(state->rgb_func) | (r300_translate_blend_factor(state->rgb_src_factor) << R300_SRC_BLEND_SHIFT) | (r300_translate_blend_factor(state->rgb_dst_factor) << R300_DST_BLEND_SHIFT); - blend->alpha_blend_control = - r300_translate_blend_function(state->alpha_func) | - (r300_translate_blend_factor(state->alpha_src_factor) << - R300_SRC_BLEND_SHIFT) | - (r300_translate_blend_factor(state->alpha_dst_factor) << - R300_DST_BLEND_SHIFT); + } else { + blend->blend_control = + R300_COMB_FCN_ADD_CLAMP | + (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT); } /* PIPE_LOGICOP_* don't need to be translated, fortunately. */ -- cgit v1.2.3 From f096cc7dc1cdae1698eb7a340cd8c7f5ea0b1166 Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Sun, 11 Oct 2009 12:40:07 +0200 Subject: r300g: Fix fragment program constants upload on R300 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolai Hähnle --- src/gallium/drivers/r300/r300_emit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 77ce431cdc..570b4c5ef7 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -212,7 +212,7 @@ void r300_emit_fragment_program_code(struct r300_context* r300, } if (constants->Count) { - OUT_CS_ONE_REG(R300_PFS_PARAM_0_X, constants->Count * 4); + OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, constants->Count * 4); for(i = 0; i < constants->Count; ++i) { const float * data = get_shader_constant(r300, &constants->Constants[i], externals); OUT_CS(pack_float24(data[0])); -- cgit v1.2.3 From a5348d435da7d06478adc003a07e388915a8b346 Mon Sep 17 00:00:00 2001 From: Patrice Mandin Date: Mon, 12 Oct 2009 21:03:26 +0200 Subject: Add support for more 8 and 16 bits formats --- src/gallium/drivers/nv04/nv04_surface_2d.c | 11 ++++++++++- src/gallium/drivers/nv30/nv30_miptree.c | 5 +++++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c index b2ab50ee21..8c7eb367e2 100644 --- a/src/gallium/drivers/nv04/nv04_surface_2d.c +++ b/src/gallium/drivers/nv04/nv04_surface_2d.c @@ -13,10 +13,13 @@ nv04_surface_format(enum pipe_format format) { switch (format) { case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_I8_UNORM: return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8; case PIPE_FORMAT_R16_SNORM: case PIPE_FORMAT_R5G6B5_UNORM: case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_A8L8_UNORM: return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5; case PIPE_FORMAT_X8R8G8B8_UNORM: case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -36,6 +39,7 @@ nv04_rect_format(enum pipe_format format) case PIPE_FORMAT_A8_UNORM: return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_A8L8_UNORM: case PIPE_FORMAT_Z16_UNORM: return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5; case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -51,6 +55,10 @@ static INLINE int nv04_scaled_image_format(enum pipe_format format) { switch (format) { + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_I8_UNORM: + return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8; case PIPE_FORMAT_A1R5G5B5_UNORM: return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5; case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -59,6 +67,7 @@ nv04_scaled_image_format(enum pipe_format format) return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8; case PIPE_FORMAT_R5G6B5_UNORM: case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_A8L8_UNORM: return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5; default: return -1; @@ -131,7 +140,7 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, OUT_RING (chan, nv04_surface_format(dst->format) | log2i(dst->width) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT | log2i(dst->height) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT); - + BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1); OUT_RELOCo(chan, src_bo, NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index 7f8054de73..17acca61ab 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -96,6 +96,11 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_X8R8G8B8_UNORM: case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_A8L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_I8_UNORM: { if (debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE)) mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; -- cgit v1.2.3 From 05fc9cdfdfceaf7ca1db64bf1feccf649fe4c907 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 6 Oct 2009 15:30:39 -0700 Subject: r300g: Clean up texture formats. --- src/gallium/drivers/r300/r300_screen.c | 4 +--- src/gallium/drivers/r300/r300_state_inlines.h | 1 - src/gallium/drivers/r300/r300_texture.h | 13 ++++++++----- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 81d01b1320..e8d991586f 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -215,10 +215,8 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage, PIPE_TEXTURE_USAGE_PRIMARY | PIPE_TEXTURE_USAGE_SAMPLER); - /* Z buffer */ + /* Z buffer or texture */ case PIPE_FORMAT_Z16_UNORM: - return usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL; - /* Z buffer with stencil or texture */ case PIPE_FORMAT_Z24S8_UNORM: return usage & diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 88eb66b79e..d7b57e1b22 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -295,7 +295,6 @@ static INLINE uint32_t r300_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_X8R8G8B8_UNORM: case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8X8_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: return R300_COLOR_FORMAT_ARGB8888; /* XXX Not in pipe_format case PIPE_FORMAT_A32R32G32B32: diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index e5182d31b4..992dad77ab 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -43,6 +43,14 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) /* X8 */ case PIPE_FORMAT_I8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X8); + /* X16 */ + case PIPE_FORMAT_R16_UNORM: + return R300_EASY_TX_FORMAT(X, X, X, X, X16); + case PIPE_FORMAT_R16_SNORM: + return R300_EASY_TX_FORMAT(X, X, X, X, X16) | + R300_TX_FORMAT_SIGNED; + case PIPE_FORMAT_Z16_UNORM: + return R300_EASY_TX_FORMAT(X, X, X, X, X16); /* W8Z8Y8X8 */ case PIPE_FORMAT_A8R8G8B8_UNORM: return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); @@ -76,11 +84,6 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) /* W24_FP */ case PIPE_FORMAT_Z24S8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, W24_FP); - /* Z5_Y6_X5 */ - case PIPE_FORMAT_R16_SNORM: - return R300_EASY_TX_FORMAT(X, X, X, X, Z5Y6X5); - case PIPE_FORMAT_Z16_UNORM: - return R300_EASY_TX_FORMAT(X, X, X, X, X16); default: debug_printf("r300: Implementation error: " "Got unsupported texture format %s in %s\n", -- cgit v1.2.3 From 36ccdf09b8483305c7fa1366de9df2dea2fd6985 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 6 Oct 2009 16:00:27 -0700 Subject: r300g: Prevent multiple-use textures from getting incorrectly approved. --- src/gallium/drivers/r300/r300_screen.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index e8d991586f..7d154576e0 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -182,16 +182,19 @@ static float r300_get_paramf(struct pipe_screen* pscreen, int param) static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage, boolean is_r500) { + uint32_t retval = 0; + switch (format) { /* Supported formats. */ /* Colorbuffer */ case PIPE_FORMAT_A4R4G4B4_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: case PIPE_FORMAT_A1R5G5B5_UNORM: - return usage & + retval = usage & (PIPE_TEXTURE_USAGE_RENDER_TARGET | PIPE_TEXTURE_USAGE_DISPLAY_TARGET | PIPE_TEXTURE_USAGE_PRIMARY); + break; /* Texture */ case PIPE_FORMAT_A8R8G8B8_SRGB: @@ -201,7 +204,8 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage, case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: case PIPE_FORMAT_YCBCR: - return usage & PIPE_TEXTURE_USAGE_SAMPLER; + retval = usage & PIPE_TEXTURE_USAGE_SAMPLER; + break; /* Colorbuffer or texture */ case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -209,19 +213,21 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage, case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8X8_UNORM: case PIPE_FORMAT_I8_UNORM: - return usage & + retval = usage & (PIPE_TEXTURE_USAGE_RENDER_TARGET | PIPE_TEXTURE_USAGE_DISPLAY_TARGET | PIPE_TEXTURE_USAGE_PRIMARY | PIPE_TEXTURE_USAGE_SAMPLER); + break; /* Z buffer or texture */ case PIPE_FORMAT_Z16_UNORM: /* Z buffer with stencil or texture */ case PIPE_FORMAT_Z24S8_UNORM: - return usage & + retval = usage & (PIPE_TEXTURE_USAGE_DEPTH_STENCIL | PIPE_TEXTURE_USAGE_SAMPLER); + break; /* Definitely unsupported formats. */ /* Non-usable Z buffer/stencil formats. */ @@ -259,7 +265,13 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage, break; } - return FALSE; + /* If usage was a mask that contained multiple bits, and not all of them + * are supported, this will catch that and return FALSE. + * e.g. usage = 2 | 4; retval = 4; (retval >= usage) == FALSE + * + * This also returns FALSE for any unknown formats. + */ + return (retval >= usage); } /* XXX moar targets */ -- cgit v1.2.3 From 95a05621eb750c07e5c7a5eb64b8458d202192b3 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Mon, 12 Oct 2009 20:47:00 -0700 Subject: r300g: Fallback on surfaces we can't render to or from. Still not sure why st keeps handing down things we can't render to. --- src/gallium/drivers/r300/r300_surface.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c index cc6288cb51..4d0ccd6b0f 100644 --- a/src/gallium/drivers/r300/r300_surface.c +++ b/src/gallium/drivers/r300/r300_surface.c @@ -113,9 +113,10 @@ static void r300_surface_fill(struct pipe_context* pipe, dest, x, y, w, h, pixpitch, color); /* Fallback? */ - if (FALSE) { + if (!pipe->screen->is_format_supported(pipe->screen, dest->format, + PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) { fallback: - debug_printf("r300: Falling back on surface clear..."); + debug_printf("r300: Falling back on surface clear...\n"); util_surface_fill(pipe, dest, x, y, w, h, color); return; } @@ -245,10 +246,18 @@ static void r300_surface_copy(struct pipe_context* pipe, if ((srctex->buffer == desttex->buffer) && ((destx < srcx + w) || (srcx < destx + w)) && ((desty < srcy + h) || (srcy < desty + h))) { + goto fallback; + } + + if (!pipe->screen->is_format_supported(pipe->screen, src->format, + PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_SAMPLER, 0) || + !pipe->screen->is_format_supported(pipe->screen, dest->format, + PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) { fallback: debug_printf("r300: Falling back on surface_copy\n"); util_surface_copy(pipe, FALSE, dest, destx, desty, src, srcx, srcy, w, h); + return; } /* Add our target BOs to the list. */ -- cgit v1.2.3 From a4a4f7abc2137754646a811007696321c7714f1b Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Mon, 12 Oct 2009 20:55:57 -0700 Subject: r300g: Surface debug. It gets really annoying watching r300g tell me how it's filling surfaces. Or falling back during filling surfaces. --- src/gallium/drivers/r300/r300_context.h | 1 + src/gallium/drivers/r300/r300_debug.c | 1 + src/gallium/drivers/r300/r300_surface.c | 12 ++++++------ 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 52b1c9a6b2..a817459ee3 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -312,6 +312,7 @@ void r300_init_surface_functions(struct r300_context* r300); #define DBG_VP 0x0000004 #define DBG_CS 0x0000008 #define DBG_DRAW 0x0000010 +#define DBG_SURF 0x0000020 /*@}*/ static INLINE boolean DBG_ON(struct r300_context * ctx, unsigned flags) diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 85d69c0747..4a55a0c5b1 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -37,6 +37,7 @@ static struct debug_option debug_options[] = { { "vp", DBG_VP, "Vertex program handling" }, { "cs", DBG_CS, "Command submissions" }, { "draw", DBG_DRAW, "Draw and emit" }, + { "surf", DBG_SURF, "Surface drawing" }, { "all", ~0, "Convenience option that enables all debug flags" }, diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c index 4d0ccd6b0f..a263b26512 100644 --- a/src/gallium/drivers/r300/r300_surface.c +++ b/src/gallium/drivers/r300/r300_surface.c @@ -108,7 +108,7 @@ static void r300_surface_fill(struct pipe_context* pipe, r = (float)((color >> 16) & 0xff) / 255.0f; g = (float)((color >> 8) & 0xff) / 255.0f; b = (float)((color >> 0) & 0xff) / 255.0f; - debug_printf("r300: Filling surface %p at (%d,%d)," + DBG(r300, DBG_SURF, "r300: Filling surface %p at (%d,%d)," " dimensions %dx%d (pixel pitch %d), color 0x%x\n", dest, x, y, w, h, pixpitch, color); @@ -116,7 +116,7 @@ static void r300_surface_fill(struct pipe_context* pipe, if (!pipe->screen->is_format_supported(pipe->screen, dest->format, PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) { fallback: - debug_printf("r300: Falling back on surface clear...\n"); + DBG(r300, DBG_SURF, "r300: Falling back on surface clear...\n"); util_surface_fill(pipe, dest, x, y, w, h, color); return; } @@ -131,7 +131,7 @@ validate: if (!r300->winsys->validate(r300->winsys)) { r300->context.flush(&r300->context, 0, NULL); if (invalid) { - debug_printf("r300: Stuck in validation loop, gonna fallback."); + DBG(r300, DBG_SURF, "r300: Stuck in validation loop, gonna fallback."); goto fallback; } invalid = TRUE; @@ -239,7 +239,7 @@ static void r300_surface_copy(struct pipe_context* pipe, float fsrcx = srcx, fsrcy = srcy, fdestx = destx, fdesty = desty; CS_LOCALS(r300); - debug_printf("r300: Copying surface %p at (%d,%d) to %p at (%d, %d)," + DBG(r300, DBG_SURF, "r300: Copying surface %p at (%d,%d) to %p at (%d, %d)," " dimensions %dx%d (pixel pitch %d)\n", src, srcx, srcy, dest, destx, desty, w, h, pixpitch); @@ -254,7 +254,7 @@ static void r300_surface_copy(struct pipe_context* pipe, !pipe->screen->is_format_supported(pipe->screen, dest->format, PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) { fallback: - debug_printf("r300: Falling back on surface_copy\n"); + DBG(r300, DBG_SURF, "r300: Falling back on surface_copy\n"); util_surface_copy(pipe, FALSE, dest, destx, desty, src, srcx, srcy, w, h); return; @@ -275,7 +275,7 @@ validate: if (!r300->winsys->validate(r300->winsys)) { r300->context.flush(&r300->context, 0, NULL); if (invalid) { - debug_printf("r300: Stuck in validation loop, gonna fallback."); + DBG(r300, DBG_SURF, "r300: Stuck in validation loop, gonna fallback."); goto fallback; } invalid = TRUE; -- cgit v1.2.3 From ca8cafda0b996167647d724ea3da3ec568a9e42f Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Mon, 12 Oct 2009 21:26:46 -0700 Subject: r300g: More debug flags. --- src/gallium/drivers/r300/r300_context.h | 2 ++ src/gallium/drivers/r300/r300_debug.c | 2 ++ src/gallium/drivers/r300/r300_surface.c | 9 +++++---- 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index a817459ee3..086633f732 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -313,6 +313,8 @@ void r300_init_surface_functions(struct r300_context* r300); #define DBG_CS 0x0000008 #define DBG_DRAW 0x0000010 #define DBG_SURF 0x0000020 +#define DBG_TEX 0x0000040 +#define DBG_FALL 0x0000080 /*@}*/ static INLINE boolean DBG_ON(struct r300_context * ctx, unsigned flags) diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 4a55a0c5b1..bfd4ab018a 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -38,6 +38,8 @@ static struct debug_option debug_options[] = { { "cs", DBG_CS, "Command submissions" }, { "draw", DBG_DRAW, "Draw and emit" }, { "surf", DBG_SURF, "Surface drawing" }, + { "tex", DBG_TEX, "Textures" }, + { "fall", DBG_FALL, "Fallbacks" }, { "all", ~0, "Convenience option that enables all debug flags" }, diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c index a263b26512..d72e734ff0 100644 --- a/src/gallium/drivers/r300/r300_surface.c +++ b/src/gallium/drivers/r300/r300_surface.c @@ -116,7 +116,8 @@ static void r300_surface_fill(struct pipe_context* pipe, if (!pipe->screen->is_format_supported(pipe->screen, dest->format, PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) { fallback: - DBG(r300, DBG_SURF, "r300: Falling back on surface clear...\n"); + DBG(r300, DBG_SURF | DBG_FALL, + "r300: Falling back on surface clear...\n"); util_surface_fill(pipe, dest, x, y, w, h, color); return; } @@ -131,7 +132,7 @@ validate: if (!r300->winsys->validate(r300->winsys)) { r300->context.flush(&r300->context, 0, NULL); if (invalid) { - DBG(r300, DBG_SURF, "r300: Stuck in validation loop, gonna fallback."); + DBG(r300, DBG_SURF | DBG_FALL, "r300: Stuck in validation loop."); goto fallback; } invalid = TRUE; @@ -254,7 +255,7 @@ static void r300_surface_copy(struct pipe_context* pipe, !pipe->screen->is_format_supported(pipe->screen, dest->format, PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) { fallback: - DBG(r300, DBG_SURF, "r300: Falling back on surface_copy\n"); + DBG(r300, DBG_SURF | DBG_FALL, "r300: Falling back on surface_copy\n"); util_surface_copy(pipe, FALSE, dest, destx, desty, src, srcx, srcy, w, h); return; @@ -275,7 +276,7 @@ validate: if (!r300->winsys->validate(r300->winsys)) { r300->context.flush(&r300->context, 0, NULL); if (invalid) { - DBG(r300, DBG_SURF, "r300: Stuck in validation loop, gonna fallback."); + DBG(r300, DBG_SURF | DBG_FALL, "r300: Stuck in validation loop."); goto fallback; } invalid = TRUE; -- cgit v1.2.3 From cf33aaf8fe2b1d22e394f431735b76f3ab04b854 Mon Sep 17 00:00:00 2001 From: Patrice Mandin Date: Tue, 13 Oct 2009 22:53:32 +0200 Subject: nouveau: nv30: use texture width,height for render target dimensions --- src/gallium/drivers/nv30/nv30_state_fb.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c index 44b6a74715..2729dcec7c 100644 --- a/src/gallium/drivers/nv30/nv30_state_fb.c +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -40,10 +40,9 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) for (i = 1; i < fb->nr_cbufs; i++) assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); - /* FIXME: NV34TCL_RT_FORMAT_LOG2_[WIDTH/HEIGHT] */ rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | - log2i(fb->width) << 16 /*NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT*/ | - log2i(fb->height) << 24 /*NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT*/; + (log2i(rt[0]->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | + (log2i(rt[0]->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); } else rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; -- cgit v1.2.3 From 23c0c820e2767324546d450d2a7aa7bf1f70c36f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 14 Oct 2009 11:42:05 +1000 Subject: r300g: fix case where texture unit 0 is disabled but unit 1 is enabled. to reproduce, start texrect, disable 0 texture in menu. Signed-off-by: Dave Airlie --- src/gallium/drivers/r300/r300_emit.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 570b4c5ef7..99deb50400 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -683,7 +683,8 @@ validate: /* ...textures... */ for (i = 0; i < r300->texture_count; i++) { tex = r300->textures[i]; - assert(tex && tex->buffer && "texture is marked, but NULL!"); + if (!tex) + continue; if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) { r300->context.flush(&r300->context, 0, NULL); @@ -770,12 +771,13 @@ validate: if (r300->dirty_state & (R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) { for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) { - if (r300->dirty_state & - ((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i))) { - r300_emit_texture(r300, - r300->sampler_states[i], - r300->textures[i], - i); + if (r300->dirty_state & + ((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i))) { + if (r300->textures[i]) + r300_emit_texture(r300, + r300->sampler_states[i], + r300->textures[i], + i); r300->dirty_state &= ~((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i)); dirty_tex++; -- cgit v1.2.3 From 210481ae16e966865dcf9f1fd5f5dfabf4dc28bc Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 14 Oct 2009 15:13:25 +1000 Subject: r300g: attempt to make bo space check sane. This attempts to make r300g do proper bo space checking as opposed to whatever it was doing now. Signed-off-by: Dave Airlie --- src/gallium/drivers/r300/r300_context.c | 9 +++++++++ src/gallium/drivers/r300/r300_emit.c | 3 +++ src/gallium/drivers/r300/r300_winsys.h | 6 ++++++ src/gallium/winsys/drm/radeon/core/radeon_r300.c | 23 ++++++++++++++++++++--- 4 files changed, 38 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 9cc455135d..e6bc80e48f 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -136,6 +136,13 @@ r300_is_buffer_referenced( struct pipe_context *pipe, return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; } +static void r300_flush_cb(void *data) +{ + struct r300_context* const cs_context_copy = data; + + cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL); +} + struct pipe_context* r300_create_context(struct pipe_screen* screen, struct r300_winsys* r300_winsys) { @@ -190,6 +197,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_state_functions(r300); r300_emit_invariant_state(r300); + + r300->winsys->set_flush_cb(r300->winsys, r300_flush_cb, r300); r300->dirty_state = R300_NEW_KITCHEN_SINK; r300->dirty_hw++; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 99deb50400..64748ad8f8 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -658,6 +658,9 @@ void r300_emit_dirty_state(struct r300_context* r300) r300_update_derived_state(r300); + /* Clean out BOs. */ + r300->winsys->reset_bos(r300->winsys); + /* XXX check size */ validate: /* Color buffers... */ diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index f18ad75a47..540f8eca92 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -92,6 +92,12 @@ struct r300_winsys { /* Flush the CS. */ void (*flush_cs)(struct r300_winsys* winsys); + + /* winsys flush - callback from winsys when flush required */ + void (*set_flush_cb)(struct r300_winsys *winsys, + void (*flush_cb)(void *), void *data); + + void (*reset_bos)(struct r300_winsys *winsys); }; struct pipe_context* r300_create_context(struct pipe_screen* screen, diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c index d2d84f1a8f..3587892e00 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c @@ -22,6 +22,17 @@ #include "radeon_r300.h" +static void radeon_r300_set_flush_cb(struct r300_winsys *winsys, + void (*flush_cb)(void *), + void *data) +{ + struct radeon_winsys_priv* priv = + (struct radeon_winsys_priv*)winsys->radeon_winsys; + + radeon_cs_space_set_flush(priv->cs, flush_cb, + data); +} + static boolean radeon_r300_add_buffer(struct r300_winsys* winsys, struct pipe_buffer* pbuffer, uint32_t rd, @@ -95,6 +106,13 @@ static void radeon_r300_write_cs_reloc(struct r300_winsys* winsys, } } +static void radeon_r300_reset_bos(struct r300_winsys *winsys) +{ + struct radeon_winsys_priv* priv = + (struct radeon_winsys_priv*)winsys->radeon_winsys; + radeon_cs_space_reset_bos(priv->cs); +} + static void radeon_r300_end_cs(struct r300_winsys* winsys, const char* file, const char* function, @@ -119,9 +137,6 @@ static void radeon_r300_flush_cs(struct r300_winsys* winsys) radeon_cs_print(priv->cs, stderr); } - /* Clean out BOs. */ - radeon_cs_space_reset_bos(priv->cs); - /* Reset CS. * Someday, when we care about performance, we should really find a way * to rotate between two or three CS objects so that the GPU can be @@ -203,6 +218,8 @@ radeon_create_r300_winsys(int fd, struct radeon_winsys* old_winsys) winsys->write_cs_reloc = radeon_r300_write_cs_reloc; winsys->end_cs = radeon_r300_end_cs; winsys->flush_cs = radeon_r300_flush_cs; + winsys->reset_bos = radeon_r300_reset_bos; + winsys->set_flush_cb = radeon_r300_set_flush_cb; memcpy(winsys, old_winsys, sizeof(struct radeon_winsys)); -- cgit v1.2.3 From c1bee7bdea470b6b5dcebef9aacc8fe4feca687c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 14 Oct 2009 16:53:12 +1000 Subject: r300g: fixup arb occulsion query support. 1: add rv530 support - num z pipes cap - add proper start/finish query options for rv530 2: convert to use linked list properly. 3: add flushing required check. 4: initial Z top disabling support. TODO: make it actually work on my rv530. --- src/gallium/drivers/r300/r300_chipset.h | 2 + src/gallium/drivers/r300/r300_context.c | 10 ++-- src/gallium/drivers/r300/r300_context.h | 4 +- src/gallium/drivers/r300/r300_emit.c | 69 ++++++++++++++++++++---- src/gallium/drivers/r300/r300_flush.c | 10 +++- src/gallium/drivers/r300/r300_query.c | 42 ++++++++------- src/gallium/drivers/r300/r300_reg.h | 15 ++++-- src/gallium/drivers/r300/r300_screen.c | 1 + src/gallium/drivers/r300/r300_state.c | 11 ++-- src/gallium/drivers/r300/r300_winsys.h | 3 ++ src/gallium/winsys/drm/radeon/core/radeon_r300.c | 10 ++++ 11 files changed, 134 insertions(+), 43 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 322d4a57e4..f015a4243d 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -36,6 +36,8 @@ struct r300_capabilities { int num_vert_fpus; /* The number of fragment pipes */ int num_frag_pipes; + /* The number of z pipes */ + int num_z_pipes; /* Whether or not TCL is physically present */ boolean has_tcl; /* Whether or not this is an RV515 or newer; R500s have many differences diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index e6bc80e48f..7a9c098e30 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -99,11 +99,9 @@ static void r300_destroy_context(struct pipe_context* context) { context->screen->buffer_destroy(r300->oqbo); /* If there are any queries pending or not destroyed, remove them now. */ - if (r300->query_list) { - foreach_s(query, temp, r300->query_list) { - remove_from_list(query); - FREE(query); - } + foreach_s(query, temp, &r300->query_list) { + remove_from_list(query); + FREE(query); } FREE(r300->blend_color_state); @@ -201,6 +199,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->winsys->set_flush_cb(r300->winsys, r300_flush_cb, r300); r300->dirty_state = R300_NEW_KITCHEN_SINK; r300->dirty_hw++; - + make_empty_list(&r300->query_list); return &r300->context; } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 086633f732..9b0094b63c 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -172,6 +172,8 @@ struct r300_query { unsigned int count; /* The offset of this query into the query buffer, in bytes. */ unsigned offset; + /* if we've flushed the query */ + boolean flushed; /* Linked list members. */ struct r300_query* prev; struct r300_query* next; @@ -237,7 +239,7 @@ struct r300_context { /* Occlusion query buffer. */ struct pipe_buffer* oqbo; /* Query list. */ - struct r300_query* query_list; + struct r300_query query_list; /* Various CSO state objects. */ /* Blend state. */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 64748ad8f8..3d28249c16 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -323,28 +323,30 @@ void r300_emit_fb_state(struct r300_context* r300, void r300_emit_query_begin(struct r300_context* r300, struct r300_query* query) { + struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); /* XXX This will almost certainly not return good results * for overlapping queries. */ - BEGIN_CS(2); + BEGIN_CS(4); + if (caps->family == CHIP_FAMILY_RV530) { + OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + } else { + OUT_CS_REG(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL); + } OUT_CS_REG(R300_ZB_ZPASS_DATA, 0); END_CS; } -void r300_emit_query_end(struct r300_context* r300, - struct r300_query* query) + +static void r300_emit_query_finish(struct r300_context *r300, + struct r300_query *query) { struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); - if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, - 0, RADEON_GEM_DOMAIN_GTT)) { - debug_printf("r300: There wasn't room for the OQ buffer!?" - " Oh noes!\n"); - } - assert(caps->num_frag_pipes); + BEGIN_CS(6 * caps->num_frag_pipes + 2); /* I'm not so sure I like this switch, but it's hard to be elegant * when there's so many special cases... @@ -394,6 +396,55 @@ void r300_emit_query_end(struct r300_context* r300, } +static void rv530_emit_query_single(struct r300_context *r300, + struct r300_query *query) +{ + CS_LOCALS(r300); + + BEGIN_CS(8); + OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); + OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_CS_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + END_CS; +} + +static void rv530_emit_query_double(struct r300_context *r300, + struct r300_query *query) +{ + CS_LOCALS(r300); + + BEGIN_CS(14); + OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); + OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_CS_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); + OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_CS_RELOC(r300->oqbo, query->offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + END_CS; +} + +void r300_emit_query_end(struct r300_context* r300, + struct r300_query* query) +{ + struct r300_capabilities *caps = r300_screen(r300->context.screen)->caps; + + if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, + 0, RADEON_GEM_DOMAIN_GTT)) { + debug_printf("r300: There wasn't room for the OQ buffer!?" + " Oh noes!\n"); + } + + if (caps->family == CHIP_FAMILY_RV530) { + if (caps->num_z_pipes == 2) + rv530_emit_query_double(r300, query); + else + rv530_emit_query_single(r300, query); + } else + r300_emit_query_finish(r300, query); +} + void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs) { CS_LOCALS(r300); diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 0dff1c6f4f..a8ab0d7212 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -26,9 +26,10 @@ static void r300_flush(struct pipe_context* pipe, unsigned flags, struct pipe_fence_handle** fence) { - struct r300_context* r300 = r300_context(pipe); - CS_LOCALS(r300); + struct r300_context *r300 = r300_context(pipe); + struct r300_query *query; + CS_LOCALS(r300); /* We probably need to flush Draw, but we may have been called from * within Draw. This feels kludgy, but it might be the best thing. */ if (!r300->draw->flushing) { @@ -41,8 +42,13 @@ static void r300_flush(struct pipe_context* pipe, r300->dirty_state = R300_NEW_KITCHEN_SINK; r300->dirty_hw = 0; } + /* reset flushed query */ + foreach(query, &r300->query_list) { + query->flushed = TRUE; + } } + void r300_init_flush_functions(struct r300_context* r300) { r300->context.flush = r300_flush; diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 2880d34877..b01313648b 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -24,13 +24,13 @@ #include "r300_emit.h" -static struct pipe_query* r300_create_query(struct pipe_context* pipe, +static struct pipe_query *r300_create_query(struct pipe_context *pipe, unsigned query_type) { - struct r300_context* r300 = r300_context(pipe); - struct r300_screen* r300screen = r300_screen(r300->context.screen); - unsigned query_size = r300screen->caps->num_frag_pipes * 4; - struct r300_query* q, * qptr; + struct r300_context *r300 = r300_context(pipe); + struct r300_screen *r300screen = r300_screen(r300->context.screen); + unsigned query_size; + struct r300_query *q, *qptr; q = CALLOC_STRUCT(r300_query); @@ -39,13 +39,16 @@ static struct pipe_query* r300_create_query(struct pipe_context* pipe, q->active = FALSE; - if (!r300->query_list) { - r300->query_list = q; - } else if (!is_empty_list(r300->query_list)) { - qptr = last_elem(r300->query_list); + if (r300screen->caps->family == CHIP_FAMILY_RV530) + query_size = r300screen->caps->num_z_pipes * sizeof(uint32_t); + else + query_size = r300screen->caps->num_frag_pipes * sizeof(uint32_t); + + if (!is_empty_list(&r300->query_list)) { + qptr = last_elem(&r300->query_list); q->offset = qptr->offset + query_size; - insert_at_tail(r300->query_list, q); } + insert_at_tail(&r300->query_list, q); /* XXX */ if (q->offset >= 4096) { @@ -74,9 +77,10 @@ static void r300_begin_query(struct pipe_context* pipe, map = pipe->screen->buffer_map(pipe->screen, r300->oqbo, PIPE_BUFFER_USAGE_CPU_WRITE); map += q->offset / 4; - *map = ~0; + *map = ~0U; pipe->screen->buffer_unmap(pipe->screen, r300->oqbo); + q->flushed = FALSE; r300_emit_dirty_state(r300); r300_emit_query_begin(r300, q); } @@ -98,28 +102,30 @@ static boolean r300_get_query_result(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); struct r300_screen* r300screen = r300_screen(r300->context.screen); - struct r300_query* q = (struct r300_query*)query; + struct r300_query *q = (struct r300_query*)query; unsigned flags = PIPE_BUFFER_USAGE_CPU_READ; uint32_t* map; - uint32_t temp; + uint32_t temp = 0; unsigned i; - if (wait) { + if (q->flushed == FALSE) pipe->flush(pipe, 0, NULL); - } else { + if (!wait) { flags |= PIPE_BUFFER_USAGE_DONTBLOCK; } map = pipe->screen->buffer_map(pipe->screen, r300->oqbo, flags); + if (!map) + return FALSE; map += q->offset / 4; for (i = 0; i < r300screen->caps->num_frag_pipes; i++) { - if (*map == ~0) { + if (*map == ~0U) { /* Looks like our results aren't ready yet. */ if (wait) { debug_printf("r300: Despite waiting, OQ results haven't" " come in yet.\n"); } - temp = ~0; + temp = ~0U; break; } temp += *map; @@ -127,7 +133,7 @@ static boolean r300_get_query_result(struct pipe_context* pipe, } pipe->screen->buffer_unmap(pipe->screen, r300->oqbo); - if (temp == ~0) { + if (temp == ~0U) { /* Our results haven't been written yet... */ return FALSE; } diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 3abff5db62..ae94bb9b9f 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -1172,6 +1172,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* SU Depth Offset value */ #define R300_SU_DEPTH_OFFSET 0x42c4 +#define R300_SU_REG_DEST 0x42c8 +# define R300_RASTER_PIPE_SELECT_0 (1 << 0) +# define R300_RASTER_PIPE_SELECT_1 (1 << 1) +# define R300_RASTER_PIPE_SELECT_2 (1 << 2) +# define R300_RASTER_PIPE_SELECT_3 (1 << 3) +# define R300_RASTER_PIPE_SELECT_ALL 0xf + /* BEGIN: Rasterization / Interpolators - many guesses */ @@ -2095,6 +2102,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_FG_ALPHA_VALUE 0x4be0 # define R500_FG_ALPHA_VALUE_MASK 0x0000ffff +#define RV530_FG_ZBREG_DEST 0x4be8 +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_0 (1 << 0) +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_1 (1 << 1) +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL (3 << 0) /* gap */ /* Fragment program parameters in 7.16 floating point */ @@ -3313,10 +3324,6 @@ enum { #define R200_3D_DRAW_IMMD_2 0xC0003500 -/* XXX Oh look, stuff not brought over from docs yet */ - -#define R300_SU_REG_DEST 0x42C8 - #endif /* _R300_REG_H */ /* *INDENT-ON* */ diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 7d154576e0..5381651c77 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -395,6 +395,7 @@ struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys) caps->pci_id = r300_winsys->pci_id; caps->num_frag_pipes = r300_winsys->gb_pipes; + caps->num_z_pipes = r300_winsys->z_pipes; r300_parse_chipset(caps); diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 3cef285dee..d8533ac168 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -190,6 +190,7 @@ static void* r300_create_dsa_state(struct pipe_context* pipe, const struct pipe_depth_stencil_alpha_state* state) { + struct r300_context* r300 = r300_context(pipe); struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state); /* Depth test setup. */ @@ -247,11 +248,15 @@ static void* R300_FG_ALPHA_FUNC_ENABLE; dsa->alpha_reference = CLAMP(state->alpha.ref_value * 1023.0f, 0, 1023); - } else { - /* XXX need to fix this to be dynamically set - dsa->z_buffer_top = R300_ZTOP_ENABLE; */ } + dsa->z_buffer_top = R300_ZTOP_ENABLE; + /* XXX TODO: add frag prog rules for ztop disable */ + if (state->alpha.enabled && state->alpha.func != PIPE_FUNC_ALWAYS) + dsa->z_buffer_top = R300_ZTOP_DISABLE; + if (!is_empty_list(&r300->query_list)) + dsa->z_buffer_top = R300_ZTOP_DISABLE; + return (void*)dsa; } diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 540f8eca92..864a6146b2 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -48,6 +48,9 @@ struct r300_winsys { /* GB pipe count */ uint32_t gb_pipes; + /* Z pipe count (rv530 only) */ + uint32_t z_pipes; + /* GART size. */ uint32_t gart_size; diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c index 3587892e00..7ea5d1fb4e 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c @@ -164,6 +164,16 @@ static void do_ioctls(struct r300_winsys* winsys, int fd) } winsys->gb_pipes = target; + /* get Z pipes */ + info.request = RADEON_INFO_NUM_Z_PIPES; + retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); + if (retval) { + fprintf(stderr, "%s: Failed to get GB pipe count, " + "error number %d\n", __FUNCTION__, retval); + exit(1); + } + winsys->z_pipes = target; + /* Then, get PCI ID */ info.request = RADEON_INFO_DEVICE_ID; retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); -- cgit v1.2.3 From 47791697ab6eb6965f0ba8ba3f20373b3753ca2a Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 14 Oct 2009 17:14:43 +1000 Subject: r300g: convert query to a state for emitting. This means we don't emit in the begin query but when we have to flush. Similiar to classic. TODO: make query object actually work. --- src/gallium/drivers/r300/r300_context.h | 2 ++ src/gallium/drivers/r300/r300_emit.c | 15 ++++++++++++--- src/gallium/drivers/r300/r300_query.c | 8 ++++++-- 3 files changed, 20 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 9b0094b63c..3a01869ba1 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -141,6 +141,7 @@ struct r300_viewport_state { #define R300_NEW_VERTEX_FORMAT 0x04000000 #define R300_NEW_VERTEX_SHADER 0x08000000 #define R300_NEW_VIEWPORT 0x10000000 +#define R300_NEW_QUERY 0x20000000 #define R300_NEW_KITCHEN_SINK 0x1fffffff /* The next several objects are not pure Radeon state; they inherit from @@ -239,6 +240,7 @@ struct r300_context { /* Occlusion query buffer. */ struct pipe_buffer* oqbo; /* Query list. */ + struct r300_query *query_current; struct r300_query query_list; /* Various CSO state objects. */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 3d28249c16..babbe0dd74 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -320,12 +320,16 @@ void r300_emit_fb_state(struct r300_context* r300, END_CS; } -void r300_emit_query_begin(struct r300_context* r300, - struct r300_query* query) +void r300_emit_query_start(struct r300_context *r300) + { - struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; + struct r300_capabilities *caps = r300_screen(r300->context.screen)->caps; + struct r300_query *query = r300->query_current; CS_LOCALS(r300); + if (!query) + return; + /* XXX This will almost certainly not return good results * for overlapping queries. */ BEGIN_CS(4); @@ -772,6 +776,11 @@ validate: goto validate; } + if (r300->dirty_state & R300_NEW_QUERY) { + r300_emit_query_start(r300); + r300->dirty_state &= ~R300_NEW_QUERY; + } + if (r300->dirty_state & R300_NEW_BLEND) { r300_emit_blend_state(r300, r300->blend_state); r300->dirty_state &= ~R300_NEW_BLEND; diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index b01313648b..fb4340ff3d 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -74,6 +74,8 @@ static void r300_begin_query(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); struct r300_query* q = (struct r300_query*)query; + assert(r300->query_current == NULL); + map = pipe->screen->buffer_map(pipe->screen, r300->oqbo, PIPE_BUFFER_USAGE_CPU_WRITE); map += q->offset / 4; @@ -81,8 +83,8 @@ static void r300_begin_query(struct pipe_context* pipe, pipe->screen->buffer_unmap(pipe->screen, r300->oqbo); q->flushed = FALSE; - r300_emit_dirty_state(r300); - r300_emit_query_begin(r300, q); + r300->query_current = q; + r300->dirty_state |= R300_NEW_QUERY; } static void r300_end_query(struct pipe_context* pipe, @@ -93,6 +95,8 @@ static void r300_end_query(struct pipe_context* pipe, r300_emit_dirty_state(r300); r300_emit_query_end(r300, q); + + r300->query_current = NULL; } static boolean r300_get_query_result(struct pipe_context* pipe, -- cgit v1.2.3 From 51d1cf55da6f8b8a215814589a189b6e5e537fe5 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 14 Oct 2009 17:44:19 +1000 Subject: r300g: port over last parts of oq support. Add support for begin/end in each CS so we don't get any other processes rendering in between. TODO: blame other parts of driver for this not working like Z. --- src/gallium/drivers/r300/r300_context.h | 2 ++ src/gallium/drivers/r300/r300_emit.c | 11 +++++++++-- src/gallium/drivers/r300/r300_emit.h | 3 +-- src/gallium/drivers/r300/r300_flush.c | 2 ++ src/gallium/drivers/r300/r300_query.c | 7 ++----- 5 files changed, 16 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 3a01869ba1..7826ed1452 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -175,6 +175,8 @@ struct r300_query { unsigned offset; /* if we've flushed the query */ boolean flushed; + /* if begin has been emitted */ + boolean begin_emitted; /* Linked list members. */ struct r300_query* prev; struct r300_query* next; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index babbe0dd74..6e616cd5b2 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -340,6 +340,7 @@ void r300_emit_query_start(struct r300_context *r300) } OUT_CS_REG(R300_ZB_ZPASS_DATA, 0); END_CS; + query->begin_emitted = TRUE; } @@ -429,10 +430,16 @@ static void rv530_emit_query_double(struct r300_context *r300, END_CS; } -void r300_emit_query_end(struct r300_context* r300, - struct r300_query* query) +void r300_emit_query_end(struct r300_context* r300) { struct r300_capabilities *caps = r300_screen(r300->context.screen)->caps; + struct r300_query *query = r300->query_current; + + if (!query) + return; + + if (query->begin_emitted == FALSE) + return; if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, 0, RADEON_GEM_DOMAIN_GTT)) { diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index c4002b8e5d..b62aa9fec5 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -58,8 +58,7 @@ void r300_emit_fb_state(struct r300_context* r300, void r300_emit_query_begin(struct r300_context* r300, struct r300_query* query); -void r300_emit_query_end(struct r300_context* r300, - struct r300_query* query); +void r300_emit_query_end(struct r300_context* r300); void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs); diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index a8ab0d7212..241ea71d6b 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -36,6 +36,8 @@ static void r300_flush(struct pipe_context* pipe, draw_flush(r300->draw); } + r300_emit_query_end(r300); + if (r300->dirty_hw) { FLUSH_CS; r300_emit_invariant_state(r300); diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index fb4340ff3d..2b0fbfb7d2 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -88,14 +88,11 @@ static void r300_begin_query(struct pipe_context* pipe, } static void r300_end_query(struct pipe_context* pipe, - struct pipe_query* query) + struct pipe_query* query) { struct r300_context* r300 = r300_context(pipe); - struct r300_query* q = (struct r300_query*)query; - - r300_emit_dirty_state(r300); - r300_emit_query_end(r300, q); + r300_emit_query_end(r300); r300->query_current = NULL; } -- cgit v1.2.3 From ce5cba040c34a1a70186c29a5055e9be3c85a54a Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 14 Oct 2009 18:05:14 +1000 Subject: r300g: add one more ZTOP disable bit. Still missing the frag uses kill support, hopefully nha can point that out. --- src/gallium/drivers/r300/r300_fs.h | 6 ++++++ src/gallium/drivers/r300/r300_state.c | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h index 967e9f697e..04453274aa 100644 --- a/src/gallium/drivers/r300/r300_fs.h +++ b/src/gallium/drivers/r300/r300_fs.h @@ -48,4 +48,10 @@ struct r300_fragment_shader { void r300_translate_fragment_shader(struct r300_context* r300, struct r300_fragment_shader* fs); +static inline boolean r300_fragment_shader_writes_depth(struct r300_fragment_shader *fs) +{ + if (!fs) + return FALSE; + return (fs->code.writes_depth) ? TRUE : FALSE; +} #endif /* R300_FS_H */ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index d8533ac168..95e2943baa 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -252,9 +252,11 @@ static void* dsa->z_buffer_top = R300_ZTOP_ENABLE; /* XXX TODO: add frag prog rules for ztop disable */ + if (r300_fragment_shader_writes_depth(r300->fs)) + dsa->z_buffer_top = R300_ZTOP_DISABLE; if (state->alpha.enabled && state->alpha.func != PIPE_FUNC_ALWAYS) dsa->z_buffer_top = R300_ZTOP_DISABLE; - if (!is_empty_list(&r300->query_list)) + if (r300->query_current) dsa->z_buffer_top = R300_ZTOP_DISABLE; return (void*)dsa; -- cgit v1.2.3 From fa581580b18d530b849299c38604ab0804290e49 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 14 Oct 2009 18:24:05 +1000 Subject: r300g: add QUERY to KITCHEN_SINK I missed this, thanks to Corbin for pointing it out. --- src/gallium/drivers/r300/r300_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 7826ed1452..d2e8875503 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -142,7 +142,7 @@ struct r300_viewport_state { #define R300_NEW_VERTEX_SHADER 0x08000000 #define R300_NEW_VIEWPORT 0x10000000 #define R300_NEW_QUERY 0x20000000 -#define R300_NEW_KITCHEN_SINK 0x1fffffff +#define R300_NEW_KITCHEN_SINK 0x3fffffff /* The next several objects are not pure Radeon state; they inherit from * various Gallium classes. */ -- cgit v1.2.3 From 88b697fb0aaaab8479716763510f56b1053ddb37 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 14 Oct 2009 18:24:34 +1000 Subject: r300g: remove buffer add that should be unnecessary. This should be handled in the emit fine --- src/gallium/drivers/r300/r300_emit.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 6e616cd5b2..feffadd0ee 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -441,12 +441,6 @@ void r300_emit_query_end(struct r300_context* r300) if (query->begin_emitted == FALSE) return; - if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, - 0, RADEON_GEM_DOMAIN_GTT)) { - debug_printf("r300: There wasn't room for the OQ buffer!?" - " Oh noes!\n"); - } - if (caps->family == CHIP_FAMILY_RV530) { if (caps->num_z_pipes == 2) rv530_emit_query_double(r300, query); -- cgit v1.2.3 From f13e507798cdbbe2fad5df33dcd581d49d6fa7ab Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 14 Oct 2009 01:58:18 -0700 Subject: r300g: Compiler warning cleanup. --- src/gallium/drivers/r300/r300_context.c | 2 +- src/gallium/drivers/r300/r300_render.c | 7 ------- src/gallium/drivers/r300/r300_state_derived.c | 2 +- src/gallium/drivers/r300/r300_surface.c | 3 +-- 4 files changed, 3 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 7a9c098e30..b243f88bb5 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -152,7 +152,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->winsys = r300_winsys; r300->context.winsys = (struct pipe_winsys*)r300_winsys; - r300->context.screen = r300_screen(screen); + r300->context.screen = screen; r300_init_debug(r300); diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index ca44e0f661..b56f7a3d1e 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -127,7 +127,6 @@ static void r300_render_unmap_vertices(struct vbuf_render* render, static void r300_render_release_vertices(struct vbuf_render* render) { struct r300_render* r300render = r300_render(render); - struct r300_context* r300 = r300render->r300; r300render->vbo_offset += r300render->vbo_max_used; r300render->vbo_max_used = 0; @@ -182,8 +181,6 @@ static void r300_prepare_render(struct r300_render* render, unsigned count) { struct r300_context* r300 = render->r300; - CS_LOCALS(r300); - r300_emit_dirty_state(r300); } @@ -213,11 +210,7 @@ static void r300_render_draw(struct vbuf_render* render, { struct r300_render* r300render = r300_render(render); struct r300_context* r300 = r300render->r300; - struct pipe_screen* screen = r300->context.screen; - struct pipe_buffer* index_buffer; - void* index_map; int i; - uint32_t index; CS_LOCALS(r300); diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 02b7ab9107..335b54820a 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -193,7 +193,7 @@ static void r300_vertex_psc(struct r300_context* r300, struct vertex_info* vinfo = &vformat->vinfo; int* tab = vformat->vs_tab; uint32_t temp; - int i, attrib_count; + unsigned i, attrib_count; /* Vertex shaders have no semantics on their inputs, * so PSC should just route stuff based on their info, diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c index d72e734ff0..5cf49d20aa 100644 --- a/src/gallium/drivers/r300/r300_surface.c +++ b/src/gallium/drivers/r300/r300_surface.c @@ -95,8 +95,7 @@ static void r300_surface_fill(struct pipe_context* pipe, unsigned w, unsigned h, unsigned color) { - int i; - float r, g, b, a, depth; + float r, g, b, a; struct r300_context* r300 = r300_context(pipe); struct r300_capabilities* caps = r300_screen(pipe->screen)->caps; struct r300_texture* tex = (struct r300_texture*)dest->texture; -- cgit v1.2.3 From fd63f89e95342d7d5921d6369346e356b505b584 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 14 Oct 2009 03:09:41 -0700 Subject: r300g: Move ztop to derived state. Need to get it into its own atom instead of piggybacking on DSA. --- src/gallium/drivers/r300/r300_state.c | 10 -------- src/gallium/drivers/r300/r300_state_derived.c | 36 +++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 95e2943baa..8359850966 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -190,7 +190,6 @@ static void* r300_create_dsa_state(struct pipe_context* pipe, const struct pipe_depth_stencil_alpha_state* state) { - struct r300_context* r300 = r300_context(pipe); struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state); /* Depth test setup. */ @@ -250,15 +249,6 @@ static void* 0, 1023); } - dsa->z_buffer_top = R300_ZTOP_ENABLE; - /* XXX TODO: add frag prog rules for ztop disable */ - if (r300_fragment_shader_writes_depth(r300->fs)) - dsa->z_buffer_top = R300_ZTOP_DISABLE; - if (state->alpha.enabled && state->alpha.func != PIPE_FUNC_ALWAYS) - dsa->z_buffer_top = R300_ZTOP_DISABLE; - if (r300->query_current) - dsa->z_buffer_top = R300_ZTOP_DISABLE; - return (void*)dsa; } diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 335b54820a..5d323a26b1 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -444,6 +444,37 @@ static void r300_update_rs_block(struct r300_context* r300) rs->inst_count = MAX2(MAX2(col_count - 1, tex_count - 1), 0); } +static void r300_update_ztop(struct r300_context* r300) +{ + r300->dsa_state->z_buffer_top = R300_ZTOP_ENABLE; + + /* This is important enough that I felt it warranted a comment. + * + * According to the docs, these are the conditions where ZTOP must be + * disabled: + * 1) Alpha testing enabled + * 2) Texture kill instructions in fragment shader + * 3) Chroma key culling enabled + * 4) W-buffering enabled + * + * The docs claim that for the first three cases, if no ZS writes happen, + * then ZTOP can be used. + * + * Additionally, the following conditions require disabled ZTOP: + * ~) Depth writes in fragment shader + * ~) Outstanding occlusion queries + * + * ~C. + */ + if (r300->dsa_state->alpha_function) { + r300->dsa_state->z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300_fragment_shader_writes_depth(r300->fs)) { + r300->dsa_state->z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300->query_current) { + r300->dsa_state->z_buffer_top = R300_ZTOP_DISABLE; + } +} + void r300_update_derived_state(struct r300_context* r300) { if (r300->dirty_state & @@ -455,4 +486,9 @@ void r300_update_derived_state(struct r300_context* r300) r300_update_fs_tab(r300); r300_update_rs_block(r300); } + + if (r300->dirty_state & + (R300_NEW_DSA | R300_NEW_FRAGMENT_SHADER | R300_NEW_QUERY)) { + r300_update_ztop(r300); + } } -- cgit v1.2.3 From 4046c3bab4dde95d4096f26637adaa6ce6d310a9 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 14 Oct 2009 17:11:30 +0100 Subject: llvmpipe: Use ALIGN_STACK. --- src/gallium/drivers/llvmpipe/lp_setup.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 60107214df..b14c265b7f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -115,6 +115,7 @@ struct setup_context { /** * Execute fragment shader for the four fragments in the quad. */ +ALIGN_STACK static void shade_quads(struct llvmpipe_context *llvmpipe, struct quad_header *quads[], -- cgit v1.2.3 From 96c9b39a6a9553573fcbdb5fd6db0e9d59768442 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Tue, 13 Oct 2009 15:32:04 +0100 Subject: i915g: Fix warnings --- src/gallium/drivers/i915/i915_debug.c | 2 +- src/gallium/drivers/i915/i915_fpc_translate.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c index ce92d1af9a..e6640e587b 100644 --- a/src/gallium/drivers/i915/i915_debug.c +++ b/src/gallium/drivers/i915/i915_debug.c @@ -880,7 +880,7 @@ i915_dump_batchbuffer( struct intel_batchbuffer *batch ) return; } - debug_printf( "\n\nBATCH: (%d)\n", bytes / 4); + debug_printf( "\n\nBATCH: (%d)\n", (int)bytes / 4); while (!done && stream.offset < bytes) diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 89504ced27..1fe5cda956 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -127,7 +127,7 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...) va_start( args, msg ); util_vsnprintf( buffer, sizeof(buffer), msg, args ); va_end( args ); - debug_printf(buffer); + debug_printf("%s", buffer); debug_printf("\n"); p->error = 1; -- cgit v1.2.3 From a82fc97c643c4309a10cfefb108c4c0f11a2e55a Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 14 Oct 2009 20:06:38 -0700 Subject: r300g: Move ZTOP to its own state atom. It may seem pointless, but this avoids a fair amount of predicted CSO pain. --- src/gallium/drivers/r300/r300_context.h | 8 +++++++- src/gallium/drivers/r300/r300_emit.c | 2 +- src/gallium/drivers/r300/r300_state_derived.c | 8 ++++---- src/gallium/drivers/r300/r300_surface.h | 1 - 4 files changed, 12 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index d2e8875503..2acce0fd4a 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -62,7 +62,6 @@ struct r300_dsa_state { uint32_t z_buffer_control; /* R300_ZB_CNTL: 0x4f00 */ uint32_t z_stencil_control; /* R300_ZB_ZSTENCILCNTL: 0x4f04 */ uint32_t stencil_ref_mask; /* R300_ZB_STENCILREFMASK: 0x4f08 */ - uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */ uint32_t stencil_ref_bf; /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */ }; @@ -124,6 +123,10 @@ struct r300_viewport_state { uint32_t vte_control; /* R300_VAP_VTE_CNTL: 0x20b0 */ }; +struct r300_ztop_state { + uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */ +}; + #define R300_NEW_BLEND 0x00000001 #define R300_NEW_BLEND_COLOR 0x00000002 #define R300_NEW_CLIP 0x00000004 @@ -281,6 +284,9 @@ struct r300_context { struct r300_vertex_shader* vs; /* Viewport state. */ struct r300_viewport_state* viewport_state; + /* ZTOP state. */ + struct r300_ztop_state ztop_state; + /* Bitmask of dirty state objects. */ uint32_t dirty_state; /* Flag indicating whether or not the HW is dirty. */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index feffadd0ee..2c3bba952d 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -106,7 +106,7 @@ void r300_emit_dsa_state(struct r300_context* r300, OUT_CS(dsa->z_buffer_control); OUT_CS(dsa->z_stencil_control); OUT_CS(dsa->stencil_ref_mask); - OUT_CS_REG(R300_ZB_ZTOP, dsa->z_buffer_top); + OUT_CS_REG(R300_ZB_ZTOP, r300->ztop_state.z_buffer_top); if (r300screen->caps->is_r500) { /* OUT_CS_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf); */ } diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 5d323a26b1..f0861a9cf1 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -446,7 +446,7 @@ static void r300_update_rs_block(struct r300_context* r300) static void r300_update_ztop(struct r300_context* r300) { - r300->dsa_state->z_buffer_top = R300_ZTOP_ENABLE; + r300->ztop_state.z_buffer_top = R300_ZTOP_ENABLE; /* This is important enough that I felt it warranted a comment. * @@ -467,11 +467,11 @@ static void r300_update_ztop(struct r300_context* r300) * ~C. */ if (r300->dsa_state->alpha_function) { - r300->dsa_state->z_buffer_top = R300_ZTOP_DISABLE; + r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; } else if (r300_fragment_shader_writes_depth(r300->fs)) { - r300->dsa_state->z_buffer_top = R300_ZTOP_DISABLE; + r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; } else if (r300->query_current) { - r300->dsa_state->z_buffer_top = R300_ZTOP_DISABLE; + r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; } } diff --git a/src/gallium/drivers/r300/r300_surface.h b/src/gallium/drivers/r300/r300_surface.h index f9e98b2ec9..d5998e6e6d 100644 --- a/src/gallium/drivers/r300/r300_surface.h +++ b/src/gallium/drivers/r300/r300_surface.h @@ -54,7 +54,6 @@ static struct r300_dsa_state dsa_clear_state = { .z_buffer_control = 0x0, .z_stencil_control = 0x0, .stencil_ref_mask = R300_STENCILWRITEMASK_MASK, - .z_buffer_top = R300_ZTOP_ENABLE, .stencil_ref_bf = 0x0, }; -- cgit v1.2.3 From 72fd1cf292937565a182b400595816c6ad88836a Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Wed, 14 Oct 2009 11:44:09 +0200 Subject: trace: Handle transfer returning null --- src/gallium/drivers/trace/tr_screen.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index ab605c7fc8..7da9bd3866 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -366,7 +366,8 @@ trace_screen_get_tex_transfer(struct pipe_screen *_screen, trace_dump_call_end(); - result = trace_transfer_create(tr_tex, result); + if (result) + result = trace_transfer_create(tr_tex, result); return result; } -- cgit v1.2.3 From 13580aa3d142b17f936e517daf949ae228f9f14e Mon Sep 17 00:00:00 2001 From: Patrice Mandin Date: Thu, 15 Oct 2009 21:58:44 +0200 Subject: nouveau: nv30: refuse binding a colour buffer with a zeta buffer with different bits, till the backend can tell Mesa not to do that. --- src/gallium/drivers/nv30/nv30_state_fb.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c index 2729dcec7c..9b0266fba5 100644 --- a/src/gallium/drivers/nv30/nv30_state_fb.c +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -15,6 +15,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) unsigned w = fb->width; unsigned h = fb->height; struct nv30_miptree *nv30mt; + int colour_bits = 32, zeta_bits = 32; rt_enable = 0; for (i = 0; i < fb->nr_cbufs; i++) { @@ -54,6 +55,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) break; case PIPE_FORMAT_R5G6B5_UNORM: rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5; + colour_bits = 16; break; default: assert(0); @@ -62,6 +64,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) switch (zeta_format) { case PIPE_FORMAT_Z16_UNORM: rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16; + zeta_bits = 16; break; case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_Z24X8_UNORM: @@ -72,6 +75,10 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) assert(0); } + if (colour_bits != zeta_bits) { + return FALSE; + } + if (rt_enable & NV34TCL_RT_ENABLE_COLOR0) { uint32_t pitch = rt[0]->pitch; if (zeta) { -- cgit v1.2.3 From a5a05fd782bf7bc3843e475df7b12fe6784c1b9e Mon Sep 17 00:00:00 2001 From: Patrice Mandin Date: Thu, 15 Oct 2009 22:41:09 +0200 Subject: nouveau: nv30: Hack to enforce same number of bits as front buffer, for render targets --- src/gallium/drivers/nv30/nv30_screen.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 41af38450b..5b1e5cab2d 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -10,6 +10,22 @@ #define NV34TCL_CHIPSET_3X_MASK 0x00000010 #define NV35TCL_CHIPSET_3X_MASK 0x000001e0 +/* FIXME: It seems I should not include directly ../../winsys/drm/nouveau/drm/nouveau_drm_api.h + * to get the pointer to the context front buffer, so I copied nouveau_winsys here. + * nv30_screen_surface_format_supported() can then use it to enforce creating fbo + * with same number of bits everywhere. + */ +struct nouveau_winsys { + struct pipe_winsys base; + + struct pipe_screen *pscreen; + + unsigned nr_pctx; + struct pipe_context **pctx; + + struct pipe_surface *front; +}; + static int nv30_screen_get_param(struct pipe_screen *pscreen, int param) { @@ -83,21 +99,19 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen, enum pipe_texture_target target, unsigned tex_usage, unsigned geom_flags) { + struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front; + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - return TRUE; - default: - break; - } + return (format == front->format); } else if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { switch (format) { case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_Z24X8_UNORM: + return (front->format == PIPE_FORMAT_A8R8G8B8_UNORM) + || (front->format == PIPE_FORMAT_A8R8G8B8_UNORM); case PIPE_FORMAT_Z16_UNORM: - return TRUE; + return (front->format == PIPE_FORMAT_R5G6B5_UNORM); default: break; } -- cgit v1.2.3 From e4f21be13ac14edd89e865138a9e36b266425b39 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Thu, 15 Oct 2009 16:58:26 -0700 Subject: r300g: Set logical ID for each emitted texture/sampler. multitexarray works on my r300, but texrect doesn't. --- src/gallium/drivers/r300/r300_emit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 2c3bba952d..f3adc0968e 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -531,7 +531,8 @@ void r300_emit_texture(struct r300_context* r300, CS_LOCALS(r300); BEGIN_CS(16); - OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), sampler->filter0); + OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), sampler->filter0 | + (offset << 28)); OUT_CS_REG(R300_TX_FILTER1_0 + (offset * 4), sampler->filter1); OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (offset * 4), sampler->border_color); -- cgit v1.2.3 From fc8a156cfc539b9c04dc3527e4fc61cb4b0b688e Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 16 Oct 2009 08:39:59 -0700 Subject: r300g: Use a hash table to look up vertex info. Need to move rs_block to this, too. Also, I'm getting massive amounts of flicker for some reason; I bet we've gotta re-re-examine PSC and friends. :C --- src/gallium/drivers/r300/r300_context.c | 18 ++++++- src/gallium/drivers/r300/r300_context.h | 10 +++- src/gallium/drivers/r300/r300_emit.c | 26 +++++----- src/gallium/drivers/r300/r300_render.c | 2 +- src/gallium/drivers/r300/r300_state_derived.c | 72 ++++++++++++++++++++------- src/gallium/drivers/r300/r300_state_derived.h | 4 ++ 6 files changed, 97 insertions(+), 35 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index b243f88bb5..a1156d2de6 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -89,10 +89,23 @@ static boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, return r300_draw_elements(pipe, NULL, 0, mode, start, count); } -static void r300_destroy_context(struct pipe_context* context) { +static enum pipe_error r300_clear_hash_table(void* key, void* value, + void* data) +{ + FREE(key); + FREE(value); + return PIPE_OK; +} + +static void r300_destroy_context(struct pipe_context* context) +{ struct r300_context* r300 = r300_context(context); struct r300_query* query, * temp; + u_hash_table_foreach(r300->shader_hash_table, r300_clear_hash_table, + NULL); + u_hash_table_destroy(r300->shader_hash_table); + draw_destroy(r300->draw); /* Free the OQ BO. */ @@ -167,6 +180,9 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.is_texture_referenced = r300_is_texture_referenced; r300->context.is_buffer_referenced = r300_is_buffer_referenced; + r300->shader_hash_table = u_hash_table_create(r300_shader_key_hash, + r300_shader_key_compare); + r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state); r300->rs_block = CALLOC_STRUCT(r300_rs_block); r300->scissor_state = CALLOC_STRUCT(r300_scissor_state); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 2acce0fd4a..2a62c67fc7 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -30,12 +30,14 @@ #include "tgsi/tgsi_scan.h" +#include "util/u_hash_table.h" #include "util/u_memory.h" #include "util/u_simple_list.h" #include "r300_clear.h" #include "r300_query.h" #include "r300_screen.h" +#include "r300_state_derived.h" #include "r300_winsys.h" struct r300_fragment_shader; @@ -248,6 +250,12 @@ struct r300_context { struct r300_query *query_current; struct r300_query query_list; + /* Shader hash table. Used to store vertex formatting information, which + * depends on the combination of both currently loaded shaders. */ + struct u_hash_table* shader_hash_table; + /* Vertex formatting information. */ + struct r300_vertex_format* vertex_info; + /* Various CSO state objects. */ /* Blend state. */ struct r300_blend_state* blend_state; @@ -278,8 +286,6 @@ struct r300_context { /* Vertex buffers for Gallium. */ struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; int vertex_buffer_count; - /* Vertex information. */ - struct r300_vertex_format vertex_info; /* Vertex shader. */ struct r300_vertex_shader* vs; /* Viewport state. */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index f3adc0968e..e6092cda9b 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -551,7 +551,7 @@ void r300_emit_vertex_buffer(struct r300_context* r300) DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, " "vertex size %d\n", r300->vbo, - r300->vertex_info.vinfo.size); + r300->vertex_info->vinfo.size); /* Set the pointer to our vertex buffer. The emitted values are this: * PACKET3 [3D_LOAD_VBPNTR] * COUNT [1] @@ -562,8 +562,8 @@ void r300_emit_vertex_buffer(struct r300_context* r300) BEGIN_CS(7); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); OUT_CS(1); - OUT_CS(r300->vertex_info.vinfo.size | - (r300->vertex_info.vinfo.size << 8)); + OUT_CS(r300->vertex_info->vinfo.size | + (r300->vertex_info->vinfo.size << 8)); OUT_CS(r300->vbo_offset); OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); END_CS; @@ -575,30 +575,30 @@ void r300_emit_vertex_format_state(struct r300_context* r300) CS_LOCALS(r300); BEGIN_CS(26); - OUT_CS_REG(R300_VAP_VTX_SIZE, r300->vertex_info.vinfo.size); + OUT_CS_REG(R300_VAP_VTX_SIZE, r300->vertex_info->vinfo.size); OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); - OUT_CS(r300->vertex_info.vinfo.hwfmt[0]); - OUT_CS(r300->vertex_info.vinfo.hwfmt[1]); + OUT_CS(r300->vertex_info->vinfo.hwfmt[0]); + OUT_CS(r300->vertex_info->vinfo.hwfmt[1]); OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); - OUT_CS(r300->vertex_info.vinfo.hwfmt[2]); - OUT_CS(r300->vertex_info.vinfo.hwfmt[3]); + OUT_CS(r300->vertex_info->vinfo.hwfmt[2]); + OUT_CS(r300->vertex_info->vinfo.hwfmt[3]); /* for (i = 0; i < 4; i++) { * debug_printf("hwfmt%d: 0x%08x\n", i, - * r300->vertex_info.vinfo.hwfmt[i]); + * r300->vertex_info->vinfo.hwfmt[i]); * } */ OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, 8); for (i = 0; i < 8; i++) { - OUT_CS(r300->vertex_info.vap_prog_stream_cntl[i]); + OUT_CS(r300->vertex_info->vap_prog_stream_cntl[i]); /* debug_printf("prog_stream_cntl%d: 0x%08x\n", i, - * r300->vertex_info.vap_prog_stream_cntl[i]); */ + * r300->vertex_info->vap_prog_stream_cntl[i]); */ } OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, 8); for (i = 0; i < 8; i++) { - OUT_CS(r300->vertex_info.vap_prog_stream_cntl_ext[i]); + OUT_CS(r300->vertex_info->vap_prog_stream_cntl_ext[i]); /* debug_printf("prog_stream_cntl_ext%d: 0x%08x\n", i, - * r300->vertex_info.vap_prog_stream_cntl_ext[i]); */ + * r300->vertex_info->vap_prog_stream_cntl_ext[i]); */ } END_CS; } diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index b56f7a3d1e..4e778e1e57 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -67,7 +67,7 @@ r300_render_get_vertex_info(struct vbuf_render* render) r300_update_derived_state(r300); - return &r300->vertex_info.vinfo; + return &r300->vertex_info->vinfo; } static boolean r300_render_allocate_vertices(struct vbuf_render* render, diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index f0861a9cf1..53027777d6 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -29,6 +29,27 @@ /* r300_state_derived: Various bits of state which are dependent upon * currently bound CSO data. */ +struct r300_shader_key { + struct r300_vertex_shader* vs; + struct r300_fragment_shader* fs; +}; + +unsigned r300_shader_key_hash(void* key) { + struct r300_shader_key* shader_key = (struct r300_shader_key*)key; + unsigned vs = (unsigned)shader_key->vs; + unsigned fs = (unsigned)shader_key->fs; + + return (vs << 16) | (fs & 0xffff); +} + +int r300_shader_key_compare(void* key1, void* key2) { + struct r300_shader_key* shader_key1 = (struct r300_shader_key*)key1; + struct r300_shader_key* shader_key2 = (struct r300_shader_key*)key2; + + return (shader_key1->vs == shader_key2->vs) && + (shader_key1->fs == shader_key2->fs); +} + /* Set up the vs_tab and routes. */ static void r300_vs_tab_routes(struct r300_context* r300, struct r300_vertex_format* vformat) @@ -247,23 +268,41 @@ static void r300_vertex_psc(struct r300_context* r300, /* Update the vertex format. */ static void r300_update_vertex_format(struct r300_context* r300) { - struct r300_vertex_format vformat; + struct r300_shader_key* key; + struct r300_vertex_format* vformat; + void* value; int i; - memset(&vformat, 0, sizeof(struct r300_vertex_format)); - for (i = 0; i < 16; i++) { - vformat.vs_tab[i] = -1; - vformat.fs_tab[i] = -1; - } + key = CALLOC_STRUCT(r300_shader_key); + key->vs = r300->vs; + key->fs = r300->fs; - r300_vs_tab_routes(r300, &vformat); + value = u_hash_table_get(r300->shader_hash_table, (void*)key); + if (value) { + debug_printf("r300: Hash table hit! vs: %p fs: %p\n", key->vs, + key->fs); + vformat = (struct r300_vertex_format*)value; + } else { + debug_printf("r300: Hash table miss... vs: %p fs: %p\n", key->vs, + key->fs); + vformat = CALLOC_STRUCT(r300_vertex_format); + + for (i = 0; i < 16; i++) { + vformat->vs_tab[i] = -1; + vformat->fs_tab[i] = -1; + } + + r300_vs_tab_routes(r300, vformat); + r300_vertex_psc(r300, vformat); - r300_vertex_psc(r300, &vformat); + if (u_hash_table_set(r300->shader_hash_table, (void*)key, + (void*)vformat) != PIPE_OK) { + debug_printf("r300: Hash table insertion error!\n"); + } + } - if (memcmp(&r300->vertex_info, &vformat, - sizeof(struct r300_vertex_format))) { - memcpy(&r300->vertex_info, &vformat, - sizeof(struct r300_vertex_format)); + if (r300->vertex_info != vformat) { + r300->vertex_info = vformat; r300->dirty_state |= R300_NEW_VERTEX_FORMAT; } } @@ -271,7 +310,7 @@ static void r300_update_vertex_format(struct r300_context* r300) /* Set up the mappings from GB to US, for RS block. */ static void r300_update_fs_tab(struct r300_context* r300) { - struct r300_vertex_format* vformat = &r300->vertex_info; + struct r300_vertex_format* vformat = r300->vertex_info; struct tgsi_shader_info* info = &r300->fs->info; int i, cols = 0, texs = 0, cols_emitted = 0; int* tab = vformat->fs_tab; @@ -337,7 +376,7 @@ static void r300_update_rs_block(struct r300_context* r300) { struct r300_rs_block* rs = r300->rs_block; struct tgsi_shader_info* info = &r300->fs->info; - int* tab = r300->vertex_info.fs_tab; + int* tab = r300->vertex_info->fs_tab; int col_count = 0, fp_offset = 0, i, tex_count = 0; int rs_tex_comp = 0; memset(rs, 0, sizeof(struct r300_rs_block)); @@ -477,10 +516,7 @@ static void r300_update_ztop(struct r300_context* r300) void r300_update_derived_state(struct r300_context* r300) { - if (r300->dirty_state & - (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER)) { - r300_update_vertex_format(r300); - } + r300_update_vertex_format(r300); if (r300->dirty_state & R300_NEW_VERTEX_FORMAT) { r300_update_fs_tab(r300); diff --git a/src/gallium/drivers/r300/r300_state_derived.h b/src/gallium/drivers/r300/r300_state_derived.h index 71a4a47b00..05ad535e2d 100644 --- a/src/gallium/drivers/r300/r300_state_derived.h +++ b/src/gallium/drivers/r300/r300_state_derived.h @@ -25,6 +25,10 @@ struct r300_context; +unsigned r300_shader_key_hash(void* key); + +int r300_shader_key_compare(void* key1, void* key2); + void r300_update_derived_state(struct r300_context* r300); #endif /* R300_STATE_DERIVED_H */ -- cgit v1.2.3 From 67356ae04743da3137e950503ffd4a1f8fa36400 Mon Sep 17 00:00:00 2001 From: Patrice Mandin Date: Sat, 17 Oct 2009 20:27:24 +0200 Subject: nouveau: nv30: Use same workaround as i915 for segfault related to vbo --- src/gallium/drivers/nv30/nv30_context.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index f827bdc78b..a3e65b96f7 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -10,7 +10,7 @@ nv30_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv30_context *nv30 = nv30_context(pipe); - + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { BEGIN_RING(rankine, 0x1fd8, 1); OUT_RING (2); @@ -37,10 +37,14 @@ nv30_is_texture_referenced( struct pipe_context *pipe, unsigned face, unsigned level) { /** - * FIXME: Optimize. + * FIXME: Return the corrent result. We can't alays return referenced + * since it causes a double flush within the vbo module. */ - +#if 0 return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; +#else + return 0; +#endif } static unsigned int @@ -48,10 +52,14 @@ nv30_is_buffer_referenced( struct pipe_context *pipe, struct pipe_buffer *buf) { /** - * FIXME: Optimize. + * FIXME: Return the corrent result. We can't alays return referenced + * since it causes a double flush within the vbo module. */ - +#if 0 return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; +#else + return 0; +#endif } struct pipe_context * @@ -95,4 +103,3 @@ nv30_create(struct pipe_screen *pscreen, unsigned pctx_id) return &nv30->pipe; } - -- cgit v1.2.3 From 66aab9a1f6de241687a14f7aed45226061c1b84b Mon Sep 17 00:00:00 2001 From: Patrice Mandin Date: Sat, 17 Oct 2009 20:46:19 +0200 Subject: nouveau: nv30: Remove duplicate case. Was a typo for X8R8G8B8, but that will never be use for front buffer. --- src/gallium/drivers/nv30/nv30_screen.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 5b1e5cab2d..bb40e1803d 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -108,8 +108,7 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen, switch (format) { case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_Z24X8_UNORM: - return (front->format == PIPE_FORMAT_A8R8G8B8_UNORM) - || (front->format == PIPE_FORMAT_A8R8G8B8_UNORM); + return (front->format == PIPE_FORMAT_A8R8G8B8_UNORM); case PIPE_FORMAT_Z16_UNORM: return (front->format == PIPE_FORMAT_R5G6B5_UNORM); default: -- cgit v1.2.3 From 114417a2f52ab463f37fcabb5e9b0636574623dc Mon Sep 17 00:00:00 2001 From: Patrice Mandin Date: Sat, 17 Oct 2009 20:49:18 +0200 Subject: nouveau: nv40: Use same workaround as i915 for segfault related to vbo --- src/gallium/drivers/nv40/nv40_context.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index 8eba6a43ef..4e23671202 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -10,7 +10,7 @@ nv40_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv40_context *nv40 = nv40_context(pipe); - + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { BEGIN_RING(curie, 0x1fd8, 1); OUT_RING (2); @@ -37,10 +37,14 @@ nv40_is_texture_referenced( struct pipe_context *pipe, unsigned face, unsigned level) { /** - * FIXME: Optimize. + * FIXME: Return the correct result. We can't always return referenced + * since it causes a double flush within the vbo module. */ - +#if 0 return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; +#else + return 0; +#endif } static unsigned int @@ -48,10 +52,14 @@ nv40_is_buffer_referenced( struct pipe_context *pipe, struct pipe_buffer *buf) { /** - * FIXME: Optimize. + * FIXME: Return the correct result. We can't always return referenced + * since it causes a double flush within the vbo module. */ - +#if 0 return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; +#else + return 0; +#endif } struct pipe_context * @@ -95,4 +103,3 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) return &nv40->pipe; } - -- cgit v1.2.3 From ce9ae4a483e7c85a9046a87005232aa09de782aa Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 17 Oct 2009 20:05:23 -0700 Subject: r300g: Fix u_hash_table rename. --- src/gallium/drivers/r300/r300_context.c | 6 +++--- src/gallium/drivers/r300/r300_context.h | 2 +- src/gallium/drivers/r300/r300_state_derived.c | 8 +++----- 3 files changed, 7 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index a1156d2de6..0518685200 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -102,9 +102,9 @@ static void r300_destroy_context(struct pipe_context* context) struct r300_context* r300 = r300_context(context); struct r300_query* query, * temp; - u_hash_table_foreach(r300->shader_hash_table, r300_clear_hash_table, + util_hash_table_foreach(r300->shader_hash_table, r300_clear_hash_table, NULL); - u_hash_table_destroy(r300->shader_hash_table); + util_hash_table_destroy(r300->shader_hash_table); draw_destroy(r300->draw); @@ -180,7 +180,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.is_texture_referenced = r300_is_texture_referenced; r300->context.is_buffer_referenced = r300_is_buffer_referenced; - r300->shader_hash_table = u_hash_table_create(r300_shader_key_hash, + r300->shader_hash_table = util_hash_table_create(r300_shader_key_hash, r300_shader_key_compare); r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 2a62c67fc7..2d608d6afc 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -252,7 +252,7 @@ struct r300_context { /* Shader hash table. Used to store vertex formatting information, which * depends on the combination of both currently loaded shaders. */ - struct u_hash_table* shader_hash_table; + struct util_hash_table* shader_hash_table; /* Vertex formatting information. */ struct r300_vertex_format* vertex_info; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 53027777d6..0210d97914 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -277,7 +277,7 @@ static void r300_update_vertex_format(struct r300_context* r300) key->vs = r300->vs; key->fs = r300->fs; - value = u_hash_table_get(r300->shader_hash_table, (void*)key); + value = util_hash_table_get(r300->shader_hash_table, (void*)key); if (value) { debug_printf("r300: Hash table hit! vs: %p fs: %p\n", key->vs, key->fs); @@ -295,10 +295,8 @@ static void r300_update_vertex_format(struct r300_context* r300) r300_vs_tab_routes(r300, vformat); r300_vertex_psc(r300, vformat); - if (u_hash_table_set(r300->shader_hash_table, (void*)key, - (void*)vformat) != PIPE_OK) { - debug_printf("r300: Hash table insertion error!\n"); - } + util_hash_table_set(r300->shader_hash_table, + (void*)key, (void*)vformat); } if (r300->vertex_info != vformat) { -- cgit v1.2.3 From 51173e4e53a64465d1498ffd6454687b7629eb59 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 17 Oct 2009 20:29:27 -0700 Subject: r300g: Also have rs_block keyed to the current shader combo. Eliminates part of the glxgears corruption here. Need to clean up PSC more, to get rid of the rest of it. --- src/gallium/drivers/r300/r300_state_derived.c | 109 ++++++++++++++------------ 1 file changed, 58 insertions(+), 51 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 0210d97914..da8c366f30 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -34,6 +34,11 @@ struct r300_shader_key { struct r300_fragment_shader* fs; }; +struct r300_shader_derived_value { + struct r300_vertex_format* vformat; + struct r300_rs_block* rs_block; +}; + unsigned r300_shader_key_hash(void* key) { struct r300_shader_key* shader_key = (struct r300_shader_key*)key; unsigned vs = (unsigned)shader_key->vs; @@ -265,50 +270,10 @@ static void r300_vertex_psc(struct r300_context* r300, (R300_LAST_VEC << (i & 1 ? 16 : 0)); } -/* Update the vertex format. */ -static void r300_update_vertex_format(struct r300_context* r300) -{ - struct r300_shader_key* key; - struct r300_vertex_format* vformat; - void* value; - int i; - - key = CALLOC_STRUCT(r300_shader_key); - key->vs = r300->vs; - key->fs = r300->fs; - - value = util_hash_table_get(r300->shader_hash_table, (void*)key); - if (value) { - debug_printf("r300: Hash table hit! vs: %p fs: %p\n", key->vs, - key->fs); - vformat = (struct r300_vertex_format*)value; - } else { - debug_printf("r300: Hash table miss... vs: %p fs: %p\n", key->vs, - key->fs); - vformat = CALLOC_STRUCT(r300_vertex_format); - - for (i = 0; i < 16; i++) { - vformat->vs_tab[i] = -1; - vformat->fs_tab[i] = -1; - } - - r300_vs_tab_routes(r300, vformat); - r300_vertex_psc(r300, vformat); - - util_hash_table_set(r300->shader_hash_table, - (void*)key, (void*)vformat); - } - - if (r300->vertex_info != vformat) { - r300->vertex_info = vformat; - r300->dirty_state |= R300_NEW_VERTEX_FORMAT; - } -} - /* Set up the mappings from GB to US, for RS block. */ -static void r300_update_fs_tab(struct r300_context* r300) +static void r300_update_fs_tab(struct r300_context* r300, + struct r300_vertex_format* vformat) { - struct r300_vertex_format* vformat = r300->vertex_info; struct tgsi_shader_info* info = &r300->fs->info; int i, cols = 0, texs = 0, cols_emitted = 0; int* tab = vformat->fs_tab; @@ -370,14 +335,14 @@ static void r300_update_fs_tab(struct r300_context* r300) /* Set up the RS block. This is the part of the chipset that actually does * the rasterization of vertices into fragments. This is also the part of the * chipset that locks up if any part of it is even slightly wrong. */ -static void r300_update_rs_block(struct r300_context* r300) +static void r300_update_rs_block(struct r300_context* r300, + struct r300_vertex_format* vformat, + struct r300_rs_block* rs) { - struct r300_rs_block* rs = r300->rs_block; struct tgsi_shader_info* info = &r300->fs->info; - int* tab = r300->vertex_info->fs_tab; + int* tab = vformat->fs_tab; int col_count = 0, fp_offset = 0, i, tex_count = 0; int rs_tex_comp = 0; - memset(rs, 0, sizeof(struct r300_rs_block)); if (r300_screen(r300->context.screen)->caps->is_r500) { for (i = 0; i < info->num_inputs; i++) { @@ -481,6 +446,53 @@ static void r300_update_rs_block(struct r300_context* r300) rs->inst_count = MAX2(MAX2(col_count - 1, tex_count - 1), 0); } +/* Update the vertex format. */ +static void r300_update_vertex_format(struct r300_context* r300) +{ + struct r300_shader_key* key; + struct r300_vertex_format* vformat; + struct r300_rs_block* rs_block; + struct r300_shader_derived_value* value; + int i; + + key = CALLOC_STRUCT(r300_shader_key); + key->vs = r300->vs; + key->fs = r300->fs; + + value = (struct r300_shader_derived_value*) + util_hash_table_get(r300->shader_hash_table, (void*)key); + if (value) { + vformat = value->vformat; + rs_block = value->rs_block; + + FREE(key); + } else { + vformat = CALLOC_STRUCT(r300_vertex_format); + rs_block = CALLOC_STRUCT(r300_rs_block); + value = CALLOC_STRUCT(r300_shader_derived_value); + + for (i = 0; i < 16; i++) { + vformat->vs_tab[i] = -1; + vformat->fs_tab[i] = -1; + } + + r300_vs_tab_routes(r300, vformat); + r300_vertex_psc(r300, vformat); + r300_update_fs_tab(r300, vformat); + + r300_update_rs_block(r300, vformat, rs_block); + + value->vformat = vformat; + value->rs_block = rs_block; + util_hash_table_set(r300->shader_hash_table, + (void*)key, (void*)value); + } + + r300->vertex_info = vformat; + r300->rs_block = rs_block; + r300->dirty_state |= (R300_NEW_VERTEX_FORMAT | R300_NEW_RS_BLOCK); +} + static void r300_update_ztop(struct r300_context* r300) { r300->ztop_state.z_buffer_top = R300_ZTOP_ENABLE; @@ -516,11 +528,6 @@ void r300_update_derived_state(struct r300_context* r300) { r300_update_vertex_format(r300); - if (r300->dirty_state & R300_NEW_VERTEX_FORMAT) { - r300_update_fs_tab(r300); - r300_update_rs_block(r300); - } - if (r300->dirty_state & (R300_NEW_DSA | R300_NEW_FRAGMENT_SHADER | R300_NEW_QUERY)) { r300_update_ztop(r300); -- cgit v1.2.3 From 11056ca86fce64209b7d21c87070c419a1968d28 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 17 Oct 2009 20:47:45 -0700 Subject: r300g: Use a dirty test to bring framerate back up. This is just split out from the next commit, that's all. --- src/gallium/drivers/r300/r300_state_derived.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index da8c366f30..c59d446e93 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -447,7 +447,7 @@ static void r300_update_rs_block(struct r300_context* r300, } /* Update the vertex format. */ -static void r300_update_vertex_format(struct r300_context* r300) +static void r300_update_derived_shader_state(struct r300_context* r300) { struct r300_shader_key* key; struct r300_vertex_format* vformat; @@ -526,7 +526,10 @@ static void r300_update_ztop(struct r300_context* r300) void r300_update_derived_state(struct r300_context* r300) { - r300_update_vertex_format(r300); + if (r300->dirty_state & + (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER)) { + r300_update_derived_shader_state(r300); + } if (r300->dirty_state & (R300_NEW_DSA | R300_NEW_FRAGMENT_SHADER | R300_NEW_QUERY)) { -- cgit v1.2.3 From bfd877e4705002d97ee8dba6fe0c1f8676582ab3 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 17 Oct 2009 20:53:19 -0700 Subject: r300g: Squash format warning. Won't ever be supported. --- src/gallium/drivers/r300/r300_screen.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 5381651c77..44770d1aca 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -231,6 +231,7 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage, /* Definitely unsupported formats. */ /* Non-usable Z buffer/stencil formats. */ + case PIPE_FORMAT_Z32_UNORM: case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: -- cgit v1.2.3 From bb567357bc1366df7115e0daa68c2470e3bf6ba6 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 17 Oct 2009 21:32:56 -0700 Subject: gallium: Permit surface_copy and surface_fill to be NULL. Uf. Lots of files touched. Would people with working vega, xorg, dri1, etc. please make sure you are not broken, and fix yourself up if you are. There were only two or three places where the code did not have painful fallbacks, so I would advise st maintainers to find less painful workarounds, or consider overhauling util_surface_copy and util_surface_fill. Per ymanton, darktama, and Dr_Jakob's suggestions, clear has been left as-is. I will not add PIPE_CAP_BLITTER unless it is deemed necessary. --- src/gallium/auxiliary/util/u_blit.c | 19 ++++++++++++++----- src/gallium/auxiliary/util/u_clear.h | 16 +++++++++++++--- src/gallium/drivers/r300/r300_context.c | 2 +- src/gallium/include/pipe/p_context.h | 3 +++ src/gallium/state_trackers/dri/dri_drawable.c | 22 ++++++++++++++++------ src/gallium/state_trackers/egl/egl_surface.c | 23 +++++++++++++++++------ src/gallium/state_trackers/vega/renderer.c | 16 ++++++++++++---- src/gallium/state_trackers/vega/vg_tracker.c | 24 +++++++++++++++++------- src/gallium/state_trackers/xorg/xorg_exa.c | 12 +++++++++--- src/gallium/state_trackers/xorg/xorg_renderer.c | 19 ++++++++++++++----- src/mesa/state_tracker/st_atom_framebuffer.c | 16 ++++++++++++---- src/mesa/state_tracker/st_cb_drawpixels.c | 19 ++++++++++++++----- src/mesa/state_tracker/st_cb_fbo.c | 16 ++++++++++++---- src/mesa/state_tracker/st_cb_texture.c | 3 ++- 14 files changed, 156 insertions(+), 54 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index fb00c3abe8..5038642599 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -46,6 +46,7 @@ #include "util/u_memory.h" #include "util/u_simple_shaders.h" #include "util/u_surface.h" +#include "util/u_rect.h" #include "cso_cache/cso_context.h" @@ -301,7 +302,8 @@ util_blit_pixels_writemask(struct blit_state *ctx, * no overlapping. * Filter mode should not matter since there's no stretching. */ - if (dst->format == src->format && + if (pipe->surface_copy && + dst->format == src->format && srcX0 < srcX1 && dstX0 < dstX1 && srcY0 < srcY1 && @@ -365,10 +367,17 @@ util_blit_pixels_writemask(struct blit_state *ctx, PIPE_BUFFER_USAGE_GPU_WRITE); /* load temp texture */ - pipe->surface_copy(pipe, - texSurf, 0, 0, /* dest */ - src, srcLeft, srcTop, /* src */ - srcW, srcH); /* size */ + if (pipe->surface_copy) { + pipe->surface_copy(pipe, + texSurf, 0, 0, /* dest */ + src, srcLeft, srcTop, /* src */ + srcW, srcH); /* size */ + } else { + util_surface_copy(pipe, FALSE, + texSurf, 0, 0, /* dest */ + src, srcLeft, srcTop, /* src */ + srcW, srcH); /* size */ + } /* free the surface, update the texture if necessary. */ diff --git a/src/gallium/auxiliary/util/u_clear.h b/src/gallium/auxiliary/util/u_clear.h index 7c16b32cf9..1e65a035ae 100644 --- a/src/gallium/auxiliary/util/u_clear.h +++ b/src/gallium/auxiliary/util/u_clear.h @@ -32,6 +32,7 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" #include "util/u_pack_color.h" +#include "util/u_rect.h" /** @@ -48,13 +49,22 @@ util_clear(struct pipe_context *pipe, unsigned color; util_pack_color(rgba, ps->format, &color); - pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, color); + if (pipe->surface_fill) { + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, color); + } else { + util_surface_fill(pipe, ps, 0, 0, ps->width, ps->height, color); + } } if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { struct pipe_surface *ps = framebuffer->zsbuf; - pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, - util_pack_z_stencil(ps->format, depth, stencil)); + if (pipe->surface_fill) { + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, + util_pack_z_stencil(ps->format, depth, stencil)); + } else { + util_surface_fill(pipe, ps, 0, 0, ps->width, ps->height, + util_pack_z_stencil(ps->format, depth, stencil)); + } } } diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 0518685200..7b370b3e95 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -206,7 +206,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_query_functions(r300); - r300_init_surface_functions(r300); + /* r300_init_surface_functions(r300); */ r300_init_state_functions(r300); diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 39620a7198..5569001e60 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -189,6 +189,9 @@ struct pipe_context { /** * Surface functions + * + * The pipe driver is allowed to set these functions to NULL, and in that + * case, they will not be available. */ /*@{*/ diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c index c67cc8dacb..5625ff53cf 100644 --- a/src/gallium/state_trackers/dri/dri_drawable.c +++ b/src/gallium/state_trackers/dri/dri_drawable.c @@ -45,6 +45,7 @@ #include "state_tracker/st_cb_fbo.h" #include "util/u_memory.h" +#include "util/u_rect.h" static struct pipe_surface * dri_surface_from_handle(struct drm_api *api, @@ -541,12 +542,21 @@ dri1_swap_copy(struct dri_context *ctx, cur = dPriv->pClipRects; for (i = 0; i < dPriv->numClipRects; ++i) { - if (dri1_intersect_src_bbox(&clip, dPriv->x, dPriv->y, cur++, bbox)) - pipe->surface_copy(pipe, dst, clip.x1, clip.y1, - src, - (int)clip.x1 - dPriv->x, - (int)clip.y1 - dPriv->y, - clip.x2 - clip.x1, clip.y2 - clip.y1); + if (dri1_intersect_src_bbox(&clip, dPriv->x, dPriv->y, cur++, bbox)) { + if (pipe->surface_copy) { + pipe->surface_copy(pipe, dst, clip.x1, clip.y1, + src, + (int)clip.x1 - dPriv->x, + (int)clip.y1 - dPriv->y, + clip.x2 - clip.x1, clip.y2 - clip.y1); + } else { + util_surface_copy(pipe, FALSE, dst, clip.x1, clip.y1, + src, + (int)clip.x1 - dPriv->x, + (int)clip.y1 - dPriv->y, + clip.x2 - clip.x1, clip.y2 - clip.y1); + } + } } } diff --git a/src/gallium/state_trackers/egl/egl_surface.c b/src/gallium/state_trackers/egl/egl_surface.c index 7911a8834e..71c013756d 100644 --- a/src/gallium/state_trackers/egl/egl_surface.c +++ b/src/gallium/state_trackers/egl/egl_surface.c @@ -12,6 +12,8 @@ #include "state_tracker/drm_api.h" +#include "util/u_rect.h" + /* * Util functions */ @@ -360,12 +362,21 @@ drm_swap_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *draw) st_notify_swapbuffers(surf->stfb); if (ctx && surf->screen) { - ctx->pipe->surface_copy(ctx->pipe, - surf->screen->surface, - 0, 0, - back_surf, - 0, 0, - surf->w, surf->h); + if (ctx->pipe->surface_copy) { + ctx->pipe->surface_copy(ctx->pipe, + surf->screen->surface, + 0, 0, + back_surf, + 0, 0, + surf->w, surf->h); + } else { + util_surface_copy(ctx->pipe, FALSE, + surf->screen->surface, + 0, 0, + back_surf, + 0, 0, + surf->w, surf->h); + } ctx->pipe->flush(ctx->pipe, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE, NULL); #ifdef DRM_MODE_FEATURE_DIRTYFB diff --git a/src/gallium/state_trackers/vega/renderer.c b/src/gallium/state_trackers/vega/renderer.c index f7c5f2f0cd..396c88aa3d 100644 --- a/src/gallium/state_trackers/vega/renderer.c +++ b/src/gallium/state_trackers/vega/renderer.c @@ -37,6 +37,7 @@ #include "util/u_draw_quad.h" #include "util/u_simple_shaders.h" #include "util/u_memory.h" +#include "util/u_rect.h" #include "cso_cache/cso_context.h" @@ -457,10 +458,17 @@ void renderer_copy_surface(struct renderer *ctx, PIPE_BUFFER_USAGE_GPU_WRITE); /* load temp texture */ - pipe->surface_copy(pipe, - texSurf, 0, 0, /* dest */ - src, srcLeft, srcTop, /* src */ - srcW, srcH); /* size */ + if (pipe->surface_copy) { + pipe->surface_copy(pipe, + texSurf, 0, 0, /* dest */ + src, srcLeft, srcTop, /* src */ + srcW, srcH); /* size */ + } else { + util_surface_copy(pipe, FALSE, + texSurf, 0, 0, /* dest */ + src, srcLeft, srcTop, /* src */ + srcW, srcH); /* size */ + } /* free the surface, update the texture if necessary.*/ screen->tex_surface_destroy(texSurf); diff --git a/src/gallium/state_trackers/vega/vg_tracker.c b/src/gallium/state_trackers/vega/vg_tracker.c index 56cc60aebe..c4da01e52c 100644 --- a/src/gallium/state_trackers/vega/vg_tracker.c +++ b/src/gallium/state_trackers/vega/vg_tracker.c @@ -235,13 +235,23 @@ static void setup_new_alpha_mask(struct vg_context *ctx, old_texture, 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ); - pipe->surface_copy(pipe, - surface, - 0, 0, - old_surface, - 0, 0, - MIN2(old_surface->width, width), - MIN2(old_surface->height, height)); + if (pipe->surface_copy) { + pipe->surface_copy(pipe, + surface, + 0, 0, + old_surface, + 0, 0, + MIN2(old_surface->width, width), + MIN2(old_surface->height, height)); + } else { + util_surface_copy(pipe, FALSE, + surface, + 0, 0, + old_surface, + 0, 0, + MIN2(old_surface->width, width), + MIN2(old_surface->height, height)); + } if (surface) pipe_surface_reference(&surface, NULL); if (old_surface) diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c index af76d6690f..4988af4864 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.c +++ b/src/gallium/state_trackers/xorg/xorg_exa.c @@ -693,9 +693,15 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, dst_surf = exa->scrn->get_tex_surface( exa->scrn, texture, 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE); src_surf = xorg_gpu_surface(exa->pipe->screen, priv); - exa->pipe->surface_copy(exa->pipe, dst_surf, 0, 0, src_surf, - 0, 0, min(width, texture->width[0]), - min(height, texture->height[0])); + if (exa->pipe->surface_copy) { + exa->pipe->surface_copy(exa->pipe, dst_surf, 0, 0, src_surf, + 0, 0, min(width, texture->width[0]), + min(height, texture->height[0])); + } else { + util_surface_copy(exa->pipe, FALSE, dst_surf, 0, 0, src_surf, + 0, 0, min(width, texture->width[0]), + min(height, texture->height[0])); + } exa->scrn->tex_surface_destroy(dst_surf); exa->scrn->tex_surface_destroy(src_surf); } diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c index 81b209cb59..ca69e1e0e9 100644 --- a/src/gallium/state_trackers/xorg/xorg_renderer.c +++ b/src/gallium/state_trackers/xorg/xorg_renderer.c @@ -7,6 +7,7 @@ #include "util/u_draw_quad.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_rect.h" #include "pipe/p_inlines.h" @@ -586,11 +587,19 @@ create_sampler_texture(struct xorg_renderer *r, screen, src, 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ); struct pipe_surface *ps_tex = screen->get_tex_surface( screen, pt, 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE ); - pipe->surface_copy(pipe, - ps_tex, /* dest */ - 0, 0, /* destx/y */ - ps_read, - 0, 0, src->width[0], src->height[0]); + if (pipe->surface_copy) { + pipe->surface_copy(pipe, + ps_tex, /* dest */ + 0, 0, /* destx/y */ + ps_read, + 0, 0, src->width[0], src->height[0]); + } else { + util_surface_copy(pipe, FALSE, + ps_tex, /* dest */ + 0, 0, /* destx/y */ + ps_read, + 0, 0, src->width[0], src->height[0]); + } pipe_surface_reference(&ps_read, NULL); pipe_surface_reference(&ps_tex, NULL); } diff --git a/src/mesa/state_tracker/st_atom_framebuffer.c b/src/mesa/state_tracker/st_atom_framebuffer.c index 5209a6a0c9..e18c0f6e0a 100644 --- a/src/mesa/state_tracker/st_atom_framebuffer.c +++ b/src/mesa/state_tracker/st_atom_framebuffer.c @@ -39,6 +39,7 @@ #include "pipe/p_context.h" #include "pipe/p_inlines.h" #include "cso_cache/cso_context.h" +#include "util/u_rect.h" @@ -162,10 +163,17 @@ update_framebuffer_state( struct st_context *st ) (void) st_get_framebuffer_surface(stfb, ST_SURFACE_FRONT_LEFT, &surf_front); (void) st_get_framebuffer_surface(stfb, ST_SURFACE_BACK_LEFT, &surf_back); - st->pipe->surface_copy(st->pipe, - surf_front, 0, 0, /* dest */ - surf_back, 0, 0, /* src */ - fb->Width, fb->Height); + if (st->pipe->surface_copy) { + st->pipe->surface_copy(st->pipe, + surf_front, 0, 0, /* dest */ + surf_back, 0, 0, /* src */ + fb->Width, fb->Height); + } else { + util_surface_copy(st->pipe, FALSE, + surf_front, 0, 0, + surf_back, 0, 0, + fb->Width, fb->Height); + } } /* we're assuming we'll really draw to the front buffer */ st->frontbuffer_status = FRONT_STATUS_DIRTY; diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 5c3413f905..be44577117 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -62,6 +62,7 @@ #include "util/u_tile.h" #include "util/u_draw_quad.h" #include "util/u_math.h" +#include "util/u_rect.h" #include "shader/prog_instruction.h" #include "cso_cache/cso_context.h" @@ -1075,11 +1076,19 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, PIPE_BUFFER_USAGE_GPU_READ); struct pipe_surface *psTex = screen->get_tex_surface(screen, pt, 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE ); - pipe->surface_copy(pipe, - psTex, /* dest */ - 0, 0, /* destx/y */ - psRead, - srcx, srcy, width, height); + if (pipe->surface_copy) { + pipe->surface_copy(pipe, + psTex, /* dest */ + 0, 0, /* destx/y */ + psRead, + srcx, srcy, width, height); + } else { + util_surface_copy(pipe, FALSE, + psTex, + 0, 0, + psRead, + srcx, srcy, width, height); + } pipe_surface_reference(&psRead, NULL); pipe_surface_reference(&psTex, NULL); } diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 864f5d3ca3..73aa65955b 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -49,6 +49,7 @@ #include "st_public.h" #include "st_texture.h" +#include "util/u_rect.h" /** @@ -538,10 +539,17 @@ copy_back_to_front(struct st_context *st, (void) st_get_framebuffer_surface(stfb, backIndex, &surf_back); if (surf_front && surf_back) { - st->pipe->surface_copy(st->pipe, - surf_front, 0, 0, /* dest */ - surf_back, 0, 0, /* src */ - fb->Width, fb->Height); + if (st->pipe->surface_copy) { + st->pipe->surface_copy(st->pipe, + surf_front, 0, 0, /* dest */ + surf_back, 0, 0, /* src */ + fb->Width, fb->Height); + } else { + util_surface_copy(st->pipe, FALSE, + surf_front, 0, 0, + surf_back, 0, 0, + fb->Width, fb->Height); + } } } diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index b943787106..a1953342b4 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -1546,7 +1546,8 @@ st_copy_texsubimage(GLcontext *ctx, if (ctx->_ImageTransferState == 0x0) { - if (matching_base_formats && + if (pipe->surface_copy && + matching_base_formats && src_format == dest_format && !do_flip) { -- cgit v1.2.3 From 838da1d4ae11aa8b5eab4f35713709714e337cbe Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 18 Oct 2009 14:31:58 +0100 Subject: llvmpipe: Allocate texture storage for whole quads. --- src/gallium/drivers/llvmpipe/lp_texture.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 08f0950d47..a00f2495df 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -66,16 +66,24 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, pf_get_block(lpt->base.format, &lpt->base.block); for (level = 0; level <= pt->last_level; level++) { + unsigned nblocksx, nblocksy; + pt->width[level] = width; pt->height[level] = height; pt->depth[level] = depth; pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); - lpt->stride[level] = align(pt->nblocksx[level]*pt->block.size, 16); + pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); + + /* Allocate storage for whole quads. This is particularly important + * for depth surfaces, which are currently stored in a swizzled format. */ + nblocksx = pf_get_nblocksx(&pt->block, align(width, 2)); + nblocksy = pf_get_nblocksy(&pt->block, align(height, 2)); + + lpt->stride[level] = align(nblocksx*pt->block.size, 16); lpt->level_offset[level] = buffer_size; - buffer_size += (pt->nblocksy[level] * + buffer_size += (nblocksy * ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * lpt->stride[level]); -- cgit v1.2.3 From d2e29b502e5f777551ff057f08e54d82542863cf Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 18 Oct 2009 10:30:18 -0700 Subject: r300g: Add another ZTOP condition. I don't even know if texkill works right now. --- src/gallium/drivers/r300/r300_state_derived.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index c59d446e93..2c624766bb 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -517,6 +517,8 @@ static void r300_update_ztop(struct r300_context* r300) */ if (r300->dsa_state->alpha_function) { r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300->fs->info.uses_kill) { + r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; } else if (r300_fragment_shader_writes_depth(r300->fs)) { r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; } else if (r300->query_current) { -- cgit v1.2.3 From 16a06fea73b1e6e8857f7568762bfc56dcfe2940 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 18 Oct 2009 15:54:39 -0700 Subject: r300g: Fix up a bunch of warnings. --- src/gallium/drivers/r300/r300_emit.c | 24 +++++++++++------------- src/gallium/drivers/r300/r300_emit.h | 1 + src/gallium/drivers/r300/r300_flush.c | 2 ++ src/gallium/drivers/r300/r300_screen.c | 2 +- src/gallium/drivers/r300/r300_state.c | 4 ++-- src/gallium/drivers/r300/r300_state_derived.c | 10 ++++------ 6 files changed, 21 insertions(+), 22 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index e6092cda9b..df2046bd0c 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -320,8 +320,7 @@ void r300_emit_fb_state(struct r300_context* r300, END_CS; } -void r300_emit_query_start(struct r300_context *r300) - +static void r300_emit_query_start(struct r300_context *r300) { struct r300_capabilities *caps = r300_screen(r300->context.screen)->caps; struct r300_query *query = r300->query_current; @@ -334,9 +333,9 @@ void r300_emit_query_start(struct r300_context *r300) * for overlapping queries. */ BEGIN_CS(4); if (caps->family == CHIP_FAMILY_RV530) { - OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); } else { - OUT_CS_REG(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL); + OUT_CS_REG(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL); } OUT_CS_REG(R300_ZB_ZPASS_DATA, 0); END_CS; @@ -345,7 +344,7 @@ void r300_emit_query_start(struct r300_context *r300) static void r300_emit_query_finish(struct r300_context *r300, - struct r300_query *query) + struct r300_query *query) { struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); @@ -388,7 +387,7 @@ static void r300_emit_query_finish(struct r300_context *r300, OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 0), 0, RADEON_GEM_DOMAIN_GTT, 0); - break; + break; default: debug_printf("r300: Implementation error: Chipset reports %d" " pixel pipes!\n", caps->num_frag_pipes); @@ -398,11 +397,10 @@ static void r300_emit_query_finish(struct r300_context *r300, /* And, finally, reset it to normal... */ OUT_CS_REG(R300_SU_REG_DEST, 0xF); END_CS; - } static void rv530_emit_query_single(struct r300_context *r300, - struct r300_query *query) + struct r300_query *query) { CS_LOCALS(r300); @@ -415,7 +413,7 @@ static void rv530_emit_query_single(struct r300_context *r300, } static void rv530_emit_query_double(struct r300_context *r300, - struct r300_query *query) + struct r300_query *query) { CS_LOCALS(r300); @@ -442,10 +440,10 @@ void r300_emit_query_end(struct r300_context* r300) return; if (caps->family == CHIP_FAMILY_RV530) { - if (caps->num_z_pipes == 2) - rv530_emit_query_double(r300, query); - else - rv530_emit_query_single(r300, query); + if (caps->num_z_pipes == 2) + rv530_emit_query_double(r300, query); + else + rv530_emit_query_single(r300, query); } else r300_emit_query_finish(r300, query); } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index b62aa9fec5..7e469ea0c7 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -58,6 +58,7 @@ void r300_emit_fb_state(struct r300_context* r300, void r300_emit_query_begin(struct r300_context* r300, struct r300_query* query); + void r300_emit_query_end(struct r300_context* r300); void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs); diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 241ea71d6b..d60652a021 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -20,7 +20,9 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "r300_emit.h" #include "r300_flush.h" +#include "r300_state_invariant.h" static void r300_flush(struct pipe_context* pipe, unsigned flags, diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 44770d1aca..cc499d400a 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -311,7 +311,7 @@ r300_get_tex_transfer(struct pipe_screen *screen, { struct r300_texture *tex = (struct r300_texture *)texture; struct r300_transfer *trans; - unsigned offset; /* in bytes */ + unsigned offset = 0; /* in bytes */ /* XXX Add support for these things */ if (texture->target == PIPE_TEXTURE_CUBE) { diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 8359850966..0a982a9d5d 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -329,7 +329,7 @@ static void r300_delete_fs_state(struct pipe_context* pipe, void* shader) { struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader; rc_constants_destroy(&fs->code.constants); - FREE(fs->state.tokens); + FREE((void*)fs->state.tokens); FREE(shader); } @@ -697,7 +697,7 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader) rc_constants_destroy(&vs->code.constants); draw_delete_vertex_shader(r300->draw, vs->draw); - FREE(vs->state.tokens); + FREE((void*)vs->state.tokens); FREE(shader); } else { draw_delete_vertex_shader(r300->draw, diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 2c624766bb..1468b9d36e 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -265,7 +265,9 @@ static void r300_vertex_psc(struct r300_context* r300, } /* Set the last vector in the PSC. */ - i--; + if (i) { + i -= 1; + } vformat->vap_prog_stream_cntl[i >> 1] |= (R300_LAST_VEC << (i & 1 ? 16 : 0)); } @@ -336,17 +338,14 @@ static void r300_update_fs_tab(struct r300_context* r300, * the rasterization of vertices into fragments. This is also the part of the * chipset that locks up if any part of it is even slightly wrong. */ static void r300_update_rs_block(struct r300_context* r300, - struct r300_vertex_format* vformat, struct r300_rs_block* rs) { struct tgsi_shader_info* info = &r300->fs->info; - int* tab = vformat->fs_tab; int col_count = 0, fp_offset = 0, i, tex_count = 0; int rs_tex_comp = 0; if (r300_screen(r300->context.screen)->caps->is_r500) { for (i = 0; i < info->num_inputs; i++) { - assert(tab[i] != -1); switch (info->input_semantic_name[i]) { case TGSI_SEMANTIC_COLOR: rs->ip[col_count] |= @@ -387,7 +386,6 @@ static void r300_update_rs_block(struct r300_context* r300, } } else { for (i = 0; i < info->num_inputs; i++) { - assert(tab[i] != -1); switch (info->input_semantic_name[i]) { case TGSI_SEMANTIC_COLOR: rs->ip[col_count] |= @@ -480,7 +478,7 @@ static void r300_update_derived_shader_state(struct r300_context* r300) r300_vertex_psc(r300, vformat); r300_update_fs_tab(r300, vformat); - r300_update_rs_block(r300, vformat, rs_block); + r300_update_rs_block(r300, rs_block); value->vformat = vformat; value->rs_block = rs_block; -- cgit v1.2.3 From 869d3eea37ee060d62cd5b7f6031ef5a93e328a1 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 7 Oct 2009 16:07:34 +1000 Subject: drm/nv50: write tic/tsc setup to correct slots when skipping units --- src/gallium/drivers/nv50/nv50_state_validate.c | 7 ++++--- src/gallium/drivers/nv50/nv50_tex.c | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index fd27620371..9079de918d 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -356,13 +356,14 @@ viewport_uptodate: if (nv50->dirty & NV50_NEW_SAMPLER) { int i; - so = so_new(nv50->sampler_nr * 9 + 2, 0); - so_method(so, tesla, NV50TCL_CB_ADDR, 1); - so_data (so, NV50_CB_TSC); + so = so_new(nv50->sampler_nr * 11, 0); for (i = 0; i < nv50->sampler_nr; i++) { if (!nv50->sampler[i]) continue; + so_method(so, tesla, NV50TCL_CB_ADDR, 1); + so_data (so, ((i * 8) << NV50TCL_CB_ADDR_ID_SHIFT) | + NV50_CB_TSC); so_method(so, tesla, NV50TCL_CB_DATA(0) | (2<<29), 8); so_datap (so, nv50->sampler[i]->tsc, 8); } diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index 72d33150af..ca2b883e9b 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -148,18 +148,19 @@ nv50_tex_validate(struct nv50_context *nv50) struct nouveau_stateobj *so; int unit, push; - push = nv50->miptree_nr * 9 + 2; + push = nv50->miptree_nr * 11; push += MAX2(nv50->miptree_nr, nv50->state.miptree_nr) * 2; so = so_new(push, nv50->miptree_nr * 2); - so_method(so, tesla, NV50TCL_CB_ADDR, 1); - so_data (so, NV50_CB_TIC); for (unit = 0; unit < nv50->miptree_nr; unit++) { struct nv50_miptree *mt = nv50->miptree[unit]; if (!mt) continue; + so_method(so, tesla, NV50TCL_CB_ADDR, 1); + so_data (so, ((unit * 8) << NV50TCL_CB_ADDR_ID_SHIFT) | + NV50_CB_TIC); so_method(so, tesla, NV50TCL_CB_DATA(0) | 0x40000000, 8); if (nv50_tex_construct(nv50, so, mt, unit)) { NOUVEAU_ERR("failed tex validate\n"); -- cgit v1.2.3 From 35b98e2884bd7c76c43fa08d5bb0a8f1396d3298 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 19 Oct 2009 09:28:59 +1000 Subject: nouveau: implement is_{texture,buffer}_referenced properly --- src/gallium/drivers/nouveau/Makefile | 3 +- src/gallium/drivers/nouveau/nouveau_context.c | 41 +++++++++++++++++++++++++++ src/gallium/drivers/nouveau/nouveau_context.h | 11 +++++++ src/gallium/drivers/nv04/nv04_context.c | 28 ++---------------- src/gallium/drivers/nv04/nv04_context.h | 1 + src/gallium/drivers/nv10/nv10_context.c | 27 ++---------------- src/gallium/drivers/nv10/nv10_context.h | 1 + src/gallium/drivers/nv20/nv20_context.c | 28 ++---------------- src/gallium/drivers/nv20/nv20_context.h | 1 + src/gallium/drivers/nv30/nv30_context.c | 35 ++--------------------- src/gallium/drivers/nv30/nv30_context.h | 1 + src/gallium/drivers/nv40/nv40_context.c | 35 ++--------------------- src/gallium/drivers/nv40/nv40_context.h | 1 + src/gallium/drivers/nv50/nv50_context.c | 27 ++---------------- src/gallium/drivers/nv50/nv50_context.h | 1 + 15 files changed, 72 insertions(+), 169 deletions(-) create mode 100644 src/gallium/drivers/nouveau/nouveau_context.c create mode 100644 src/gallium/drivers/nouveau/nouveau_context.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/Makefile b/src/gallium/drivers/nouveau/Makefile index dbe8a6e7bf..0cb66041d5 100644 --- a/src/gallium/drivers/nouveau/Makefile +++ b/src/gallium/drivers/nouveau/Makefile @@ -3,6 +3,7 @@ include $(TOP)/configs/current LIBNAME = nouveau -C_SOURCES = nouveau_screen.c +C_SOURCES = nouveau_screen.c \ + nouveau_context.c include ../../Makefile.template diff --git a/src/gallium/drivers/nouveau/nouveau_context.c b/src/gallium/drivers/nouveau/nouveau_context.c new file mode 100644 index 0000000000..23443869e6 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_context.c @@ -0,0 +1,41 @@ +#include +#include + +#include "nouveau/nouveau_screen.h" +#include "nouveau/nouveau_context.h" + +#include "nouveau/nouveau_bo.h" + +static unsigned int +nouveau_reference_flags(struct nouveau_bo *bo) +{ + uint32_t bo_flags; + int flags = 0; + + bo_flags = nouveau_bo_pending(bo); + if (bo_flags & NOUVEAU_BO_RD) + flags |= PIPE_REFERENCED_FOR_READ; + if (bo_flags & NOUVEAU_BO_WR) + flags |= PIPE_REFERENCED_FOR_WRITE; + + return flags; +} + +unsigned int +nouveau_is_texture_referenced(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned face, unsigned level) +{ + struct nouveau_miptree *mt = nouveau_miptree(pt); + + return nouveau_reference_flags(mt->bo); +} + +unsigned int +nouveau_is_buffer_referenced(struct pipe_context *pipe, struct pipe_buffer *pb) +{ + struct nouveau_bo *bo = nouveau_bo(pb); + + return nouveau_reference_flags(bo); +} + diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h new file mode 100644 index 0000000000..6a28d40da7 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_context.h @@ -0,0 +1,11 @@ +#ifndef __NOUVEAU_CONTEXT_H__ +#define __NOUVEAU_CONTEXT_H__ + +unsigned int +nouveau_is_texture_referenced(struct pipe_context *, struct pipe_texture *, + unsigned face, unsigned level); + +unsigned int +nouveau_is_buffer_referenced(struct pipe_context *, struct pipe_buffer *); + +#endif diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c index 17166c9f51..10d984ace9 100644 --- a/src/gallium/drivers/nv04/nv04_context.c +++ b/src/gallium/drivers/nv04/nv04_context.c @@ -64,30 +64,6 @@ nv04_init_hwctx(struct nv04_context *nv04) return TRUE; } -static unsigned int -nv04_is_texture_referenced( struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - -static unsigned int -nv04_is_buffer_referenced( struct pipe_context *pipe, - struct pipe_buffer *buf) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - - struct pipe_context * nv04_create(struct pipe_screen *pscreen, unsigned pctx_id) { @@ -113,8 +89,8 @@ nv04_create(struct pipe_screen *pscreen, unsigned pctx_id) nv04->pipe.clear = nv04_clear; nv04->pipe.flush = nv04_flush; - nv04->pipe.is_texture_referenced = nv04_is_texture_referenced; - nv04->pipe.is_buffer_referenced = nv04_is_buffer_referenced; + nv04->pipe.is_texture_referenced = nouveau_is_texture_referenced; + nv04->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; nv04_init_surface_functions(nv04); nv04_init_state_functions(nv04); diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h index 2842b2c90d..55326c787a 100644 --- a/src/gallium/drivers/nv04/nv04_context.h +++ b/src/gallium/drivers/nv04/nv04_context.h @@ -13,6 +13,7 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" +#include "nouveau/nouveau_context.h" #define NOUVEAU_PUSH_CONTEXT(ctx) \ struct nv04_screen *ctx = nv04->screen diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c index a127b134ec..933176fc32 100644 --- a/src/gallium/drivers/nv10/nv10_context.c +++ b/src/gallium/drivers/nv10/nv10_context.c @@ -257,29 +257,6 @@ nv10_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) { } -static unsigned int -nv10_is_texture_referenced( struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - -static unsigned int -nv10_is_buffer_referenced( struct pipe_context *pipe, - struct pipe_buffer *buf) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - struct pipe_context * nv10_create(struct pipe_screen *pscreen, unsigned pctx_id) { @@ -305,8 +282,8 @@ nv10_create(struct pipe_screen *pscreen, unsigned pctx_id) nv10->pipe.clear = nv10_clear; nv10->pipe.flush = nv10_flush; - nv10->pipe.is_texture_referenced = nv10_is_texture_referenced; - nv10->pipe.is_buffer_referenced = nv10_is_buffer_referenced; + nv10->pipe.is_texture_referenced = nouveau_is_texture_referenced; + nv10->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; nv10_init_surface_functions(nv10); nv10_init_state_functions(nv10); diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h index f1e003c953..36a6aa7a74 100644 --- a/src/gallium/drivers/nv10/nv10_context.h +++ b/src/gallium/drivers/nv10/nv10_context.h @@ -13,6 +13,7 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" +#include "nouveau/nouveau_context.h" #define NOUVEAU_PUSH_CONTEXT(ctx) \ struct nv10_screen *ctx = nv10->screen diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c index b32d0d83ba..9a48739661 100644 --- a/src/gallium/drivers/nv20/nv20_context.c +++ b/src/gallium/drivers/nv20/nv20_context.c @@ -380,30 +380,6 @@ nv20_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) { } - -static unsigned int -nv20_is_texture_referenced( struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - -static unsigned int -nv20_is_buffer_referenced( struct pipe_context *pipe, - struct pipe_buffer *buf) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - struct pipe_context * nv20_create(struct pipe_screen *pscreen, unsigned pctx_id) { @@ -429,8 +405,8 @@ nv20_create(struct pipe_screen *pscreen, unsigned pctx_id) nv20->pipe.clear = nv20_clear; nv20->pipe.flush = nv20_flush; - nv20->pipe.is_texture_referenced = nv20_is_texture_referenced; - nv20->pipe.is_buffer_referenced = nv20_is_buffer_referenced; + nv20->pipe.is_texture_referenced = nouveau_is_texture_referenced; + nv20->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; nv20_init_surface_functions(nv20); nv20_init_state_functions(nv20); diff --git a/src/gallium/drivers/nv20/nv20_context.h b/src/gallium/drivers/nv20/nv20_context.h index fc932f1f90..a4eaa95660 100644 --- a/src/gallium/drivers/nv20/nv20_context.h +++ b/src/gallium/drivers/nv20/nv20_context.h @@ -13,6 +13,7 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" +#include "nouveau/nouveau_context.h" #define NOUVEAU_PUSH_CONTEXT(ctx) \ struct nv20_screen *ctx = nv20->screen diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index a3e65b96f7..d8300fd69f 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -31,37 +31,6 @@ nv30_destroy(struct pipe_context *pipe) FREE(nv30); } -static unsigned int -nv30_is_texture_referenced( struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level) -{ - /** - * FIXME: Return the corrent result. We can't alays return referenced - * since it causes a double flush within the vbo module. - */ -#if 0 - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -#else - return 0; -#endif -} - -static unsigned int -nv30_is_buffer_referenced( struct pipe_context *pipe, - struct pipe_buffer *buf) -{ - /** - * FIXME: Return the corrent result. We can't alays return referenced - * since it causes a double flush within the vbo module. - */ -#if 0 - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -#else - return 0; -#endif -} - struct pipe_context * nv30_create(struct pipe_screen *pscreen, unsigned pctx_id) { @@ -86,8 +55,8 @@ nv30_create(struct pipe_screen *pscreen, unsigned pctx_id) nv30->pipe.clear = nv30_clear; nv30->pipe.flush = nv30_flush; - nv30->pipe.is_texture_referenced = nv30_is_texture_referenced; - nv30->pipe.is_buffer_referenced = nv30_is_buffer_referenced; + nv30->pipe.is_texture_referenced = nouveau_is_texture_referenced; + nv30->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; nv30_init_query_functions(nv30); nv30_init_surface_functions(nv30); diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 4229c0a0e1..8d49366dfc 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -13,6 +13,7 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" +#include "nouveau/nouveau_context.h" #define NOUVEAU_PUSH_CONTEXT(ctx) \ struct nv30_screen *ctx = nv30->screen diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index 4e23671202..7f008274a4 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -31,37 +31,6 @@ nv40_destroy(struct pipe_context *pipe) FREE(nv40); } -static unsigned int -nv40_is_texture_referenced( struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level) -{ - /** - * FIXME: Return the correct result. We can't always return referenced - * since it causes a double flush within the vbo module. - */ -#if 0 - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -#else - return 0; -#endif -} - -static unsigned int -nv40_is_buffer_referenced( struct pipe_context *pipe, - struct pipe_buffer *buf) -{ - /** - * FIXME: Return the correct result. We can't always return referenced - * since it causes a double flush within the vbo module. - */ -#if 0 - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -#else - return 0; -#endif -} - struct pipe_context * nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) { @@ -86,8 +55,8 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) nv40->pipe.clear = nv40_clear; nv40->pipe.flush = nv40_flush; - nv40->pipe.is_texture_referenced = nv40_is_texture_referenced; - nv40->pipe.is_buffer_referenced = nv40_is_buffer_referenced; + nv40->pipe.is_texture_referenced = nouveau_is_texture_referenced; + nv40->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; nv40_init_query_functions(nv40); nv40_init_surface_functions(nv40); diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 97bc83292d..a3d594167a 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -13,6 +13,7 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" +#include "nouveau/nouveau_context.h" #define NOUVEAU_PUSH_CONTEXT(ctx) \ struct nv40_screen *ctx = nv40->screen diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index fca078b174..7ef27bb671 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -60,29 +60,6 @@ nv50_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) { } -static unsigned int -nv50_is_texture_referenced( struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - -static unsigned int -nv50_is_buffer_referenced( struct pipe_context *pipe, - struct pipe_buffer *buf) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - struct pipe_context * nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) { @@ -108,8 +85,8 @@ nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) nv50->pipe.flush = nv50_flush; - nv50->pipe.is_texture_referenced = nv50_is_texture_referenced; - nv50->pipe.is_buffer_referenced = nv50_is_buffer_referenced; + nv50->pipe.is_texture_referenced = nouveau_is_texture_referenced; + nv50->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; screen->base.channel->user_private = nv50; screen->base.channel->flush_notify = nv50_state_flush_notify; diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 4608854d71..fd2dab856d 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -14,6 +14,7 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_stateobj.h" +#include "nouveau/nouveau_context.h" #include "nv50_screen.h" #include "nv50_program.h" -- cgit v1.2.3 From 6ab2fcca9d40ed65ab8d88c0253969c5311b7320 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 10 Oct 2009 13:18:07 +0200 Subject: nv50: nicer texture format switch Similar to nv40. --- src/gallium/drivers/nv50/nv50_tex.c | 144 ++++++++++++------------------------ 1 file changed, 49 insertions(+), 95 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index ca2b883e9b..81e04327e8 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -25,106 +25,60 @@ #include "nouveau/nouveau_stateobj.h" +#define _(pf, tt, r, g, b, a, tf) \ +{ \ + PIPE_FORMAT_##pf, \ + NV50TIC_0_0_MAPR_##r | NV50TIC_0_0_TYPER_##tt | \ + NV50TIC_0_0_MAPG_##g | NV50TIC_0_0_TYPEG_##tt | \ + NV50TIC_0_0_MAPB_##b | NV50TIC_0_0_TYPEB_##tt | \ + NV50TIC_0_0_MAPA_##a | NV50TIC_0_0_TYPEA_##tt | \ + NV50TIC_0_0_FMT_##tf \ +} + +struct nv50_texture_format { + enum pipe_format pf; + uint32_t hw; +}; + +#define NV50_TEX_FORMAT_LIST_SIZE \ + (sizeof(nv50_tex_format_list) / sizeof(struct nv50_texture_format)) + +static const struct nv50_texture_format nv50_tex_format_list[] = +{ + _(A8R8G8B8_UNORM, UNORM, C2, C1, C0, C3, 8_8_8_8), + _(X8R8G8B8_UNORM, UNORM, C2, C1, C0, ONE, 8_8_8_8), + _(A1R5G5B5_UNORM, UNORM, C2, C1, C0, C3, 1_5_5_5), + _(A4R4G4B4_UNORM, UNORM, C2, C1, C0, C3, 4_4_4_4), + + _(R5G6B5_UNORM, UNORM, C2, C1, C0, ONE, 5_6_5), + + _(L8_UNORM, UNORM, C0, C0, C0, ONE, 8), + _(A8_UNORM, UNORM, ZERO, ZERO, ZERO, C0, 8), + _(I8_UNORM, UNORM, C0, C0, C0, C0, 8), + + _(A8L8_UNORM, UNORM, C0, C0, C0, C1, 8_8), + + _(DXT1_RGB, UNORM, C0, C1, C2, ONE, DXT1), + _(DXT1_RGBA, UNORM, C0, C1, C2, C3, DXT1), + _(DXT3_RGBA, UNORM, C0, C1, C2, C3, DXT3), + _(DXT5_RGBA, UNORM, C0, C1, C2, C3, DXT5) +}; + +#undef _ + static int nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so, struct nv50_miptree *mt, int unit) { - switch (mt->base.base.format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_8_8_8_8); - break; - case PIPE_FORMAT_X8R8G8B8_UNORM: - so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_8_8_8_8); - break; - case PIPE_FORMAT_A1R5G5B5_UNORM: - so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_1_5_5_5); - break; - case PIPE_FORMAT_A4R4G4B4_UNORM: - so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_4_4_4_4); - break; - case PIPE_FORMAT_R5G6B5_UNORM: - so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_5_6_5); - break; - case PIPE_FORMAT_L8_UNORM: - so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_8); - break; - case PIPE_FORMAT_A8_UNORM: - so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_ZERO | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_ZERO | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_ZERO | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_8); - break; - case PIPE_FORMAT_I8_UNORM: - so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_8); - break; - case PIPE_FORMAT_A8L8_UNORM: - so_data(so, NV50TIC_0_0_MAPA_C1 | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_8_8); - break; - case PIPE_FORMAT_DXT1_RGB: - so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_DXT1); - break; - case PIPE_FORMAT_DXT1_RGBA: - so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_DXT1); - break; - case PIPE_FORMAT_DXT3_RGBA: - so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_DXT3); - break; - case PIPE_FORMAT_DXT5_RGBA: - so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_DXT5); - break; - default: - return 1; - } + unsigned i; + + for (i = 0; i < NV50_TEX_FORMAT_LIST_SIZE; i++) + if (nv50_tex_format_list[i].pf == mt->base.base.format) + break; + if (i == NV50_TEX_FORMAT_LIST_SIZE) + return 1; + so_data (so, nv50_tex_format_list[i].hw); so_reloc(so, mt->base.bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0); if (nv50->sampler[unit]->normalized) -- cgit v1.2.3 From fba2eabe13b8a3f8c1396c5949db3daab0192156 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 10 Oct 2009 13:13:16 +0200 Subject: nv50: use SIFC for TIC, TSC upload Add proper flushes for TIC and TSC and remove the costly 2D.0110 flush in nv50_flush. Correct TIC and TSC bo sizes. --- src/gallium/drivers/nv50/nv50_context.c | 7 ---- src/gallium/drivers/nv50/nv50_context.h | 5 +++ src/gallium/drivers/nv50/nv50_screen.c | 25 ++---------- src/gallium/drivers/nv50/nv50_state_validate.c | 54 ++++++++++++++++++++++---- src/gallium/drivers/nv50/nv50_tex.c | 34 +++++++++------- src/gallium/drivers/nv50/nv50_vbo.c | 4 -- 6 files changed, 77 insertions(+), 52 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 7ef27bb671..219e7a7862 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -33,13 +33,6 @@ nv50_flush(struct pipe_context *pipe, unsigned flags, { struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_channel *chan = nv50->screen->base.channel; - struct nouveau_grobj *eng2d = nv50->screen->eng2d; - - /* We need this in the ddx for reliable composite, not sure what we're - * actually flushing. We generate all our own flushes with flags = 0. */ - WAIT_RING(chan, 2); - BEGIN_RING(chan, eng2d, 0x0110, 1); - OUT_RING (chan, 0); if (flags & PIPE_FLUSH_FRAME) FIRE_RING(chan); diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index fd2dab856d..75cb65d9a2 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -199,6 +199,11 @@ extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program extern boolean nv50_state_validate(struct nv50_context *nv50); extern void nv50_state_flush_notify(struct nouveau_channel *chan); +extern void nv50_so_init_sifc(struct nv50_context *nv50, + struct nouveau_stateobj *so, + struct nouveau_bo *bo, unsigned reloc, + unsigned size); + /* nv50_tex.c */ extern void nv50_tex_validate(struct nv50_context *); diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index dd7baecba7..66361dc3ba 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -364,48 +364,31 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000131 | (NV50_CB_PFP << 12)); - /* Texture sampler/image unit setup - we abuse the constant buffer - * upload mechanism for the moment to upload data to the tex config - * blocks. At some point we *may* want to go the NVIDIA way of doing - * things? - */ - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 32*8*4, &screen->tic); + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tic); if (ret) { nv50_screen_destroy(pscreen); return NULL; } - so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); - so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, (NV50_CB_TIC << 16) | 0x0800); so_method(so, screen->tesla, NV50TCL_TIC_ADDRESS_HIGH, 3); so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0x00000800); + so_data (so, 0x000007ff); - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 32*8*4, &screen->tsc); + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tsc); if (ret) { nv50_screen_destroy(pscreen); return NULL; } - so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); - so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, (NV50_CB_TSC << 16) | 0x0800); so_method(so, screen->tesla, NV50TCL_TSC_ADDRESS_HIGH, 3); so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0x00000800); + so_data (so, 0x00000000); /* Vertex array limits - max them out */ diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 9079de918d..012911f41b 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -222,6 +222,9 @@ nv50_state_flush_notify(struct nouveau_channel *chan) { struct nv50_context *nv50 = chan->user_private; + if (nv50->state.tic_upload && !(nv50->dirty & NV50_NEW_TEXTURE)) + so_emit(chan, nv50->state.tic_upload); + so_emit_reloc_markers(chan, nv50->state.fb); so_emit_reloc_markers(chan, nv50->state.vertprog); so_emit_reloc_markers(chan, nv50->state.fragprog); @@ -233,6 +236,7 @@ boolean nv50_state_validate(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_grobj *eng2d = nv50->screen->eng2d; struct nouveau_stateobj *so; unsigned i; @@ -354,19 +358,25 @@ scissor_uptodate: viewport_uptodate: if (nv50->dirty & NV50_NEW_SAMPLER) { - int i; + unsigned i; + + so = so_new(nv50->sampler_nr * 9 + 23 + 4, 2); + + nv50_so_init_sifc(nv50, so, nv50->screen->tsc, NOUVEAU_BO_VRAM, + nv50->sampler_nr * 8 * 4); - so = so_new(nv50->sampler_nr * 11, 0); for (i = 0; i < nv50->sampler_nr; i++) { if (!nv50->sampler[i]) continue; - - so_method(so, tesla, NV50TCL_CB_ADDR, 1); - so_data (so, ((i * 8) << NV50TCL_CB_ADDR_ID_SHIFT) | - NV50_CB_TSC); - so_method(so, tesla, NV50TCL_CB_DATA(0) | (2<<29), 8); + so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8); so_datap (so, nv50->sampler[i]->tsc, 8); } + + so_method(so, tesla, 0x1440, 1); /* sync SIFC */ + so_data (so, 0); + so_method(so, tesla, 0x1334, 1); /* flush TSC */ + so_data (so, 0); + so_ref(so, &nv50->state.tsc_upload); so_ref(NULL, &so); } @@ -384,3 +394,33 @@ viewport_uptodate: return TRUE; } +void nv50_so_init_sifc(struct nv50_context *nv50, + struct nouveau_stateobj *so, + struct nouveau_bo *bo, unsigned reloc, unsigned size) +{ + struct nouveau_grobj *eng2d = nv50->screen->eng2d; + + so_method(so, eng2d, NV50_2D_DST_FORMAT, 2); + so_data (so, NV50_2D_DST_FORMAT_R8_UNORM); + so_data (so, 1); + so_method(so, eng2d, NV50_2D_DST_PITCH, 5); + so_data (so, 262144); + so_data (so, 65536); + so_data (so, 1); + so_reloc (so, bo, 0, reloc | NOUVEAU_BO_WR | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, bo, 0, reloc | NOUVEAU_BO_WR | NOUVEAU_BO_LOW, 0, 0); + so_method(so, eng2d, NV50_2D_SIFC_UNK0800, 2); + so_data (so, 0); + so_data (so, NV50_2D_SIFC_FORMAT_R8_UNORM); + so_method(so, eng2d, NV50_2D_SIFC_WIDTH, 10); + so_data (so, size); + so_data (so, 1); + so_data (so, 0); + so_data (so, 1); + so_data (so, 0); + so_data (so, 1); + so_data (so, 0); + so_data (so, 0); + so_data (so, 0); + so_data (so, 0); +} diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index 81e04327e8..e12a6ad648 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -98,24 +98,24 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so, void nv50_tex_validate(struct nv50_context *nv50) { + struct nouveau_grobj *eng2d = nv50->screen->eng2d; struct nouveau_grobj *tesla = nv50->screen->tesla; struct nouveau_stateobj *so; - int unit, push; + unsigned i, unit, push; - push = nv50->miptree_nr * 11; - push += MAX2(nv50->miptree_nr, nv50->state.miptree_nr) * 2; + push = MAX2(nv50->miptree_nr, nv50->state.miptree_nr) * 2 + 23 + 6; + so = so_new(nv50->miptree_nr * 9 + push, nv50->miptree_nr + 2); - so = so_new(push, nv50->miptree_nr * 2); - for (unit = 0; unit < nv50->miptree_nr; unit++) { + nv50_so_init_sifc(nv50, so, nv50->screen->tic, NOUVEAU_BO_VRAM, + nv50->miptree_nr * 8 * 4); + + for (i = 0, unit = 0; unit < nv50->miptree_nr; ++unit) { struct nv50_miptree *mt = nv50->miptree[unit]; if (!mt) continue; - so_method(so, tesla, NV50TCL_CB_ADDR, 1); - so_data (so, ((unit * 8) << NV50TCL_CB_ADDR_ID_SHIFT) | - NV50_CB_TIC); - so_method(so, tesla, NV50TCL_CB_DATA(0) | 0x40000000, 8); + so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8); if (nv50_tex_construct(nv50, so, mt, unit)) { NOUVEAU_ERR("failed tex validate\n"); so_ref(NULL, &so); @@ -123,17 +123,25 @@ nv50_tex_validate(struct nv50_context *nv50) } so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1); - so_data (so, (unit << NV50TCL_SET_SAMPLER_TEX_TIC_SHIFT) | - (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) | - NV50TCL_SET_SAMPLER_TEX_VALID); + so_data (so, (i++ << NV50TCL_SET_SAMPLER_TEX_TIC_SHIFT) | + (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) | + NV50TCL_SET_SAMPLER_TEX_VALID); } for (; unit < nv50->state.miptree_nr; unit++) { so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1); so_data (so, - (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) | 0); + (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) | 0); } + /* not sure if the following really do what I think: */ + so_method(so, tesla, 0x1440, 1); /* sync SIFC */ + so_data (so, 0); + so_method(so, tesla, 0x1330, 1); /* flush TIC */ + so_data (so, 0); + so_method(so, tesla, 0x1338, 1); /* flush texture caches */ + so_data (so, 0x20); + so_ref(so, &nv50->state.tic_upload); so_ref(NULL, &so); nv50->state.miptree_nr = nv50->miptree_nr; diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index eeed148c7b..8b0fbf0e76 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -139,10 +139,6 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, OUT_RING (chan, 0); BEGIN_RING(chan, tesla, 0x142c, 1); OUT_RING (chan, 0); - BEGIN_RING(chan, tesla, 0x1440, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, tesla, 0x1334, 1); - OUT_RING (chan, 0); BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); OUT_RING (chan, nv50_prim(mode)); -- cgit v1.2.3 From c0e80cf0e97cec526bb2ff0f94d9142e33374c20 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 14 Oct 2009 21:23:29 +0200 Subject: nv50: submit user vbo data through the fifo Requesting a new real buffer from the kernel and copying all the data is wasteful e.g. if only a few (but widely spread) vertices are accessed. --- src/gallium/drivers/nv50/nv50_context.h | 3 + src/gallium/drivers/nv50/nv50_vbo.c | 409 ++++++++++++++++++++++++++++++-- 2 files changed, 394 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 75cb65d9a2..33667e8765 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -121,6 +121,7 @@ struct nv50_state { struct nouveau_stateobj *vtxfmt; struct nouveau_stateobj *vtxbuf; struct nouveau_stateobj *vtxattr; + unsigned vtxelt_nr; }; struct nv50_context { @@ -153,6 +154,8 @@ struct nv50_context { unsigned sampler_nr; struct nv50_miptree *miptree[PIPE_MAX_SAMPLERS]; unsigned miptree_nr; + + uint16_t vbo_fifo; }; static INLINE struct nv50_context * diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 8b0fbf0e76..db54380241 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -26,6 +26,18 @@ #include "nv50_context.h" +static boolean +nv50_push_elements_u08(struct nv50_context *, uint8_t *, unsigned); + +static boolean +nv50_push_elements_u16(struct nv50_context *, uint16_t *, unsigned); + +static boolean +nv50_push_elements_u32(struct nv50_context *, uint32_t *, unsigned); + +static boolean +nv50_push_arrays(struct nv50_context *, unsigned, unsigned); + static INLINE unsigned nv50_prim(unsigned mode) { @@ -132,6 +144,7 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_channel *chan = nv50->screen->tesla->channel; struct nouveau_grobj *tesla = nv50->screen->tesla; + boolean ret; nv50_state_validate(nv50); @@ -142,17 +155,22 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); OUT_RING (chan, nv50_prim(mode)); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); - OUT_RING (chan, start); - OUT_RING (chan, count); + + if (nv50->vbo_fifo) + ret = nv50_push_arrays(nv50, start, count); + else { + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); + OUT_RING (chan, start); + OUT_RING (chan, count); + ret = TRUE; + } BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); - pipe->flush(pipe, 0, NULL); - return TRUE; + return ret; } -static INLINE void +static INLINE boolean nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, unsigned start, unsigned count) { @@ -161,6 +179,9 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, map += start; + if (nv50->vbo_fifo) + return nv50_push_elements_u08(nv50, map, count); + if (count & 1) { BEGIN_RING(chan, tesla, 0x15e8, 1); OUT_RING (chan, map[0]); @@ -179,9 +200,10 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, count -= nr; map += nr; } + return TRUE; } -static INLINE void +static INLINE boolean nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, unsigned start, unsigned count) { @@ -190,6 +212,9 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, map += start; + if (nv50->vbo_fifo) + return nv50_push_elements_u16(nv50, map, count); + if (count & 1) { BEGIN_RING(chan, tesla, 0x15e8, 1); OUT_RING (chan, map[0]); @@ -208,9 +233,10 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, count -= nr; map += nr; } + return TRUE; } -static INLINE void +static INLINE boolean nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, unsigned start, unsigned count) { @@ -219,6 +245,9 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, map += start; + if (nv50->vbo_fifo) + return nv50_push_elements_u32(nv50, map, count); + while (count) { unsigned nr = count > 2047 ? 2047 : count; @@ -228,6 +257,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, count -= nr; map += nr; } + return TRUE; } boolean @@ -240,6 +270,7 @@ nv50_draw_elements(struct pipe_context *pipe, struct nouveau_grobj *tesla = nv50->screen->tesla; struct pipe_screen *pscreen = pipe->screen; void *map; + boolean ret; map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); @@ -254,23 +285,25 @@ nv50_draw_elements(struct pipe_context *pipe, OUT_RING (chan, nv50_prim(mode)); switch (indexSize) { case 1: - nv50_draw_elements_inline_u08(nv50, map, start, count); + ret = nv50_draw_elements_inline_u08(nv50, map, start, count); break; case 2: - nv50_draw_elements_inline_u16(nv50, map, start, count); + ret = nv50_draw_elements_inline_u16(nv50, map, start, count); break; case 4: - nv50_draw_elements_inline_u32(nv50, map, start, count); + ret = nv50_draw_elements_inline_u32(nv50, map, start, count); break; default: assert(0); + ret = FALSE; + break; } BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); pipe_buffer_unmap(pscreen, indexBuffer); - pipe->flush(pipe, 0, NULL); - return TRUE; + + return ret; } static INLINE boolean @@ -337,17 +370,24 @@ nv50_vbo_validate(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; struct nouveau_stateobj *vtxbuf, *vtxfmt, *vtxattr; - unsigned i; + unsigned i, n_ve; /* don't validate if Gallium took away our buffers */ if (nv50->vtxbuf_nr == 0) return; + nv50->vbo_fifo = 0; + + for (i = 0; i < nv50->vtxbuf_nr; ++i) + if (nv50->vtxbuf[i].stride && + !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)) + nv50->vbo_fifo = 0xffff; + + n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr); vtxattr = NULL; - vtxbuf = so_new(nv50->vtxelt_nr * 7, nv50->vtxelt_nr * 4); - vtxfmt = so_new(nv50->vtxelt_nr + 1, 0); - so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), - nv50->vtxelt_nr); + vtxbuf = so_new(n_ve * 7, nv50->vtxelt_nr * 4); + vtxfmt = so_new(n_ve + 1, 0); + so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve); for (i = 0; i < nv50->vtxelt_nr; i++) { struct pipe_vertex_element *ve = &nv50->vtxelt[i]; @@ -363,10 +403,19 @@ nv50_vbo_validate(struct nv50_context *nv50) so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1); so_data (vtxbuf, 0); + + nv50->vbo_fifo &= ~(1 << i); continue; } so_data(vtxfmt, hw | i); + if (nv50->vbo_fifo) { + so_method(vtxbuf, tesla, + NV50TCL_VERTEX_ARRAY_FORMAT(i), 1); + so_data (vtxbuf, 0); + continue; + } + so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3); so_data (vtxbuf, 0x20000000 | vb->stride); so_reloc (vtxbuf, bo, vb->buffer_offset + @@ -385,6 +434,13 @@ nv50_vbo_validate(struct nv50_context *nv50) NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); } + for (; i < n_ve; ++i) { + so_data (vtxfmt, 0x7e080010); + + so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1); + so_data (vtxbuf, 0); + } + nv50->state.vtxelt_nr = nv50->vtxelt_nr; so_ref (vtxfmt, &nv50->state.vtxfmt); so_ref (vtxbuf, &nv50->state.vtxbuf); @@ -394,3 +450,320 @@ nv50_vbo_validate(struct nv50_context *nv50) so_ref (NULL, &vtxattr); } +typedef void (*pfn_push)(struct nouveau_channel *, void *); + +struct nv50_vbo_emitctx +{ + pfn_push push[16]; + void *map[16]; + unsigned stride[16]; + unsigned nr_ve; + unsigned vtx_dwords; + unsigned vtx_max; +}; + +static INLINE void +emit_vtx_next(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit) +{ + unsigned i; + + for (i = 0; i < emit->nr_ve; ++i) { + emit->push[i](chan, emit->map[i]); + emit->map[i] += emit->stride[i]; + } +} + +static INLINE void +emit_vtx(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit, + uint32_t vi) +{ + unsigned i; + + for (i = 0; i < emit->nr_ve; ++i) + emit->push[i](chan, emit->map[i] + emit->stride[i] * vi); +} + +static INLINE boolean +nv50_map_vbufs(struct nv50_context *nv50) +{ + int i; + + for (i = 0; i < nv50->vtxbuf_nr; ++i) { + struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i]; + unsigned size, delta; + + if (nouveau_bo(vb->buffer)->map) + continue; + + size = vb->stride * (vb->max_index + 1); + delta = vb->buffer_offset; + + if (!size) + size = vb->buffer->size - vb->buffer_offset; + + if (nouveau_bo_map_range(nouveau_bo(vb->buffer), + delta, size, NOUVEAU_BO_RD)) + break; + } + + if (i == nv50->vtxbuf_nr) + return TRUE; + for (; i >= 0; --i) + nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer)); + return FALSE; +} + +static INLINE void +nv50_unmap_vbufs(struct nv50_context *nv50) +{ + unsigned i; + + for (i = 0; i < nv50->vtxbuf_nr; ++i) + if (nouveau_bo(nv50->vtxbuf[i].buffer)->map) + nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer)); +} + +static void +emit_b32_1(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); +} + +static void +emit_b32_2(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); + OUT_RING(chan, v[1]); +} + +static void +emit_b32_3(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); + OUT_RING(chan, v[1]); + OUT_RING(chan, v[2]); +} + +static void +emit_b32_4(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); + OUT_RING(chan, v[1]); + OUT_RING(chan, v[2]); + OUT_RING(chan, v[3]); +} + +static void +emit_b16_1(struct nouveau_channel *chan, void *data) +{ + uint16_t *v = data; + + OUT_RING(chan, v[0]); +} + +static void +emit_b16_3(struct nouveau_channel *chan, void *data) +{ + uint16_t *v = data; + + OUT_RING(chan, (v[1] << 16) | v[0]); + OUT_RING(chan, v[2]); +} + +static void +emit_b08_1(struct nouveau_channel *chan, void *data) +{ + uint8_t *v = data; + + OUT_RING(chan, v[0]); +} + +static void +emit_b08_3(struct nouveau_channel *chan, void *data) +{ + uint8_t *v = data; + + OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]); +} + +static boolean +emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, + unsigned start) +{ + unsigned i; + + if (nv50_map_vbufs(nv50) == FALSE) + return FALSE; + + emit->nr_ve = 0; + emit->vtx_dwords = 0; + + for (i = 0; i < nv50->vtxelt_nr; ++i) { + struct pipe_vertex_element *ve; + struct pipe_vertex_buffer *vb; + unsigned n, type, size; + + ve = &nv50->vtxelt[i]; + vb = &nv50->vtxbuf[ve->vertex_buffer_index]; + if (!(nv50->vbo_fifo & (1 << i))) + continue; + n = emit->nr_ve++; + + emit->stride[n] = vb->stride; + emit->map[n] = nouveau_bo(vb->buffer)->map + + (start * vb->stride + ve->src_offset); + + type = pf_type(ve->src_format); + size = pf_size_x(ve->src_format) << pf_exp2(ve->src_format); + + assert(ve->nr_components > 0 && ve->nr_components <= 4); + + /* It shouldn't be necessary to push the implicit 1s + * for case 3 and size 8 cases 1, 2, 3. + */ + switch (size) { + default: + NOUVEAU_ERR("unsupported vtxelt size: %u\n", size); + return FALSE; + case 32: + switch (ve->nr_components) { + case 1: emit->push[n] = emit_b32_1; break; + case 2: emit->push[n] = emit_b32_2; break; + case 3: emit->push[n] = emit_b32_3; break; + case 4: emit->push[n] = emit_b32_4; break; + } + emit->vtx_dwords += ve->nr_components; + break; + case 16: + switch (ve->nr_components) { + case 1: emit->push[n] = emit_b16_1; break; + case 2: emit->push[n] = emit_b32_1; break; + case 3: emit->push[n] = emit_b16_3; break; + case 4: emit->push[n] = emit_b32_2; break; + } + emit->vtx_dwords += (ve->nr_components + 1) >> 1; + break; + case 8: + switch (ve->nr_components) { + case 1: emit->push[n] = emit_b08_1; break; + case 2: emit->push[n] = emit_b16_1; break; + case 3: emit->push[n] = emit_b08_3; break; + case 4: emit->push[n] = emit_b32_1; break; + } + emit->vtx_dwords += 1; + break; + } + } + + emit->vtx_max = 512 / emit->vtx_dwords; + + return TRUE; +} + +static boolean +nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_vbo_emitctx emit; + + if (emit_prepare(nv50, &emit, start) == FALSE) + return FALSE; + + while (count) { + unsigned i, dw, nr = MIN2(count, emit.vtx_max); + dw = nr * emit.vtx_dwords; + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + for (i = 0; i < nr; ++i) + emit_vtx_next(chan, &emit); + + count -= nr; + } + nv50_unmap_vbufs(nv50); + + return TRUE; +} + +static boolean +nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count) +{ + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_vbo_emitctx emit; + + if (emit_prepare(nv50, &emit, 0) == FALSE) + return FALSE; + + while (count) { + unsigned i, dw, nr = MIN2(count, emit.vtx_max); + dw = nr * emit.vtx_dwords; + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + for (i = 0; i < nr; ++i) + emit_vtx(chan, &emit, *map++); + + count -= nr; + } + nv50_unmap_vbufs(nv50); + + return TRUE; +} + +static boolean +nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count) +{ + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_vbo_emitctx emit; + + if (emit_prepare(nv50, &emit, 0) == FALSE) + return FALSE; + + while (count) { + unsigned i, dw, nr = MIN2(count, emit.vtx_max); + dw = nr * emit.vtx_dwords; + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + for (i = 0; i < nr; ++i) + emit_vtx(chan, &emit, *map++); + + count -= nr; + } + nv50_unmap_vbufs(nv50); + + return TRUE; +} + +static boolean +nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count) +{ + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_vbo_emitctx emit; + + if (emit_prepare(nv50, &emit, 0) == FALSE) + return FALSE; + + while (count) { + unsigned i, dw, nr = MIN2(count, emit.vtx_max); + dw = nr * emit.vtx_dwords; + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + for (i = 0; i < nr; ++i) + emit_vtx(chan, &emit, *map++); + + count -= nr; + } + nv50_unmap_vbufs(nv50); + + return TRUE; +} -- cgit v1.2.3 From 1635e8d6f4b96e691746e8c8c5a273089bae6843 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 14 Oct 2009 21:27:35 +0200 Subject: nv50: add support for DDX and DDY opcodes --- src/gallium/drivers/nv50/nv50_program.c | 70 ++++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 576d075318..89e0ac8db9 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -837,7 +837,7 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) #define CVTOP_SAT 0x08 #define CVTOP_ABS 0x10 -/* 0x04 == 32 bit */ +/* 0x04 == 32 bit dst */ /* 0x40 == dst is float */ /* 0x80 == src is float */ #define CVT_F32_F32 0xc4 @@ -858,7 +858,7 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, set_long(pc, e); e->inst[0] |= 0xa0000000; - e->inst[1] |= 0x00004000; + e->inst[1] |= 0x00004000; /* 32 bit src */ e->inst[1] |= (cvn << 16); e->inst[1] |= (fmt << 24); set_src_0(pc, src, e); @@ -1037,20 +1037,10 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, FREE(one); } -static void +static INLINE void emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - struct nv50_program_exec *e = exec(pc); - - set_long(pc, e); - e->inst[0] |= 0xa0000000; /* delta */ - e->inst[1] |= (7 << 29); /* delta */ - e->inst[1] |= 0x04000000; /* negate arg0? probably not */ - e->inst[1] |= (1 << 14); /* src .f32 */ - set_dst(pc, dst, e); - set_src_0(pc, src, e); - - emit(pc, e); + emit_cvt(pc, dst, src, -1, CVTOP_RN, CVT_F32_F32 | CVT_NEG); } static void @@ -1218,6 +1208,43 @@ emit_nop(struct nv50_pc *pc) emit(pc, e); } +static void +emit_ddx(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + assert(src->type == P_TEMP); + + e->inst[0] = 0xc0140000; + e->inst[1] = 0x89800000; + set_long(pc, e); + set_dst(pc, dst, e); + set_src_0(pc, src, e); + set_src_2(pc, src, e); + + emit(pc, e); +} + +static void +emit_ddy(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + assert(src->type == P_TEMP); + + if (!src->neg) /* ! double negation */ + emit_neg(pc, src, src); + + e->inst[0] = 0xc0150000; + e->inst[1] = 0x8a400000; + set_long(pc, e); + set_dst(pc, dst, e); + set_src_0(pc, src, e); + set_src_2(pc, src, e); + + emit(pc, e); +} + static void convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) { @@ -1270,6 +1297,7 @@ static boolean negate_supported(const struct tgsi_full_instruction *insn, int i) { switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_DDY: case TGSI_OPCODE_DP3: case TGSI_OPCODE_DP4: case TGSI_OPCODE_MUL: @@ -1660,6 +1688,20 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_precossin(pc, temp, src[0][0]); emit_flop(pc, 5, brdc, temp); break; + case TGSI_OPCODE_DDX: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_ddx(pc, dst[c], src[0][c]); + } + break; + case TGSI_OPCODE_DDY: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_ddy(pc, dst[c], src[0][c]); + } + break; case TGSI_OPCODE_DP3: emit_mul(pc, temp, src[0][0], src[1][0]); emit_mad(pc, temp, src[0][1], src[1][1], temp); -- cgit v1.2.3 From f204eb184237b387432413212a3a20d83c87594b Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Tue, 13 Oct 2009 15:09:13 +0200 Subject: nv50: quick fix for insn src negation We only have a per nv50_reg negation flag, if an nv50_reg is used more than once in a TGSI op with different sign modes, we'd generate wrong code. We probably can't do much better without more invasive changes. --- src/gallium/drivers/nv50/nv50_program.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 89e0ac8db9..1bd6f717d1 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1293,9 +1293,12 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) e->inst[1] |= q; } +/* Some operations support an optional negation flag. */ static boolean negate_supported(const struct tgsi_full_instruction *insn, int i) { + int s; + switch (insn->Instruction.Opcode) { case TGSI_OPCODE_DDY: case TGSI_OPCODE_DP3: @@ -1305,12 +1308,29 @@ negate_supported(const struct tgsi_full_instruction *insn, int i) case TGSI_OPCODE_ADD: case TGSI_OPCODE_SUB: case TGSI_OPCODE_MAD: - return TRUE; + break; case TGSI_OPCODE_POW: - return (i == 1) ? TRUE : FALSE; + if (i == 1) + break; + return FALSE; default: return FALSE; } + + /* Watch out for possible multiple uses of an nv50_reg, we + * can't use nv50_reg::neg in these cases. + */ + for (s = 0; s < insn->Instruction.NumSrcRegs; ++s) { + if (s == i) + continue; + if ((insn->FullSrcRegisters[s].SrcRegister.Index == + insn->FullSrcRegisters[i].SrcRegister.Index) && + (insn->FullSrcRegisters[s].SrcRegister.File == + insn->FullSrcRegisters[i].SrcRegister.File)) + return FALSE; + } + + return TRUE; } /* Return a read mask for source registers deduced from opcode & write mask. */ @@ -1956,6 +1976,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!src[i][c]) continue; + src[i][c]->neg = 0; if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD) FREE(src[i][c]); } -- cgit v1.2.3 From 2eef2017acbbb617c559555648c7745141f3aedb Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 19 Oct 2009 17:47:29 +0200 Subject: nv50: implement TGSI_OPCODE_CMP --- src/gallium/drivers/nv50/nv50_program.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 1bd6f717d1..3b7033b518 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -506,11 +506,13 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { struct nv50_program_exec *e = exec(pc); - e->inst[0] |= 0x10000000; + e->inst[0] = 0x10000000; + if (!pc->allow32) + set_long(pc, e); set_dst(pc, dst, e); - if (pc->allow32 && dst->type != P_RESULT && src->type == P_IMMD) { + if (!is_long(e) && src->type == P_IMMD) { set_immd(pc, src, e); /*XXX: 32-bit, but steals part of "half" reg space - need to * catch and handle this case if/when we do half-regs @@ -1696,6 +1698,18 @@ nv50_program_tx_insn(struct nv50_pc *pc, CVTOP_CEIL, CVT_F32_F32 | CVT_RI); } break; + case TGSI_OPCODE_CMP: + pc->allow32 = FALSE; + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, NULL, src[0][c], 1, CVTOP_RN, CVT_F32_F32); + emit_mov(pc, dst[c], src[1][c]); + set_pred(pc, 0x1, 1, pc->p->exec_tail); /* @SF */ + emit_mov(pc, dst[c], src[2][c]); + set_pred(pc, 0x6, 1, pc->p->exec_tail); /* @NSF */ + } + break; case TGSI_OPCODE_COS: if (mask & 8) { emit_precossin(pc, temp, src[0][3]); -- cgit v1.2.3 From eb7ea97e7fff1ee39921ad81294c4963b5b3ded8 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 19 Oct 2009 17:53:31 +0200 Subject: nv50: cleanup emit_kil --- src/gallium/drivers/nv50/nv50_program.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 3b7033b518..bfd979ce0f 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1050,22 +1050,18 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src) { struct nv50_program_exec *e; const int r_pred = 1; + unsigned cvn = CVT_F32_F32; - /* Sets predicate reg ? */ - e = exec(pc); - e->inst[0] = 0xa00001fd; - e->inst[1] = 0xc4014788; - set_src_0(pc, src, e); - set_pred_wr(pc, 1, r_pred, e); if (src->neg) - e->inst[1] |= 0x20000000; - emit(pc, e); + cvn |= CVT_NEG; + /* write predicate reg */ + emit_cvt(pc, NULL, src, r_pred, CVTOP_RN, cvn); - /* This is probably KILP */ + /* conditional discard */ e = exec(pc); - e->inst[0] = 0x000001fe; + e->inst[0] = 0x00000002; set_long(pc, e); - set_pred(pc, 1 /* LT? */, r_pred, e); + set_pred(pc, 0x1 /* LT */, r_pred, e); emit(pc, e); } -- cgit v1.2.3 From ec5c23551cdb4c369d8f8f392208f4d4bf29911b Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 19 Oct 2009 18:17:45 +0200 Subject: nv50: add support for address regs Allow indirect uniform access and increase the limit on parameters from 128 to 512. --- src/gallium/drivers/nv50/nv50_program.c | 178 ++++++++++++++++++++++++++++++-- src/gallium/drivers/nv50/nv50_screen.c | 10 +- 2 files changed, 175 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index bfd979ce0f..c7145bb9be 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -32,6 +32,7 @@ #include "nv50_context.h" #define NV50_SU_MAX_TEMP 64 +#define NV50_SU_MAX_ADDR 7 //#define NV50_PROGRAM_DUMP /* ARL - gallium craps itself on progs/vp/arl.txt @@ -79,7 +80,8 @@ struct nv50_reg { P_ATTR, P_RESULT, P_CONST, - P_IMMD + P_IMMD, + P_ADDR } type; int index; @@ -99,6 +101,7 @@ struct nv50_pc { /* hw resources */ struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; + struct nv50_reg r_addr[NV50_SU_MAX_ADDR]; /* tgsi resources */ struct nv50_reg *temp; @@ -112,6 +115,8 @@ struct nv50_pc { struct nv50_reg *immd; float *immd_buf; int immd_nr; + struct nv50_reg **addr; + int addr_nr; struct nv50_reg *temp_temp[16]; unsigned temp_temp_nr; @@ -158,6 +163,17 @@ popcnt4(uint32_t val) return cnt[val & 0xf]; } +static void +terminate_mbb(struct nv50_pc *pc) +{ + int i; + + /* remove records of temporary address register values */ + for (i = 0; i < NV50_SU_MAX_ADDR; ++i) + if (pc->r_addr[i].index < 0) + pc->r_addr[i].rhw = -1; +} + static void alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) { @@ -454,9 +470,68 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) e->inst[1] |= (val >> 6) << 2; } +static void +emit_set_addr(struct nv50_pc *pc, struct nv50_reg *dst, unsigned val) +{ + struct nv50_program_exec *e = exec(pc); + + assert(val <= 0xffff); + e->inst[0] = 0xd0000000 | ((val & 0xffff) << 9); + e->inst[1] = 0x20000000; + e->inst[0] |= dst->hw << 2; + set_long(pc, e); + + emit(pc, e); +} + +static struct nv50_reg * +alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref) +{ + int i; + struct nv50_reg *a = NULL; + + if (!ref) { + for (i = 0; i < NV50_SU_MAX_ADDR; ++i) { + if (pc->r_addr[i].index >= 0) + continue; + if (pc->r_addr[i].rhw >= 0 && + pc->r_addr[i].acc == pc->insn_cur) + continue; + + pc->r_addr[i].rhw = -1; + pc->r_addr[i].index = i; + return &pc->r_addr[i]; + } + assert(0); + return NULL; + } + + for (i = NV50_SU_MAX_ADDR - 1; i >= 0; --i) { + if (pc->r_addr[i].index >= 0) /* occupied for TGSI */ + continue; + if (pc->r_addr[i].rhw < 0) { /* unused */ + a = &pc->r_addr[i]; + continue; + } + if (!a && pc->r_addr[i].acc != pc->insn_cur) + a = &pc->r_addr[i]; + + if (ref->hw - pc->r_addr[i].rhw < 128) { + /* alloc'd & suitable */ + pc->r_addr[i].acc = pc->insn_cur; + return &pc->r_addr[i]; + } + } + assert(a); + emit_set_addr(pc, a, ref->hw * 4); + + a->rhw = ref->hw % 128; + a->acc = pc->insn_cur; + return a; +} #define INTERP_LINEAR 0 -#define INTERP_FLAT 1 +#define INTERP_FLAT 1 #define INTERP_PERSPECTIVE 2 #define INTERP_CENTROID 4 @@ -488,6 +563,16 @@ emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *iv, emit(pc, e); } +static INLINE void +set_addr(struct nv50_program_exec *e, struct nv50_reg *a) +{ + assert(!(e->inst[0] & 0x0c000000)); + assert(!(e->inst[1] & 0x00000004)); + + e->inst[0] |= (a->hw & 3) << 26; + e->inst[1] |= (a->hw >> 2) << 2; +} + static void set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, struct nv50_program_exec *e) @@ -498,6 +583,14 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, e->param.shift = s; e->param.mask = m << (s % 32); + if (src->hw > 127) + set_addr(e, alloc_addr(pc, src)); + else + if (src->acc < 0) { + assert(src->type == P_CONST); + set_addr(e, pc->addr[src->index]); + } + e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22); } @@ -632,7 +725,7 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) } alloc_reg(pc, src); - e->inst[0] |= (src->hw << 16); + e->inst[0] |= ((src->hw & 127) << 16); } static void @@ -660,7 +753,7 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) } alloc_reg(pc, src); - e->inst[1] |= (src->hw << 14); + e->inst[1] |= ((src->hw & 127) << 14); } static void @@ -722,6 +815,22 @@ emit_add(struct nv50_pc *pc, struct nv50_reg *dst, emit(pc, e); } +static void +emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, + uint8_t s) +{ + struct nv50_program_exec *e = exec(pc); + + set_long(pc, e); + e->inst[1] |= 0xc0000000; + + e->inst[0] |= dst->hw << 2; + e->inst[0] |= s << 16; /* shift left */ + set_src_0_restricted(pc, src, e); + + emit(pc, e); +} + static void emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) @@ -1403,6 +1512,16 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) return &pc->temp[dst->DstRegister.Index * 4 + c]; case TGSI_FILE_OUTPUT: return &pc->result[dst->DstRegister.Index * 4 + c]; + case TGSI_FILE_ADDRESS: + { + struct nv50_reg *r = pc->addr[dst->DstRegister.Index * 4 + c]; + if (!r) { + r = alloc_addr(pc, NULL); + pc->addr[dst->DstRegister.Index * 4 + c] = r; + } + assert(r); + return r; + } case TGSI_FILE_NULL: return NULL; default: @@ -1418,7 +1537,10 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, { struct nv50_reg *r = NULL; struct nv50_reg *temp; - unsigned sgn, c; + unsigned sgn, c, swz; + + if (src->SrcRegister.File != TGSI_FILE_CONSTANT) + assert(!src->SrcRegister.Indirect); sgn = tgsi_util_get_full_src_register_sign_mode(src, chan); @@ -1436,13 +1558,29 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, r = &pc->temp[src->SrcRegister.Index * 4 + c]; break; case TGSI_FILE_CONSTANT: - r = &pc->param[src->SrcRegister.Index * 4 + c]; + if (!src->SrcRegister.Indirect) { + r = &pc->param[src->SrcRegister.Index * 4 + c]; + break; + } + /* Indicate indirection by setting r->acc < 0 and + * use the index field to select the address reg. + */ + r = MALLOC_STRUCT(nv50_reg); + swz = tgsi_util_get_src_register_swizzle( + &src->SrcRegisterInd, 0); + ctor_reg(r, P_CONST, + src->SrcRegisterInd.Index * 4 + swz, c); + r->acc = -1; break; case TGSI_FILE_IMMEDIATE: r = &pc->immd[src->SrcRegister.Index * 4 + c]; break; case TGSI_FILE_SAMPLER: break; + case TGSI_FILE_ADDRESS: + r = pc->addr[src->SrcRegister.Index * 4 + c]; + assert(r); + break; default: assert(0); break; @@ -1678,8 +1816,15 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_add(pc, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_ARL: + assert(src[0][0]); + temp = temp_temp(pc); + emit_cvt(pc, temp, src[0][0], -1, CVTOP_FLOOR, CVT_S32_F32); + emit_arl(pc, dst[0], temp, 4); + break; case TGSI_OPCODE_BGNLOOP: pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size; + terminate_mbb(pc); break; case TGSI_OPCODE_BRK: emit_branch(pc, -1, 0, NULL); @@ -1763,6 +1908,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_branch(pc, -1, 0, NULL); pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size; pc->if_insn[pc->if_lvl++] = pc->p->exec_tail; + terminate_mbb(pc); break; case TGSI_OPCODE_ENDIF: pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size; @@ -1775,6 +1921,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->br_join[pc->if_lvl]->param.index = pc->p->exec_size; pc->br_join[pc->if_lvl] = NULL; } + terminate_mbb(pc); /* emit a NOP as join point, we could set it on the next * one, but would have to make sure it is long and !immd */ @@ -1785,6 +1932,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_branch(pc, -1, 0, NULL); pc->p->exec_tail->param.index = pc->loop_pos[--pc->loop_lvl]; pc->br_loop[pc->loop_lvl]->param.index = pc->p->exec_size; + terminate_mbb(pc); break; case TGSI_OPCODE_EX2: emit_preex2(pc, temp, src[0][0]); @@ -1812,6 +1960,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, set_pred_wr(pc, 1, 0, pc->if_cond); emit_branch(pc, 0, 2, &pc->br_join[pc->if_lvl]); pc->if_insn[pc->if_lvl++] = pc->p->exec_tail; + terminate_mbb(pc); break; case TGSI_OPCODE_KIL: emit_kil(pc, src[0][0]); @@ -1989,6 +2138,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, src[i][c]->neg = 0; if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD) FREE(src[i][c]); + else + if (src[i][c]->acc < 0 && src[i][c]->type == P_CONST) + FREE(src[i][c]); /* indirect constant */ } } @@ -2332,8 +2484,8 @@ nv50_program_tx_prep(struct nv50_pc *pc) pc->interp_mode[i] = mode; } break; + case TGSI_FILE_ADDRESS: case TGSI_FILE_CONSTANT: - break; case TGSI_FILE_SAMPLER: break; default: @@ -2527,6 +2679,8 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) pc->attr_nr = p->info.file_max[TGSI_FILE_INPUT] + 1; pc->result_nr = p->info.file_max[TGSI_FILE_OUTPUT] + 1; pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1; + pc->addr_nr = p->info.file_max[TGSI_FILE_ADDRESS] + 1; + assert(pc->addr_nr <= 2); p->cfg.high_temp = 4; @@ -2595,6 +2749,14 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) ctor_reg(&pc->param[rid], P_CONST, i, rid); } + if (pc->addr_nr) { + pc->addr = CALLOC(pc->addr_nr * 4, sizeof(struct nv50_reg *)); + if (!pc->addr) + return FALSE; + } + for (i = 0; i < NV50_SU_MAX_ADDR; ++i) + ctor_reg(&pc->r_addr[i], P_ADDR, -1, i + 1); + return TRUE; } @@ -2774,7 +2936,7 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) p->immd_nr, NV50_CB_PMISC); } - assert(p->param_nr <= 128); + assert(p->param_nr <= 512); if (p->param_nr) { unsigned cb; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 66361dc3ba..0bd5487695 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -301,7 +301,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_data (so, 8); /* constant buffers for immediates and VP/FP parameters */ - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 128*4*4, + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (32 * 4) * 4, &screen->constbuf_misc[0]); if (ret) { nv50_screen_destroy(pscreen); @@ -309,7 +309,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } for (i = 0; i < 2; i++) { - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 128*4*4, + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (128 * 4) * 4, &screen->constbuf_parm[i]); if (ret) { nv50_screen_destroy(pscreen); @@ -318,8 +318,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } if (nouveau_resource_init(&screen->immd_heap[0], 0, 128) || - nouveau_resource_init(&screen->parm_heap[0], 0, 128) || - nouveau_resource_init(&screen->parm_heap[1], 0, 128)) + nouveau_resource_init(&screen->parm_heap[0], 0, 512) || + nouveau_resource_init(&screen->parm_heap[1], 0, 512)) { NOUVEAU_ERR("Error initialising constant buffers.\n"); nv50_screen_destroy(pscreen); @@ -340,7 +340,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->constbuf_misc[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, (NV50_CB_PMISC << 16) | 0x00000800); + so_data (so, (NV50_CB_PMISC << 16) | 0x00000200); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000001 | (NV50_CB_PMISC << 12)); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); -- cgit v1.2.3 From e5f1f6a0bece3d035bf5ac1685b5335af4862cea Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Mon, 19 Oct 2009 13:51:28 -0700 Subject: r300g: Demonstratory kludge to unbreak glxgears. We *must* recalculate something in vformat every rebind; let's see if we can't narrow it down a bit. --- src/gallium/drivers/r300/r300_state_derived.c | 28 +++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 1468b9d36e..5df1a0cd63 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -460,32 +460,36 @@ static void r300_update_derived_shader_state(struct r300_context* r300) value = (struct r300_shader_derived_value*) util_hash_table_get(r300->shader_hash_table, (void*)key); if (value) { - vformat = value->vformat; + //vformat = value->vformat; rs_block = value->rs_block; FREE(key); } else { - vformat = CALLOC_STRUCT(r300_vertex_format); rs_block = CALLOC_STRUCT(r300_rs_block); value = CALLOC_STRUCT(r300_shader_derived_value); - for (i = 0; i < 16; i++) { - vformat->vs_tab[i] = -1; - vformat->fs_tab[i] = -1; - } - - r300_vs_tab_routes(r300, vformat); - r300_vertex_psc(r300, vformat); - r300_update_fs_tab(r300, vformat); - r300_update_rs_block(r300, rs_block); - value->vformat = vformat; + //value->vformat = vformat; value->rs_block = rs_block; util_hash_table_set(r300->shader_hash_table, (void*)key, (void*)value); } + /* XXX This will be refactored ASAP. */ + vformat = CALLOC_STRUCT(r300_vertex_format); + + for (i = 0; i < 16; i++) { + vformat->vs_tab[i] = -1; + vformat->fs_tab[i] = -1; + } + + r300_vs_tab_routes(r300, vformat); + r300_vertex_psc(r300, vformat); + r300_update_fs_tab(r300, vformat); + + FREE(r300->vertex_info); + r300->vertex_info = vformat; r300->rs_block = rs_block; r300->dirty_state |= (R300_NEW_VERTEX_FORMAT | R300_NEW_RS_BLOCK); -- cgit v1.2.3 From fc07ca004aaa338217c49e95f51b072b32c4f8c6 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Tue, 20 Oct 2009 17:17:41 +0200 Subject: trace: Check for destroy before calling it --- src/gallium/drivers/trace/tr_drm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_drm.c b/src/gallium/drivers/trace/tr_drm.c index 781ca5d3bc..48d1c4051c 100644 --- a/src/gallium/drivers/trace/tr_drm.c +++ b/src/gallium/drivers/trace/tr_drm.c @@ -150,7 +150,9 @@ trace_drm_destroy(struct drm_api *_api) { struct trace_drm_api *tr_api = trace_drm_api(_api); struct drm_api *api = tr_api->api; - api->destroy(api); + + if (api->destroy) + api->destroy(api); free(tr_api); } -- cgit v1.2.3 From 4b2cf92ad9caa384869371534c1f2154625a755a Mon Sep 17 00:00:00 2001 From: Marc Dietrich Date: Sun, 18 Oct 2009 08:28:34 -0700 Subject: cell: fix compilation on cell s/LERP/LRP/ --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 312621fd53..b6b2f885af 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -674,7 +674,7 @@ emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst) * Emit linear interpolate. See emit_ADD for comments. */ static boolean -emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst) +emit_LRP(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4]; @@ -1766,7 +1766,7 @@ emit_instruction(struct codegen *gen, return emit_binop(gen, inst); case TGSI_OPCODE_MAD: return emit_MAD(gen, inst); - case TGSI_OPCODE_LERP: + case TGSI_OPCODE_LRP: return emit_LRP(gen, inst); case TGSI_OPCODE_DP3: return emit_DP3(gen, inst); -- cgit v1.2.3 From 58abfebaad80b72c4a4bedad2d96a3959651eea3 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 21 Oct 2009 06:44:16 -0700 Subject: r300g: Kill r300_surface with fire. If you really want to see it again, check the history. --- src/gallium/drivers/r300/Makefile | 1 - src/gallium/drivers/r300/r300_surface.c | 381 -------------------------------- src/gallium/drivers/r300/r300_surface.h | 123 ----------- 3 files changed, 505 deletions(-) delete mode 100644 src/gallium/drivers/r300/r300_surface.c delete mode 100644 src/gallium/drivers/r300/r300_surface.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 69e4724790..c4f2c021c4 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -20,7 +20,6 @@ C_SOURCES = \ r300_state_derived.c \ r300_state_invariant.c \ r300_vs.c \ - r300_surface.c \ r300_texture.c \ r300_tgsi_to_rc.c diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c deleted file mode 100644 index 5cf49d20aa..0000000000 --- a/src/gallium/drivers/r300/r300_surface.c +++ /dev/null @@ -1,381 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson - * Joakim Sindholt - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "r300_surface.h" - -static void r300_surface_setup(struct r300_context* r300, - struct r300_texture* dest, - unsigned x, unsigned y, - unsigned w, unsigned h) -{ - struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; - unsigned pixpitch = r300_texture_get_stride(dest, 0) / dest->tex.block.size; - CS_LOCALS(r300); - - r300_emit_blend_state(r300, &blend_clear_state); - r300_emit_blend_color_state(r300, &blend_color_clear_state); - r300_emit_dsa_state(r300, &dsa_clear_state); - r300_emit_rs_state(r300, &rs_clear_state); - - BEGIN_CS(26); - - /* Viewport setup */ - OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); - OUT_CS_32F((float)w); - OUT_CS_32F((float)x); - OUT_CS_32F((float)h); - OUT_CS_32F((float)y); - OUT_CS_32F(1.0); - OUT_CS_32F(0.0); - - OUT_CS_REG(R300_VAP_VTE_CNTL, R300_VPORT_X_SCALE_ENA | - R300_VPORT_X_OFFSET_ENA | - R300_VPORT_Y_SCALE_ENA | - R300_VPORT_Y_OFFSET_ENA | - R300_VTX_XY_FMT | R300_VTX_Z_FMT); - - /* Pixel scissors. */ - OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); - if (caps->is_r500) { - OUT_CS((x << R300_SCISSORS_X_SHIFT) | (y << R300_SCISSORS_Y_SHIFT)); - OUT_CS(((w - 1) << R300_SCISSORS_X_SHIFT) | ((h - 1) << R300_SCISSORS_Y_SHIFT)); - } else { - /* Non-R500 chipsets have an offset of 1440 in their scissors. */ - OUT_CS(((x + 1440) << R300_SCISSORS_X_SHIFT) | - ((y + 1440) << R300_SCISSORS_Y_SHIFT)); - OUT_CS((((w - 1) + 1440) << R300_SCISSORS_X_SHIFT) | - (((h - 1) + 1440) << R300_SCISSORS_Y_SHIFT)); - } - - /* Flush colorbuffer and blend caches. */ - OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, - R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D | - R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL); - OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, - R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | - R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); - - /* Setup colorbuffer. */ - OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1); - OUT_CS_RELOC(dest->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); - OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0, 1); - OUT_CS_RELOC(dest->buffer, pixpitch | - r300_translate_colorformat(dest->tex.format), 0, - RADEON_GEM_DOMAIN_VRAM, 0); - OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0xf); - - END_CS; -} - -/* Provides pipe_context's "surface_fill". Commonly used for clearing - * buffers. */ -static void r300_surface_fill(struct pipe_context* pipe, - struct pipe_surface* dest, - unsigned x, unsigned y, - unsigned w, unsigned h, - unsigned color) -{ - float r, g, b, a; - struct r300_context* r300 = r300_context(pipe); - struct r300_capabilities* caps = r300_screen(pipe->screen)->caps; - struct r300_texture* tex = (struct r300_texture*)dest->texture; - unsigned pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size; - boolean invalid = FALSE; - CS_LOCALS(r300); - - a = (float)((color >> 24) & 0xff) / 255.0f; - r = (float)((color >> 16) & 0xff) / 255.0f; - g = (float)((color >> 8) & 0xff) / 255.0f; - b = (float)((color >> 0) & 0xff) / 255.0f; - DBG(r300, DBG_SURF, "r300: Filling surface %p at (%d,%d)," - " dimensions %dx%d (pixel pitch %d), color 0x%x\n", - dest, x, y, w, h, pixpitch, color); - - /* Fallback? */ - if (!pipe->screen->is_format_supported(pipe->screen, dest->format, - PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) { -fallback: - DBG(r300, DBG_SURF | DBG_FALL, - "r300: Falling back on surface clear...\n"); - util_surface_fill(pipe, dest, x, y, w, h, color); - return; - } - - /* Make sure our target BO is okay. */ -validate: - if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, - 0, RADEON_GEM_DOMAIN_VRAM)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } - if (!r300->winsys->validate(r300->winsys)) { - r300->context.flush(&r300->context, 0, NULL); - if (invalid) { - DBG(r300, DBG_SURF | DBG_FALL, "r300: Stuck in validation loop."); - goto fallback; - } - invalid = TRUE; - goto validate; - } - - r300_surface_setup(r300, tex, x, y, w, h); - - /* Vertex shader setup */ - if (caps->has_tcl) { - r300_emit_vertex_program_code(r300, &r300_passthrough_vertex_shader, 0); - } else { - BEGIN_CS(4); - OUT_CS_REG(R300_VAP_CNTL_STATUS, -#ifdef PIPE_ARCH_BIG_ENDIAN - R300_VC_32BIT_SWAP | -#endif - R300_VAP_TCL_BYPASS); - OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(5) | - R300_PVS_NUM_CNTLRS(5) | - R300_PVS_NUM_FPUS(caps->num_vert_fpus) | - R300_PVS_VF_MAX_VTX_NUM(12)); - END_CS; - } - - /* Fragment shader setup */ - if (caps->is_r500) { - r500_emit_fragment_program_code(r300, &r5xx_passthrough_fragment_shader, 0); - r300_emit_rs_block_state(r300, &r5xx_rs_block_clear_state); - } else { - r300_emit_fragment_program_code(r300, &r3xx_passthrough_fragment_shader, 0); - r300_emit_rs_block_state(r300, &r3xx_rs_block_clear_state); - } - - BEGIN_CS(26); - - /* VAP stream control, mapping from input memory to PVS/RS memory */ - if (caps->has_tcl) { - OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, - (R300_DATA_TYPE_FLOAT_4 << R300_DATA_TYPE_0_SHIFT) | - ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | - R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)); - } else { - OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, - (R300_DATA_TYPE_FLOAT_4 << R300_DATA_TYPE_0_SHIFT) | - ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | - R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)); - } - OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0, - (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE0_SHIFT) | - (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE1_SHIFT)); - - /* VAP format controls */ - OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_0, - R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT | - R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT); - OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_1, 0x0); - - /* Disable textures */ - OUT_CS_REG(R300_TX_ENABLE, 0x0); - - /* The size of the point we're about to draw, in sixths of pixels */ - OUT_CS_REG(R300_GA_POINT_SIZE, - ((h * 6) & R300_POINTSIZE_Y_MASK) | - ((w * 6) << R300_POINTSIZE_X_SHIFT)); - - /* Vertex size. */ - OUT_CS_REG(R300_VAP_VTX_SIZE, 0x8); - - /* Packet3 with our point vertex */ - OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 8); - OUT_CS(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | - (1 << R300_PRIM_NUM_VERTICES_SHIFT)); - /* Position */ - OUT_CS_32F(0.5); - OUT_CS_32F(0.5); - OUT_CS_32F(1.0); - OUT_CS_32F(1.0); - /* Color */ - OUT_CS_32F(r); - OUT_CS_32F(g); - OUT_CS_32F(b); - OUT_CS_32F(a); - - OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA); - - END_CS; - - r300->dirty_hw++; -} - -static void r300_surface_copy(struct pipe_context* pipe, - struct pipe_surface* dest, - unsigned destx, unsigned desty, - struct pipe_surface* src, - unsigned srcx, unsigned srcy, - unsigned w, unsigned h) -{ - struct r300_context* r300 = r300_context(pipe); - struct r300_capabilities* caps = r300_screen(pipe->screen)->caps; - struct r300_texture* srctex = (struct r300_texture*)src->texture; - struct r300_texture* desttex = (struct r300_texture*)dest->texture; - unsigned pixpitch = r300_texture_get_stride(srctex, 0) / srctex->tex.block.size; - boolean invalid = FALSE; - float fsrcx = srcx, fsrcy = srcy, fdestx = destx, fdesty = desty; - CS_LOCALS(r300); - - DBG(r300, DBG_SURF, "r300: Copying surface %p at (%d,%d) to %p at (%d, %d)," - " dimensions %dx%d (pixel pitch %d)\n", - src, srcx, srcy, dest, destx, desty, w, h, pixpitch); - - if ((srctex->buffer == desttex->buffer) && - ((destx < srcx + w) || (srcx < destx + w)) && - ((desty < srcy + h) || (srcy < desty + h))) { - goto fallback; - } - - if (!pipe->screen->is_format_supported(pipe->screen, src->format, - PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_SAMPLER, 0) || - !pipe->screen->is_format_supported(pipe->screen, dest->format, - PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) { -fallback: - DBG(r300, DBG_SURF | DBG_FALL, "r300: Falling back on surface_copy\n"); - util_surface_copy(pipe, FALSE, dest, destx, desty, src, - srcx, srcy, w, h); - return; - } - - /* Add our target BOs to the list. */ -validate: - if (!r300->winsys->add_buffer(r300->winsys, srctex->buffer, - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } - if (!r300->winsys->add_buffer(r300->winsys, desttex->buffer, - 0, RADEON_GEM_DOMAIN_VRAM)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } - if (!r300->winsys->validate(r300->winsys)) { - r300->context.flush(&r300->context, 0, NULL); - if (invalid) { - DBG(r300, DBG_SURF | DBG_FALL, "r300: Stuck in validation loop."); - goto fallback; - } - invalid = TRUE; - goto validate; - } - - r300_surface_setup(r300, desttex, destx, desty, w, h); - - /* Setup the texture. */ - r300_emit_texture(r300, &r300_sampler_copy_state, srctex, 0); - - /* Flush and enable. */ - r300_flush_textures(r300); - - /* Vertex shader setup */ - if (caps->has_tcl) { - r300_emit_vertex_program_code(r300, &r300_passthrough_vertex_shader, 0); - } else { - BEGIN_CS(4); - OUT_CS_REG(R300_VAP_CNTL_STATUS, -#ifdef PIPE_ARCH_BIG_ENDIAN - R300_VC_32BIT_SWAP | -#endif - R300_VAP_TCL_BYPASS); - OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(5) | - R300_PVS_NUM_CNTLRS(5) | - R300_PVS_NUM_FPUS(caps->num_vert_fpus) | - R300_PVS_VF_MAX_VTX_NUM(12)); - END_CS; - } - - /* Fragment shader setup */ - if (caps->is_r500) { - r500_emit_fragment_program_code(r300, &r5xx_texture_fragment_shader, 0); - r300_emit_rs_block_state(r300, &r5xx_rs_block_copy_state); - } else { - r300_emit_fragment_program_code(r300, &r3xx_texture_fragment_shader, 0); - r300_emit_rs_block_state(r300, &r3xx_rs_block_copy_state); - } - - BEGIN_CS(30); - /* VAP stream control, mapping from input memory to PVS/RS memory */ - if (caps->has_tcl) { - OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, - (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | - ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | - R300_DATA_TYPE_FLOAT_2) << R300_DATA_TYPE_1_SHIFT)); - } else { - OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, - (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | - ((R300_LAST_VEC | (6 << R300_DST_VEC_LOC_SHIFT) | - R300_DATA_TYPE_FLOAT_2) << R300_DATA_TYPE_1_SHIFT)); - } - OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0, - (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE0_SHIFT) | - (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE1_SHIFT)); - - /* VAP format controls */ - OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_0, - R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT); - /* Two components of texture 0 */ - OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_1, 0x2); - - /* Vertex size. */ - OUT_CS_REG(R300_VAP_VTX_SIZE, 0x4); - - /* Packet3 with our texcoords */ - OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 16); - OUT_CS(R300_PRIM_TYPE_QUADS | R300_PRIM_WALK_RING | - (4 << R300_PRIM_NUM_VERTICES_SHIFT)); - /* (x , y ) */ - OUT_CS_32F(fdestx / dest->width); - OUT_CS_32F(fdesty / dest->height); - OUT_CS_32F(fsrcx / src->width); - OUT_CS_32F(fsrcy / src->height); - /* (x , y + h) */ - OUT_CS_32F(fdestx / dest->width); - OUT_CS_32F((fdesty + h) / dest->height); - OUT_CS_32F(fsrcx / src->width); - OUT_CS_32F((fsrcy + h) / src->height); - /* (x + w, y + h) */ - OUT_CS_32F((fdestx + w) / dest->width); - OUT_CS_32F((fdesty + h) / dest->height); - OUT_CS_32F((fsrcx + w) / src->width); - OUT_CS_32F((fsrcy + h) / src->height); - /* (x + w, y ) */ - OUT_CS_32F((fdestx + w) / dest->width); - OUT_CS_32F(fdesty / dest->height); - OUT_CS_32F((fsrcx + w) / src->width); - OUT_CS_32F(fsrcy / src->height); - - OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA); - - END_CS; - - r300->dirty_hw++; -} - -void r300_init_surface_functions(struct r300_context* r300) -{ - r300->context.surface_fill = r300_surface_fill; - r300->context.surface_copy = r300_surface_copy; -} diff --git a/src/gallium/drivers/r300/r300_surface.h b/src/gallium/drivers/r300/r300_surface.h deleted file mode 100644 index d5998e6e6d..0000000000 --- a/src/gallium/drivers/r300/r300_surface.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_SURFACE_H -#define R300_SURFACE_H - -#include "pipe/p_context.h" -#include "pipe/p_screen.h" - -#include "util/u_rect.h" - -#include "r300_context.h" -#include "r300_cs.h" -#include "r300_emit.h" -#include "r300_fs.h" -#include "r300_vs.h" -#include "r300_state_inlines.h" - -static struct r300_blend_state blend_clear_state = { - .blend_control = 0x0, - .alpha_blend_control = 0x0, - .rop = 0x0, - .dither = 0x0, -}; - -static struct r300_blend_color_state blend_color_clear_state = { - .blend_color = 0x0, - .blend_color_red_alpha = 0x0, - .blend_color_green_blue = 0x0, -}; - -static struct r300_dsa_state dsa_clear_state = { - .alpha_function = 0x0, - .alpha_reference = 0x0, - .z_buffer_control = 0x0, - .z_stencil_control = 0x0, - .stencil_ref_mask = R300_STENCILWRITEMASK_MASK, - .stencil_ref_bf = 0x0, -}; - -static struct r300_rs_state rs_clear_state = { - .point_minmax = 0x36000006, - .line_control = 0x00030006, - .depth_scale_front = 0x0, - .depth_offset_front = 0x0, - .depth_scale_back = 0x0, - .depth_offset_back = 0x0, - .polygon_offset_enable = 0x0, - .cull_mode = 0x0, - .line_stipple_config = 0x3BAAAAAB, - .line_stipple_value = 0x0, - .color_control = R300_SHADE_MODEL_FLAT, -}; - -static struct r300_rs_block r3xx_rs_block_clear_state = { - .ip[0] = R500_RS_SEL_S(R300_RS_SEL_C0) | - R500_RS_SEL_T(R300_RS_SEL_C0) | - R500_RS_SEL_R(R300_RS_SEL_C0) | - R500_RS_SEL_Q(R300_RS_SEL_K1), - .inst[0] = R300_RS_INST_COL_CN_WRITE, - .count = R300_IT_COUNT(0) | R300_IC_COUNT(1) | R300_HIRES_EN, - .inst_count = 0, -}; - -static struct r300_rs_block r5xx_rs_block_clear_state = { - .ip[0] = R500_RS_SEL_S(R500_RS_IP_PTR_K0) | - R500_RS_SEL_T(R500_RS_IP_PTR_K0) | - R500_RS_SEL_R(R500_RS_IP_PTR_K0) | - R500_RS_SEL_Q(R500_RS_IP_PTR_K1), - .inst[0] = R500_RS_INST_COL_CN_WRITE, - .count = R300_IT_COUNT(0) | R300_IC_COUNT(1) | R300_HIRES_EN, - .inst_count = 0, -}; - -/* The following state is used for surface_copy only. */ - -static struct r300_rs_block r3xx_rs_block_copy_state = { - .ip[0] = R500_RS_SEL_S(R300_RS_SEL_K0) | - R500_RS_SEL_T(R300_RS_SEL_K0) | - R500_RS_SEL_R(R300_RS_SEL_K0) | - R500_RS_SEL_Q(R300_RS_SEL_K1), - .inst[0] = R300_RS_INST_COL_CN_WRITE, - .count = R300_IT_COUNT(2) | R300_IC_COUNT(0) | R300_HIRES_EN, - .inst_count = R300_RS_TX_OFFSET(0), -}; - -static struct r300_rs_block r5xx_rs_block_copy_state = { - .ip[0] = R500_RS_SEL_S(0) | - R500_RS_SEL_T(1) | - R500_RS_SEL_R(R500_RS_IP_PTR_K0) | - R500_RS_SEL_Q(R500_RS_IP_PTR_K1), - .inst[0] = R500_RS_INST_TEX_CN_WRITE, - .count = R300_IT_COUNT(2) | R300_IC_COUNT(0) | R300_HIRES_EN, - .inst_count = R300_RS_TX_OFFSET(0), -}; - -static struct r300_sampler_state r300_sampler_copy_state = { - .filter0 = R300_TX_WRAP_S(R300_TX_CLAMP) | - R300_TX_WRAP_T(R300_TX_CLAMP) | - R300_TX_MAG_FILTER_NEAREST | - R300_TX_MIN_FILTER_NEAREST, -}; - -#endif /* R300_SURFACE_H */ -- cgit v1.2.3 From b21df2620ef970daa306e06c6ca69a9b66280cd6 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 21 Oct 2009 06:47:05 -0700 Subject: r300g: Also kill r300_shader_inlines with fire. --- src/gallium/drivers/r300/r300_shader_inlines.h | 47 -------------------------- 1 file changed, 47 deletions(-) delete mode 100644 src/gallium/drivers/r300/r300_shader_inlines.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_shader_inlines.h b/src/gallium/drivers/r300/r300_shader_inlines.h deleted file mode 100644 index a04f45b03e..0000000000 --- a/src/gallium/drivers/r300/r300_shader_inlines.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright 2009 Corbin Simpson - * Joakim Sindholt - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_SHADER_INLINES_H -#define R300_SHADER_INLINES_H - -/* TGSI constants. TGSI is like XML: If it can't solve your problems, you're - * not using enough of it. */ -static const struct tgsi_full_src_register r300_constant_zero = { - .SrcRegister.Extended = TRUE, - .SrcRegister.File = TGSI_FILE_NULL, - .SrcRegisterExtSwz.ExtSwizzleX = TGSI_EXTSWIZZLE_ZERO, - .SrcRegisterExtSwz.ExtSwizzleY = TGSI_EXTSWIZZLE_ZERO, - .SrcRegisterExtSwz.ExtSwizzleZ = TGSI_EXTSWIZZLE_ZERO, - .SrcRegisterExtSwz.ExtSwizzleW = TGSI_EXTSWIZZLE_ZERO, -}; - -static const struct tgsi_full_src_register r300_constant_one = { - .SrcRegister.Extended = TRUE, - .SrcRegister.File = TGSI_FILE_NULL, - .SrcRegisterExtSwz.ExtSwizzleX = TGSI_EXTSWIZZLE_ONE, - .SrcRegisterExtSwz.ExtSwizzleY = TGSI_EXTSWIZZLE_ONE, - .SrcRegisterExtSwz.ExtSwizzleZ = TGSI_EXTSWIZZLE_ONE, - .SrcRegisterExtSwz.ExtSwizzleW = TGSI_EXTSWIZZLE_ONE, -}; - -#endif /* R300_SHADER_INLINES_H */ -- cgit v1.2.3 From b589e39809fa9d0b24a708d792b70ae5b120ffb8 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 21 Oct 2009 05:45:05 -0700 Subject: r300g: Examine vertex attribute type on HW TCL too. --- src/gallium/drivers/r300/r300_state_derived.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 5df1a0cd63..7297f9c653 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -244,10 +244,8 @@ static void r300_vertex_psc(struct r300_context* r300, assert(tab[i] != -1); /* Add the attribute to the PSC table. */ - temp = r300screen->caps->has_tcl ? - R300_DATA_TYPE_FLOAT_4 : - translate_vertex_data_type(vinfo->attrib[i].emit); - temp |= tab[i] << R300_DST_VEC_LOC_SHIFT; + temp = translate_vertex_data_type(vinfo->attrib[i].emit) | + tab[i] << R300_DST_VEC_LOC_SHIFT; if (i & 1) { vformat->vap_prog_stream_cntl[i >> 1] &= 0x0000ffff; -- cgit v1.2.3 From 5a0598f23569314a4ad72eda59e250ab9c43b46d Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 21 Oct 2009 05:48:45 -0700 Subject: r300g: Don't use the hashtable internally. As osiris pointed out, glxgears slowly gets slower for some reason when it's enabled, and it's not helping at the moment, so just turn it off. --- src/gallium/drivers/r300/r300_state_derived.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 7297f9c653..6e7679c39f 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -451,6 +451,7 @@ static void r300_update_derived_shader_state(struct r300_context* r300) struct r300_shader_derived_value* value; int i; + /* key = CALLOC_STRUCT(r300_shader_key); key->vs = r300->vs; key->fs = r300->fs; @@ -472,10 +473,11 @@ static void r300_update_derived_shader_state(struct r300_context* r300) value->rs_block = rs_block; util_hash_table_set(r300->shader_hash_table, (void*)key, (void*)value); - } + } */ /* XXX This will be refactored ASAP. */ vformat = CALLOC_STRUCT(r300_vertex_format); + rs_block = CALLOC_STRUCT(r300_rs_block); for (i = 0; i < 16; i++) { vformat->vs_tab[i] = -1; @@ -486,7 +488,10 @@ static void r300_update_derived_shader_state(struct r300_context* r300) r300_vertex_psc(r300, vformat); r300_update_fs_tab(r300, vformat); + r300_update_rs_block(r300, rs_block); + FREE(r300->vertex_info); + FREE(r300->rs_block); r300->vertex_info = vformat; r300->rs_block = rs_block; -- cgit v1.2.3 From babadb8bb9d68f3687a9c9cb80f98c732b1120c7 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 21 Oct 2009 05:54:49 -0700 Subject: r300g: Don't use getenv; use debug_get_*_option instead. --- src/gallium/drivers/r300/r300_chipset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index d138866d33..9b763e2cb5 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -31,7 +31,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) { /* Reasonable defaults */ caps->num_vert_fpus = 4; - caps->has_tcl = getenv("RADEON_NO_TCL") ? FALSE : TRUE; + caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; caps->is_r500 = FALSE; caps->high_second_pipe = FALSE; -- cgit v1.2.3 From 6a448a525baf81173f92ee8c3074b98baa54397b Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 21 Oct 2009 06:31:36 -0700 Subject: r300g: Cleanup header includes. --- src/gallium/drivers/r300/r300_chipset.c | 1 + src/gallium/drivers/r300/r300_chipset.h | 6 +++--- src/gallium/drivers/r300/r300_clear.c | 3 +++ src/gallium/drivers/r300/r300_clear.h | 4 +--- src/gallium/drivers/r300/r300_context.c | 16 +++++++++++++++- src/gallium/drivers/r300/r300_context.h | 13 ------------- src/gallium/drivers/r300/r300_emit.c | 7 ++++++- src/gallium/drivers/r300/r300_emit.h | 7 ------- src/gallium/drivers/r300/r300_flush.c | 7 +++++++ src/gallium/drivers/r300/r300_flush.h | 7 ------- src/gallium/drivers/r300/r300_fs.c | 6 +++++- src/gallium/drivers/r300/r300_fs.h | 7 +++---- src/gallium/drivers/r300/r300_query.c | 8 +++++++- src/gallium/drivers/r300/r300_query.h | 4 ---- src/gallium/drivers/r300/r300_render.c | 5 ++++- src/gallium/drivers/r300/r300_screen.c | 7 +++++++ src/gallium/drivers/r300/r300_screen.h | 5 ----- src/gallium/drivers/r300/r300_state.c | 4 ++++ src/gallium/drivers/r300/r300_state_derived.c | 8 +++++++- src/gallium/drivers/r300/r300_state_inlines.h | 2 ++ src/gallium/drivers/r300/r300_state_invariant.c | 5 ++++- src/gallium/drivers/r300/r300_state_invariant.h | 6 +----- src/gallium/drivers/r300/r300_texture.c | 6 ++++++ src/gallium/drivers/r300/r300_texture.h | 3 --- 24 files changed, 86 insertions(+), 61 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 9b763e2cb5..51fdb82ff3 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -21,6 +21,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_chipset.h" + #include "util/u_debug.h" /* r300_chipset: A file all to itself for deducing the various properties of diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index f015a4243d..0633a8b8a7 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -33,11 +33,11 @@ struct r300_capabilities { /* Chipset family */ int family; /* The number of vertex floating-point units */ - int num_vert_fpus; + unsigned num_vert_fpus; /* The number of fragment pipes */ - int num_frag_pipes; + unsigned num_frag_pipes; /* The number of z pipes */ - int num_z_pipes; + unsigned num_z_pipes; /* Whether or not TCL is physically present */ boolean has_tcl; /* Whether or not this is an RV515 or newer; R500s have many differences diff --git a/src/gallium/drivers/r300/r300_clear.c b/src/gallium/drivers/r300/r300_clear.c index 8b9cb819ae..02d6d504fc 100644 --- a/src/gallium/drivers/r300/r300_clear.c +++ b/src/gallium/drivers/r300/r300_clear.c @@ -21,6 +21,9 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_clear.h" +#include "r300_context.h" + +#include "util/u_clear.h" /* Clears currently bound buffers. */ void r300_clear(struct pipe_context* pipe, diff --git a/src/gallium/drivers/r300/r300_clear.h b/src/gallium/drivers/r300/r300_clear.h index cd5900565e..b8fcdf273c 100644 --- a/src/gallium/drivers/r300/r300_clear.h +++ b/src/gallium/drivers/r300/r300_clear.h @@ -23,9 +23,7 @@ #ifndef R300_CLEAR_H #define R300_CLEAR_H -#include "util/u_clear.h" - -#include "r300_context.h" +struct pipe_context; void r300_clear(struct pipe_context* pipe, unsigned buffers, diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 7b370b3e95..4ba11a026e 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -20,10 +20,24 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "r300_context.h" +#include "draw/draw_context.h" + +#include "pipe/p_inlines.h" + +#include "tgsi/tgsi_scan.h" +#include "util/u_hash_table.h" +#include "util/u_memory.h" +#include "util/u_simple_list.h" + +#include "r300_clear.h" +#include "r300_context.h" #include "r300_flush.h" +#include "r300_query.h" +#include "r300_screen.h" +#include "r300_state_derived.h" #include "r300_state_invariant.h" +#include "r300_winsys.h" static boolean r300_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 2d608d6afc..61a57df35e 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -23,23 +23,10 @@ #ifndef R300_CONTEXT_H #define R300_CONTEXT_H -#include "draw/draw_context.h" #include "draw/draw_vertex.h" #include "pipe/p_context.h" -#include "tgsi/tgsi_scan.h" - -#include "util/u_hash_table.h" -#include "util/u_memory.h" -#include "util/u_simple_list.h" - -#include "r300_clear.h" -#include "r300_query.h" -#include "r300_screen.h" -#include "r300_state_derived.h" -#include "r300_winsys.h" - struct r300_fragment_shader; struct r300_vertex_shader; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index df2046bd0c..258c38fefd 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -22,10 +22,15 @@ /* r300_emit: Functions for emitting state. */ -#include "r300_emit.h" +#include "util/u_math.h" +#include "r300_context.h" +#include "r300_cs.h" +#include "r300_emit.h" #include "r300_fs.h" +#include "r300_screen.h" #include "r300_state_derived.h" +#include "r300_state_inlines.h" #include "r300_vs.h" void r300_emit_blend_state(struct r300_context* r300, diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 7e469ea0c7..02ac5bebbd 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -23,13 +23,6 @@ #ifndef R300_EMIT_H #define R300_EMIT_H -#include "util/u_math.h" - -#include "r300_context.h" -#include "r300_cs.h" -#include "r300_screen.h" -#include "r300_state_inlines.h" - struct rX00_fragment_program_code; struct r300_vertex_program_code; diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index d60652a021..c222ea09b1 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -20,6 +20,13 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "draw/draw_context.h" +#include "draw/draw_private.h" + +#include "util/u_simple_list.h" + +#include "r300_context.h" +#include "r300_cs.h" #include "r300_emit.h" #include "r300_flush.h" #include "r300_state_invariant.h" diff --git a/src/gallium/drivers/r300/r300_flush.h b/src/gallium/drivers/r300/r300_flush.h index 9a83d89daa..0e9e6106bb 100644 --- a/src/gallium/drivers/r300/r300_flush.h +++ b/src/gallium/drivers/r300/r300_flush.h @@ -23,13 +23,6 @@ #ifndef R300_FLUSH_H #define R300_FLUSH_H -#include "draw/draw_private.h" - -#include "pipe/p_context.h" - -#include "r300_context.h" -#include "r300_cs.h" - void r300_init_flush_functions(struct r300_context* r300); #endif /* R300_FLUSH_H */ diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 546ad545a5..2db185fd80 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -21,10 +21,14 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "r300_fs.h" +#include "tgsi/tgsi_dump.h" +#include "r300_context.h" +#include "r300_screen.h" +#include "r300_fs.h" #include "r300_tgsi_to_rc.h" +#include "radeon_code.h" #include "radeon_compiler.h" static void find_output_registers(struct r300_fragment_program_compiler * compiler, diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h index 04453274aa..8dfb139738 100644 --- a/src/gallium/drivers/r300/r300_fs.h +++ b/src/gallium/drivers/r300/r300_fs.h @@ -24,14 +24,13 @@ #ifndef R300_FS_H #define R300_FS_H -#include "tgsi/tgsi_dump.h" +#include "pipe/p_state.h" + +#include "tgsi/tgsi_scan.h" -#include "r300_context.h" #include "r3xx_fs.h" #include "r5xx_fs.h" -#include "radeon_code.h" - struct r300_fragment_shader { /* Parent class */ struct pipe_shader_state state; diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 2b0fbfb7d2..007f11efae 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -20,9 +20,15 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "r300_query.h" +#include "util/u_memory.h" +#include "util/u_simple_list.h" +#include "r300_context.h" +#include "r300_screen.h" +#include "r300_cs.h" #include "r300_emit.h" +#include "r300_query.h" +#include "r300_reg.h" static struct pipe_query *r300_create_query(struct pipe_context *pipe, unsigned query_type) diff --git a/src/gallium/drivers/r300/r300_query.h b/src/gallium/drivers/r300/r300_query.h index 4f50e8f844..48876da312 100644 --- a/src/gallium/drivers/r300/r300_query.h +++ b/src/gallium/drivers/r300/r300_query.h @@ -23,10 +23,6 @@ #ifndef R300_QUERY_H #define R300_QUERY_H -#include "r300_context.h" -#include "r300_cs.h" -#include "r300_reg.h" - struct r300_context; static INLINE struct r300_query* r300_query(struct pipe_query* q) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 4e778e1e57..79a33b53cb 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -20,8 +20,11 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "draw/draw_pipe.h" +#include "draw/draw_context.h" #include "draw/draw_vbuf.h" + +#include "pipe/p_inlines.h" + #include "util/u_memory.h" #include "r300_cs.h" diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index cc499d400a..a9058a25e5 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -20,7 +20,14 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "util/u_simple_screen.h" + +#include "r300_context.h" #include "r300_screen.h" +#include "r300_texture.h" +#include "r300_winsys.h" /* Return the identifier behind whom the brave coders responsible for this * amalgamation of code, sweat, and duct tape, routinely obscure their names. diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 2a0e41fbc3..41df31f670 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -23,14 +23,9 @@ #ifndef R300_SCREEN_H #define R300_SCREEN_H -#include "pipe/p_inlines.h" #include "pipe/p_screen.h" -#include "util/u_memory.h" -#include "util/u_simple_screen.h" #include "r300_chipset.h" -#include "r300_texture.h" -#include "r300_winsys.h" struct r300_screen { /* Parent class */ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 0a982a9d5d..c4c9e5d7c2 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -20,8 +20,11 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "draw/draw_context.h" + #include "util/u_debug.h" #include "util/u_math.h" +#include "util/u_memory.h" #include "util/u_pack_color.h" #include "tgsi/tgsi_parse.h" @@ -31,6 +34,7 @@ #include "r300_context.h" #include "r300_reg.h" +#include "r300_screen.h" #include "r300_state_inlines.h" #include "r300_fs.h" #include "r300_vs.h" diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 6e7679c39f..e749bd9cb9 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -20,9 +20,15 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "r300_state_derived.h" +#include "draw/draw_context.h" + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "r300_context.h" #include "r300_fs.h" +#include "r300_screen.h" +#include "r300_state_derived.h" #include "r300_state_inlines.h" #include "r300_vs.h" diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index d7b57e1b22..9135c55ec4 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -24,6 +24,8 @@ #ifndef R300_STATE_INLINES_H #define R300_STATE_INLINES_H +#include "draw/draw_vertex.h" + #include "pipe/p_format.h" #include "r300_reg.h" diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 3865730d63..4865f16058 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -21,9 +21,12 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "r300_context.h" +#include "r300_cs.h" +#include "r300_reg.h" +#include "r300_screen.h" #include "r300_state_invariant.h" - struct pipe_viewport_state r300_viewport_identity = { .scale = {1.0, 1.0, 1.0, 1.0}, .translate = {0.0, 0.0, 0.0, 0.0}, diff --git a/src/gallium/drivers/r300/r300_state_invariant.h b/src/gallium/drivers/r300/r300_state_invariant.h index 5bea6779fe..05cff0d6df 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.h +++ b/src/gallium/drivers/r300/r300_state_invariant.h @@ -23,11 +23,7 @@ #ifndef R300_STATE_INVARIANT_H #define R300_STATE_INVARIANT_H -#include "r300_chipset.h" -#include "r300_context.h" -#include "r300_cs.h" -#include "r300_reg.h" -#include "r300_state_inlines.h" +struct r300_context; void r300_emit_invariant_state(struct r300_context* r300); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 7ea4c33fa9..339fbb6242 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -20,6 +20,12 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "pipe/p_screen.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "r300_context.h" #include "r300_texture.h" static void r300_setup_texture_state(struct r300_texture* tex) diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 992dad77ab..2e58bda716 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -23,11 +23,8 @@ #ifndef R300_TEXTURE_H #define R300_TEXTURE_H -#include "pipe/p_screen.h" #include "pipe/p_video_state.h" -#include "util/u_math.h" -#include "r300_context.h" #include "r300_reg.h" struct r300_texture; -- cgit v1.2.3 From 3b8dad47f816667aa4166d6e27361d274fc2cf4d Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 21 Oct 2009 06:49:16 -0700 Subject: r300g: No debug in r300_state. --- src/gallium/drivers/r300/r300_state.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index c4c9e5d7c2..a3e1bc621a 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -22,7 +22,6 @@ #include "draw/draw_context.h" -#include "util/u_debug.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_pack_color.h" -- cgit v1.2.3 From ce98860012b10cc6cc124fd1ed6fa3a5e28712bb Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 20 Oct 2009 10:54:21 +0100 Subject: llvmpipe: Remove extraneous name in lp_type pre-declaration. --- src/gallium/drivers/llvmpipe/lp_bld_arit.h | 2 +- src/gallium/drivers/llvmpipe/lp_bld_const.h | 2 +- src/gallium/drivers/llvmpipe/lp_bld_conv.h | 2 +- src/gallium/drivers/llvmpipe/lp_bld_logic.h | 2 +- src/gallium/drivers/llvmpipe/lp_bld_swizzle.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index 095a8e1cab..4e568c055e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -40,7 +40,7 @@ #include -struct lp_type type; +struct lp_type; struct lp_build_context; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.h b/src/gallium/drivers/llvmpipe/lp_bld_const.h index ffb302f736..cb8e1c7b00 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_const.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_const.h @@ -42,7 +42,7 @@ #include -struct lp_type type; +struct lp_type; unsigned diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.h b/src/gallium/drivers/llvmpipe/lp_bld_conv.h index ca378804d2..948e68fae4 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.h @@ -40,7 +40,7 @@ #include -struct lp_type type; +struct lp_type; LLVMValueRef diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h index a4ee7723b5..d67500ef70 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h @@ -42,7 +42,7 @@ #include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */ -struct lp_type type; +struct lp_type; struct lp_build_context; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h index 1f6da80448..b9472127a6 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h @@ -40,7 +40,7 @@ #include -struct lp_type type; +struct lp_type; struct lp_build_context; -- cgit v1.2.3 From cb351bdd6e09b40fe719c548c48ea40c6c4c3d11 Mon Sep 17 00:00:00 2001 From: Patrice Mandin Date: Wed, 21 Oct 2009 21:56:09 +0200 Subject: nouveau: nv30: check number of colour buffers to bind --- src/gallium/drivers/nv30/nv30_state_fb.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c index 9b0266fba5..197de82886 100644 --- a/src/gallium/drivers/nv30/nv30_state_fb.c +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -17,6 +17,10 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) struct nv30_miptree *nv30mt; int colour_bits = 32, zeta_bits = 32; + if (fb->nr_cbufs == 0) { + return FALSE; + } + rt_enable = 0; for (i = 0; i < fb->nr_cbufs; i++) { if (colour_format) { -- cgit v1.2.3 From d364f662c685ba0f28aa865fbd7e1f0acc3c469e Mon Sep 17 00:00:00 2001 From: Patrice Mandin Date: Wed, 21 Oct 2009 22:01:03 +0200 Subject: nouveau: nv30: Do not use assert to return NULL --- src/gallium/drivers/nv30/nv30_fragtex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index 822e1d8def..a2ce947a72 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -69,7 +69,7 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) tf = nv30_fragtex_format(pt->format); if (!tf) - assert(0); + return NULL; txf = tf->format; txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0); -- cgit v1.2.3 From 40247d87d215d0f1b6370b2888548544eedf0d89 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 21 Oct 2009 22:53:37 -0700 Subject: r300g: Cleanup old static shader state. --- src/gallium/drivers/r300/Makefile | 2 - src/gallium/drivers/r300/r300_fs.c | 11 +--- src/gallium/drivers/r300/r300_fs.h | 3 +- src/gallium/drivers/r300/r300_vs.c | 86 ------------------------- src/gallium/drivers/r300/r3xx_fs.c | 74 ---------------------- src/gallium/drivers/r300/r3xx_fs.h | 32 ---------- src/gallium/drivers/r300/r5xx_fs.c | 125 ------------------------------------- src/gallium/drivers/r300/r5xx_fs.h | 32 ---------- 8 files changed, 4 insertions(+), 361 deletions(-) delete mode 100644 src/gallium/drivers/r300/r3xx_fs.c delete mode 100644 src/gallium/drivers/r300/r3xx_fs.h delete mode 100644 src/gallium/drivers/r300/r5xx_fs.c delete mode 100644 src/gallium/drivers/r300/r5xx_fs.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index c4f2c021c4..f73d80de88 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -4,8 +4,6 @@ include $(TOP)/configs/current LIBNAME = r300 C_SOURCES = \ - r3xx_fs.c \ - r5xx_fs.c \ r300_chipset.c \ r300_clear.c \ r300_context.c \ diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 2db185fd80..29ddc84c41 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -130,14 +130,9 @@ void r300_translate_fragment_shader(struct r300_context* r300, /* Invoke the compiler */ r3xx_compile_fragment_program(&compiler); if (compiler.Base.Error) { - /* Todo: Fallback to software rendering gracefully? */ - fprintf(stderr, "r300 FP: Compiler error: %s\n", compiler.Base.ErrorMsg); - - if (compiler.is_r500) { - memcpy(compiler.code, &r5xx_passthrough_fragment_shader, sizeof(r5xx_passthrough_fragment_shader)); - } else { - memcpy(compiler.code, &r3xx_passthrough_fragment_shader, sizeof(r3xx_passthrough_fragment_shader)); - } + /* XXX failover maybe? */ + DBG(r300, DBG_FP, "r300: Error compiling fragment program: %s\n", + compiler.Base.ErrorMsg); } /* And, finally... */ diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h index 8dfb139738..e831c30301 100644 --- a/src/gallium/drivers/r300/r300_fs.h +++ b/src/gallium/drivers/r300/r300_fs.h @@ -28,8 +28,7 @@ #include "tgsi/tgsi_scan.h" -#include "r3xx_fs.h" -#include "r5xx_fs.h" +#include "radeon_code.h" struct r300_fragment_shader { /* Parent class */ diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 8460cfaf51..eca85879a7 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -146,89 +146,3 @@ void r300_translate_vertex_shader(struct r300_context* r300, rc_destroy(&compiler.Base); vs->translated = TRUE; } - - -/* XXX get these to r300_reg */ -#define R300_PVS_DST_OPCODE(x) ((x) << 0) -# define R300_VE_DOT_PRODUCT 1 -# define R300_VE_MULTIPLY 2 -# define R300_VE_ADD 3 -# define R300_VE_MAXIMUM 7 -# define R300_VE_SET_LESS_THAN 10 -#define R300_PVS_DST_MATH_INST (1 << 6) -# define R300_ME_RECIP_DX 6 -#define R300_PVS_DST_MACRO_INST (1 << 7) -# define R300_PVS_MACRO_OP_2CLK_MADD 0 -#define R300_PVS_DST_REG_TYPE(x) ((x) << 8) -# define R300_PVS_DST_REG_TEMPORARY 0 -# define R300_PVS_DST_REG_A0 1 -# define R300_PVS_DST_REG_OUT 2 -# define R300_PVS_DST_REG_OUT_REPL_X 3 -# define R300_PVS_DST_REG_ALT_TEMPORARY 4 -# define R300_PVS_DST_REG_INPUT 5 -#define R300_PVS_DST_OFFSET(x) ((x) << 13) -#define R300_PVS_DST_WE(x) ((x) << 20) -#define R300_PVS_DST_WE_XYZW (0xf << 20) - -#define R300_PVS_SRC_REG_TYPE(x) ((x) << 0) -# define R300_PVS_SRC_REG_TEMPORARY 0 -# define R300_PVS_SRC_REG_INPUT 1 -# define R300_PVS_SRC_REG_CONSTANT 2 -# define R300_PVS_SRC_REG_ALT_TEMPORARY 3 -#define R300_PVS_SRC_OFFSET(x) ((x) << 5) -#define R300_PVS_SRC_SWIZZLE(x) ((x) << 13) -# define R300_PVS_SRC_SELECT_X 0 -# define R300_PVS_SRC_SELECT_Y 1 -# define R300_PVS_SRC_SELECT_Z 2 -# define R300_PVS_SRC_SELECT_W 3 -# define R300_PVS_SRC_SELECT_FORCE_0 4 -# define R300_PVS_SRC_SELECT_FORCE_1 5 -# define R300_PVS_SRC_SWIZZLE_XYZW \ - ((R300_PVS_SRC_SELECT_X | (R300_PVS_SRC_SELECT_Y << 3) | \ - (R300_PVS_SRC_SELECT_Z << 6) | (R300_PVS_SRC_SELECT_W << 9)) << 13) -# define R300_PVS_SRC_SWIZZLE_ZERO \ - ((R300_PVS_SRC_SELECT_FORCE_0 | (R300_PVS_SRC_SELECT_FORCE_0 << 3) | \ - (R300_PVS_SRC_SELECT_FORCE_0 << 6) | \ - (R300_PVS_SRC_SELECT_FORCE_0 << 9)) << 13) -# define R300_PVS_SRC_SWIZZLE_ONE \ - ((R300_PVS_SRC_SELECT_FORCE_1 | (R300_PVS_SRC_SELECT_FORCE_1 << 3) | \ - (R300_PVS_SRC_SELECT_FORCE_1 << 6) | \ - (R300_PVS_SRC_SELECT_FORCE_1 << 9)) << 13) -#define R300_PVS_MODIFIER_X (1 << 25) -#define R300_PVS_MODIFIER_Y (1 << 26) -#define R300_PVS_MODIFIER_Z (1 << 27) -#define R300_PVS_MODIFIER_W (1 << 28) -#define R300_PVS_NEGATE_XYZW \ - (R300_PVS_MODIFIER_X | R300_PVS_MODIFIER_Y | \ - R300_PVS_MODIFIER_Z | R300_PVS_MODIFIER_W) - -struct r300_vertex_program_code r300_passthrough_vertex_shader = { - .length = 8, /* two instructions */ - - /* MOV out[0], in[0] */ - .body.d[0] = R300_PVS_DST_OPCODE(R300_VE_ADD) | - R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | - R300_PVS_DST_OFFSET(0) | R300_PVS_DST_WE_XYZW, - .body.d[1] = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | - R300_PVS_SRC_OFFSET(0) | R300_PVS_SRC_SWIZZLE_XYZW, - .body.d[2] = R300_PVS_SRC_SWIZZLE_ZERO, - .body.d[3] = 0x0, - - /* MOV out[1], in[1] */ - .body.d[4] = R300_PVS_DST_OPCODE(R300_VE_ADD) | - R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | - R300_PVS_DST_OFFSET(1) | R300_PVS_DST_WE_XYZW, - .body.d[5] = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | - R300_PVS_SRC_OFFSET(1) | R300_PVS_SRC_SWIZZLE_XYZW, - .body.d[6] = R300_PVS_SRC_SWIZZLE_ZERO, - .body.d[7] = 0x0, - - .inputs[0] = 0, - .inputs[1] = 1, - .outputs[0] = 0, - .outputs[1] = 1, - - .InputsRead = 3, - .OutputsWritten = 3 -}; - diff --git a/src/gallium/drivers/r300/r3xx_fs.c b/src/gallium/drivers/r300/r3xx_fs.c deleted file mode 100644 index c1c1194d58..0000000000 --- a/src/gallium/drivers/r300/r3xx_fs.c +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson - * Joakim Sindholt - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "r3xx_fs.h" - -#include "r300_reg.h" - -struct rX00_fragment_program_code r3xx_passthrough_fragment_shader = { - .code.r300.alu.length = 1, - .code.r300.tex.length = 0, - - .code.r300.config = 0, - .code.r300.pixsize = 0, - .code.r300.code_offset = 0, - .code.r300.code_addr[3] = R300_RGBA_OUT, - - .code.r300.alu.inst[0].rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) | - R300_ALU_OUTC_CMP, - .code.r300.alu.inst[0].rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | - R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ, - .code.r300.alu.inst[0].alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) | - R300_ALU_OUTA_CMP, - .code.r300.alu.inst[0].alpha_addr = R300_ALPHA_ADDR0(0) | - R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT, -}; - -struct rX00_fragment_program_code r3xx_texture_fragment_shader = { - .code.r300.alu.length = 1, - .code.r300.tex.length = 1, - - .code.r300.config = R300_PFS_CNTL_FIRST_NODE_HAS_TEX, - .code.r300.pixsize = 0, - .code.r300.code_offset = 0, - .code.r300.code_addr[3] = R300_RGBA_OUT, - - .code.r300.tex.inst[0] = R300_TEX_OP_LD << R300_TEX_INST_SHIFT, - - .code.r300.alu.inst[0].rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) | - R300_ALU_OUTC_CMP, - .code.r300.alu.inst[0].rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | - R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ, - .code.r300.alu.inst[0].alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) | - R300_ALU_OUTA_CMP, - .code.r300.alu.inst[0].alpha_addr = R300_ALPHA_ADDR0(0) | - R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT, -}; diff --git a/src/gallium/drivers/r300/r3xx_fs.h b/src/gallium/drivers/r300/r3xx_fs.h deleted file mode 100644 index 51cd245724..0000000000 --- a/src/gallium/drivers/r300/r3xx_fs.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson - * Joakim Sindholt - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R3XX_FS_H -#define R3XX_FS_H - -#include "radeon_code.h" - -struct rX00_fragment_program_code r3xx_passthrough_fragment_shader; -struct rX00_fragment_program_code r3xx_texture_fragment_shader; - -#endif /* R3XX_FS_H */ diff --git a/src/gallium/drivers/r300/r5xx_fs.c b/src/gallium/drivers/r300/r5xx_fs.c deleted file mode 100644 index f072deab0d..0000000000 --- a/src/gallium/drivers/r300/r5xx_fs.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson - * Joakim Sindholt - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "r5xx_fs.h" - -#include "r300_reg.h" - -/* XXX this all should find its way back to r300_reg */ -/* Swizzle tools */ -#define R500_SWIZZLE_ZERO 4 -#define R500_SWIZZLE_HALF 5 -#define R500_SWIZZLE_ONE 6 -#define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6)) -#define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6)) -#define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6)) -#define R500_SWIZ_MOD_NEG 1 -#define R500_SWIZ_MOD_ABS 2 -#define R500_SWIZ_MOD_NEG_ABS 3 -/* Swizzles for inst2 */ -#define R500_SWIZ_TEX_STRQ(x) ((x) << 8) -#define R500_SWIZ_TEX_RGBA(x) ((x) << 24) -/* Swizzles for inst3 */ -#define R500_SWIZ_RGB_A(x) ((x) << 2) -#define R500_SWIZ_RGB_B(x) ((x) << 15) -/* Swizzles for inst4 */ -#define R500_SWIZ_ALPHA_A(x) ((x) << 14) -#define R500_SWIZ_ALPHA_B(x) ((x) << 21) -/* Swizzle for inst5 */ -#define R500_SWIZ_RGBA_C(x) ((x) << 14) -#define R500_SWIZ_ALPHA_C(x) ((x) << 27) -/* Writemasks */ -#define R500_TEX_WMASK(x) ((x) << 11) -#define R500_ALU_WMASK(x) ((x) << 11) -#define R500_ALU_OMASK(x) ((x) << 15) -#define R500_W_OMASK (1 << 31) - -struct rX00_fragment_program_code r5xx_passthrough_fragment_shader = { - .code.r500.max_temp_idx = 0, - .code.r500.inst_end = 0, - - .code.r500.inst[0].inst0 = R500_INST_TYPE_OUT | - R500_INST_TEX_SEM_WAIT | R500_INST_LAST | - R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK | - R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, - .code.r500.inst[0].inst1 = - R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST | - R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST, - .code.r500.inst[0].inst2 = - R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST | - R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST, - .code.r500.inst[0].inst3 = - R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B, - .code.r500.inst[0].inst4 = - R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A, - .code.r500.inst[0].inst5 = - R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 | - R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 | - R500_ALU_RGBA_A_SWIZ_0, -}; - -struct rX00_fragment_program_code r5xx_texture_fragment_shader = { - .code.r500.max_temp_idx = 0, - .code.r500.inst_end = 1, - - .code.r500.inst[0].inst0 = R500_INST_TYPE_TEX | - R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_WMASK_RGB | R500_INST_ALPHA_WMASK | - R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, - .code.r500.inst[0].inst1 = R500_TEX_ID(0) | R500_TEX_INST_LD | - R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED, - .code.r500.inst[0].inst2 = R500_TEX_SRC_ADDR(0) | - R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G | - R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A | - R500_TEX_DST_ADDR(0) | - R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | - R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A, - .code.r500.inst[0].inst3 = 0x0, - .code.r500.inst[0].inst4 = 0x0, - .code.r500.inst[0].inst5 = 0x0, - - .code.r500.inst[1].inst0 = R500_INST_TYPE_OUT | - R500_INST_TEX_SEM_WAIT | R500_INST_LAST | - R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK | - R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, - .code.r500.inst[1].inst1 = - R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST | - R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST, - .code.r500.inst[1].inst2 = - R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST | - R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST, - .code.r500.inst[1].inst3 = - R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B, - .code.r500.inst[1].inst4 = - R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A, - .code.r500.inst[1].inst5 = - R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 | - R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 | - R500_ALU_RGBA_A_SWIZ_0, -}; diff --git a/src/gallium/drivers/r300/r5xx_fs.h b/src/gallium/drivers/r300/r5xx_fs.h deleted file mode 100644 index a4addde32b..0000000000 --- a/src/gallium/drivers/r300/r5xx_fs.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson - * Joakim Sindholt - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R5XX_FS_H -#define R5XX_FS_H - -#include "radeon_code.h" - -struct rX00_fragment_program_code r5xx_passthrough_fragment_shader; -struct rX00_fragment_program_code r5xx_texture_fragment_shader; - -#endif /* R5XX_FS_H */ -- cgit v1.2.3 From 5a653ada4143c24b00b0ca12b4898064afd59c29 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 21 Oct 2009 22:54:52 -0700 Subject: r300g: Remove unused debug flag. --- src/gallium/drivers/r300/r300_context.h | 5 ++--- src/gallium/drivers/r300/r300_debug.c | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 61a57df35e..4e2c0ec34e 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -317,9 +317,8 @@ void r300_init_surface_functions(struct r300_context* r300); #define DBG_VP 0x0000004 #define DBG_CS 0x0000008 #define DBG_DRAW 0x0000010 -#define DBG_SURF 0x0000020 -#define DBG_TEX 0x0000040 -#define DBG_FALL 0x0000080 +#define DBG_TEX 0x0000020 +#define DBG_FALL 0x0000040 /*@}*/ static INLINE boolean DBG_ON(struct r300_context * ctx, unsigned flags) diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index bfd4ab018a..421253ca72 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -37,7 +37,6 @@ static struct debug_option debug_options[] = { { "vp", DBG_VP, "Vertex program handling" }, { "cs", DBG_CS, "Command submissions" }, { "draw", DBG_DRAW, "Draw and emit" }, - { "surf", DBG_SURF, "Surface drawing" }, { "tex", DBG_TEX, "Textures" }, { "fall", DBG_FALL, "Fallbacks" }, -- cgit v1.2.3 From 034db65f08b943ee9940947db69e4e190f751061 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 21 Oct 2009 23:23:37 -0700 Subject: r300g: Update comments, asserts, indents in r300_texture. I wish I knew enough about textures to really really REALLY fix that file. --- src/gallium/drivers/r300/r300_texture.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 339fbb6242..3c8ff24e17 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -42,16 +42,16 @@ static void r300_setup_texture_state(struct r300_texture* tex) /* XXX */ state->format1 = r300_translate_texformat(pt->format); if (pt->target == PIPE_TEXTURE_CUBE) { - state->format1 |= R300_TX_FORMAT_CUBIC_MAP; + state->format1 |= R300_TX_FORMAT_CUBIC_MAP; } if (pt->target == PIPE_TEXTURE_3D) { - state->format1 |= R300_TX_FORMAT_3D; + state->format1 |= R300_TX_FORMAT_3D; } state->format2 = (r300_texture_get_stride(tex, 0) / pt->block.size) - 1; - /* Assume (somewhat foolishly) that oversized textures will - * not be permitted by the state tracker. */ + /* Don't worry about accidentally setting this bit on non-r500; + * the kernel should catch it. */ if (pt->width[0] > 2048) { state->format2 |= R500_TXWIDTH_BIT11; } @@ -73,7 +73,8 @@ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) return tex->stride_override; if (level > tex->tex.last_level) { - debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__, level, tex->tex.last_level); + debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__, + level, tex->tex.last_level); return 0; } @@ -96,11 +97,6 @@ static void r300_setup_miptree(struct r300_texture* tex) base->nblocksx[i] = pf_get_nblocksx(&base->block, base->width[i]); base->nblocksy[i] = pf_get_nblocksy(&base->block, base->height[i]); - /* Radeons enjoy things in multiples of 64. - * - * XXX - * POT, uncompressed, unmippmapped textures can be aligned to 32, - * instead of 64. */ stride = r300_texture_get_stride(tex, i); size = stride * base->nblocksy[i] * base->depth[i]; @@ -195,9 +191,7 @@ static struct pipe_texture* { struct r300_texture* tex; - /* XXX we should start doing mips now... */ if (base->target != PIPE_TEXTURE_2D || - base->last_level != 0 || base->depth[0] != 1) { return NULL; } @@ -213,7 +207,6 @@ static struct pipe_texture* tex->stride_override = *stride; - /* XXX */ r300_setup_texture_state(tex); pipe_buffer_reference(&tex->buffer, buffer); -- cgit v1.2.3 From 0a8cd4862c4f04308ab818077bab94417ffbf50b Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 21 Oct 2009 23:26:02 -0700 Subject: r300g: Update comments, function names in r300_state_inlines. --- src/gallium/drivers/r300/r300_state_derived.c | 2 +- src/gallium/drivers/r300/r300_state_inlines.h | 24 +++++++++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index e749bd9cb9..42aee7231e 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -250,7 +250,7 @@ static void r300_vertex_psc(struct r300_context* r300, assert(tab[i] != -1); /* Add the attribute to the PSC table. */ - temp = translate_vertex_data_type(vinfo->attrib[i].emit) | + temp = translate_draw_vertex_data_type(vinfo->attrib[i].emit) | tab[i] << R300_DST_VEC_LOC_SHIFT; if (i & 1) { diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 9135c55ec4..c82d8e5f08 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -53,6 +53,7 @@ static INLINE uint32_t r300_translate_blend_function(int blend_func) return R300_COMB_FCN_MAX; default: debug_printf("r300: Unknown blend function %d\n", blend_func); + assert(0); break; } return 0; @@ -100,6 +101,7 @@ static INLINE uint32_t r300_translate_blend_factor(int blend_fact) case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: */ default: debug_printf("r300: Unknown blend factor %d\n", blend_fact); + assert(0); break; } return 0; @@ -129,6 +131,7 @@ static INLINE uint32_t r300_translate_depth_stencil_function(int zs_func) default: debug_printf("r300: Unknown depth/stencil function %d\n", zs_func); + assert(0); break; } return 0; @@ -155,6 +158,7 @@ static INLINE uint32_t r300_translate_stencil_op(int s_op) return R300_ZS_INVERT; default: debug_printf("r300: Unknown stencil op %d", s_op); + assert(0); break; } return 0; @@ -181,6 +185,7 @@ static INLINE uint32_t r300_translate_alpha_function(int alpha_func) return R300_FG_ALPHA_FUNC_ALWAYS; default: debug_printf("r300: Unknown alpha function %d", alpha_func); + assert(0); break; } return 0; @@ -209,6 +214,7 @@ static INLINE uint32_t r300_translate_wrap(int wrap) return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED; default: debug_printf("r300: Unknown texture wrap %d", wrap); + assert(0); return 0; } } @@ -228,6 +234,7 @@ static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip) break; default: debug_printf("r300: Unknown texture filter %d\n", min); + assert(0); break; } switch (mag) { @@ -242,6 +249,7 @@ static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip) break; default: debug_printf("r300: Unknown texture filter %d\n", mag); + assert(0); break; } switch (mip) { @@ -256,6 +264,7 @@ static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip) break; default: debug_printf("r300: Unknown texture filter %d\n", mip); + assert(0); break; } @@ -279,6 +288,8 @@ static INLINE uint32_t r300_anisotropy(float max_aniso) /* Buffer formats. */ +/* Colorbuffer formats. This is the unswizzled format of the RB3D block's + * output. For the swizzling of the targets, check the shader's format. */ static INLINE uint32_t r300_translate_colorformat(enum pipe_format format) { switch (format) { @@ -313,11 +324,13 @@ static INLINE uint32_t r300_translate_colorformat(enum pipe_format format) debug_printf("r300: Implementation error: " "Got unsupported color format %s in %s\n", pf_name(format), __FUNCTION__); + assert(0); break; } return 0; } +/* Depthbuffer and stencilbuffer. Thankfully, we only support two kinds. */ static INLINE uint32_t r300_translate_zsformat(enum pipe_format format) { switch (format) { @@ -331,18 +344,22 @@ static INLINE uint32_t r300_translate_zsformat(enum pipe_format format) debug_printf("r300: Implementation error: " "Got unsupported ZS format %s in %s\n", pf_name(format), __FUNCTION__); + assert(0); break; } return 0; } -/* Translate pipe_format into US_OUT_FMT. +/* Shader output formats. This is essentially the swizzle from the shader + * to the RB3D block. + * * Note that formats are stored from C3 to C0. */ static INLINE uint32_t r300_translate_out_fmt(enum pipe_format format) { switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_X8R8G8B8_UNORM: + /* XXX */ case PIPE_FORMAT_Z24S8_UNORM: return R300_US_OUT_FMT_C4_8 | R300_C0_SEL_B | R300_C1_SEL_G | @@ -356,6 +373,7 @@ static INLINE uint32_t r300_translate_out_fmt(enum pipe_format format) debug_printf("r300: Implementation error: " "Got unsupported output format %s in %s\n", pf_name(format), __FUNCTION__); + assert(0); return R300_US_OUT_FMT_UNUSED; } return 0; @@ -382,7 +400,8 @@ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) return 0; } -static INLINE uint32_t translate_vertex_data_type(int type) { +/* Translate Draw vertex types into PSC vertex types. */ +static INLINE uint32_t translate_draw_vertex_data_type(int type) { switch (type) { case EMIT_1F: case EMIT_1F_PSIZE: @@ -406,7 +425,6 @@ static INLINE uint32_t translate_vertex_data_type(int type) { assert(0); break; } - return 0; } -- cgit v1.2.3 From 511bd5f32b67f903b590f00f7ccf8132127ef2e4 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Thu, 22 Oct 2009 00:21:08 -0700 Subject: r300g: Check for NULL Draw during flush. Split from the fastpath WIP. --- src/gallium/drivers/r300/r300_flush.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index c222ea09b1..14a08241fc 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -40,8 +40,10 @@ static void r300_flush(struct pipe_context* pipe, CS_LOCALS(r300); /* We probably need to flush Draw, but we may have been called from - * within Draw. This feels kludgy, but it might be the best thing. */ - if (!r300->draw->flushing) { + * within Draw. This feels kludgy, but it might be the best thing. + * + * Of course, the best thing is to kill Draw with fire. :3 */ + if (r300->draw && !r300->draw->flushing) { draw_flush(r300->draw); } -- cgit v1.2.3 From eebf4b5299a880f4cdf8a916b4e1ca0bd79a6f07 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 22 Oct 2009 21:55:22 +1000 Subject: nv50: support 3D class 0x8597, remove redundant unknown chipset detection --- src/gallium/drivers/nv50/nv50_screen.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 0bd5487695..63dce0f4c2 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -218,7 +218,16 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) tesla_class = NV54TCL; break; case 0xa0: - tesla_class = NVA0TCL; + switch (chipset) { + case 0xa0: + case 0xaa: + case 0xac: + tesla_class = NVA0TCL; + break; + default: + tesla_class = 0x8597; + break; + } break; default: NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", chipset); @@ -226,12 +235,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) return NULL; } - if (tesla_class == 0) { - NOUVEAU_ERR("Unknown G8x chipset: NV%02x\n", chipset); - nv50_screen_destroy(pscreen); - return NULL; - } - ret = nouveau_grobj_alloc(chan, 0xbeef5097, tesla_class, &screen->tesla); if (ret) { -- cgit v1.2.3 From a07437f8a6a863654487c5586cbd02bfc20f0a3b Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 22 Oct 2009 18:10:19 +0100 Subject: llvmpipe: Call util_cpu_detect() from the unit tests. --- src/gallium/drivers/llvmpipe/lp_test_format.c | 3 +++ src/gallium/drivers/llvmpipe/lp_test_main.c | 4 ++++ 2 files changed, 7 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index ab80c0143f..5dc8297fe9 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -35,6 +35,7 @@ #include #include +#include "util/u_cpu_detect.h" #include "util/u_format.h" #include "lp_bld_format.h" @@ -263,6 +264,8 @@ int main(int argc, char **argv) LLVMInitializeNativeTarget(); #endif + util_cpu_detect(); + for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i) if(!test_format(&test_cases[i])) ret = 1; diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index f07fa256f1..d4767ff52b 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -34,6 +34,8 @@ */ +#include "util/u_cpu_detect.h" + #include "lp_bld_const.h" #include "lp_test.h" @@ -370,6 +372,8 @@ int main(int argc, char **argv) LLVMInitializeNativeTarget(); #endif + util_cpu_detect(); + if(fp) { /* Warm up the caches */ test_some(0, NULL, 100); -- cgit v1.2.3 From 421507de06bd42a322c5864d887e67e385eb458c Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 22 Oct 2009 18:28:17 +0100 Subject: llvmpipe: Factor vector packing/unpacking to a separate source file. These functions will be needed to implement many of the 8bit operations, and they are quite complex on its own. --- src/gallium/drivers/llvmpipe/Makefile | 1 + src/gallium/drivers/llvmpipe/SConscript | 1 + src/gallium/drivers/llvmpipe/lp_bld_conv.c | 240 +---------------- src/gallium/drivers/llvmpipe/lp_bld_pack.c | 419 +++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_bld_pack.h | 95 +++++++ 5 files changed, 519 insertions(+), 237 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_pack.c create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_pack.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 21aff1967a..b96ee23a99 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -21,6 +21,7 @@ C_SOURCES = \ lp_bld_interp.c \ lp_bld_intr.c \ lp_bld_logic.c \ + lp_bld_pack.c \ lp_bld_sample_soa.c \ lp_bld_swizzle.c \ lp_bld_struct.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 13cd465838..403e4daa43 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -33,6 +33,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_format_soa.c', 'lp_bld_interp.c', 'lp_bld_intr.c', + 'lp_bld_pack.c', 'lp_bld_sample_soa.c', 'lp_bld_struct.c', 'lp_bld_logic.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c index 20c8710214..9935209437 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c @@ -69,6 +69,7 @@ #include "lp_bld_const.h" #include "lp_bld_intr.h" #include "lp_bld_arit.h" +#include "lp_bld_pack.h" #include "lp_bld_conv.h" @@ -198,241 +199,6 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, } -/** - * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions. - */ -static LLVMValueRef -lp_build_const_unpack_shuffle(unsigned n, unsigned lo_hi) -{ - LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; - unsigned i, j; - - assert(n <= LP_MAX_VECTOR_LENGTH); - assert(lo_hi < 2); - - /* TODO: cache results in a static table */ - - for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) { - elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0); - elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0); - } - - return LLVMConstVector(elems, n); -} - - -/** - * Build shuffle vectors that match PACKxx instructions. - */ -static LLVMValueRef -lp_build_const_pack_shuffle(unsigned n) -{ - LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; - unsigned i; - - assert(n <= LP_MAX_VECTOR_LENGTH); - - /* TODO: cache results in a static table */ - - for(i = 0; i < n; ++i) - elems[i] = LLVMConstInt(LLVMInt32Type(), 2*i, 0); - - return LLVMConstVector(elems, n); -} - - -/** - * Expand the bit width. - * - * This will only change the number of bits the values are represented, not the - * values themselves. - */ -static void -lp_build_expand(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - LLVMValueRef src, - LLVMValueRef *dst, unsigned num_dsts) -{ - unsigned num_tmps; - unsigned i; - - /* Register width must remain constant */ - assert(src_type.width * src_type.length == dst_type.width * dst_type.length); - - /* We must not loose or gain channels. Only precision */ - assert(src_type.length == dst_type.length * num_dsts); - - num_tmps = 1; - dst[0] = src; - - while(src_type.width < dst_type.width) { - struct lp_type new_type = src_type; - LLVMTypeRef new_vec_type; - - new_type.width *= 2; - new_type.length /= 2; - new_vec_type = lp_build_vec_type(new_type); - - for(i = num_tmps; i--; ) { - LLVMValueRef zero; - LLVMValueRef shuffle_lo; - LLVMValueRef shuffle_hi; - LLVMValueRef lo; - LLVMValueRef hi; - - zero = lp_build_zero(src_type); - shuffle_lo = lp_build_const_unpack_shuffle(src_type.length, 0); - shuffle_hi = lp_build_const_unpack_shuffle(src_type.length, 1); - - /* PUNPCKLBW, PUNPCKHBW */ - lo = LLVMBuildShuffleVector(builder, dst[i], zero, shuffle_lo, ""); - hi = LLVMBuildShuffleVector(builder, dst[i], zero, shuffle_hi, ""); - - dst[2*i + 0] = LLVMBuildBitCast(builder, lo, new_vec_type, ""); - dst[2*i + 1] = LLVMBuildBitCast(builder, hi, new_vec_type, ""); - } - - src_type = new_type; - - num_tmps *= 2; - } - - assert(num_tmps == num_dsts); -} - - -/** - * Non-interleaved pack. - * - * This will move values as - * - * lo = __ l0 __ l1 __ l2 __.. __ ln - * hi = __ h0 __ h1 __ h2 __.. __ hn - * res = l0 l1 l2 .. ln h0 h1 h2 .. hn - * - * TODO: handle saturation consistently. - */ -static LLVMValueRef -lp_build_pack2(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - boolean clamped, - LLVMValueRef lo, - LLVMValueRef hi) -{ - LLVMTypeRef src_vec_type = lp_build_vec_type(src_type); - LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type); - LLVMValueRef shuffle; - LLVMValueRef res; - - /* Register width must remain constant */ - assert(src_type.width * src_type.length == dst_type.width * dst_type.length); - - /* We must not loose or gain channels. Only precision */ - assert(src_type.length * 2 == dst_type.length); - - assert(!src_type.floating); - assert(!dst_type.floating); - - if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) { - /* All X86 non-interleaved pack instructions all take signed inputs and - * saturate them, so saturate beforehand. */ - if(!src_type.sign && !clamped) { - struct lp_build_context bld; - unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width; - LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1); - lp_build_context_init(&bld, builder, src_type); - lo = lp_build_min(&bld, lo, dst_max); - hi = lp_build_min(&bld, hi, dst_max); - } - - switch(src_type.width) { - case 32: - if(dst_type.sign || !util_cpu_caps.has_sse4_1) - res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi); - else - /* PACKUSDW is the only instrinsic with a consistent signature */ - return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi); - break; - - case 16: - if(dst_type.sign) - res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi); - else - res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi); - break; - - default: - assert(0); - return LLVMGetUndef(dst_vec_type); - break; - } - - res = LLVMBuildBitCast(builder, res, dst_vec_type, ""); - return res; - } - - lo = LLVMBuildBitCast(builder, lo, dst_vec_type, ""); - hi = LLVMBuildBitCast(builder, hi, dst_vec_type, ""); - - shuffle = lp_build_const_pack_shuffle(dst_type.length); - - res = LLVMBuildShuffleVector(builder, lo, hi, shuffle, ""); - - return res; -} - - -/** - * Truncate the bit width. - * - * TODO: Handle saturation consistently. - */ -static LLVMValueRef -lp_build_pack(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - boolean clamped, - const LLVMValueRef *src, unsigned num_srcs) -{ - LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; - unsigned i; - - /* Register width must remain constant */ - assert(src_type.width * src_type.length == dst_type.width * dst_type.length); - - /* We must not loose or gain channels. Only precision */ - assert(src_type.length * num_srcs == dst_type.length); - - for(i = 0; i < num_srcs; ++i) - tmp[i] = src[i]; - - while(src_type.width > dst_type.width) { - struct lp_type new_type = src_type; - - new_type.width /= 2; - new_type.length *= 2; - - /* Take in consideration the sign changes only in the last step */ - if(new_type.width == dst_type.width) - new_type.sign = dst_type.sign; - - num_srcs /= 2; - - for(i = 0; i < num_srcs; ++i) - tmp[i] = lp_build_pack2(builder, src_type, new_type, clamped, - tmp[2*i + 0], tmp[2*i + 1]); - - src_type = new_type; - } - - assert(num_srcs == 1); - - return tmp[0]; -} - - /** * Generic type conversion. * @@ -572,7 +338,7 @@ lp_build_conv(LLVMBuilderRef builder, if(tmp_type.width < dst_type.width) { assert(num_tmps == 1); - lp_build_expand(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts); + lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts); tmp_type.width = dst_type.width; tmp_type.length = dst_type.length; num_tmps = num_dsts; @@ -692,7 +458,7 @@ lp_build_conv_mask(LLVMBuilderRef builder, } else if(src_type.width < dst_type.width) { assert(num_srcs == 1); - lp_build_expand(builder, src_type, dst_type, src[0], dst, num_dsts); + lp_build_unpack(builder, src_type, dst_type, src[0], dst, num_dsts); } else { assert(num_srcs == num_dsts); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.c b/src/gallium/drivers/llvmpipe/lp_bld_pack.c new file mode 100644 index 0000000000..fe82fda039 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.c @@ -0,0 +1,419 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Helper functions for packing/unpacking. + * + * Pack/unpacking is necessary for conversion between types of different + * bit width. + * + * They are also commonly used when an computation needs higher + * precision for the intermediate values. For example, if one needs the + * function: + * + * c = compute(a, b); + * + * to use more precision for intermediate results then one should implement it + * as: + * + * LLVMValueRef + * compute(LLVMBuilderRef builder struct lp_type type, LLVMValueRef a, LLVMValueRef b) + * { + * struct lp_type wide_type = lp_wider_type(type); + * LLVMValueRef al, ah, bl, bh, cl, ch, c; + * + * lp_build_unpack2(builder, type, wide_type, a, &al, &ah); + * lp_build_unpack2(builder, type, wide_type, b, &bl, &bh); + * + * cl = compute_half(al, bl); + * ch = compute_half(ah, bh); + * + * c = lp_build_pack2(bld->builder, wide_type, type, cl, ch); + * + * return c; + * } + * + * where compute_half() would do the computation for half the elements with + * twice the precision. + * + * @author Jose Fonseca + */ + + +#include "util/u_debug.h" +#include "util/u_math.h" +#include "util/u_cpu_detect.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_intr.h" +#include "lp_bld_arit.h" +#include "lp_bld_pack.h" + + +/** + * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions. + */ +static LLVMValueRef +lp_build_const_unpack_shuffle(unsigned n, unsigned lo_hi) +{ + LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; + unsigned i, j; + + assert(n <= LP_MAX_VECTOR_LENGTH); + assert(lo_hi < 2); + + /* TODO: cache results in a static table */ + + for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) { + elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0); + elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0); + } + + return LLVMConstVector(elems, n); +} + + +/** + * Build shuffle vectors that match PACKxx instructions. + */ +static LLVMValueRef +lp_build_const_pack_shuffle(unsigned n) +{ + LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + assert(n <= LP_MAX_VECTOR_LENGTH); + + /* TODO: cache results in a static table */ + + for(i = 0; i < n; ++i) + elems[i] = LLVMConstInt(LLVMInt32Type(), 2*i, 0); + + return LLVMConstVector(elems, n); +} + + +/** + * Interleave vector elements. + * + * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions. + */ +LLVMValueRef +lp_build_interleave2(LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef a, + LLVMValueRef b, + unsigned lo_hi) +{ + LLVMValueRef shuffle; + + shuffle = lp_build_const_unpack_shuffle(type.length, lo_hi); + + return LLVMBuildShuffleVector(builder, a, b, shuffle, ""); +} + + +/** + * Double the bit width. + * + * This will only change the number of bits the values are represented, not the + * values themselves. + */ +void +lp_build_unpack2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef src, + LLVMValueRef *dst_lo, + LLVMValueRef *dst_hi) +{ + LLVMValueRef msb; + LLVMTypeRef dst_vec_type; + + assert(!src_type.floating); + assert(!dst_type.floating); + assert(dst_type.sign == src_type.sign); + assert(dst_type.width == src_type.width * 2); + assert(dst_type.length * 2 == src_type.length); + + if(src_type.sign) { + /* Replicate the sign bit in the most significant bits */ + msb = LLVMBuildAShr(builder, src, lp_build_int_const_scalar(src_type, src_type.width - 1), ""); + } + else + /* Most significant bits always zero */ + msb = lp_build_zero(src_type); + + /* Interleave bits */ + if(util_cpu_caps.little_endian) { + *dst_lo = lp_build_interleave2(builder, src_type, src, msb, 0); + *dst_hi = lp_build_interleave2(builder, src_type, src, msb, 1); + } + else { + *dst_lo = lp_build_interleave2(builder, src_type, msb, src, 0); + *dst_hi = lp_build_interleave2(builder, src_type, msb, src, 1); + } + + /* Cast the result into the new type (twice as wide) */ + + dst_vec_type = lp_build_vec_type(dst_type); + + *dst_lo = LLVMBuildBitCast(builder, *dst_lo, dst_vec_type, ""); + *dst_hi = LLVMBuildBitCast(builder, *dst_hi, dst_vec_type, ""); +} + + +/** + * Expand the bit width. + * + * This will only change the number of bits the values are represented, not the + * values themselves. + */ +void +lp_build_unpack(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef src, + LLVMValueRef *dst, unsigned num_dsts) +{ + unsigned num_tmps; + unsigned i; + + /* Register width must remain constant */ + assert(src_type.width * src_type.length == dst_type.width * dst_type.length); + + /* We must not loose or gain channels. Only precision */ + assert(src_type.length == dst_type.length * num_dsts); + + num_tmps = 1; + dst[0] = src; + + while(src_type.width < dst_type.width) { + struct lp_type tmp_type = src_type; + + tmp_type.width *= 2; + tmp_type.length /= 2; + + for(i = num_tmps; i--; ) { + lp_build_unpack2(builder, src_type, tmp_type, dst[i], &dst[2*i + 0], &dst[2*i + 1]); + } + + src_type = tmp_type; + + num_tmps *= 2; + } + + assert(num_tmps == num_dsts); +} + + +/** + * Non-interleaved pack. + * + * This will move values as + * + * lo = __ l0 __ l1 __ l2 __.. __ ln + * hi = __ h0 __ h1 __ h2 __.. __ hn + * res = l0 l1 l2 .. ln h0 h1 h2 .. hn + * + * This will only change the number of bits the values are represented, not the + * values themselves. + * + * It is assumed the values are already clamped into the destination type range. + * Values outside that range will produce undefined results. Use + * lp_build_packs2 instead. + */ +LLVMValueRef +lp_build_pack2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef lo, + LLVMValueRef hi) +{ + LLVMTypeRef src_vec_type = lp_build_vec_type(src_type); + LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type); + LLVMValueRef shuffle; + LLVMValueRef res; + + dst_vec_type = lp_build_vec_type(dst_type); + + assert(!src_type.floating); + assert(!dst_type.floating); + assert(src_type.width == dst_type.width * 2); + assert(src_type.length * 2 == dst_type.length); + + if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) { + switch(src_type.width) { + case 32: + if(dst_type.sign) { + res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi); + } + else { + if (util_cpu_caps.has_sse4_1) { + /* PACKUSDW is the only instrinsic with a consistent signature */ + return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi); + } + else { + assert(0); + return LLVMGetUndef(dst_vec_type); + } + } + break; + + case 16: + if(dst_type.sign) + res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi); + else + res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi); + break; + + default: + assert(0); + return LLVMGetUndef(dst_vec_type); + break; + } + + res = LLVMBuildBitCast(builder, res, dst_vec_type, ""); + return res; + } + + lo = LLVMBuildBitCast(builder, lo, dst_vec_type, ""); + hi = LLVMBuildBitCast(builder, hi, dst_vec_type, ""); + + shuffle = lp_build_const_pack_shuffle(dst_type.length); + + res = LLVMBuildShuffleVector(builder, lo, hi, shuffle, ""); + + return res; +} + + + +/** + * Non-interleaved pack and saturate. + * + * Same as lp_build_pack2 but will saturate values so that they fit into the + * destination type. + */ +LLVMValueRef +lp_build_packs2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef lo, + LLVMValueRef hi) +{ + boolean clamp; + + assert(!src_type.floating); + assert(!dst_type.floating); + assert(src_type.sign == dst_type.sign); + assert(src_type.width == dst_type.width * 2); + assert(src_type.length * 2 == dst_type.length); + + clamp = TRUE; + + /* All X86 SSE non-interleaved pack instructions take signed inputs and + * saturate them, so no need to clamp for those cases. */ + if(util_cpu_caps.has_sse2 && + src_type.width * src_type.length == 128 && + src_type.sign) + clamp = FALSE; + + if(clamp) { + struct lp_build_context bld; + unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width; + LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1); + lp_build_context_init(&bld, builder, src_type); + lo = lp_build_min(&bld, lo, dst_max); + hi = lp_build_min(&bld, hi, dst_max); + /* FIXME: What about lower bound? */ + } + + return lp_build_pack2(builder, src_type, dst_type, lo, hi); +} + + +/** + * Truncate the bit width. + * + * TODO: Handle saturation consistently. + */ +LLVMValueRef +lp_build_pack(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + boolean clamped, + const LLVMValueRef *src, unsigned num_srcs) +{ + LLVMValueRef (*pack2)(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef lo, + LLVMValueRef hi); + LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + + /* Register width must remain constant */ + assert(src_type.width * src_type.length == dst_type.width * dst_type.length); + + /* We must not loose or gain channels. Only precision */ + assert(src_type.length * num_srcs == dst_type.length); + + if(clamped) + pack2 = &lp_build_pack2; + else + pack2 = &lp_build_packs2; + + for(i = 0; i < num_srcs; ++i) + tmp[i] = src[i]; + + while(src_type.width > dst_type.width) { + struct lp_type tmp_type = src_type; + + tmp_type.width /= 2; + tmp_type.length *= 2; + + /* Take in consideration the sign changes only in the last step */ + if(tmp_type.width == dst_type.width) + tmp_type.sign = dst_type.sign; + + num_srcs /= 2; + + for(i = 0; i < num_srcs; ++i) + tmp[i] = pack2(builder, src_type, tmp_type, tmp[2*i + 0], tmp[2*i + 1]); + + src_type = tmp_type; + } + + assert(num_srcs == 1); + + return tmp[0]; +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.h b/src/gallium/drivers/llvmpipe/lp_bld_pack.h new file mode 100644 index 0000000000..fb2a34984a --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.h @@ -0,0 +1,95 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Helper functions for packing/unpacking conversions. + * + * @author Jose Fonseca + */ + + +#ifndef LP_BLD_PACK_H +#define LP_BLD_PACK_H + + +#include + + +struct lp_type; + + +LLVMValueRef +lp_build_interleave2(LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef a, + LLVMValueRef b, + unsigned lo_hi); + + +void +lp_build_unpack2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef src, + LLVMValueRef *dst_lo, + LLVMValueRef *dst_hi); + + +void +lp_build_unpack(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef src, + LLVMValueRef *dst, unsigned num_dsts); + + +LLVMValueRef +lp_build_packs2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef lo, + LLVMValueRef hi); + + +LLVMValueRef +lp_build_pack2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef lo, + LLVMValueRef hi); + + +LLVMValueRef +lp_build_pack(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + boolean clamped, + const LLVMValueRef *src, unsigned num_srcs); + + +#endif /* !LP_BLD_PACK_H */ -- cgit v1.2.3 From 4458695bdafb13eba639d986e2f20953f0f7445c Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 22 Oct 2009 18:28:37 +0100 Subject: llvmpipe: Utility function to double the bit width of a type. --- src/gallium/drivers/llvmpipe/lp_bld_type.c | 29 ++++++++++++++++++++++++----- src/gallium/drivers/llvmpipe/lp_bld_type.h | 4 ++++ 2 files changed, 28 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c index 606243d6c5..1320a26721 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c @@ -160,12 +160,31 @@ lp_build_int_vec_type(struct lp_type type) struct lp_type lp_int_type(struct lp_type type) { - struct lp_type int_type; + struct lp_type res_type; - memset(&int_type, 0, sizeof int_type); - int_type.width = type.width; - int_type.length = type.length; - return int_type; + memset(&res_type, 0, sizeof res_type); + res_type.width = type.width; + res_type.length = type.length; + + return res_type; +} + + +/** + * Return the type with twice the bit width (hence half the number of elements). + */ +struct lp_type +lp_wider_type(struct lp_type type) +{ + struct lp_type res_type; + + memcpy(&res_type, &type, sizeof res_type); + res_type.width *= 2; + res_type.length /= 2; + + assert(res_type.length); + + return res_type; } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h index ee5ca3483c..46c298fa20 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h @@ -166,6 +166,10 @@ struct lp_type lp_int_type(struct lp_type type); +struct lp_type +lp_wider_type(struct lp_type type); + + void lp_build_context_init(struct lp_build_context *bld, LLVMBuilderRef builder, -- cgit v1.2.3 From 01b85e292352d710586344348fff5a81459e5486 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 22 Oct 2009 18:28:57 +0100 Subject: llvmpipe: Use the pack/unpack functions for 8bit unsigned norm multiplication. --- src/gallium/drivers/llvmpipe/lp_bld_arit.c | 96 ++++++------------------------ 1 file changed, 17 insertions(+), 79 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index d27ef0de04..83ca06acf8 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -54,6 +54,7 @@ #include "lp_bld_const.h" #include "lp_bld_intr.h" #include "lp_bld_logic.h" +#include "lp_bld_pack.h" #include "lp_bld_debug.h" #include "lp_bld_arit.h" @@ -279,45 +280,6 @@ lp_build_sub(struct lp_build_context *bld, } -/** - * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions. - */ -static LLVMValueRef -lp_build_unpack_shuffle(unsigned n, unsigned lo_hi) -{ - LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; - unsigned i, j; - - assert(n <= LP_MAX_VECTOR_LENGTH); - assert(lo_hi < 2); - - for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) { - elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0); - elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0); - } - - return LLVMConstVector(elems, n); -} - - -/** - * Build constant int vector of width 'n' and value 'c'. - */ -static LLVMValueRef -lp_build_const_vec(LLVMTypeRef type, unsigned n, long long c) -{ - LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; - unsigned i; - - assert(n <= LP_MAX_VECTOR_LENGTH); - - for(i = 0; i < n; ++i) - elems[i] = LLVMConstInt(type, c, 0); - - return LLVMConstVector(elems, n); -} - - /** * Normalized 8bit multiplication. * @@ -361,33 +323,30 @@ lp_build_const_vec(LLVMTypeRef type, unsigned n, long long c) */ static LLVMValueRef lp_build_mul_u8n(LLVMBuilderRef builder, + struct lp_type i16_type, LLVMValueRef a, LLVMValueRef b) { - static LLVMValueRef c01 = NULL; - static LLVMValueRef c08 = NULL; - static LLVMValueRef c80 = NULL; + LLVMValueRef c8; LLVMValueRef ab; - if(!c01) c01 = lp_build_const_vec(LLVMInt16Type(), 8, 0x01); - if(!c08) c08 = lp_build_const_vec(LLVMInt16Type(), 8, 0x08); - if(!c80) c80 = lp_build_const_vec(LLVMInt16Type(), 8, 0x80); + c8 = lp_build_int_const_scalar(i16_type, 8); #if 0 /* a*b/255 ~= (a*(b + 1)) >> 256 */ - b = LLVMBuildAdd(builder, b, c01, ""); + b = LLVMBuildAdd(builder, b, lp_build_int_const_scalar(i16_type, 1), ""); ab = LLVMBuildMul(builder, a, b, ""); #else - /* t/255 ~= (t + (t >> 8) + 0x80) >> 8 */ + /* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */ ab = LLVMBuildMul(builder, a, b, ""); - ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c08, ""), ""); - ab = LLVMBuildAdd(builder, ab, c80, ""); + ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), ""); + ab = LLVMBuildAdd(builder, ab, lp_build_int_const_scalar(i16_type, 0x80), ""); #endif - ab = LLVMBuildLShr(builder, ab, c08, ""); + ab = LLVMBuildLShr(builder, ab, c8, ""); return ab; } @@ -415,39 +374,18 @@ lp_build_mul(struct lp_build_context *bld, return bld->undef; if(!type.floating && !type.fixed && type.norm) { - if(util_cpu_caps.has_sse2 && type.width == 8 && type.length == 16) { - LLVMTypeRef i16x8 = LLVMVectorType(LLVMInt16Type(), 8); - LLVMTypeRef i8x16 = LLVMVectorType(LLVMInt8Type(), 16); - static LLVMValueRef ml = NULL; - static LLVMValueRef mh = NULL; - LLVMValueRef al, ah, bl, bh; - LLVMValueRef abl, abh; - LLVMValueRef ab; - - if(!ml) ml = lp_build_unpack_shuffle(16, 0); - if(!mh) mh = lp_build_unpack_shuffle(16, 1); + if(type.width == 8) { + struct lp_type i16_type = lp_wider_type(type); + LLVMValueRef al, ah, bl, bh, abl, abh, ab; - /* PUNPCKLBW, PUNPCKHBW */ - al = LLVMBuildShuffleVector(bld->builder, a, bld->zero, ml, ""); - bl = LLVMBuildShuffleVector(bld->builder, b, bld->zero, ml, ""); - ah = LLVMBuildShuffleVector(bld->builder, a, bld->zero, mh, ""); - bh = LLVMBuildShuffleVector(bld->builder, b, bld->zero, mh, ""); - - /* NOP */ - al = LLVMBuildBitCast(bld->builder, al, i16x8, ""); - bl = LLVMBuildBitCast(bld->builder, bl, i16x8, ""); - ah = LLVMBuildBitCast(bld->builder, ah, i16x8, ""); - bh = LLVMBuildBitCast(bld->builder, bh, i16x8, ""); + lp_build_unpack2(bld->builder, type, i16_type, a, &al, &ah); + lp_build_unpack2(bld->builder, type, i16_type, b, &bl, &bh); /* PMULLW, PSRLW, PADDW */ - abl = lp_build_mul_u8n(bld->builder, al, bl); - abh = lp_build_mul_u8n(bld->builder, ah, bh); - - /* PACKUSWB */ - ab = lp_build_intrinsic_binary(bld->builder, "llvm.x86.sse2.packuswb.128" , i16x8, abl, abh); + abl = lp_build_mul_u8n(bld->builder, i16_type, al, bl); + abh = lp_build_mul_u8n(bld->builder, i16_type, ah, bh); - /* NOP */ - ab = LLVMBuildBitCast(bld->builder, ab, i8x16, ""); + ab = lp_build_pack2(bld->builder, i16_type, type, abl, abh); return ab; } -- cgit v1.2.3 From 9aafa1fbd247cd6d1bb0ab47bc5b318bd0d67bc5 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 22 Oct 2009 19:02:04 +0100 Subject: llvmpipe: Avoid variable size arrays. Not really variable size, but MSVC still doesn't like them. --- src/gallium/drivers/llvmpipe/lp_test.h | 3 +++ src/gallium/drivers/llvmpipe/lp_test_blend.c | 4 ++-- src/gallium/drivers/llvmpipe/lp_test_conv.c | 4 ++-- 3 files changed, 7 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h index a88e110c66..21016fefe3 100644 --- a/src/gallium/drivers/llvmpipe/lp_test.h +++ b/src/gallium/drivers/llvmpipe/lp_test.h @@ -56,6 +56,9 @@ #include "lp_bld_type.h" +#define LP_TEST_NUM_SAMPLES 32 + + void write_tsv_header(FILE *fp); diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 94b661dcba..e3af81cffb 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -477,8 +477,8 @@ test_one(unsigned verbose, char *error = NULL; blend_test_ptr_t blend_test_ptr; boolean success; - const unsigned n = 32; - int64_t cycles[n]; + const unsigned n = LP_TEST_NUM_SAMPLES; + int64_t cycles[LP_TEST_NUM_SAMPLES]; double cycles_avg = 0.0; unsigned i, j; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index 9dcf58e5dc..ac2a6d05e3 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -156,8 +156,8 @@ test_one(unsigned verbose, char *error = NULL; conv_test_ptr_t conv_test_ptr; boolean success; - const unsigned n = 32; - int64_t cycles[n]; + const unsigned n = LP_TEST_NUM_SAMPLES; + int64_t cycles[LP_TEST_NUM_SAMPLES]; double cycles_avg = 0.0; unsigned num_srcs; unsigned num_dsts; -- cgit v1.2.3 From ba8c11923a13bdec53128988ffc26ceb5c4f7310 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 22 Oct 2009 19:02:42 +0100 Subject: llvmpipe: Define rdtsc for MSVC. --- src/gallium/drivers/llvmpipe/lp_test.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h index 21016fefe3..39d80726e6 100644 --- a/src/gallium/drivers/llvmpipe/lp_test.h +++ b/src/gallium/drivers/llvmpipe/lp_test.h @@ -71,17 +71,28 @@ boolean test_all(unsigned verbose, FILE *fp); +#if defined(PIPE_CC_MSVC) + +unsigned __int64 __rdtsc(); +#pragma intrinsic(__rdtsc) +#define rdtsc() __rdtsc() + +#elif defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)) + static INLINE uint64_t rdtsc(void) { -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) uint32_t hi, lo; __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); return ((uint64_t)lo) | (((uint64_t)hi) << 32); +} + #else - return 0; + +#define rdtsc() 0 + #endif -} + float -- cgit v1.2.3 From 719984afca3864cfe86ca734f3e2bd6eb5834bd1 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 22 Oct 2009 19:03:04 +0100 Subject: llvmpipe: Avoid yet another variable size array. --- src/gallium/drivers/llvmpipe/lp_setup.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index b14c265b7f..c43b3da450 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -278,11 +278,13 @@ clip_emit_quad( struct setup_context *setup, struct quad_header *quad ) * until we codegenerate single-quad variants of the fragment pipeline * we need this hack. */ const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE; - struct quad_header quads[nr_quads]; - struct quad_header *quad_ptrs[nr_quads]; + struct quad_header quads[4]; + struct quad_header *quad_ptrs[4]; int x0 = block_x(quad->input.x0); unsigned i; + assert(nr_quads == 4); + for(i = 0; i < nr_quads; ++i) { int x = x0 + 2*i; if(x == quad->input.x0) -- cgit v1.2.3 From 1f7f9bab8139681e1dcbc6c10fb42965059d1395 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Thu, 22 Oct 2009 11:39:59 -0700 Subject: r300g: Move render functions to r300_render. Part of the fastpath cleanup. --- src/gallium/drivers/r300/r300_context.c | 67 +---------- src/gallium/drivers/r300/r300_context.h | 3 + src/gallium/drivers/r300/r300_render.c | 192 ++++++++++++++++++++++++++++++++ src/gallium/drivers/r300/r300_render.h | 52 +++++++++ 4 files changed, 249 insertions(+), 65 deletions(-) create mode 100644 src/gallium/drivers/r300/r300_render.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 4ba11a026e..c34fbb1123 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -34,75 +34,12 @@ #include "r300_context.h" #include "r300_flush.h" #include "r300_query.h" +#include "r300_render.h" #include "r300_screen.h" #include "r300_state_derived.h" #include "r300_state_invariant.h" #include "r300_winsys.h" -static boolean r300_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) -{ - struct r300_context* r300 = r300_context(pipe); - int i; - - for (i = 0; i < r300->vertex_buffer_count; i++) { - void* buf = pipe_buffer_map(pipe->screen, - r300->vertex_buffers[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_vertex_buffer(r300->draw, i, buf); - } - - if (indexBuffer) { - void* indices = pipe_buffer_map(pipe->screen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer_range(r300->draw, indexSize, - minIndex, maxIndex, indices); - } else { - draw_set_mapped_element_buffer(r300->draw, 0, NULL); - } - - draw_set_mapped_constant_buffer(r300->draw, - r300->shader_constants[PIPE_SHADER_VERTEX].constants, - r300->shader_constants[PIPE_SHADER_VERTEX].count * - (sizeof(float) * 4)); - - draw_arrays(r300->draw, mode, start, count); - - for (i = 0; i < r300->vertex_buffer_count; i++) { - pipe_buffer_unmap(pipe->screen, r300->vertex_buffers[i].buffer); - draw_set_mapped_vertex_buffer(r300->draw, i, NULL); - } - - if (indexBuffer) { - pipe_buffer_unmap(pipe->screen, indexBuffer); - draw_set_mapped_element_buffer_range(r300->draw, 0, start, - start + count - 1, NULL); - } - - return TRUE; -} - -static boolean r300_draw_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, unsigned mode, - unsigned start, unsigned count) -{ - return r300_draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0, - mode, start, count); -} - -static boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, - unsigned start, unsigned count) -{ - return r300_draw_elements(pipe, NULL, 0, mode, start, count); -} - static enum pipe_error r300_clear_hash_table(void* key, void* value, void* data) { @@ -189,7 +126,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.draw_arrays = r300_draw_arrays; r300->context.draw_elements = r300_draw_elements; - r300->context.draw_range_elements = r300_draw_range_elements; + r300->context.draw_range_elements = r300_swtcl_draw_range_elements; r300->context.is_texture_referenced = r300_is_texture_referenced; r300->context.is_buffer_referenced = r300_is_buffer_referenced; diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 4e2c0ec34e..30b80fa9db 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -273,6 +273,9 @@ struct r300_context { /* Vertex buffers for Gallium. */ struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; int vertex_buffer_count; + /* Vertex elements for Gallium. */ + struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS]; + int vertex_element_count; /* Vertex shader. */ struct r300_vertex_shader* vs; /* Viewport state. */ diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 79a33b53cb..4916152bd6 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -31,11 +31,203 @@ #include "r300_context.h" #include "r300_emit.h" #include "r300_reg.h" +#include "r300_render.h" #include "r300_state_derived.h" /* r300_render: Vertex and index buffer primitive emission. */ #define R300_MAX_VBO_SIZE (1024 * 1024) +static uint32_t r300_translate_primitive(unsigned prim) +{ + switch (prim) { + case PIPE_PRIM_POINTS: + return R300_VAP_VF_CNTL__PRIM_POINTS; + case PIPE_PRIM_LINES: + return R300_VAP_VF_CNTL__PRIM_LINES; + case PIPE_PRIM_LINE_LOOP: + return R300_VAP_VF_CNTL__PRIM_LINE_LOOP; + case PIPE_PRIM_LINE_STRIP: + return R300_VAP_VF_CNTL__PRIM_LINE_STRIP; + case PIPE_PRIM_TRIANGLES: + return R300_VAP_VF_CNTL__PRIM_TRIANGLES; + case PIPE_PRIM_TRIANGLE_STRIP: + return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP; + case PIPE_PRIM_TRIANGLE_FAN: + return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN; + case PIPE_PRIM_QUADS: + return R300_VAP_VF_CNTL__PRIM_QUADS; + case PIPE_PRIM_QUAD_STRIP: + return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP; + case PIPE_PRIM_POLYGON: + return R300_VAP_VF_CNTL__PRIM_POLYGON; + default: + return 0; + } +} + +/* This is the fast-path drawing & emission for HW TCL. */ +boolean r300_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) +{ + struct r300_context* r300 = r300_context(pipe); + CS_LOCALS(r300); + uint32_t prim = r300_translate_primitive(mode); + struct pipe_vertex_buffer* aos = r300->vertex_buffers; + unsigned aos_count = r300->vertex_buffer_count; + short* indices; + unsigned packet_size; + unsigned i; + bool invalid = FALSE; + +validate: + for (i = 0; i < aos_count; i++) { + if (!r300->winsys->add_buffer(r300->winsys, aos[i].buffer, + RADEON_GEM_DOMAIN_GTT, 0)) { + pipe->flush(pipe, 0, NULL); + goto validate; + } + } + if (!r300->winsys->validate(r300->winsys)) { + pipe->flush(pipe, 0, NULL); + if (invalid) { + /* Well, hell. */ + debug_printf("r300: Stuck in validation loop, gonna quit now."); + exit(1); + } + invalid = TRUE; + goto validate; + } + + r300_emit_dirty_state(r300); + + packet_size = (aos_count >> 1) * 3 + (aos_count & 1) * 2; + + BEGIN_CS(3 + packet_size + (aos_count * 2)); + OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); + OUT_CS(aos_count); + for (i = 0; i < aos_count - 1; i += 2) { + OUT_CS(aos[i].stride | + (aos[i].stride << 8) | + (aos[i + 1].stride << 16) | + (aos[i + 1].stride << 24)); + OUT_CS(aos[i].buffer_offset + start * 4 * aos[i].stride); + OUT_CS(aos[i + 1].buffer_offset + start * 4 * aos[i + 1].stride); + } + if (aos_count & 1) { + OUT_CS(aos[i].stride | (aos[i].stride << 8)); + OUT_CS(aos[i].buffer_offset + start * 4 * aos[i].stride); + } + for (i = 0; i < aos_count; i++) { + OUT_CS_RELOC(aos[i].buffer, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); + } + END_CS; + + if (indexBuffer) { + indices = (short*)pipe_buffer_map(pipe->screen, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + + /* Set the starting point. */ + indices += start; + + BEGIN_CS(2 + (count+1)/2); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count + 1)/2); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | prim); + for (i = 0; i < count - 1; i += 2) { + OUT_CS(indices[i + 1] << 16 | indices[i]); + } + if (count % 2) { + OUT_CS(indices[count - 1]); + } + END_CS; + } else { + BEGIN_CS(2); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | + prim); + END_CS; + } + + return TRUE; +} + +/* Simple helpers for context setup. Should probably be moved to util. */ +boolean r300_draw_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, unsigned mode, + unsigned start, unsigned count) +{ + return pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0, + mode, start, count); +} + +boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, + unsigned start, unsigned count) +{ + return pipe->draw_elements(pipe, NULL, 0, mode, start, count); +} + +/**************************************************************************** + * The rest of this file is for SW TCL rendering only. Please be polite and * + * keep these functions separated so that they are easier to locate. ~C. * + ***************************************************************************/ + +/* Draw-based drawing for SW TCL chipsets. */ +boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) +{ + struct r300_context* r300 = r300_context(pipe); + int i; + + for (i = 0; i < r300->vertex_buffer_count; i++) { + void* buf = pipe_buffer_map(pipe->screen, + r300->vertex_buffers[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(r300->draw, i, buf); + } + + if (indexBuffer) { + void* indices = pipe_buffer_map(pipe->screen, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer_range(r300->draw, indexSize, + minIndex, maxIndex, indices); + } else { + draw_set_mapped_element_buffer(r300->draw, 0, NULL); + } + + draw_set_mapped_constant_buffer(r300->draw, + r300->shader_constants[PIPE_SHADER_VERTEX].constants, + r300->shader_constants[PIPE_SHADER_VERTEX].count * + (sizeof(float) * 4)); + + draw_arrays(r300->draw, mode, start, count); + + for (i = 0; i < r300->vertex_buffer_count; i++) { + pipe_buffer_unmap(pipe->screen, r300->vertex_buffers[i].buffer); + draw_set_mapped_vertex_buffer(r300->draw, i, NULL); + } + + if (indexBuffer) { + pipe_buffer_unmap(pipe->screen, indexBuffer); + draw_set_mapped_element_buffer_range(r300->draw, 0, start, + start + count - 1, NULL); + } + + return TRUE; +} + +/* Object for rendering using Draw. */ struct r300_render { /* Parent class */ struct vbuf_render base; diff --git a/src/gallium/drivers/r300/r300_render.h b/src/gallium/drivers/r300/r300_render.h new file mode 100644 index 0000000000..3d8f47ba75 --- /dev/null +++ b/src/gallium/drivers/r300/r300_render.h @@ -0,0 +1,52 @@ +/* + * Copyright 2009 Corbin Simpson + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_RENDER_H +#define R300_RENDER_H + +boolean r300_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); + +boolean r300_draw_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, unsigned mode, + unsigned start, unsigned count); + +boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, + unsigned start, unsigned count); + +boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); + +#endif /* R300_RENDER_H */ -- cgit v1.2.3 From 06e464c2d57552d5ccde2b98885aeef953d8b2a1 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Thu, 22 Oct 2009 11:45:36 -0700 Subject: r300g: Clean up duplicate code in r300_render. --- src/gallium/drivers/r300/r300_render.c | 49 +++------------------------------- 1 file changed, 4 insertions(+), 45 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 4916152bd6..6e2bcc62da 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -331,54 +331,13 @@ static boolean r300_render_set_primitive(struct vbuf_render* render, unsigned prim) { struct r300_render* r300render = r300_render(render); - r300render->prim = prim; - switch (prim) { - case PIPE_PRIM_POINTS: - r300render->hwprim = R300_VAP_VF_CNTL__PRIM_POINTS; - break; - case PIPE_PRIM_LINES: - r300render->hwprim = R300_VAP_VF_CNTL__PRIM_LINES; - break; - case PIPE_PRIM_LINE_LOOP: - r300render->hwprim = R300_VAP_VF_CNTL__PRIM_LINE_LOOP; - break; - case PIPE_PRIM_LINE_STRIP: - r300render->hwprim = R300_VAP_VF_CNTL__PRIM_LINE_STRIP; - break; - case PIPE_PRIM_TRIANGLES: - r300render->hwprim = R300_VAP_VF_CNTL__PRIM_TRIANGLES; - break; - case PIPE_PRIM_TRIANGLE_STRIP: - r300render->hwprim = R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP; - break; - case PIPE_PRIM_TRIANGLE_FAN: - r300render->hwprim = R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN; - break; - case PIPE_PRIM_QUADS: - r300render->hwprim = R300_VAP_VF_CNTL__PRIM_QUADS; - break; - case PIPE_PRIM_QUAD_STRIP: - r300render->hwprim = R300_VAP_VF_CNTL__PRIM_QUAD_STRIP; - break; - case PIPE_PRIM_POLYGON: - r300render->hwprim = R300_VAP_VF_CNTL__PRIM_POLYGON; - break; - default: - return FALSE; - break; - } + r300render->prim = prim; + r300render->hwprim = r300_translate_primitive(prim); return TRUE; } -static void r300_prepare_render(struct r300_render* render, unsigned count) -{ - struct r300_context* r300 = render->r300; - - r300_emit_dirty_state(r300); -} - static void r300_render_draw_arrays(struct vbuf_render* render, unsigned start, unsigned count) @@ -388,7 +347,7 @@ static void r300_render_draw_arrays(struct vbuf_render* render, CS_LOCALS(r300); - r300_prepare_render(r300render, count); + r300_emit_dirty_state(r300); DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count); @@ -409,7 +368,7 @@ static void r300_render_draw(struct vbuf_render* render, CS_LOCALS(r300); - r300_prepare_render(r300render, count); + r300_emit_dirty_state(r300); BEGIN_CS(2 + (count+1)/2); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2); -- cgit v1.2.3 From f9a69c0f040171cffa63c9c68264c1cf847aa1cd Mon Sep 17 00:00:00 2001 From: Patrice Mandin Date: Thu, 22 Oct 2009 21:55:09 +0200 Subject: nouveau: nv30: use a8r8g8b8 as depth texture format for z24s8 --- src/gallium/drivers/nv30/nv30_fragtex.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index a2ce947a72..f5f17d4071 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -30,7 +30,7 @@ nv30_texture_formats[] = { _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X), _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y), // _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X), -// _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X), + _(Z24S8_UNORM , A8R8G8B8, S1, S1, S1, ONE, X, X, X, X), _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W), _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W), _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W), @@ -73,9 +73,9 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) txf = tf->format; txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0); - txf |= log2i(pt->width[0]) << 20; - txf |= log2i(pt->height[0]) << 24; - txf |= log2i(pt->depth[0]) << 28; + txf |= log2i(pt->width[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT; + txf |= log2i(pt->height[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT; + txf |= log2i(pt->depth[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT; txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000; switch (pt->target) { -- cgit v1.2.3 From 4b8de9bd7c6f77fcf3f1f2b939bab980e074e8bf Mon Sep 17 00:00:00 2001 From: Patrice Mandin Date: Thu, 22 Oct 2009 22:01:53 +0200 Subject: nouveau: nv30: rewrite so we can render only in depth buffer --- src/gallium/drivers/nv30/nv30_state_fb.c | 55 ++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 20 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c index 197de82886..f90681b0f9 100644 --- a/src/gallium/drivers/nv30/nv30_state_fb.c +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -8,8 +8,8 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) struct nouveau_channel *chan = nv30->screen->base.channel; struct nouveau_grobj *rankine = nv30->screen->rankine; struct nv04_surface *rt[2], *zeta = NULL; - uint32_t rt_enable, rt_format; - int i, colour_format = 0, zeta_format = 0; + uint32_t rt_enable = 0, rt_format = 0; + int i, colour_format = 0, zeta_format = 0, depth_only = 0; struct nouveau_stateobj *so = so_new(64, 10); unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; unsigned w = fb->width; @@ -17,11 +17,6 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) struct nv30_miptree *nv30mt; int colour_bits = 32, zeta_bits = 32; - if (fb->nr_cbufs == 0) { - return FALSE; - } - - rt_enable = 0; for (i = 0; i < fb->nr_cbufs; i++) { if (colour_format) { assert(colour_format == fb->cbufs[i]->format); @@ -40,17 +35,35 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) zeta = (struct nv04_surface *)fb->zsbuf; } - if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { - assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); - for (i = 1; i < fb->nr_cbufs; i++) - assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); + if (rt_enable & (NV34TCL_RT_ENABLE_COLOR0|NV34TCL_RT_ENABLE_COLOR1)) { + /* Render to at least a colour buffer */ + if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); + for (i = 1; i < fb->nr_cbufs; i++) + assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); - rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | - (log2i(rt[0]->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | - (log2i(rt[0]->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); + rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | + (log2i(rt[0]->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | + (log2i(rt[0]->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); + } + else + rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; + } else if (fb->zsbuf) { + depth_only = 1; + + /* Render to depth buffer only */ + if (!(zeta->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); + + rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | + (log2i(zeta->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | + (log2i(zeta->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); + } + else + rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; + } else { + return FALSE; } - else - rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; switch (colour_format) { case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -83,21 +96,23 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) return FALSE; } - if (rt_enable & NV34TCL_RT_ENABLE_COLOR0) { - uint32_t pitch = rt[0]->pitch; + if (depth_only || (rt_enable & NV34TCL_RT_ENABLE_COLOR0)) { + struct nv04_surface *rt0 = (depth_only ? zeta : rt[0]); + uint32_t pitch = rt0->pitch; + if (zeta) { pitch |= (zeta->pitch << 16); } else { pitch |= (pitch << 16); } - nv30mt = (struct nv30_miptree *)rt[0]->base.texture; + nv30mt = (struct nv30_miptree *) rt0->base.texture; so_method(so, rankine, NV34TCL_DMA_COLOR0, 1); so_reloc (so, nouveau_bo(nv30mt->buffer), 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); so_method(so, rankine, NV34TCL_COLOR0_PITCH, 2); so_data (so, pitch); - so_reloc (so, nouveau_bo(nv30mt->buffer), rt[0]->base.offset, + so_reloc (so, nouveau_bo(nv30mt->buffer), rt0->base.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); } -- cgit v1.2.3 From 198925caa18526e5aa908ab50482aff814207dc2 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 22 Oct 2009 22:57:30 +0200 Subject: nv50: handle PIPE_TEX_FILTER_ANISO case Set the same bits as for linear filtering (in addition to max anisotropy), and 2 unknown bits I've seen set. --- src/gallium/drivers/nv50/nv50_state.c | 26 +++++++++++--------------- src/gallium/drivers/nv50/nv50_texture.h | 2 ++ 2 files changed, 13 insertions(+), 15 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 81fa3e34c5..ffaa5e29d1 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -146,6 +146,7 @@ nv50_sampler_state_create(struct pipe_context *pipe, (wrap_mode(cso->wrap_r) << 6)); switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_ANISO: case PIPE_TEX_FILTER_LINEAR: tsc[1] |= NV50TSC_1_1_MAGF_LINEAR; break; @@ -156,6 +157,7 @@ nv50_sampler_state_create(struct pipe_context *pipe, } switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_ANISO: case PIPE_TEX_FILTER_LINEAR: tsc[1] |= NV50TSC_1_1_MINF_LINEAR; break; @@ -183,21 +185,15 @@ nv50_sampler_state_create(struct pipe_context *pipe, else if (cso->max_anisotropy >= 12.0) tsc[0] |= (6 << 20); - else - if (cso->max_anisotropy >= 10.0) - tsc[0] |= (5 << 20); - else - if (cso->max_anisotropy >= 8.0) - tsc[0] |= (4 << 20); - else - if (cso->max_anisotropy >= 6.0) - tsc[0] |= (3 << 20); - else - if (cso->max_anisotropy >= 4.0) - tsc[0] |= (2 << 20); - else - if (cso->max_anisotropy >= 2.0) - tsc[0] |= (1 << 20); + else { + tsc[0] |= (int)(cso->max_anisotropy * 0.5f) << 20; + + if (cso->max_anisotropy >= 4.0) + tsc[1] |= NV50TSC_1_1_UNKN_ANISO_35; + else + if (cso->max_anisotropy >= 2.0) + tsc[1] |= NV50TSC_1_1_UNKN_ANISO_15; + } if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { tsc[0] |= (1 << 8); diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h index 207fb039f7..13f74c11c6 100644 --- a/src/gallium/drivers/nv50/nv50_texture.h +++ b/src/gallium/drivers/nv50/nv50_texture.h @@ -133,6 +133,8 @@ #define NV50TSC_1_1_MIPF_NEAREST 0x00000080 #define NV50TSC_1_1_MIPF_LINEAR 0x000000c0 #define NV50TSC_1_1_LOD_BIAS_MASK 0x01fff000 +#define NV50TSC_1_1_UNKN_ANISO_15 0x10000000 +#define NV50TSC_1_1_UNKN_ANISO_35 0x18000000 #define NV50TSC_1_2_MIN_LOD_MASK 0x00000f00 #define NV50TSC_1_2_MAX_LOD_MASK 0x00f00000 -- cgit v1.2.3 From ff9e1c01989fc80f07cdc69e3e373bdfe1a384ef Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Thu, 22 Oct 2009 13:42:03 -0700 Subject: r300g: Cleanup PSC setup math a bit and stop using Draw formats. --- src/gallium/drivers/r300/r300_reg.h | 21 ++++++++++++ src/gallium/drivers/r300/r300_state_derived.c | 28 +++++++++------- src/gallium/drivers/r300/r300_state_inlines.h | 48 +++++++++++++++++++++------ 3 files changed, 74 insertions(+), 23 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index ae94bb9b9f..e920b2a5e7 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -348,6 +348,27 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_WRITE_ENA_W 8 # define R300_SWIZZLE1_SHIFT 16 +# define R300_VAP_SWIZZLE_X001 \ + ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \ + (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Y_SHIFT) | \ + (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) | \ + (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | \ + (0xf << R300_WRITE_ENA_SHIFT)) + +# define R300_VAP_SWIZZLE_XY01 \ + ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \ + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \ + (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) | \ + (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | \ + (0xf << R300_WRITE_ENA_SHIFT)) + +# define R300_VAP_SWIZZLE_XYZ1 \ + ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \ + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \ + (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | \ + (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | \ + (0xf << R300_WRITE_ENA_SHIFT)) + # define R300_VAP_SWIZZLE_XYZW \ ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \ (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \ diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 42aee7231e..7d000e9e2d 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -224,7 +224,8 @@ static void r300_vertex_psc(struct r300_context* r300, struct r300_screen* r300screen = r300_screen(r300->context.screen); struct vertex_info* vinfo = &vformat->vinfo; int* tab = vformat->vs_tab; - uint32_t temp; + uint16_t type, swizzle; + enum pipe_format format; unsigned i, attrib_count; /* Vertex shaders have no semantics on their inputs, @@ -246,25 +247,28 @@ static void r300_vertex_psc(struct r300_context* r300, } for (i = 0; i < attrib_count; i++) { - /* Make sure we have a proper destination for our attribute */ + /* Make sure we have a proper destination for our attribute. */ assert(tab[i] != -1); - /* Add the attribute to the PSC table. */ - temp = translate_draw_vertex_data_type(vinfo->attrib[i].emit) | + format = draw_translate_vinfo_format(vinfo->attrib[i].emit); + + /* Obtain the type of data in this attribute. */ + type = r300_translate_vertex_data_type(format) | tab[i] << R300_DST_VEC_LOC_SHIFT; + /* Obtain the swizzle for this attribute. Note that the default + * swizzle in the hardware is not XYZW! */ + swizzle = r300_translate_vertex_data_swizzle(format); + + /* Add the attribute to the PSC table. */ if (i & 1) { - vformat->vap_prog_stream_cntl[i >> 1] &= 0x0000ffff; - vformat->vap_prog_stream_cntl[i >> 1] |= temp << 16; + vformat->vap_prog_stream_cntl[i >> 1] |= type << 16; - vformat->vap_prog_stream_cntl_ext[i >> 1] |= - (R300_VAP_SWIZZLE_XYZW << 16); + vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; } else { - vformat->vap_prog_stream_cntl[i >> 1] &= 0xffff0000; - vformat->vap_prog_stream_cntl[i >> 1] |= temp << 0; + vformat->vap_prog_stream_cntl[i >> 1] |= type << 0; - vformat->vap_prog_stream_cntl_ext[i >> 1] |= - (R300_VAP_SWIZZLE_XYZW << 0); + vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 0; } } diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index c82d8e5f08..2431b75a51 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -400,28 +400,54 @@ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) return 0; } -/* Translate Draw vertex types into PSC vertex types. */ -static INLINE uint32_t translate_draw_vertex_data_type(int type) { - switch (type) { - case EMIT_1F: - case EMIT_1F_PSIZE: +/* Translate pipe_formats into PSC vertex types. */ +static INLINE uint16_t +r300_translate_vertex_data_type(enum pipe_format format) { + switch (format) { + case PIPE_FORMAT_R32_FLOAT: return R300_DATA_TYPE_FLOAT_1; break; - case EMIT_2F: + case PIPE_FORMAT_R32G32_FLOAT: return R300_DATA_TYPE_FLOAT_2; break; - case EMIT_3F: + case PIPE_FORMAT_R32G32B32_FLOAT: return R300_DATA_TYPE_FLOAT_3; break; - case EMIT_4F: + case PIPE_FORMAT_R32G32B32A32_FLOAT: return R300_DATA_TYPE_FLOAT_4; break; - case EMIT_4UB: - return R300_DATA_TYPE_BYTE; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return R300_DATA_TYPE_BYTE | + R300_NORMALIZE; + break; + default: + debug_printf("r300: Implementation error: " + "Bad vertex data format %s!\n", pf_name(format)); + assert(0); + break; + } + return 0; +} + +static INLINE uint16_t +r300_translate_vertex_data_swizzle(enum pipe_format format) { + switch (format) { + case PIPE_FORMAT_R32_FLOAT: + return R300_VAP_SWIZZLE_X001; + break; + case PIPE_FORMAT_R32G32_FLOAT: + return R300_VAP_SWIZZLE_XY01; + break; + case PIPE_FORMAT_R32G32B32_FLOAT: + return R300_VAP_SWIZZLE_XYZ1; + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R8G8B8A8_UNORM: + return R300_VAP_SWIZZLE_XYZW; break; default: debug_printf("r300: Implementation error: " - "Bad vertex data type!\n"); + "Bad vertex data format %s!\n", pf_name(format)); assert(0); break; } -- cgit v1.2.3 From 04ec113e09f6287f2c6b39bf0247e06839eaf7a8 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Thu, 22 Oct 2009 14:28:47 -0700 Subject: r300g: Enable more stuff in r300_screen, cleanup comments. Also enable 24-bit depth buffers without stencil. --- src/gallium/drivers/r300/r300_screen.c | 34 +++++++++++++++------------ src/gallium/drivers/r300/r300_state_inlines.h | 4 +++- src/gallium/drivers/r300/r300_texture.h | 1 + 3 files changed, 23 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index a9058a25e5..1d9f91d0f7 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -80,13 +80,10 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) struct r300_screen* r300screen = r300_screen(pscreen); switch (param) { - /* XXX cases marked "IN THEORY" are possible on the hardware, - * but haven't been implemented yet. */ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: /* XXX I'm told this goes up to 16 */ return 8; case PIPE_CAP_NPOT_TEXTURES: - /* IN THEORY */ return 0; case PIPE_CAP_TWO_SIDED_STENCIL: if (r300screen->caps->is_r500) { @@ -95,16 +92,26 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) return 0; } case PIPE_CAP_GLSL: - if (r300screen->caps->is_r500) { - return 1; - } else { - return 0; - } + /* I'll be frank. This is a lie. + * + * We don't truly support GLSL on any of this driver's chipsets. + * To be fair, no chipset supports the full GLSL specification + * to the best of our knowledge, but some of the less esoteric + * features are still missing here. + * + * Rather than cripple ourselves intentionally, I'm going to set + * this flag, and as Gallium's interface continues to change, I + * hope that this single monolithic GLSL enable can slowly get + * split down into many different pieces and the state tracker + * will handle fallbacks transparently, like it should. + * + * ~ C. + */ + return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: - /* IN THEORY */ - return 0; + return 1; case PIPE_CAP_MAX_RENDER_TARGETS: return 4; case PIPE_CAP_OCCLUSION_QUERY: @@ -145,10 +152,8 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) case PIPE_CAP_TEXTURE_MIRROR_REPEAT: return 1; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - /* XXX guessing (what a terrible guess) */ - return 2; + return 0; case PIPE_CAP_TGSI_CONT_SUPPORTED: - /* XXX */ return 0; case PIPE_CAP_BLEND_EQUATION_SEPARATE: return 1; @@ -229,6 +234,7 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage, /* Z buffer or texture */ case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: /* Z buffer with stencil or texture */ case PIPE_FORMAT_Z24S8_UNORM: retval = usage & @@ -239,7 +245,6 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage, /* Definitely unsupported formats. */ /* Non-usable Z buffer/stencil formats. */ case PIPE_FORMAT_Z32_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: debug_printf("r300: Note: Got unsupported format: %s in %s\n", @@ -249,7 +254,6 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage, /* XXX These don't even exist case PIPE_FORMAT_A32R32G32B32: case PIPE_FORMAT_A16R16G16B16: */ - /* XXX Insert YUV422 packed VYUY and YVYU here */ /* XXX What the deuce is UV88? (r3xx accel page 14) debug_printf("r300: Warning: Got unimplemented format: %s in %s\n", pf_name(format), __FUNCTION__); diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 2431b75a51..ec11a41253 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -330,13 +330,15 @@ static INLINE uint32_t r300_translate_colorformat(enum pipe_format format) return 0; } -/* Depthbuffer and stencilbuffer. Thankfully, we only support two kinds. */ +/* Depthbuffer and stencilbuffer. Thankfully, we only support two flavors. */ static INLINE uint32_t r300_translate_zsformat(enum pipe_format format) { switch (format) { /* 16-bit depth, no stencil */ case PIPE_FORMAT_Z16_UNORM: return R300_DEPTHFORMAT_16BIT_INT_Z; + /* 24-bit depth, ignored stencil */ + case PIPE_FORMAT_Z24X8_UNORM: /* 24-bit depth, 8-bit stencil */ case PIPE_FORMAT_Z24S8_UNORM: return R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 2e58bda716..55d1a0ac5c 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -80,6 +80,7 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) R300_TX_FORMAT_YUV_TO_RGB; /* W24_FP */ case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, W24_FP); default: debug_printf("r300: Implementation error: " -- cgit v1.2.3 From 4e1d51786e0657c7430d731ac464f2a73e32eddf Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 23 Oct 2009 13:49:04 +0100 Subject: gallium: remove noise opcodes Provide a dummy implementation in the GL state tracker (move 0.5 to the destination regs). At some point, a motivated person could add a better implementation of noise. Currently not even the nvidia binary drivers do anything more than this. In any case, the place to do this is in the GL state tracker, not the poor driver. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 16 ---------------- src/gallium/auxiliary/tgsi/tgsi_info.c | 8 ++++---- src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h | 4 ---- src/gallium/drivers/cell/spu/spu_exec.c | 16 ---------------- src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 9 --------- src/gallium/drivers/nv30/nv30_fragprog.c | 6 ------ src/gallium/drivers/nv40/nv40_fragprog.c | 6 ------ src/gallium/drivers/r300/r300_tgsi_to_rc.c | 4 ---- src/gallium/include/pipe/p_shader_tokens.h | 5 +---- src/mesa/state_tracker/st_mesa_to_tgsi.c | 23 +++++++++++++++-------- 10 files changed, 20 insertions(+), 77 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index c79c56debd..d9661c75a0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -3223,22 +3223,6 @@ exec_instruction( /* no-op */ break; - case TGSI_OPCODE_NOISE1: - assert( 0 ); - break; - - case TGSI_OPCODE_NOISE2: - assert( 0 ); - break; - - case TGSI_OPCODE_NOISE3: - assert( 0 ); - break; - - case TGSI_OPCODE_NOISE4: - assert( 0 ); - break; - case TGSI_OPCODE_NOP: break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 17af4cb7ad..fe8b0bdce3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -134,10 +134,10 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 0, 0, 0, 0, 0, 1, "BGNSUB", TGSI_OPCODE_BGNSUB }, { 0, 0, 0, 1, 1, 0, "ENDLOOP", TGSI_OPCODE_ENDLOOP }, { 0, 0, 0, 0, 1, 0, "ENDSUB", TGSI_OPCODE_ENDSUB }, - { 1, 1, 0, 0, 0, 0, "NOISE1", TGSI_OPCODE_NOISE1 }, - { 1, 1, 0, 0, 0, 0, "NOISE2", TGSI_OPCODE_NOISE2 }, - { 1, 1, 0, 0, 0, 0, "NOISE3", TGSI_OPCODE_NOISE3 }, - { 1, 1, 0, 0, 0, 0, "NOISE4", TGSI_OPCODE_NOISE4 }, + { 0, 0, 0, 0, 0, 0, "", 103 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 104 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 105 }, /* removed */ + { 0, 0, 0, 0, 0, 0, "", 106 }, /* removed */ { 0, 0, 0, 0, 0, 0, "NOP", TGSI_OPCODE_NOP }, { 0, 0, 0, 0, 0, 0, "", 108 }, /* removed */ { 0, 0, 0, 0, 0, 0, "", 109 }, /* removed */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index e7bcf4bf75..d321f013d0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -139,10 +139,6 @@ OP00_LBL(BGNLOOP) OP00(BGNSUB) OP00_LBL(ENDLOOP) OP00(ENDSUB) -OP11(NOISE1) -OP11(NOISE2) -OP11(NOISE3) -OP11(NOISE4) OP00(NOP) OP11(NRM4) OP01(CALLNZ) diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 0eaae2e451..725a72b326 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -1807,22 +1807,6 @@ exec_instruction( /* no-op */ break; - case TGSI_OPCODE_NOISE1: - ASSERT( 0 ); - break; - - case TGSI_OPCODE_NOISE2: - ASSERT( 0 ); - break; - - case TGSI_OPCODE_NOISE3: - ASSERT( 0 ); - break; - - case TGSI_OPCODE_NOISE4: - ASSERT( 0 ); - break; - case TGSI_OPCODE_NOP: break; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index adc81569ed..926cc1cc9f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -1386,15 +1386,6 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_NOISE1: - case TGSI_OPCODE_NOISE2: - case TGSI_OPCODE_NOISE3: - case TGSI_OPCODE_NOISE4: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = bld->base.zero; - } - break; - case TGSI_OPCODE_NOP: break; diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index a48ba9782b..62c6c76cee 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -527,12 +527,6 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, case TGSI_OPCODE_MUL: arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); break; - case TGSI_OPCODE_NOISE1: - case TGSI_OPCODE_NOISE2: - case TGSI_OPCODE_NOISE3: - case TGSI_OPCODE_NOISE4: - arith(fpc, sat, SFL, dst, mask, none, none, none); - break; case TGSI_OPCODE_POW: arith(fpc, sat, POW, dst, mask, src[0], src[1], none); break; diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 32d9ed1a7f..e3550baa63 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -568,12 +568,6 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, case TGSI_OPCODE_MUL: arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); break; - case TGSI_OPCODE_NOISE1: - case TGSI_OPCODE_NOISE2: - case TGSI_OPCODE_NOISE3: - case TGSI_OPCODE_NOISE4: - arith(fpc, sat, SFL, dst, mask, none, none, none); - break; case TGSI_OPCODE_POW: tmp = temp(fpc); arith(fpc, 0, LG2, tmp, MASK_X, diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 74d4fb5087..3d2f6cafee 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -135,10 +135,6 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_BGNSUB: return RC_OPCODE_BGNSUB; */ /* case TGSI_OPCODE_ENDLOOP2: return RC_OPCODE_ENDLOOP2; */ /* case TGSI_OPCODE_ENDSUB: return RC_OPCODE_ENDSUB; */ - /* case TGSI_OPCODE_NOISE1: return RC_OPCODE_NOISE1; */ - /* case TGSI_OPCODE_NOISE2: return RC_OPCODE_NOISE2; */ - /* case TGSI_OPCODE_NOISE3: return RC_OPCODE_NOISE3; */ - /* case TGSI_OPCODE_NOISE4: return RC_OPCODE_NOISE4; */ case TGSI_OPCODE_NOP: return RC_OPCODE_NOP; /* gap */ /* case TGSI_OPCODE_NRM4: return RC_OPCODE_NRM4; */ diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 5fa6c9af30..48e6583ada 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -266,10 +266,7 @@ union tgsi_immediate_data #define TGSI_OPCODE_BGNSUB 100 #define TGSI_OPCODE_ENDLOOP 101 #define TGSI_OPCODE_ENDSUB 102 -#define TGSI_OPCODE_NOISE1 103 -#define TGSI_OPCODE_NOISE2 104 -#define TGSI_OPCODE_NOISE3 105 -#define TGSI_OPCODE_NOISE4 106 + /* gap */ #define TGSI_OPCODE_NOP 107 /* gap */ #define TGSI_OPCODE_NRM4 112 diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 70d7c4fee2..1b9d35d353 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -481,14 +481,6 @@ translate_opcode( unsigned op ) return TGSI_OPCODE_MOV; case OPCODE_MUL: return TGSI_OPCODE_MUL; - case OPCODE_NOISE1: - return TGSI_OPCODE_NOISE1; - case OPCODE_NOISE2: - return TGSI_OPCODE_NOISE2; - case OPCODE_NOISE3: - return TGSI_OPCODE_NOISE3; - case OPCODE_NOISE4: - return TGSI_OPCODE_NOISE4; case OPCODE_NOP: return TGSI_OPCODE_NOP; case OPCODE_NRM3: @@ -616,6 +608,21 @@ compile_instruction( src, num_src ); break; + case OPCODE_NOISE1: + case OPCODE_NOISE2: + case OPCODE_NOISE3: + case OPCODE_NOISE4: + /* At some point, a motivated person could add a better + * implementation of noise. Currently not even the nvidia + * binary drivers do anything more than this. In any case, the + * place to do this is in the GL state tracker, not the poor + * driver. + */ + ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) ); + break; + + + default: ureg_insn( ureg, translate_opcode( inst->Opcode ), -- cgit v1.2.3 From b9cb74c7f826dfd320f5e5b54aa933898f7ddd3d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 23 Oct 2009 14:31:24 +0100 Subject: gallium: remove the swizzling parts of ExtSwizzle These haven't been used by the mesa state tracker since the conversion to tgsi_ureg, and it seems that none of the other state trackers are using it either. This helps simplify one of the biggest suprises when starting off with TGSI shaders. --- src/gallium/auxiliary/draw/draw_vs_aos.c | 30 ++--------- src/gallium/auxiliary/tgsi/tgsi_build.c | 27 +--------- src/gallium/auxiliary/tgsi/tgsi_build.h | 4 -- src/gallium/auxiliary/tgsi/tgsi_dump.c | 27 ---------- src/gallium/auxiliary/tgsi/tgsi_dump_c.c | 26 --------- src/gallium/auxiliary/tgsi/tgsi_exec.c | 34 ++++-------- src/gallium/auxiliary/tgsi/tgsi_info.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h | 1 - src/gallium/auxiliary/tgsi/tgsi_ppc.c | 26 +++------ src/gallium/auxiliary/tgsi/tgsi_scan.c | 5 -- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 34 +++--------- src/gallium/auxiliary/tgsi/tgsi_text.c | 28 ++-------- src/gallium/auxiliary/tgsi/tgsi_util.c | 74 ++------------------------ src/gallium/auxiliary/tgsi/tgsi_util.h | 12 +---- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 18 ++----- src/gallium/drivers/cell/spu/spu_exec.c | 27 +++------- src/gallium/drivers/cell/spu/spu_util.c | 48 +---------------- src/gallium/drivers/i915/i915_fpc_translate.c | 20 ++----- src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 26 +++------ src/gallium/drivers/nv30/nv30_fragprog.c | 24 ++------- src/gallium/drivers/nv40/nv40_fragprog.c | 18 ++----- src/gallium/drivers/nv40/nv40_vertprog.c | 18 ++----- src/gallium/drivers/nv50/nv50_program.c | 28 +++------- src/gallium/drivers/r300/r300_tgsi_to_rc.c | 9 ++-- src/gallium/include/pipe/p_shader_tokens.h | 17 +----- src/mesa/state_tracker/st_mesa_to_tgsi.c | 2 - 26 files changed, 96 insertions(+), 489 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 645d7cccba..88bc790b62 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -537,19 +537,10 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, unsigned abs = 0; for (i = 0; i < 4; i++) { - unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( src, i ); + unsigned swizzle = tgsi_util_get_full_src_register_swizzle( src, i ); unsigned neg = tgsi_util_get_full_src_register_sign_mode( src, i ); - switch (swizzle) { - case TGSI_EXTSWIZZLE_ZERO: - case TGSI_EXTSWIZZLE_ONE: - AOS_ERROR(cp, "not supporting full swizzles yet in tgsi_aos_sse2"); - break; - - default: - swz |= (swizzle & 0x3) << (i * 2); - break; - } + swz |= (swizzle & 0x3) << (i * 2); switch (neg) { case TGSI_UTIL_SIGN_TOGGLE: @@ -632,23 +623,10 @@ static void x87_fld_src( struct aos_compilation *cp, src->SrcRegister.File, src->SrcRegister.Index); - unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( src, channel ); + unsigned swizzle = tgsi_util_get_full_src_register_swizzle( src, channel ); unsigned neg = tgsi_util_get_full_src_register_sign_mode( src, channel ); - switch (swizzle) { - case TGSI_EXTSWIZZLE_ZERO: - x87_fldz( cp->func ); - break; - - case TGSI_EXTSWIZZLE_ONE: - x87_fld1( cp->func ); - break; - - default: - x87_fld( cp->func, x86_make_disp(arg0, (swizzle & 3) * sizeof(float)) ); - break; - } - + x87_fld( cp->func, x86_make_disp(arg0, (swizzle & 3) * sizeof(float)) ); switch (neg) { case TGSI_UTIL_SIGN_TOGGLE: diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index e0cfc54420..98d36f43e4 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -692,12 +692,8 @@ tgsi_build_full_instruction( tgsi_default_src_register_ext_swz() ) ) { struct tgsi_src_register_ext_swz *src_register_ext_swz; - /* Use of the extended swizzle requires the simple swizzle to be identity. + /* Use of the extended negate requires the simple negate to be identity. */ - assert( reg->SrcRegister.SwizzleX == TGSI_SWIZZLE_X ); - assert( reg->SrcRegister.SwizzleY == TGSI_SWIZZLE_Y ); - assert( reg->SrcRegister.SwizzleZ == TGSI_SWIZZLE_Z ); - assert( reg->SrcRegister.SwizzleW == TGSI_SWIZZLE_W ); assert( reg->SrcRegister.Negate == FALSE ); if( maxsize <= size ) @@ -707,10 +703,6 @@ tgsi_build_full_instruction( size++; *src_register_ext_swz = tgsi_build_src_register_ext_swz( - reg->SrcRegisterExtSwz.ExtSwizzleX, - reg->SrcRegisterExtSwz.ExtSwizzleY, - reg->SrcRegisterExtSwz.ExtSwizzleZ, - reg->SrcRegisterExtSwz.ExtSwizzleW, reg->SrcRegisterExtSwz.NegateX, reg->SrcRegisterExtSwz.NegateY, reg->SrcRegisterExtSwz.NegateZ, @@ -1048,10 +1040,7 @@ tgsi_default_src_register_ext_swz( void ) struct tgsi_src_register_ext_swz src_register_ext_swz; src_register_ext_swz.Type = TGSI_SRC_REGISTER_EXT_TYPE_SWZ; - src_register_ext_swz.ExtSwizzleX = TGSI_EXTSWIZZLE_X; - src_register_ext_swz.ExtSwizzleY = TGSI_EXTSWIZZLE_Y; - src_register_ext_swz.ExtSwizzleZ = TGSI_EXTSWIZZLE_Z; - src_register_ext_swz.ExtSwizzleW = TGSI_EXTSWIZZLE_W; + src_register_ext_swz.Padding0 = 0; src_register_ext_swz.NegateX = 0; src_register_ext_swz.NegateY = 0; src_register_ext_swz.NegateZ = 0; @@ -1074,10 +1063,6 @@ tgsi_compare_src_register_ext_swz( struct tgsi_src_register_ext_swz tgsi_build_src_register_ext_swz( - unsigned ext_swizzle_x, - unsigned ext_swizzle_y, - unsigned ext_swizzle_z, - unsigned ext_swizzle_w, unsigned negate_x, unsigned negate_y, unsigned negate_z, @@ -1088,20 +1073,12 @@ tgsi_build_src_register_ext_swz( { struct tgsi_src_register_ext_swz src_register_ext_swz; - assert( ext_swizzle_x <= TGSI_EXTSWIZZLE_ONE ); - assert( ext_swizzle_y <= TGSI_EXTSWIZZLE_ONE ); - assert( ext_swizzle_z <= TGSI_EXTSWIZZLE_ONE ); - assert( ext_swizzle_w <= TGSI_EXTSWIZZLE_ONE ); assert( negate_x <= 1 ); assert( negate_y <= 1 ); assert( negate_z <= 1 ); assert( negate_w <= 1 ); src_register_ext_swz = tgsi_default_src_register_ext_swz(); - src_register_ext_swz.ExtSwizzleX = ext_swizzle_x; - src_register_ext_swz.ExtSwizzleY = ext_swizzle_y; - src_register_ext_swz.ExtSwizzleZ = ext_swizzle_z; - src_register_ext_swz.ExtSwizzleW = ext_swizzle_w; src_register_ext_swz.NegateX = negate_x; src_register_ext_swz.NegateY = negate_y; src_register_ext_swz.NegateZ = negate_z; diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h index 17d977b059..5e0062a96f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -241,10 +241,6 @@ tgsi_compare_src_register_ext_swz( struct tgsi_src_register_ext_swz tgsi_build_src_register_ext_swz( - unsigned ext_swizzle_x, - unsigned ext_swizzle_y, - unsigned ext_swizzle_z, - unsigned ext_swizzle_w, unsigned negate_x, unsigned negate_y, unsigned negate_z, diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 111d95b666..3a584a10a1 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -148,15 +148,6 @@ static const char *texture_names[] = "SHADOWRECT" }; -static const char *extswizzle_names[] = -{ - "x", - "y", - "z", - "w", - "0", - "1" -}; static const char *modulate_names[TGSI_MODULATE_COUNT] = { @@ -446,24 +437,6 @@ iter_instruction( ENM( src->SrcRegister.SwizzleZ, swizzle_names ); ENM( src->SrcRegister.SwizzleW, swizzle_names ); } - if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || - src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || - src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || - src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) { - CHR( '.' ); - if (src->SrcRegisterExtSwz.NegateX) - TXT("-"); - ENM( src->SrcRegisterExtSwz.ExtSwizzleX, extswizzle_names ); - if (src->SrcRegisterExtSwz.NegateY) - TXT("-"); - ENM( src->SrcRegisterExtSwz.ExtSwizzleY, extswizzle_names ); - if (src->SrcRegisterExtSwz.NegateZ) - TXT("-"); - ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, extswizzle_names ); - if (src->SrcRegisterExtSwz.NegateW) - TXT("-"); - ENM( src->SrcRegisterExtSwz.ExtSwizzleW, extswizzle_names ); - } if (src->SrcRegisterExtMod.Complement) CHR( ')' ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c index 4a9c02b141..4f59ed22b5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -167,16 +167,6 @@ static const char *TGSI_SRC_REGISTER_EXTS[] = "SRC_REGISTER_EXT_TYPE_MOD" }; -static const char *TGSI_EXTSWIZZLES[] = -{ - "EXTSWIZZLE_X", - "EXTSWIZZLE_Y", - "EXTSWIZZLE_Z", - "EXTSWIZZLE_W", - "EXTSWIZZLE_ZERO", - "EXTSWIZZLE_ONE" -}; - static const char *TGSI_WRITEMASKS[] = { "0", @@ -560,22 +550,6 @@ dump_instruction_verbose( EOL(); TXT( "\nType : " ); ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS ); - if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) { - TXT( "\nExtSwizzleX: " ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES ); - } - if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleY != src->SrcRegisterExtSwz.ExtSwizzleY ) { - TXT( "\nExtSwizzleY: " ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES ); - } - if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleZ != src->SrcRegisterExtSwz.ExtSwizzleZ ) { - TXT( "\nExtSwizzleZ: " ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES ); - } - if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleW != src->SrcRegisterExtSwz.ExtSwizzleW ) { - TXT( "\nExtSwizzleW: " ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES ); - } if( deflt || fs->SrcRegisterExtSwz.NegateX != src->SrcRegisterExtSwz.NegateX ) { TXT( "\nNegateX : " ); UID( src->SrcRegisterExtSwz.NegateX ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index d9661c75a0..14f0fc4e38 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -210,9 +210,8 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) uint channelsWritten = 0x0; FOR_EACH_ENABLED_CHANNEL(*inst, chan) { /* check if we're reading a channel that's been written */ - uint swizzle = tgsi_util_get_full_src_register_extswizzle(&inst->FullSrcRegisters[i], chan); - if (swizzle <= TGSI_SWIZZLE_W && - (channelsWritten & (1 << swizzle))) { + uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->FullSrcRegisters[i], chan); + if (channelsWritten & (1 << swizzle)) { return TRUE; } @@ -338,7 +337,7 @@ tgsi_exec_machine_bind_shader( /* XXX we only handle SOA dependencies properly for MOV/SWZ * at this time! */ - if (opcode != TGSI_OPCODE_MOV && opcode != TGSI_OPCODE_SWZ) { + if (opcode != TGSI_OPCODE_MOV) { debug_printf("Warning: SOA dependency in instruction" " is not handled:\n"); tgsi_dump_instruction(&parse.FullToken.FullInstruction, @@ -1130,10 +1129,10 @@ fetch_src_file_channel( union tgsi_exec_channel *chan ) { switch( swizzle ) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: switch( file ) { case TGSI_FILE_CONSTANT: assert(mach->Consts); @@ -1201,14 +1200,6 @@ fetch_src_file_channel( } break; - case TGSI_EXTSWIZZLE_ZERO: - *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; - break; - - case TGSI_EXTSWIZZLE_ONE: - *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; - break; - default: assert( 0 ); } @@ -1367,7 +1358,7 @@ fetch_source( */ } - swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); fetch_src_file_channel( mach, reg->SrcRegister.File, @@ -1689,10 +1680,8 @@ exec_kil(struct tgsi_exec_machine *mach, uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ union tgsi_exec_channel r[1]; - /* This mask stores component bits that were already tested. Note that - * we test if the value is less than zero, so 1.0 and 0.0 need not to be - * tested. */ - uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + /* This mask stores component bits that were already tested. */ + uniquemask = 0; for (chan_index = 0; chan_index < 4; chan_index++) { @@ -1700,7 +1689,7 @@ exec_kil(struct tgsi_exec_machine *mach, uint i; /* unswizzle channel */ - swizzle = tgsi_util_get_full_src_register_extswizzle ( + swizzle = tgsi_util_get_full_src_register_swizzle ( &inst->FullSrcRegisters[0], chan_index); @@ -2031,7 +2020,6 @@ exec_instruction( break; case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: if (inst->Flags & SOA_DEPENDENCY_FLAG) { /* Do all fetches into temp regs, then do all stores to avoid * intermediate/accidental clobbering. This could be done all the diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index fe8b0bdce3..be375cabb8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -149,7 +149,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 0, 1, 0, 0, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC }, { 0, 1, 0, 0, 0, 0, "KIL", TGSI_OPCODE_KIL }, { 0, 0, 0, 0, 0, 0, "END", TGSI_OPCODE_END }, - { 1, 1, 0, 0, 0, 0, "SWZ", TGSI_OPCODE_SWZ } + { 0, 0, 0, 0, 0, 0, "", 118 } /* removed */ }; const struct tgsi_opcode_info * diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index d321f013d0..b34263da48 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -146,7 +146,6 @@ OP01(IFC) OP01(BREAKC) OP01(KIL) OP00(END) -OP11(SWZ) #undef OP00 diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 4b1c7d4e01..617fd7f6be 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -283,14 +283,14 @@ emit_fetch(struct gen_context *gen, const struct tgsi_full_src_register *reg, const unsigned chan_index) { - uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index); + uint swizzle = tgsi_util_get_full_src_register_swizzle(reg, chan_index); int dst_vec = -1; switch (swizzle) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: switch (reg->SrcRegister.File) { case TGSI_FILE_INPUT: { @@ -349,16 +349,6 @@ emit_fetch(struct gen_context *gen, assert( 0 ); } break; - case TGSI_EXTSWIZZLE_ZERO: - ppc_vzero(gen->f, dst_vec); - break; - case TGSI_EXTSWIZZLE_ONE: - { - int one_vec = gen_one_vec(gen); - dst_vec = ppc_allocate_vec_register(gen->f); - ppc_vmove(gen->f, dst_vec, one_vec); - } - break; default: assert( 0 ); } @@ -418,8 +408,8 @@ equal_src_locs(const struct tgsi_full_src_register *a, uint chan_a, return FALSE; if (a->SrcRegister.Index != b->SrcRegister.Index) return FALSE; - swz_a = tgsi_util_get_full_src_register_extswizzle(a, chan_a); - swz_b = tgsi_util_get_full_src_register_extswizzle(b, chan_b); + swz_a = tgsi_util_get_full_src_register_swizzle(a, chan_a); + swz_b = tgsi_util_get_full_src_register_swizzle(b, chan_b); if (swz_a != swz_b) return FALSE; sign_a = tgsi_util_get_full_src_register_sign_mode(a, chan_a); @@ -635,7 +625,6 @@ emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) ppc_vlogefp(gen->f, v1, v0); /* v1 = log2(v0) */ break; case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: if (v0 != v1) ppc_vmove(gen->f, v1, v0); break; @@ -1119,7 +1108,6 @@ emit_instruction(struct gen_context *gen, switch (inst->Instruction.Opcode) { case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: case TGSI_OPCODE_ABS: case TGSI_OPCODE_FLR: case TGSI_OPCODE_FRC: diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 0db4481a3d..f9c16f1b6c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -228,11 +228,6 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens) src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W || - src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || - src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || - src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || - src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W || - dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW) { tgsi_parse_free(&parse); diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 5f6b83b236..a96fc94c7a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1260,13 +1260,13 @@ emit_fetch( const struct tgsi_full_src_register *reg, const unsigned chan_index ) { - unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); switch (swizzle) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: switch (reg->SrcRegister.File) { case TGSI_FILE_CONSTANT: emit_const( @@ -1308,22 +1308,6 @@ emit_fetch( } break; - case TGSI_EXTSWIZZLE_ZERO: - emit_tempf( - func, - xmm, - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C ); - break; - - case TGSI_EXTSWIZZLE_ONE: - emit_tempf( - func, - xmm, - TEMP_ONE_I, - TEMP_ONE_C ); - break; - default: assert( 0 ); } @@ -1582,13 +1566,13 @@ emit_kil( /* This mask stores component bits that were already tested. Note that * we test if the value is less than zero, so 1.0 and 0.0 need not to be * tested. */ - uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + uniquemask = 0; FOR_EACH_CHANNEL( chan_index ) { unsigned swizzle; /* unswizzle channel */ - swizzle = tgsi_util_get_full_src_register_extswizzle( + swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); @@ -1772,7 +1756,6 @@ emit_instruction( break; case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 4 + chan_index, 0, chan_index ); } @@ -2938,8 +2921,7 @@ tgsi_emit_sse2( * the result in the cases where the code is too opaque to * fix. */ - if (opcode != TGSI_OPCODE_MOV && - opcode != TGSI_OPCODE_SWZ) { + if (opcode != TGSI_OPCODE_MOV) { debug_printf("Warning: src/dst aliasing in instruction" " is not handled:\n"); tgsi_dump_instruction(&parse.FullToken.FullInstruction, 1); diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index d438450b1e..87d9cd7b3f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -538,13 +538,11 @@ static boolean parse_optional_swizzle( struct translate_ctx *ctx, uint swizzle[4], - boolean *parsed_swizzle, - boolean *parsed_extswizzle ) + boolean *parsed_swizzle ) { const char *cur = ctx->cur; *parsed_swizzle = FALSE; - *parsed_extswizzle = FALSE; eat_opt_white( &cur ); if (*cur == '.') { @@ -562,15 +560,8 @@ parse_optional_swizzle( else if (uprcase( *cur ) == 'W') swizzle[i] = TGSI_SWIZZLE_W; else { - if (*cur == '0') - swizzle[i] = TGSI_EXTSWIZZLE_ZERO; - else if (*cur == '1') - swizzle[i] = TGSI_EXTSWIZZLE_ONE; - else { - report_error( ctx, "Expected register swizzle component `x', `y', `z', `w', `0' or `1'" ); - return FALSE; - } - *parsed_extswizzle = TRUE; + report_error( ctx, "Expected register swizzle component `x', `y', `z', `w', `0' or `1'" ); + return FALSE; } cur++; } @@ -595,7 +586,6 @@ parse_src_operand( uint swizzle[4]; boolean parsed_ext_negate_paren = FALSE; boolean parsed_swizzle; - boolean parsed_extswizzle; if (*ctx->cur == '-') { cur = ctx->cur; @@ -690,16 +680,8 @@ parse_src_operand( /* Parse optional swizzle. */ - if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle, &parsed_extswizzle )) { - if (parsed_extswizzle) { - assert( parsed_swizzle ); - - src->SrcRegisterExtSwz.ExtSwizzleX = swizzle[0]; - src->SrcRegisterExtSwz.ExtSwizzleY = swizzle[1]; - src->SrcRegisterExtSwz.ExtSwizzleZ = swizzle[2]; - src->SrcRegisterExtSwz.ExtSwizzleW = swizzle[3]; - } - else if (parsed_swizzle) { + if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle )) { + if (parsed_swizzle) { src->SrcRegister.SwizzleX = swizzle[0]; src->SrcRegister.SwizzleY = swizzle[1]; src->SrcRegister.SwizzleZ = swizzle[2]; diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index 71f8a6ca40..6120a2da94 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -69,59 +69,15 @@ tgsi_util_get_src_register_swizzle( return 0; } -unsigned -tgsi_util_get_src_register_extswizzle( - const struct tgsi_src_register_ext_swz *reg, - unsigned component ) -{ - switch( component ) { - case 0: - return reg->ExtSwizzleX; - case 1: - return reg->ExtSwizzleY; - case 2: - return reg->ExtSwizzleZ; - case 3: - return reg->ExtSwizzleW; - default: - assert( 0 ); - } - return 0; -} unsigned -tgsi_util_get_full_src_register_extswizzle( +tgsi_util_get_full_src_register_swizzle( const struct tgsi_full_src_register *reg, unsigned component ) { - unsigned swizzle; - - /* - * First, calculate the extended swizzle for a given channel. This will give - * us either a channel index into the simple swizzle or a constant 1 or 0. - */ - swizzle = tgsi_util_get_src_register_extswizzle( - ®->SrcRegisterExtSwz, + return tgsi_util_get_src_register_swizzle( + ®->SrcRegister, component ); - - assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); - assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); - assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); - assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); - assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); - assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); - - /* - * Second, calculate the simple swizzle for the unswizzled channel index. - * Leave the constants intact, they are not affected by the simple swizzle. - */ - if( swizzle <= TGSI_SWIZZLE_W ) { - swizzle = tgsi_util_get_src_register_swizzle( - ®->SrcRegister, - swizzle ); - } - - return swizzle; } void @@ -148,30 +104,6 @@ tgsi_util_set_src_register_swizzle( } } -void -tgsi_util_set_src_register_extswizzle( - struct tgsi_src_register_ext_swz *reg, - unsigned swizzle, - unsigned component ) -{ - switch( component ) { - case 0: - reg->ExtSwizzleX = swizzle; - break; - case 1: - reg->ExtSwizzleY = swizzle; - break; - case 2: - reg->ExtSwizzleZ = swizzle; - break; - case 3: - reg->ExtSwizzleW = swizzle; - break; - default: - assert( 0 ); - } -} - unsigned tgsi_util_get_src_register_extnegate( const struct tgsi_src_register_ext_swz *reg, diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h index 21eb656327..bf3f20ca6c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.h +++ b/src/gallium/auxiliary/tgsi/tgsi_util.h @@ -45,13 +45,9 @@ tgsi_util_get_src_register_swizzle( const struct tgsi_src_register *reg, unsigned component ); -unsigned -tgsi_util_get_src_register_extswizzle( - const struct tgsi_src_register_ext_swz *reg, - unsigned component); unsigned -tgsi_util_get_full_src_register_extswizzle( +tgsi_util_get_full_src_register_swizzle( const struct tgsi_full_src_register *reg, unsigned component ); @@ -61,12 +57,6 @@ tgsi_util_set_src_register_swizzle( unsigned swizzle, unsigned component ); -void -tgsi_util_set_src_register_extswizzle( - struct tgsi_src_register_ext_swz *reg, - unsigned swizzle, - unsigned component ); - unsigned tgsi_util_get_src_register_extnegate( const struct tgsi_src_register_ext_swz *reg, diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index b6b2f885af..19e3ab0844 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -231,7 +231,7 @@ static boolean is_register_src(struct codegen *gen, int channel, const struct tgsi_full_src_register *src) { - int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel); + int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel); int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel); if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) { @@ -271,23 +271,14 @@ get_src_reg(struct codegen *gen, const struct tgsi_full_src_register *src) { int reg = -1; - int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel); + int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel); boolean reg_is_itemp = FALSE; uint sign_op; assert(swizzle >= TGSI_SWIZZLE_X); - assert(swizzle <= TGSI_EXTSWIZZLE_ONE); + assert(swizzle <= TGSI_SWIZZLE_W); - if (swizzle == TGSI_EXTSWIZZLE_ONE) { - /* Load const one float and early out */ - reg = get_const_one_reg(gen); - } - else if (swizzle == TGSI_EXTSWIZZLE_ZERO) { - /* Load const zero float and early out */ - reg = get_itemp(gen); - spe_xor(gen->f, reg, reg, reg); - } - else { + { int index = src->SrcRegister.Index; assert(swizzle < 4); @@ -1758,7 +1749,6 @@ emit_instruction(struct codegen *gen, case TGSI_OPCODE_ARL: return emit_ARL(gen, inst); case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: return emit_MOV(gen, inst); case TGSI_OPCODE_ADD: case TGSI_OPCODE_SUB: diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 725a72b326..4c32b2d06d 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -346,10 +346,10 @@ fetch_src_file_channel( union spu_exec_channel *chan ) { switch( swizzle ) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: switch( file ) { case TGSI_FILE_CONSTANT: { unsigned i; @@ -413,14 +413,6 @@ fetch_src_file_channel( } break; - case TGSI_EXTSWIZZLE_ZERO: - *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; - break; - - case TGSI_EXTSWIZZLE_ONE: - *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; - break; - default: ASSERT( 0 ); } @@ -500,7 +492,7 @@ fetch_source( } } - swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); fetch_src_file_channel( mach, reg->SrcRegister.File, @@ -610,10 +602,8 @@ exec_kil(struct spu_exec_machine *mach, uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ union spu_exec_channel r[1]; - /* This mask stores component bits that were already tested. Note that - * we test if the value is less than zero, so 1.0 and 0.0 need not to be - * tested. */ - uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + /* This mask stores component bits that were already tested. */ + uniquemask = 0; for (chan_index = 0; chan_index < 4; chan_index++) { @@ -621,7 +611,7 @@ exec_kil(struct spu_exec_machine *mach, uint i; /* unswizzle channel */ - swizzle = tgsi_util_get_full_src_register_extswizzle ( + swizzle = tgsi_util_get_full_src_register_swizzle ( &inst->FullSrcRegisters[0], chan_index); @@ -909,7 +899,6 @@ exec_instruction( break; case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); STORE( &r[0], 0, chan_index ); diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c index af25dd3718..25a7a71133 100644 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -26,59 +26,15 @@ tgsi_util_get_src_register_swizzle( return 0; } -unsigned -tgsi_util_get_src_register_extswizzle( - const struct tgsi_src_register_ext_swz *reg, - unsigned component ) -{ - switch( component ) { - case 0: - return reg->ExtSwizzleX; - case 1: - return reg->ExtSwizzleY; - case 2: - return reg->ExtSwizzleZ; - case 3: - return reg->ExtSwizzleW; - default: - ASSERT( 0 ); - } - return 0; -} unsigned tgsi_util_get_full_src_register_extswizzle( const struct tgsi_full_src_register *reg, unsigned component ) { - unsigned swizzle; - - /* - * First, calculate the extended swizzle for a given channel. This will give - * us either a channel index into the simple swizzle or a constant 1 or 0. - */ - swizzle = tgsi_util_get_src_register_extswizzle( - ®->SrcRegisterExtSwz, + return tgsi_util_get_src_register_swizzle( + reg->SrcRegister, component ); - - ASSERT (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); - ASSERT (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); - ASSERT (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); - ASSERT (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); - ASSERT (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); - ASSERT (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); - - /* - * Second, calculate the simple swizzle for the unswizzled channel index. - * Leave the constants intact, they are not affected by the simple swizzle. - */ - if( swizzle <= TGSI_SWIZZLE_W ) { - swizzle = tgsi_util_get_src_register_swizzle( - ®->SrcRegister, - component ); - } - - return swizzle; } unsigned diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 1fe5cda956..3074044441 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -214,20 +214,11 @@ src_vector(struct i915_fp_compile *p, return 0; } - if (source->SrcRegister.Extended) { - src = swizzle(src, - source->SrcRegisterExtSwz.ExtSwizzleX, - source->SrcRegisterExtSwz.ExtSwizzleY, - source->SrcRegisterExtSwz.ExtSwizzleZ, - source->SrcRegisterExtSwz.ExtSwizzleW); - } - else { - src = swizzle(src, - source->SrcRegister.SwizzleX, - source->SrcRegister.SwizzleY, - source->SrcRegister.SwizzleZ, - source->SrcRegister.SwizzleW); - } + src = swizzle(src, + source->SrcRegister.SwizzleX, + source->SrcRegister.SwizzleY, + source->SrcRegister.SwizzleZ, + source->SrcRegister.SwizzleW); /* There's both negate-all-components and per-component negation. @@ -681,7 +672,6 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: emit_simple_arith(p, inst, A0_MOV, 1); break; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index 926cc1cc9f..64027de6aa 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -158,14 +158,14 @@ emit_fetch( const unsigned chan_index ) { const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index]; - unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); LLVMValueRef res; switch (swizzle) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: switch (reg->SrcRegister.File) { case TGSI_FILE_CONSTANT: { @@ -198,14 +198,6 @@ emit_fetch( } break; - case TGSI_EXTSWIZZLE_ZERO: - res = bld->base.zero; - break; - - case TGSI_EXTSWIZZLE_ONE: - res = bld->base.one; - break; - default: assert( 0 ); return bld->base.undef; @@ -394,12 +386,7 @@ emit_kil( unsigned swizzle; /* Unswizzle channel */ - swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); - - /* Note that we test if the value is less than zero, so 1.0 and 0.0 need - * not to be tested. */ - if(swizzle == TGSI_EXTSWIZZLE_ZERO || swizzle == TGSI_EXTSWIZZLE_ONE) - continue; + swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); /* Check if the component has not been already tested. */ assert(swizzle < NUM_CHANNELS); @@ -488,7 +475,6 @@ emit_instruction( #endif case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); } diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 62c6c76cee..93cf869f58 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -326,21 +326,13 @@ src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc, uint c; for (c = 0; c < 4; c++) { - switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: + switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: mask |= (1 << c); break; - case TGSI_EXTSWIZZLE_ZERO: - zero_mask |= (1 << c); - tgsi.swz[c] = SWZ_X; - break; - case TGSI_EXTSWIZZLE_ONE: - one_mask |= (1 << c); - tgsi.swz[c] = SWZ_X; - break; default: assert(0); } @@ -357,12 +349,6 @@ src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc, if (mask) arith(fpc, 0, MOV, *src, mask, tgsi, none, none); - if (zero_mask) - arith(fpc, 0, SFL, *src, zero_mask, *src, none, none); - - if (one_mask) - arith(fpc, 0, STR, *src, one_mask, *src, none, none); - if (neg_mask) { struct nv30_sreg one = temp(fpc); arith(fpc, 0, STR, one, neg_mask, one, none, none); diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index e3550baa63..4a6b355936 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -329,21 +329,13 @@ src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc, uint c; for (c = 0; c < 4; c++) { - switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: + switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: mask |= (1 << c); break; - case TGSI_EXTSWIZZLE_ZERO: - zero_mask |= (1 << c); - tgsi.swz[c] = SWZ_X; - break; - case TGSI_EXTSWIZZLE_ONE: - one_mask |= (1 << c); - tgsi.swz[c] = SWZ_X; - break; default: assert(0); } diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 0382dbba8f..4898aaa809 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -370,21 +370,13 @@ src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc, uint c; for (c = 0; c < 4; c++) { - switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: + switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: mask |= tgsi_mask(1 << c); break; - case TGSI_EXTSWIZZLE_ZERO: - zero_mask |= tgsi_mask(1 << c); - tgsi.swz[c] = SWZ_X; - break; - case TGSI_EXTSWIZZLE_ONE: - one_mask |= tgsi_mask(1 << c); - tgsi.swz[c] = SWZ_X; - break; default: assert(0); } diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index c7145bb9be..5c691877e0 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1544,12 +1544,12 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, sgn = tgsi_util_get_full_src_register_sign_mode(src, chan); - c = tgsi_util_get_full_src_register_extswizzle(src, chan); + c = tgsi_util_get_full_src_register_swizzle(src, chan); switch (c) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: switch (src->SrcRegister.File) { case TGSI_FILE_INPUT: r = &pc->attr[src->SrcRegister.Index * 4 + c]; @@ -1586,13 +1586,6 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, break; } break; - case TGSI_EXTSWIZZLE_ZERO: - r = alloc_immd(pc, 0.0); - return r; - case TGSI_EXTSWIZZLE_ONE: - if (sgn == TGSI_UTIL_SIGN_TOGGLE || sgn == TGSI_UTIL_SIGN_SET) - return alloc_immd(pc, -1.0); - return alloc_immd(pc, 1.0); default: assert(0); break; @@ -2005,7 +1998,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, } break; case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; @@ -2189,10 +2181,7 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - k = tgsi_util_get_full_src_register_extswizzle(src, c); - - if (k > TGSI_EXTSWIZZLE_W) - continue; + k = tgsi_util_get_full_src_register_swizzle(src, c); reg[src->SrcRegister.Index * 4 + k].acc = pc->insn_nr; } @@ -2295,11 +2284,10 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, if (!(mask & (1 << chn))) /* src is not read */ continue; - c = tgsi_util_get_full_src_register_extswizzle(fs, chn); + c = tgsi_util_get_full_src_register_swizzle(fs, chn); s = tgsi_util_get_full_src_register_sign_mode(fs, chn); - if (c > TGSI_EXTSWIZZLE_W || - !(fd->DstRegister.WriteMask & (1 << c))) + if (!(fd->DstRegister.WriteMask & (1 << c))) continue; /* no danger if src is copied to TEMP first */ diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 3d2f6cafee..de599b068f 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -142,7 +142,6 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_IFC: return RC_OPCODE_IFC; */ /* case TGSI_OPCODE_BREAKC: return RC_OPCODE_BREAKC; */ case TGSI_OPCODE_KIL: return RC_OPCODE_KIL; - case TGSI_OPCODE_SWZ: return RC_OPCODE_SWZ; } fprintf(stderr, "Unknown opcode: %i\n", opcode); @@ -205,10 +204,10 @@ static void transform_srcreg( dst->File = translate_register_file(src->SrcRegister.File); dst->Index = translate_register_index(ttr, src->SrcRegister.File, src->SrcRegister.Index); dst->RelAddr = src->SrcRegister.Indirect; - dst->Swizzle = tgsi_util_get_full_src_register_extswizzle(src, 0); - dst->Swizzle |= tgsi_util_get_full_src_register_extswizzle(src, 1) << 3; - dst->Swizzle |= tgsi_util_get_full_src_register_extswizzle(src, 2) << 6; - dst->Swizzle |= tgsi_util_get_full_src_register_extswizzle(src, 3) << 9; + dst->Swizzle = tgsi_util_get_full_src_register_swizzle(src, 0); + dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 1) << 3; + dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 2) << 6; + dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 3) << 9; dst->Abs = src->SrcRegisterExtMod.Absolute; dst->Negate = src->SrcRegisterExtSwz.NegateX | diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 48e6583ada..b01df41b0e 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -275,7 +275,7 @@ union tgsi_immediate_data #define TGSI_OPCODE_BREAKC 115 #define TGSI_OPCODE_KIL 116 /* conditional kill */ #define TGSI_OPCODE_END 117 /* aka HALT */ -#define TGSI_OPCODE_SWZ 118 + /* gap */ #define TGSI_OPCODE_LAST 119 #define TGSI_SAT_NONE 0 /* do not saturate */ @@ -496,17 +496,7 @@ struct tgsi_src_register_ext * follows. */ -#define TGSI_EXTSWIZZLE_X TGSI_SWIZZLE_X -#define TGSI_EXTSWIZZLE_Y TGSI_SWIZZLE_Y -#define TGSI_EXTSWIZZLE_Z TGSI_SWIZZLE_Z -#define TGSI_EXTSWIZZLE_W TGSI_SWIZZLE_W -#define TGSI_EXTSWIZZLE_ZERO 4 -#define TGSI_EXTSWIZZLE_ONE 5 - /** - * ExtSwizzleX, ExtSwizzleY, ExtSwizzleZ and ExtSwizzleW swizzle the source - * register in an extended manner. - * * NegateX, NegateY, NegateZ and NegateW negate individual components of the * source register. * @@ -518,10 +508,7 @@ struct tgsi_src_register_ext struct tgsi_src_register_ext_swz { unsigned Type : 4; /* TGSI_SRC_REGISTER_EXT_TYPE_SWZ */ - unsigned ExtSwizzleX : 4; /* TGSI_EXTSWIZZLE_ */ - unsigned ExtSwizzleY : 4; /* TGSI_EXTSWIZZLE_ */ - unsigned ExtSwizzleZ : 4; /* TGSI_EXTSWIZZLE_ */ - unsigned ExtSwizzleW : 4; /* TGSI_EXTSWIZZLE_ */ + unsigned Padding0 : 16; /* unused */ unsigned NegateX : 1; /* BOOL */ unsigned NegateY : 1; /* BOOL */ unsigned NegateZ : 1; /* BOOL */ diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 1b9d35d353..3d6c215819 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -515,8 +515,6 @@ translate_opcode( unsigned op ) return TGSI_OPCODE_SSG; case OPCODE_SUB: return TGSI_OPCODE_SUB; - case OPCODE_SWZ: - return TGSI_OPCODE_SWZ; case OPCODE_TEX: return TGSI_OPCODE_TEX; case OPCODE_TXB: -- cgit v1.2.3 From 8a571b809accce1c36907ea616a893b920b752e5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 23 Oct 2009 14:38:30 +0100 Subject: cell: typo from ExtSwizzle commit --- src/gallium/drivers/cell/spu/spu_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c index 25a7a71133..a62c04e6af 100644 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -28,7 +28,7 @@ tgsi_util_get_src_register_swizzle( unsigned -tgsi_util_get_full_src_register_extswizzle( +tgsi_util_get_full_src_register_swizzle( const struct tgsi_full_src_register *reg, unsigned component ) { -- cgit v1.2.3 From da253319f9e5d37d9c55b975ef9328545a3ac9b4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 23 Oct 2009 14:50:02 +0100 Subject: gallium: remove extended negate also, and also the ExtSwz token Likewise, the extended negate functionality hasn't been used since mesa switched to using tgsi_ureg to build programs, and has been translating the SWZ opcode internally to a single MAD. --- src/gallium/auxiliary/tgsi/tgsi_build.c | 82 --------------------------- src/gallium/auxiliary/tgsi/tgsi_build.h | 18 ------ src/gallium/auxiliary/tgsi/tgsi_dump_c.c | 32 +---------- src/gallium/auxiliary/tgsi/tgsi_parse.c | 5 -- src/gallium/auxiliary/tgsi/tgsi_parse.h | 1 - src/gallium/auxiliary/tgsi/tgsi_ureg.c | 1 - src/gallium/auxiliary/tgsi/tgsi_util.c | 53 +---------------- src/gallium/auxiliary/tgsi/tgsi_util.h | 12 ---- src/gallium/drivers/cell/spu/spu_util.c | 46 --------------- src/gallium/drivers/i915/i915_fpc_translate.c | 13 +---- src/gallium/drivers/nv30/nv30_fragprog.c | 17 +----- src/gallium/drivers/nv40/nv40_fragprog.c | 23 +------- src/gallium/drivers/nv40/nv40_vertprog.c | 23 +------- src/gallium/drivers/r300/r300_tgsi_to_rc.c | 7 +-- src/gallium/include/pipe/p_shader_tokens.h | 24 -------- 15 files changed, 11 insertions(+), 346 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 98d36f43e4..d45561362d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -687,32 +687,6 @@ tgsi_build_full_instruction( header ); prev_token = (struct tgsi_token *) src_register; - if( tgsi_compare_src_register_ext_swz( - reg->SrcRegisterExtSwz, - tgsi_default_src_register_ext_swz() ) ) { - struct tgsi_src_register_ext_swz *src_register_ext_swz; - - /* Use of the extended negate requires the simple negate to be identity. - */ - assert( reg->SrcRegister.Negate == FALSE ); - - if( maxsize <= size ) - return 0; - src_register_ext_swz = - (struct tgsi_src_register_ext_swz *) &tokens[size]; - size++; - - *src_register_ext_swz = tgsi_build_src_register_ext_swz( - reg->SrcRegisterExtSwz.NegateX, - reg->SrcRegisterExtSwz.NegateY, - reg->SrcRegisterExtSwz.NegateZ, - reg->SrcRegisterExtSwz.NegateW, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) src_register_ext_swz; - } - if( tgsi_compare_src_register_ext_mod( reg->SrcRegisterExtMod, tgsi_default_src_register_ext_mod() ) ) { @@ -1025,7 +999,6 @@ tgsi_default_full_src_register( void ) struct tgsi_full_src_register full_src_register; full_src_register.SrcRegister = tgsi_default_src_register(); - full_src_register.SrcRegisterExtSwz = tgsi_default_src_register_ext_swz(); full_src_register.SrcRegisterExtMod = tgsi_default_src_register_ext_mod(); full_src_register.SrcRegisterInd = tgsi_default_src_register(); full_src_register.SrcRegisterDim = tgsi_default_dimension(); @@ -1034,61 +1007,6 @@ tgsi_default_full_src_register( void ) return full_src_register; } -struct tgsi_src_register_ext_swz -tgsi_default_src_register_ext_swz( void ) -{ - struct tgsi_src_register_ext_swz src_register_ext_swz; - - src_register_ext_swz.Type = TGSI_SRC_REGISTER_EXT_TYPE_SWZ; - src_register_ext_swz.Padding0 = 0; - src_register_ext_swz.NegateX = 0; - src_register_ext_swz.NegateY = 0; - src_register_ext_swz.NegateZ = 0; - src_register_ext_swz.NegateW = 0; - src_register_ext_swz.Padding = 0; - src_register_ext_swz.Extended = 0; - - return src_register_ext_swz; -} - -unsigned -tgsi_compare_src_register_ext_swz( - struct tgsi_src_register_ext_swz a, - struct tgsi_src_register_ext_swz b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return compare32(&a, &b); -} - -struct tgsi_src_register_ext_swz -tgsi_build_src_register_ext_swz( - unsigned negate_x, - unsigned negate_y, - unsigned negate_z, - unsigned negate_w, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_src_register_ext_swz src_register_ext_swz; - - assert( negate_x <= 1 ); - assert( negate_y <= 1 ); - assert( negate_z <= 1 ); - assert( negate_w <= 1 ); - - src_register_ext_swz = tgsi_default_src_register_ext_swz(); - src_register_ext_swz.NegateX = negate_x; - src_register_ext_swz.NegateY = negate_y; - src_register_ext_swz.NegateZ = negate_z; - src_register_ext_swz.NegateW = negate_w; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return src_register_ext_swz; -} struct tgsi_src_register_ext_mod tgsi_default_src_register_ext_mod( void ) diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h index 5e0062a96f..9ae1705f6c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -231,24 +231,6 @@ tgsi_build_src_register( struct tgsi_full_src_register tgsi_default_full_src_register( void ); -struct tgsi_src_register_ext_swz -tgsi_default_src_register_ext_swz( void ); - -unsigned -tgsi_compare_src_register_ext_swz( - struct tgsi_src_register_ext_swz a, - struct tgsi_src_register_ext_swz b ); - -struct tgsi_src_register_ext_swz -tgsi_build_src_register_ext_swz( - unsigned negate_x, - unsigned negate_y, - unsigned negate_z, - unsigned negate_w, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - struct tgsi_src_register_ext_mod tgsi_default_src_register_ext_mod( void ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c index 4f59ed22b5..c7dbdb3bd2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -163,7 +163,7 @@ static const char *TGSI_TEXTURES[] = static const char *TGSI_SRC_REGISTER_EXTS[] = { - "SRC_REGISTER_EXT_TYPE_SWZ", + "", "SRC_REGISTER_EXT_TYPE_MOD" }; @@ -546,36 +546,6 @@ dump_instruction_verbose( } } - if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) { - EOL(); - TXT( "\nType : " ); - ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS ); - if( deflt || fs->SrcRegisterExtSwz.NegateX != src->SrcRegisterExtSwz.NegateX ) { - TXT( "\nNegateX : " ); - UID( src->SrcRegisterExtSwz.NegateX ); - } - if( deflt || fs->SrcRegisterExtSwz.NegateY != src->SrcRegisterExtSwz.NegateY ) { - TXT( "\nNegateY : " ); - UID( src->SrcRegisterExtSwz.NegateY ); - } - if( deflt || fs->SrcRegisterExtSwz.NegateZ != src->SrcRegisterExtSwz.NegateZ ) { - TXT( "\nNegateZ : " ); - UID( src->SrcRegisterExtSwz.NegateZ ); - } - if( deflt || fs->SrcRegisterExtSwz.NegateW != src->SrcRegisterExtSwz.NegateW ) { - TXT( "\nNegateW : " ); - UID( src->SrcRegisterExtSwz.NegateW ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( src->SrcRegisterExtSwz.Padding ); - if( deflt || fs->SrcRegisterExtSwz.Extended != src->SrcRegisterExtSwz.Extended ) { - TXT( "\nExtended : " ); - UID( src->SrcRegisterExtSwz.Extended ); - } - } - } - if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) { EOL(); TXT( "\nType : " ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index 4870f82b6b..f742c71936 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -264,11 +264,6 @@ tgsi_parse_token( next_token( ctx, &token ); switch( token.Type ) { - case TGSI_SRC_REGISTER_EXT_TYPE_SWZ: - copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtSwz, - &token); - break; - case TGSI_SRC_REGISTER_EXT_TYPE_MOD: copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtMod, &token); diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index a26ee5ba86..602131398d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -56,7 +56,6 @@ struct tgsi_full_dst_register struct tgsi_full_src_register { struct tgsi_src_register SrcRegister; - struct tgsi_src_register_ext_swz SrcRegisterExtSwz; struct tgsi_src_register_ext_mod SrcRegisterExtMod; struct tgsi_src_register SrcRegisterInd; struct tgsi_dimension SrcRegisterDim; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 654426a903..8cb574ea43 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -51,7 +51,6 @@ union tgsi_any_token { struct tgsi_instruction_ext_texture insn_ext_texture; struct tgsi_instruction_ext_predicate insn_ext_predicate; struct tgsi_src_register src; - struct tgsi_src_register_ext_swz src_ext_swz; struct tgsi_src_register_ext_mod src_ext_mod; struct tgsi_dimension dim; struct tgsi_dst_register dst; diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index 6120a2da94..4dee1be9e8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -104,50 +104,6 @@ tgsi_util_set_src_register_swizzle( } } -unsigned -tgsi_util_get_src_register_extnegate( - const struct tgsi_src_register_ext_swz *reg, - unsigned component ) -{ - switch( component ) { - case 0: - return reg->NegateX; - case 1: - return reg->NegateY; - case 2: - return reg->NegateZ; - case 3: - return reg->NegateW; - default: - assert( 0 ); - } - return 0; -} - -void -tgsi_util_set_src_register_extnegate( - struct tgsi_src_register_ext_swz *reg, - unsigned negate, - unsigned component ) -{ - switch( component ) { - case 0: - reg->NegateX = negate; - break; - case 1: - reg->NegateY = negate; - break; - case 2: - reg->NegateZ = negate; - break; - case 3: - reg->NegateW = negate; - break; - default: - assert( 0 ); - } -} - unsigned tgsi_util_get_full_src_register_sign_mode( const struct tgsi_full_src_register *reg, @@ -171,9 +127,7 @@ tgsi_util_get_full_src_register_sign_mode( unsigned negate; negate = reg->SrcRegister.Negate; - if( tgsi_util_get_src_register_extnegate( ®->SrcRegisterExtSwz, component ) ) { - negate = !negate; - } + if( reg->SrcRegisterExtMod.Negate ) { negate = !negate; } @@ -194,11 +148,6 @@ tgsi_util_set_full_src_register_sign_mode( struct tgsi_full_src_register *reg, unsigned sign_mode ) { - reg->SrcRegisterExtSwz.NegateX = 0; - reg->SrcRegisterExtSwz.NegateY = 0; - reg->SrcRegisterExtSwz.NegateZ = 0; - reg->SrcRegisterExtSwz.NegateW = 0; - switch (sign_mode) { case TGSI_UTIL_SIGN_CLEAR: diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h index bf3f20ca6c..19ee2e7cf2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.h +++ b/src/gallium/auxiliary/tgsi/tgsi_util.h @@ -33,7 +33,6 @@ extern "C" { #endif struct tgsi_src_register; -struct tgsi_src_register_ext_swz; struct tgsi_full_src_register; void * @@ -57,17 +56,6 @@ tgsi_util_set_src_register_swizzle( unsigned swizzle, unsigned component ); -unsigned -tgsi_util_get_src_register_extnegate( - const struct tgsi_src_register_ext_swz *reg, - unsigned component ); - -void -tgsi_util_set_src_register_extnegate( - struct tgsi_src_register_ext_swz *reg, - unsigned negate, - unsigned component ); - #define TGSI_UTIL_SIGN_CLEAR 0 /* Force positive */ #define TGSI_UTIL_SIGN_SET 1 /* Force negative */ #define TGSI_UTIL_SIGN_TOGGLE 2 /* Negate */ diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c index a62c04e6af..c2c32b22d5 100644 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -37,49 +37,6 @@ tgsi_util_get_full_src_register_swizzle( component ); } -unsigned -tgsi_util_get_src_register_extnegate( - const struct tgsi_src_register_ext_swz *reg, - unsigned component ) -{ - switch( component ) { - case 0: - return reg->NegateX; - case 1: - return reg->NegateY; - case 2: - return reg->NegateZ; - case 3: - return reg->NegateW; - default: - ASSERT( 0 ); - } - return 0; -} - -void -tgsi_util_set_src_register_extnegate( - struct tgsi_src_register_ext_swz *reg, - unsigned negate, - unsigned component ) -{ - switch( component ) { - case 0: - reg->NegateX = negate; - break; - case 1: - reg->NegateY = negate; - break; - case 2: - reg->NegateZ = negate; - break; - case 3: - reg->NegateW = negate; - break; - default: - ASSERT( 0 ); - } -} unsigned tgsi_util_get_full_src_register_sign_mode( @@ -104,9 +61,6 @@ tgsi_util_get_full_src_register_sign_mode( unsigned negate; negate = reg->SrcRegister.Negate; - if( tgsi_util_get_src_register_extnegate( ®->SrcRegisterExtSwz, component ) ) { - negate = !negate; - } if( reg->SrcRegisterExtMod.Negate ) { negate = !negate; } diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 3074044441..379d47e79a 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -225,17 +225,8 @@ src_vector(struct i915_fp_compile *p, * Try to handle both here. */ { - int nx = source->SrcRegisterExtSwz.NegateX; - int ny = source->SrcRegisterExtSwz.NegateY; - int nz = source->SrcRegisterExtSwz.NegateZ; - int nw = source->SrcRegisterExtSwz.NegateW; - if (source->SrcRegister.Negate) { - nx = !nx; - ny = !ny; - nz = !nz; - nw = !nw; - } - src = negate(src, nx, ny, nz, nw); + int n = source->SrcRegister.Negate; + src = negate(src, n, n, n, n); } /* no abs() or post-abs negation */ diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 93cf869f58..cc0385426c 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -318,11 +318,7 @@ src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc, { const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); struct nv30_sreg tgsi = tgsi_src(fpc, fsrc); - uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; - uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, - fsrc->SrcRegisterExtSwz.NegateY, - fsrc->SrcRegisterExtSwz.NegateZ, - fsrc->SrcRegisterExtSwz.NegateW }; + uint mask = 0; uint c; for (c = 0; c < 4; c++) { @@ -336,12 +332,9 @@ src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc, default: assert(0); } - - if (!tgsi.negate && neg[c]) - neg_mask |= (1 << c); } - if (mask == MASK_ALL && !neg_mask) + if (mask == MASK_ALL) return TRUE; *src = temp(fpc); @@ -349,12 +342,6 @@ src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc, if (mask) arith(fpc, 0, MOV, *src, mask, tgsi, none, none); - if (neg_mask) { - struct nv30_sreg one = temp(fpc); - arith(fpc, 0, STR, one, neg_mask, one, none, none); - arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none); - } - return FALSE; } diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 4a6b355936..99277506fc 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -321,11 +321,7 @@ src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc, { const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); struct nv40_sreg tgsi = tgsi_src(fpc, fsrc); - uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; - uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, - fsrc->SrcRegisterExtSwz.NegateY, - fsrc->SrcRegisterExtSwz.NegateZ, - fsrc->SrcRegisterExtSwz.NegateW }; + uint mask = 0; uint c; for (c = 0; c < 4; c++) { @@ -339,12 +335,9 @@ src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc, default: assert(0); } - - if (!tgsi.negate && neg[c]) - neg_mask |= (1 << c); } - if (mask == MASK_ALL && !neg_mask) + if (mask == MASK_ALL) return TRUE; *src = temp(fpc); @@ -352,18 +345,6 @@ src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc, if (mask) arith(fpc, 0, MOV, *src, mask, tgsi, none, none); - if (zero_mask) - arith(fpc, 0, SFL, *src, zero_mask, *src, none, none); - - if (one_mask) - arith(fpc, 0, STR, *src, one_mask, *src, none, none); - - if (neg_mask) { - struct nv40_sreg one = temp(fpc); - arith(fpc, 0, STR, one, neg_mask, one, none, none); - arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none); - } - return FALSE; } diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 4898aaa809..31dae2457f 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -362,11 +362,7 @@ src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc, { const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); struct nv40_sreg tgsi = tgsi_src(vpc, fsrc); - uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; - uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, - fsrc->SrcRegisterExtSwz.NegateY, - fsrc->SrcRegisterExtSwz.NegateZ, - fsrc->SrcRegisterExtSwz.NegateW }; + uint mask = 0; uint c; for (c = 0; c < 4; c++) { @@ -380,12 +376,9 @@ src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc, default: assert(0); } - - if (!tgsi.negate && neg[c]) - neg_mask |= tgsi_mask(1 << c); } - if (mask == MASK_ALL && !neg_mask) + if (mask == MASK_ALL) return TRUE; *src = temp(vpc); @@ -393,18 +386,6 @@ src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc, if (mask) arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none); - if (zero_mask) - arith(vpc, 0, OP_SFL, *src, zero_mask, *src, none, none); - - if (one_mask) - arith(vpc, 0, OP_STR, *src, one_mask, *src, none, none); - - if (neg_mask) { - struct nv40_sreg one = temp(vpc); - arith(vpc, 0, OP_STR, one, neg_mask, one, none, none); - arith(vpc, 0, OP_MUL, *src, neg_mask, *src, neg(one), none); - } - return FALSE; } diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index de599b068f..589f1984ee 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -209,12 +209,7 @@ static void transform_srcreg( dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 2) << 6; dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 3) << 9; dst->Abs = src->SrcRegisterExtMod.Absolute; - dst->Negate = - src->SrcRegisterExtSwz.NegateX | - (src->SrcRegisterExtSwz.NegateY << 1) | - (src->SrcRegisterExtSwz.NegateZ << 2) | - (src->SrcRegisterExtSwz.NegateW << 3); - dst->Negate ^= src->SrcRegister.Negate ? RC_MASK_XYZW : 0; + dst->Negate = src->SrcRegister.Negate ? RC_MASK_XYZW : 0; } static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_ext_texture src) diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index b01df41b0e..de338c4877 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -475,7 +475,6 @@ struct tgsi_src_register * Then, if tgsi_src_register::Dimension is TRUE, tgsi_dimension follows. */ -#define TGSI_SRC_REGISTER_EXT_TYPE_SWZ 0 #define TGSI_SRC_REGISTER_EXT_TYPE_MOD 1 struct tgsi_src_register_ext @@ -486,9 +485,6 @@ struct tgsi_src_register_ext }; /** - * If tgsi_src_register_ext::Type is TGSI_SRC_REGISTER_EXT_TYPE_SWZ, - * it should be cast to tgsi_src_register_ext_swz. - * * If tgsi_src_register_ext::Type is TGSI_SRC_REGISTER_EXT_TYPE_MOD, * it should be cast to tgsi_src_register_ext_mod. * @@ -496,26 +492,6 @@ struct tgsi_src_register_ext * follows. */ -/** - * NegateX, NegateY, NegateZ and NegateW negate individual components of the - * source register. - * - * NOTE: To simplify matter, if this token is present, the corresponding Swizzle - * and Negate fields in tgsi_src_register should be set to X,Y,Z,W - * and FALSE, respectively. - */ - -struct tgsi_src_register_ext_swz -{ - unsigned Type : 4; /* TGSI_SRC_REGISTER_EXT_TYPE_SWZ */ - unsigned Padding0 : 16; /* unused */ - unsigned NegateX : 1; /* BOOL */ - unsigned NegateY : 1; /* BOOL */ - unsigned NegateZ : 1; /* BOOL */ - unsigned NegateW : 1; /* BOOL */ - unsigned Padding : 7; - unsigned Extended : 1; /* BOOL */ -}; /** * Extra src register modifiers -- cgit v1.2.3 From c84a05676497ff7263f3ea8203b868071c4f678f Mon Sep 17 00:00:00 2001 From: Patrice Mandin Date: Fri, 23 Oct 2009 18:40:13 +0200 Subject: nouveau: nv30: use r5g6b5 as z16 format --- src/gallium/drivers/nv30/nv30_fragtex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index f5f17d4071..3dd636f4ee 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -29,7 +29,7 @@ nv30_texture_formats[] = { _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X), _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X), _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y), -// _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X), + _(Z16_UNORM , R5G6B5 , S1, S1, S1, ONE, X, X, X, X), _(Z24S8_UNORM , A8R8G8B8, S1, S1, S1, ONE, X, X, X, X), _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W), _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W), -- cgit v1.2.3 From d9014a13e72b6682a959217d38050f3252628edb Mon Sep 17 00:00:00 2001 From: Patrice Mandin Date: Fri, 23 Oct 2009 18:42:21 +0200 Subject: nouveau: nv30: Relax some limits. We can render to z24s8 buffer even if color buffer is 16 bits. --- src/gallium/drivers/nv30/nv30_screen.c | 10 ++++++++-- src/gallium/drivers/nv30/nv30_state_fb.c | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index bb40e1803d..221ae1b5f8 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -102,13 +102,19 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen, struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front; if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { - return (format == front->format); + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + return TRUE; + default: + break; + } } else if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { switch (format) { case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_Z24X8_UNORM: - return (front->format == PIPE_FORMAT_A8R8G8B8_UNORM); + return TRUE; case PIPE_FORMAT_Z16_UNORM: return (front->format == PIPE_FORMAT_R5G6B5_UNORM); default: diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c index f90681b0f9..4d6a67e56d 100644 --- a/src/gallium/drivers/nv30/nv30_state_fb.c +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -92,7 +92,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) assert(0); } - if (colour_bits != zeta_bits) { + if (colour_bits > zeta_bits) { return FALSE; } -- cgit v1.2.3 From 255a90a7bd829904554889dd19a16d86fc7f9274 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 23 Oct 2009 20:05:31 +0200 Subject: nv50: add depth texture formats, and a few others, too --- src/gallium/drivers/nv50/nv50_screen.c | 14 +++++++++ src/gallium/drivers/nv50/nv50_state_validate.c | 39 +++++++++++------------- src/gallium/drivers/nv50/nv50_tex.c | 42 +++++++++++++++++++------- src/gallium/drivers/nv50/nv50_texture.h | 13 ++++++++ 4 files changed, 76 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 63dce0f4c2..c672ea471a 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -38,6 +38,11 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, case PIPE_FORMAT_X8R8G8B8_UNORM: case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R16G16_UNORM: return TRUE; default: break; @@ -57,6 +62,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_A8R8G8B8_SRGB: + case PIPE_FORMAT_X8R8G8B8_SRGB: case PIPE_FORMAT_A1R5G5B5_UNORM: case PIPE_FORMAT_A4R4G4B4_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: @@ -68,6 +75,13 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, case PIPE_FORMAT_DXT1_RGBA: case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R16G16_UNORM: return TRUE; default: break; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 012911f41b..956a700615 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -23,6 +23,12 @@ #include "nv50_context.h" #include "nouveau/nouveau_stateobj.h" +#define NV50_CBUF_FORMAT_CASE(n) \ + case PIPE_FORMAT_##n: so_data(so, NV50TCL_RT_FORMAT_##n); break + +#define NV50_ZETA_FORMAT_CASE(n) \ + case PIPE_FORMAT_##n: so_data(so, NV50TCL_ZETA_FORMAT_##n); break + static void nv50_state_validate_fb(struct nv50_context *nv50) { @@ -54,15 +60,14 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_reloc (so, bo, fb->cbufs[i]->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0); switch (fb->cbufs[i]->format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - so_data(so, NV50TCL_RT_FORMAT_A8R8G8B8_UNORM); - break; - case PIPE_FORMAT_X8R8G8B8_UNORM: - so_data(so, NV50TCL_RT_FORMAT_X8R8G8B8_UNORM); - break; - case PIPE_FORMAT_R5G6B5_UNORM: - so_data(so, NV50TCL_RT_FORMAT_R5G6B5_UNORM); - break; + NV50_CBUF_FORMAT_CASE(A8R8G8B8_UNORM); + NV50_CBUF_FORMAT_CASE(X8R8G8B8_UNORM); + NV50_CBUF_FORMAT_CASE(R5G6B5_UNORM); + NV50_CBUF_FORMAT_CASE(R16G16B16A16_SNORM); + NV50_CBUF_FORMAT_CASE(R16G16B16A16_UNORM); + NV50_CBUF_FORMAT_CASE(R32G32B32A32_FLOAT); + NV50_CBUF_FORMAT_CASE(R16G16_SNORM); + NV50_CBUF_FORMAT_CASE(R16G16_UNORM); default: NOUVEAU_ERR("AIIII unknown format %s\n", pf_name(fb->cbufs[i]->format)); @@ -96,18 +101,10 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_reloc (so, bo, fb->zsbuf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0); switch (fb->zsbuf->format) { - case PIPE_FORMAT_Z32_FLOAT: - so_data(so, NV50TCL_ZETA_FORMAT_Z32_FLOAT); - break; - case PIPE_FORMAT_Z24S8_UNORM: - so_data(so, NV50TCL_ZETA_FORMAT_Z24S8_UNORM); - break; - case PIPE_FORMAT_X8Z24_UNORM: - so_data(so, NV50TCL_ZETA_FORMAT_X8Z24_UNORM); - break; - case PIPE_FORMAT_S8Z24_UNORM: - so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM); - break; + NV50_ZETA_FORMAT_CASE(S8Z24_UNORM); + NV50_ZETA_FORMAT_CASE(X8Z24_UNORM); + NV50_ZETA_FORMAT_CASE(Z24S8_UNORM); + NV50_ZETA_FORMAT_CASE(Z32_FLOAT); default: NOUVEAU_ERR("AIIII unknown format %s\n", pf_name(fb->zsbuf->format)); diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index e12a6ad648..52ccdaa407 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -25,16 +25,18 @@ #include "nouveau/nouveau_stateobj.h" -#define _(pf, tt, r, g, b, a, tf) \ +#define _MIXED(pf, t0, t1, t2, t3, cr, cg, cb, ca, f) \ { \ PIPE_FORMAT_##pf, \ - NV50TIC_0_0_MAPR_##r | NV50TIC_0_0_TYPER_##tt | \ - NV50TIC_0_0_MAPG_##g | NV50TIC_0_0_TYPEG_##tt | \ - NV50TIC_0_0_MAPB_##b | NV50TIC_0_0_TYPEB_##tt | \ - NV50TIC_0_0_MAPA_##a | NV50TIC_0_0_TYPEA_##tt | \ - NV50TIC_0_0_FMT_##tf \ + NV50TIC_0_0_MAPR_##cr | NV50TIC_0_0_TYPER_##t0 | \ + NV50TIC_0_0_MAPG_##cg | NV50TIC_0_0_TYPEG_##t1 | \ + NV50TIC_0_0_MAPB_##cb | NV50TIC_0_0_TYPEB_##t2 | \ + NV50TIC_0_0_MAPA_##ca | NV50TIC_0_0_TYPEA_##t3 | \ + NV50TIC_0_0_FMT_##f \ } +#define _(pf, t, cr, cg, cb, ca, f) _MIXED(pf, t, t, t, t, cr, cg, cb, ca, f) + struct nv50_texture_format { enum pipe_format pf; uint32_t hw; @@ -46,7 +48,9 @@ struct nv50_texture_format { static const struct nv50_texture_format nv50_tex_format_list[] = { _(A8R8G8B8_UNORM, UNORM, C2, C1, C0, C3, 8_8_8_8), + _(A8R8G8B8_SRGB, UNORM, C2, C1, C0, C3, 8_8_8_8), _(X8R8G8B8_UNORM, UNORM, C2, C1, C0, ONE, 8_8_8_8), + _(X8R8G8B8_SRGB, UNORM, C2, C1, C0, ONE, 8_8_8_8), _(A1R5G5B5_UNORM, UNORM, C2, C1, C0, C3, 1_5_5_5), _(A4R4G4B4_UNORM, UNORM, C2, C1, C0, C3, 4_4_4_4), @@ -61,16 +65,30 @@ static const struct nv50_texture_format nv50_tex_format_list[] = _(DXT1_RGB, UNORM, C0, C1, C2, ONE, DXT1), _(DXT1_RGBA, UNORM, C0, C1, C2, C3, DXT1), _(DXT3_RGBA, UNORM, C0, C1, C2, C3, DXT3), - _(DXT5_RGBA, UNORM, C0, C1, C2, C3, DXT5) + _(DXT5_RGBA, UNORM, C0, C1, C2, C3, DXT5), + + _MIXED(Z24S8_UNORM, UINT, UNORM, UINT, UINT, C1, C1, C1, ONE, 24_8), + + _(R16G16B16A16_SNORM, UNORM, C0, C1, C2, C3, 16_16_16_16), + _(R16G16B16A16_UNORM, SNORM, C0, C1, C2, C3, 16_16_16_16), + _(R32G32B32A32_FLOAT, FLOAT, C0, C1, C2, C3, 32_32_32_32), + + _(R16G16_SNORM, SNORM, C0, C1, ZERO, ONE, 16_16), + _(R16G16_UNORM, UNORM, C0, C1, ZERO, ONE, 16_16), + + _MIXED(Z32_FLOAT, FLOAT, UINT, UINT, UINT, C0, C0, C0, ONE, 32_DEPTH) + }; #undef _ +#undef _MIXED static int nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so, struct nv50_miptree *mt, int unit) { unsigned i; + uint32_t mode; for (i = 0; i < NV50_TEX_FORMAT_LIST_SIZE; i++) if (nv50_tex_format_list[i].pf == mt->base.base.format) @@ -78,13 +96,15 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so, if (i == NV50_TEX_FORMAT_LIST_SIZE) return 1; + mode = (nv50->sampler[unit]->normalized ? 0xd0005000 : 0x5001d000) | + (mt->base.bo->tile_mode << 22); + if (pf_type(mt->base.base.format) == PIPE_FORMAT_TYPE_SRGB) + mode |= 0x0400; + so_data (so, nv50_tex_format_list[i].hw); so_reloc(so, mt->base.bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0); - if (nv50->sampler[unit]->normalized) - so_data (so, 0xd0005000 | mt->base.bo->tile_mode << 22); - else - so_data (so, 0x5001d000 | mt->base.bo->tile_mode << 22); + so_data (so, mode); so_data (so, 0x00300000); so_data (so, mt->base.base.width[0]); so_data (so, (mt->base.base.last_level << 28) | diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h index 13f74c11c6..d531e61132 100644 --- a/src/gallium/drivers/nv50/nv50_texture.h +++ b/src/gallium/drivers/nv50/nv50_texture.h @@ -38,18 +38,26 @@ #define NV50TIC_0_0_TYPEA_MASK 0x00038000 #define NV50TIC_0_0_TYPEA_UNORM 0x00010000 #define NV50TIC_0_0_TYPEA_SNORM 0x00008000 +#define NV50TIC_0_0_TYPEA_SINT 0x00018000 +#define NV50TIC_0_0_TYPEA_UINT 0x00020000 #define NV50TIC_0_0_TYPEA_FLOAT 0x00038000 #define NV50TIC_0_0_TYPEB_MASK 0x00007000 #define NV50TIC_0_0_TYPEB_UNORM 0x00002000 #define NV50TIC_0_0_TYPEB_SNORM 0x00001000 +#define NV50TIC_0_0_TYPEB_SINT 0x00003000 +#define NV50TIC_0_0_TYPEB_UINT 0x00004000 #define NV50TIC_0_0_TYPEB_FLOAT 0x00007000 #define NV50TIC_0_0_TYPEG_MASK 0x00000e00 #define NV50TIC_0_0_TYPEG_UNORM 0x00000400 #define NV50TIC_0_0_TYPEG_SNORM 0x00000200 +#define NV50TIC_0_0_TYPEG_SINT 0x00000600 +#define NV50TIC_0_0_TYPEG_UINT 0x00000800 #define NV50TIC_0_0_TYPEG_FLOAT 0x00000e00 #define NV50TIC_0_0_TYPER_MASK 0x000001c0 #define NV50TIC_0_0_TYPER_UNORM 0x00000080 #define NV50TIC_0_0_TYPER_SNORM 0x00000040 +#define NV50TIC_0_0_TYPER_SINT 0x000000c0 +#define NV50TIC_0_0_TYPER_UINT 0x00000100 #define NV50TIC_0_0_TYPER_FLOAT 0x000001c0 #define NV50TIC_0_0_FMT_MASK 0x0000003f #define NV50TIC_0_0_FMT_32_32_32_32 0x00000001 @@ -57,6 +65,7 @@ #define NV50TIC_0_0_FMT_32_32 0x00000004 #define NV50TIC_0_0_FMT_8_8_8_8 0x00000008 #define NV50TIC_0_0_FMT_2_10_10_10 0x00000009 +#define NV50TIC_0_0_FMT_16_16 0x0000000c #define NV50TIC_0_0_FMT_32 0x0000000f #define NV50TIC_0_0_FMT_4_4_4_4 0x00000012 /* #define NV50TIC_0_0_FMT_1_5_5_5 0x00000013 */ @@ -65,12 +74,16 @@ #define NV50TIC_0_0_FMT_8_8 0x00000018 #define NV50TIC_0_0_FMT_16 0x0000001b #define NV50TIC_0_0_FMT_8 0x0000001d +#define NV50TIC_0_0_FMT_5_9_9_9 0x00000020 #define NV50TIC_0_0_FMT_10_11_11 0x00000021 #define NV50TIC_0_0_FMT_DXT1 0x00000024 #define NV50TIC_0_0_FMT_DXT3 0x00000025 #define NV50TIC_0_0_FMT_DXT5 0x00000026 #define NV50TIC_0_0_FMT_RGTC1 0x00000027 #define NV50TIC_0_0_FMT_RGTC2 0x00000028 +#define NV50TIC_0_0_FMT_24_8 0x00000029 +#define NV50TIC_0_0_FMT_32_DEPTH 0x0000002f +#define NV50TIC_0_0_FMT_32_8 0x00000030 #define NV50TIC_0_1_OFFSET_LOW_MASK 0xffffffff #define NV50TIC_0_1_OFFSET_LOW_SHIFT 0 -- cgit v1.2.3 From c738c9ab67859f3d4412417333d0f023dd18dc19 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 23 Oct 2009 22:17:44 +0200 Subject: nv50: fix address reg code Contained some rather obvious thinking errors before, and didn't consider offsets from TGSI ADDRESS regs. --- src/gallium/drivers/nv50/nv50_program.c | 67 ++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 23 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 5c691877e0..ff6ff578f4 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -32,9 +32,11 @@ #include "nv50_context.h" #define NV50_SU_MAX_TEMP 64 -#define NV50_SU_MAX_ADDR 7 +#define NV50_SU_MAX_ADDR 4 //#define NV50_PROGRAM_DUMP +/* $a5 and $a6 always seem to be 0, and using $a7 gives you noise */ + /* ARL - gallium craps itself on progs/vp/arl.txt * * MSB - Like MAD, but MUL+SUB @@ -470,16 +472,28 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) e->inst[1] |= (val >> 6) << 2; } +static INLINE void +set_addr(struct nv50_program_exec *e, struct nv50_reg *a) +{ + assert(!(e->inst[0] & 0x0c000000)); + assert(!(e->inst[1] & 0x00000004)); + + e->inst[0] |= (a->hw & 3) << 26; + e->inst[1] |= (a->hw >> 2) << 2; +} + static void -emit_set_addr(struct nv50_pc *pc, struct nv50_reg *dst, unsigned val) +emit_add_addr_imm(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, uint16_t src1_val) { struct nv50_program_exec *e = exec(pc); - assert(val <= 0xffff); - e->inst[0] = 0xd0000000 | ((val & 0xffff) << 9); + e->inst[0] = 0xd0000000 | (src1_val << 9); e->inst[1] = 0x20000000; - e->inst[0] |= dst->hw << 2; set_long(pc, e); + e->inst[0] |= dst->hw << 2; + if (src0) /* otherwise will add to $a0, which is always 0 */ + set_addr(e, src0); emit(pc, e); } @@ -488,9 +502,10 @@ static struct nv50_reg * alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref) { int i; - struct nv50_reg *a = NULL; + struct nv50_reg *a_tgsi = NULL, *a = NULL; if (!ref) { + /* allocate for TGSI address reg */ for (i = 0; i < NV50_SU_MAX_ADDR; ++i) { if (pc->r_addr[i].index >= 0) continue; @@ -506,6 +521,13 @@ alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref) return NULL; } + /* Allocate and set an address reg so we can access 'ref'. + * + * If and r_addr has index < 0, it is not reserved for TGSI, + * and index will be the negative of the TGSI addr index the + * value in rhw is relative to, or -256 if rhw is an offset + * from 0. If rhw < 0, the reg has not been initialized. + */ for (i = NV50_SU_MAX_ADDR - 1; i >= 0; --i) { if (pc->r_addr[i].index >= 0) /* occupied for TGSI */ continue; @@ -516,17 +538,25 @@ alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref) if (!a && pc->r_addr[i].acc != pc->insn_cur) a = &pc->r_addr[i]; - if (ref->hw - pc->r_addr[i].rhw < 128) { - /* alloc'd & suitable */ + if (ref->hw - pc->r_addr[i].rhw >= 128) + continue; + + if ((ref->acc >= 0 && pc->r_addr[i].index == -256) || + (ref->acc < 0 && -pc->r_addr[i].index == ref->index)) { pc->r_addr[i].acc = pc->insn_cur; return &pc->r_addr[i]; } } assert(a); - emit_set_addr(pc, a, ref->hw * 4); - a->rhw = ref->hw % 128; + if (ref->acc < 0) + a_tgsi = pc->addr[ref->index]; + + emit_add_addr_imm(pc, a, a_tgsi, (ref->hw & ~0x7f) * 4); + + a->rhw = ref->hw & ~0x7f; a->acc = pc->insn_cur; + a->index = a_tgsi ? -ref->index : -256; return a; } @@ -563,23 +593,13 @@ emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *iv, emit(pc, e); } -static INLINE void -set_addr(struct nv50_program_exec *e, struct nv50_reg *a) -{ - assert(!(e->inst[0] & 0x0c000000)); - assert(!(e->inst[1] & 0x00000004)); - - e->inst[0] |= (a->hw & 3) << 26; - e->inst[1] |= (a->hw >> 2) << 2; -} - static void set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, struct nv50_program_exec *e) { set_long(pc, e); - e->param.index = src->hw; + e->param.index = src->hw & 127; e->param.shift = s; e->param.mask = m << (s % 32); @@ -1569,7 +1589,8 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, swz = tgsi_util_get_src_register_swizzle( &src->SrcRegisterInd, 0); ctor_reg(r, P_CONST, - src->SrcRegisterInd.Index * 4 + swz, c); + src->SrcRegisterInd.Index * 4 + swz, + src->SrcRegister.Index * 4 + c); r->acc = -1; break; case TGSI_FILE_IMMEDIATE: @@ -2743,7 +2764,7 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) return FALSE; } for (i = 0; i < NV50_SU_MAX_ADDR; ++i) - ctor_reg(&pc->r_addr[i], P_ADDR, -1, i + 1); + ctor_reg(&pc->r_addr[i], P_ADDR, -256, i + 1); return TRUE; } -- cgit v1.2.3 From ad67326f12c0d6298cffc0fc4e421ddc02b3cb07 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 23 Oct 2009 21:38:37 +0200 Subject: nv50: allow all 127 TEMP regs We should really learn to not waste so many though. --- src/gallium/drivers/nv50/nv50_program.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index ff6ff578f4..dd7634c58a 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -31,7 +31,7 @@ #include "nv50_context.h" -#define NV50_SU_MAX_TEMP 64 +#define NV50_SU_MAX_TEMP 127 #define NV50_SU_MAX_ADDR 4 //#define NV50_PROGRAM_DUMP @@ -452,6 +452,8 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e) } alloc_reg(pc, dst); + if (dst->hw > 63) + set_long(pc, e); e->inst[0] |= (dst->hw << 2); } @@ -642,6 +644,8 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) } alloc_reg(pc, src); + if (src->hw > 63) + set_long(pc, e); e->inst[0] |= (src->hw << 9); } @@ -701,6 +705,8 @@ set_src_0_restricted(struct nv50_pc *pc, struct nv50_reg *src, } alloc_reg(pc, src); + if (src->hw > 63) + set_long(pc, e); e->inst[0] |= (src->hw << 9); } @@ -719,6 +725,8 @@ set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) } alloc_reg(pc, src); + if (src->hw > 63) + set_long(pc, e); e->inst[0] |= (src->hw << 9); } @@ -745,6 +753,8 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) } alloc_reg(pc, src); + if (src->hw > 63) + set_long(pc, e); e->inst[0] |= ((src->hw & 127) << 16); } @@ -813,11 +823,12 @@ emit_add(struct nv50_pc *pc, struct nv50_reg *dst, { struct nv50_program_exec *e = exec(pc); - e->inst[0] |= 0xb0000000; + e->inst[0] = 0xb0000000; + alloc_reg(pc, src1); check_swap_src_0_1(pc, &src0, &src1); - if (!pc->allow32 || src0->neg || src1->neg) { + if (!pc->allow32 || (src0->neg | src1->neg) || src1->hw > 63) { set_long(pc, e); e->inst[1] |= (src0->neg << 26) | (src1->neg << 27); } @@ -873,6 +884,7 @@ static INLINE void emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) { + assert(src0 != src1); src1->neg ^= 1; emit_add(pc, dst, src0, src1); src1->neg ^= 1; @@ -904,6 +916,7 @@ static INLINE void emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) { + assert(src2 != src0 && src2 != src1); src2->neg ^= 1; emit_mad(pc, dst, src0, src1, src2); src2->neg ^= 1; -- cgit v1.2.3 From 99e728a13ea8518efc7e27242093b43470f102d6 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 23 Oct 2009 21:57:42 +0200 Subject: nv50: fix saturation outside of tx_insn case --- src/gallium/drivers/nv50/nv50_program.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index dd7634c58a..3f834b5736 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1821,7 +1821,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!(mask & (1 << c)) || dst[c]->type == P_TEMP) continue; - rdst[c] = dst[c]; + /* rdst[c] = dst[c]; */ /* done above */ dst[c] = temp_temp(pc); } } @@ -2150,8 +2150,10 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - /* in this case we saturate later */ - if (dst[c]->type == P_TEMP && dst[c]->index < 0) + /* In this case we saturate later, and dst[c] won't + * be another temp_temp (and thus lost), since rdst + * already is TEMP (see above). */ + if (rdst[c]->type == P_TEMP && rdst[c]->index < 0) continue; emit_sat(pc, rdst[c], dst[c]); } -- cgit v1.2.3 From 683722740c85fb6b8c0a930e8a4dce51e1709464 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 23 Oct 2009 22:00:06 +0200 Subject: nv50: do SIGN_SET as one instruction --- src/gallium/drivers/nv50/nv50_program.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 3f834b5736..9ccc4f5a16 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -986,7 +986,6 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) /* 0x80 == src is float */ #define CVT_F32_F32 0xc4 #define CVT_F32_S32 0x44 -#define CVT_F32_U32 0x64 #define CVT_S32_F32 0x8c #define CVT_S32_S32 0x0c #define CVT_NEG 0x20 @@ -1644,11 +1643,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, break; case TGSI_UTIL_SIGN_SET: temp = temp_temp(pc); - emit_abs(pc, temp, r); - if (neg) - temp->neg = 1; - else - emit_neg(pc, temp, temp); + emit_cvt(pc, temp, r, -1, CVTOP_ABS, CVT_F32_F32 | CVT_NEG); r = temp; break; default: -- cgit v1.2.3 From 94a63dccdd79268cf37587c93e3dec0d02dad457 Mon Sep 17 00:00:00 2001 From: Joakim Sindholt Date: Sat, 24 Oct 2009 02:38:28 +0200 Subject: r300g: fix scons build yet again --- src/gallium/drivers/r300/SConscript | 3 --- src/gallium/drivers/r300/r300_render.c | 3 ++- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index b4c8ba2015..97989040d2 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -9,8 +9,6 @@ env.Append(CPPPATH = ['#/src/mesa/drivers/dri/r300/compiler', '#/include', '#/sr r300 = env.ConvenienceLibrary( target = 'r300', source = [ - 'r3xx_fs.c', - 'r5xx_fs.c', 'r300_chipset.c', 'r300_clear.c', 'r300_context.c', @@ -25,7 +23,6 @@ r300 = env.ConvenienceLibrary( 'r300_state_derived.c', 'r300_state_invariant.c', 'r300_vs.c', - 'r300_surface.c', 'r300_texture.c', 'r300_tgsi_to_rc.c', ] + r300compiler) + r300compiler diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 6e2bcc62da..6f392402bd 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -76,7 +76,6 @@ boolean r300_draw_range_elements(struct pipe_context* pipe, unsigned count) { struct r300_context* r300 = r300_context(pipe); - CS_LOCALS(r300); uint32_t prim = r300_translate_primitive(mode); struct pipe_vertex_buffer* aos = r300->vertex_buffers; unsigned aos_count = r300->vertex_buffer_count; @@ -84,6 +83,8 @@ boolean r300_draw_range_elements(struct pipe_context* pipe, unsigned packet_size; unsigned i; bool invalid = FALSE; + + CS_LOCALS(r300); validate: for (i = 0; i < aos_count; i++) { -- cgit v1.2.3 From 118dfe16887d1ec4d3b96d49b76fffa0d2924132 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 25 Oct 2009 05:05:27 +0100 Subject: r300g: added support for 3D textures Mipmaps not tested. Also, I am not sure why piglit/texturing/tex3d needs to have color tolerance +-1 to pass. The classic Mesa driver doesn't need that. --- src/gallium/drivers/r300/r300_context.h | 3 ++ src/gallium/drivers/r300/r300_screen.c | 59 ++++++++++----------------------- src/gallium/drivers/r300/r300_texture.c | 39 ++++++++++++++++------ src/gallium/drivers/r300/r300_texture.h | 3 ++ 4 files changed, 51 insertions(+), 53 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 30b80fa9db..4d73567bbe 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -181,6 +181,9 @@ struct r300_texture { /* Offsets into the buffer. */ unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; + /* Size of one zslice or face based on the texture target */ + unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS]; + /** * If non-zero, override the natural texture layout with * a custom stride (in bytes). diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 1d9f91d0f7..f581f0ca09 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -119,32 +119,13 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - if (r300screen->caps->is_r500) { - /* 13 == 4096x4096 */ - return 13; - } else { - /* 12 == 2048x2048 */ - return 12; - } case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - /* So, technically, the limit is the same as above, but some math - * shows why this is silly. Assuming RGBA, 4cpp, we can see that - * 4096*4096*4096 = 64.0 GiB exactly, so it's not exactly - * practical. However, if at some point a game really wants this, - * then we can remove or raise this limit. */ - if (r300screen->caps->is_r500) { - /* 9 == 256x256x256 */ - return 9; - } else { - /* 8 == 128*128*128 */ - return 8; - } case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: if (r300screen->caps->is_r500) { - /* 13 == 4096x4096 */ + /* 13 == 4096 */ return 13; } else { - /* 12 == 2048x2048 */ + /* 12 == 2048 */ return 12; } case PIPE_CAP_TEXTURE_MIRROR_CLAMP: @@ -191,8 +172,8 @@ static float r300_get_paramf(struct pipe_screen* pscreen, int param) } } -static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage, - boolean is_r500) +static boolean check_tex_format(enum pipe_format format, uint32_t usage, + boolean is_r500) { uint32_t retval = 0; @@ -286,7 +267,6 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage, return (retval >= usage); } -/* XXX moar targets */ static boolean r300_is_format_supported(struct pipe_screen* pscreen, enum pipe_format format, enum pipe_texture_target target, @@ -294,15 +274,17 @@ static boolean r300_is_format_supported(struct pipe_screen* pscreen, unsigned geom_flags) { switch (target) { + case PIPE_TEXTURE_1D: /* handle 1D textures as 2D ones */ case PIPE_TEXTURE_2D: - return check_tex_2d_format(format, tex_usage, - r300_screen(pscreen)->caps->is_r500); - case PIPE_TEXTURE_1D: case PIPE_TEXTURE_3D: + return check_tex_format(format, tex_usage, + r300_screen(pscreen)->caps->is_r500); + case PIPE_TEXTURE_CUBE: debug_printf("r300: Implementation error: Unsupported format " "target: %d\n", target); break; + default: debug_printf("r300: Fatal: This is not a format target: %d\n", target); @@ -322,22 +304,9 @@ r300_get_tex_transfer(struct pipe_screen *screen, { struct r300_texture *tex = (struct r300_texture *)texture; struct r300_transfer *trans; - unsigned offset = 0; /* in bytes */ + unsigned offset; - /* XXX Add support for these things */ - if (texture->target == PIPE_TEXTURE_CUBE) { - debug_printf("PIPE_TEXTURE_CUBE is not yet supported.\n"); - /* offset = tex->image_offset[level][face]; */ - } - else if (texture->target == PIPE_TEXTURE_3D) { - debug_printf("PIPE_TEXTURE_3D is not yet supported.\n"); - /* offset = tex->image_offset[level][zslice]; */ - } - else { - offset = tex->offset[level]; - assert(face == 0); - assert(zslice == 0); - } + offset = r300_texture_get_offset(tex, level, zslice, face); /* in bytes */ trans = CALLOC_STRUCT(r300_transfer); if (trans) { @@ -352,6 +321,12 @@ r300_get_tex_transfer(struct pipe_screen *screen, trans->transfer.nblocksy = texture->nblocksy[level]; trans->transfer.stride = r300_texture_get_stride(tex, level); trans->transfer.usage = usage; + + /* XXX not sure whether it's required to set these two, + the driver doesn't use them */ + trans->transfer.zslice = zslice; + trans->transfer.face = face; + trans->offset = offset; } return &trans->transfer; diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 3c8ff24e17..37c7910d80 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -36,7 +36,7 @@ static void r300_setup_texture_state(struct r300_texture* tex) state->format0 = R300_TX_WIDTH((pt->width[0] - 1) & 0x7ff) | R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff) | R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) | - R300_TX_NUM_LEVELS(pt->last_level) | + R300_TX_NUM_LEVELS(pt->last_level & 0xf) | R300_TX_PITCH_EN; /* XXX */ @@ -48,7 +48,8 @@ static void r300_setup_texture_state(struct r300_texture* tex) state->format1 |= R300_TX_FORMAT_3D; } - state->format2 = (r300_texture_get_stride(tex, 0) / pt->block.size) - 1; + state->format2 = ((r300_texture_get_stride(tex, 0) / pt->block.size) - 1) + & 0x1fff; /* Don't worry about accidentally setting this bit on non-r500; * the kernel should catch it. */ @@ -63,6 +64,26 @@ static void r300_setup_texture_state(struct r300_texture* tex) pt->width[0], pt->height[0], pt->last_level); } +unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, + unsigned zslice, unsigned face) +{ + unsigned offset = tex->offset[level]; + + switch (tex->tex.target) { + case PIPE_TEXTURE_3D: + assert(face == 0); + return offset + zslice * tex->layer_size[level]; + + case PIPE_TEXTURE_CUBE: + assert(zslice == 0); + return offset + face * tex->layer_size[level]; + + default: + assert(zslice == 0 && face == 0); + return offset; + } +} + /** * Return the stride, in bytes, of the texture images of the given texture * at the given level. @@ -84,7 +105,7 @@ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) static void r300_setup_miptree(struct r300_texture* tex) { struct pipe_texture* base = &tex->tex; - int stride, size; + int stride, size, layer_size; int i; for (i = 0; i <= base->last_level; i++) { @@ -98,10 +119,12 @@ static void r300_setup_miptree(struct r300_texture* tex) base->nblocksy[i] = pf_get_nblocksy(&base->block, base->height[i]); stride = r300_texture_get_stride(tex, i); - size = stride * base->nblocksy[i] * base->depth[i]; + layer_size = stride * base->nblocksy[i]; + size = layer_size * base->depth[i]; tex->offset[i] = align(tex->size, 32); tex->size = tex->offset[i] + size; + tex->layer_size[i] = layer_size; debug_printf("r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes)\n", @@ -161,8 +184,7 @@ static struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, struct pipe_surface* surface = CALLOC_STRUCT(pipe_surface); unsigned offset; - /* XXX this is certainly dependent on tex target */ - offset = tex->offset[level]; + offset = r300_texture_get_offset(tex, level, zslice, face); if (surface) { pipe_reference_init(&surface->reference, 1); @@ -191,11 +213,6 @@ static struct pipe_texture* { struct r300_texture* tex; - if (base->target != PIPE_TEXTURE_2D || - base->depth[0] != 1) { - return NULL; - } - tex = CALLOC_STRUCT(r300_texture); if (!tex) { return NULL; diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 55d1a0ac5c..35e06a9acb 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -33,6 +33,9 @@ void r300_init_screen_texture_functions(struct pipe_screen* screen); unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level); +unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, + unsigned zslice, unsigned face); + /* Note the signature of R300_EASY_TX_FORMAT(A, R, G, B, FORMAT)... */ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) { -- cgit v1.2.3 From c2df759cd73e281c4698c717e0ab89757a7affd5 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 25 Oct 2009 09:57:53 +0100 Subject: r300g: fix redefining mipmaps and fetching from them --- src/gallium/drivers/r300/r300_texture.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 37c7910d80..762806822c 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -36,8 +36,9 @@ static void r300_setup_texture_state(struct r300_texture* tex) state->format0 = R300_TX_WIDTH((pt->width[0] - 1) & 0x7ff) | R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff) | R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) | - R300_TX_NUM_LEVELS(pt->last_level & 0xf) | - R300_TX_PITCH_EN; + R300_TX_NUM_LEVELS(pt->last_level & 0xf);/* | + R300_TX_PITCH_EN;*/ + /* XXX TX_PITCH_EN breaks rendering mipmap levels > 0, weard */ /* XXX */ state->format1 = r300_translate_texformat(pt->format); @@ -194,6 +195,10 @@ static struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, surface->height = texture->height[level]; surface->offset = offset; surface->usage = flags; + surface->zslice = zslice; + surface->texture = texture; + surface->face = face; + surface->level = level; } return surface; -- cgit v1.2.3 From b4f6907b8d8a966df56c06155049c52dadea105f Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 22 Oct 2009 20:42:16 +0100 Subject: llvmpipe: Move a few format/sampling functions into better space. --- src/gallium/drivers/llvmpipe/Makefile | 1 + src/gallium/drivers/llvmpipe/SConscript | 1 + src/gallium/drivers/llvmpipe/lp_bld_format.h | 16 --- src/gallium/drivers/llvmpipe/lp_bld_format_soa.c | 71 ------------- src/gallium/drivers/llvmpipe/lp_bld_sample.c | 124 +++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_bld_sample.h | 9 ++ src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 59 +++++------ 7 files changed, 158 insertions(+), 123 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_sample.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index b96ee23a99..ea771392b1 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -22,6 +22,7 @@ C_SOURCES = \ lp_bld_intr.c \ lp_bld_logic.c \ lp_bld_pack.c \ + lp_bld_sample.c \ lp_bld_sample_soa.c \ lp_bld_swizzle.c \ lp_bld_struct.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 403e4daa43..72e445b881 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -34,6 +34,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_interp.c', 'lp_bld_intr.c', 'lp_bld_pack.c', + 'lp_bld_sample.c', 'lp_bld_sample_soa.c', 'lp_bld_struct.c', 'lp_bld_logic.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h index c087fc986e..1ea694509d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h @@ -91,14 +91,6 @@ lp_build_store_rgba_aos(LLVMBuilderRef builder, LLVMValueRef ptr, LLVMValueRef rgba); -LLVMValueRef -lp_build_gather(LLVMBuilderRef builder, - unsigned length, - unsigned src_width, - unsigned dst_width, - LLVMValueRef base_ptr, - LLVMValueRef offsets); - void lp_build_unpack_rgba_soa(LLVMBuilderRef builder, @@ -108,12 +100,4 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, LLVMValueRef *rgba); -void -lp_build_load_rgba_soa(LLVMBuilderRef builder, - const struct util_format_description *format_desc, - struct lp_type type, - LLVMValueRef base_ptr, - LLVMValueRef offsets, - LLVMValueRef *rgba); - #endif /* !LP_BLD_FORMAT_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c index 66bebdcdec..60ad4c0ee6 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c @@ -34,55 +34,6 @@ #include "lp_bld_format.h" -/** - * Gather elements from scatter positions in memory into a single vector. - * - * @param src_width src element width - * @param dst_width result element width (source will be expanded to fit) - * @param length length of the offsets, - * @param base_ptr base pointer, should be a i8 pointer type. - * @param offsets vector with offsets - */ -LLVMValueRef -lp_build_gather(LLVMBuilderRef builder, - unsigned length, - unsigned src_width, - unsigned dst_width, - LLVMValueRef base_ptr, - LLVMValueRef offsets) -{ - LLVMTypeRef src_type = LLVMIntType(src_width); - LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); - LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); - LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); - LLVMValueRef res; - unsigned i; - - res = LLVMGetUndef(dst_vec_type); - for(i = 0; i < length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef elem_offset; - LLVMValueRef elem_ptr; - LLVMValueRef elem; - - elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); - elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); - elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); - elem = LLVMBuildLoad(builder, elem_ptr, ""); - - assert(src_width <= dst_width); - if(src_width > dst_width) - elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); - if(src_width < dst_width) - elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); - - res = LLVMBuildInsertElement(builder, res, elem, index, ""); - } - - return res; -} - - static LLVMValueRef lp_build_format_swizzle(struct lp_type type, const LLVMValueRef *inputs, @@ -185,25 +136,3 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, } } } - - -void -lp_build_load_rgba_soa(LLVMBuilderRef builder, - const struct util_format_description *format_desc, - struct lp_type type, - LLVMValueRef base_ptr, - LLVMValueRef offsets, - LLVMValueRef *rgba) -{ - LLVMValueRef packed; - - assert(format_desc->block.width == 1); - assert(format_desc->block.height == 1); - assert(format_desc->block.bits <= type.width); - - packed = lp_build_gather(builder, - type.length, format_desc->block.bits, type.width, - base_ptr, offsets); - - lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba); -} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c new file mode 100644 index 0000000000..b0543f22c9 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c @@ -0,0 +1,124 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling -- common code. + * + * @author Jose Fonseca + */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_format.h" +#include "util/u_math.h" +#include "lp_bld_debug.h" +#include "lp_bld_format.h" +#include "lp_bld_sample.h" + + +void +lp_sampler_static_state(struct lp_sampler_static_state *state, + const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler) +{ + memset(state, 0, sizeof *state); + + if(!texture) + return; + + if(!sampler) + return; + + state->format = texture->format; + state->target = texture->target; + state->pot_width = util_is_pot(texture->width[0]); + state->pot_height = util_is_pot(texture->height[0]); + state->pot_depth = util_is_pot(texture->depth[0]); + + state->wrap_s = sampler->wrap_s; + state->wrap_t = sampler->wrap_t; + state->wrap_r = sampler->wrap_r; + state->min_img_filter = sampler->min_img_filter; + state->min_mip_filter = sampler->min_mip_filter; + state->mag_img_filter = sampler->mag_img_filter; + if(sampler->compare_mode) { + state->compare_mode = sampler->compare_mode; + state->compare_func = sampler->compare_func; + } + state->normalized_coords = sampler->normalized_coords; + state->prefilter = sampler->prefilter; +} + + +/** + * Gather elements from scatter positions in memory into a single vector. + * + * @param src_width src element width + * @param dst_width result element width (source will be expanded to fit) + * @param length length of the offsets, + * @param base_ptr base pointer, should be a i8 pointer type. + * @param offsets vector with offsets + */ +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets) +{ + LLVMTypeRef src_type = LLVMIntType(src_width); + LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); + LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); + LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); + LLVMValueRef res; + unsigned i; + + res = LLVMGetUndef(dst_vec_type); + for(i = 0; i < length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef elem_offset; + LLVMValueRef elem_ptr; + LLVMValueRef elem; + + elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); + elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); + elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); + elem = LLVMBuildLoad(builder, elem_ptr, ""); + + assert(src_width <= dst_width); + if(src_width > dst_width) + elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); + if(src_width < dst_width) + elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); + + res = LLVMBuildInsertElement(builder, res, elem, index, ""); + } + + return res; +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h index 403d0e4836..2b56179eb8 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h @@ -119,6 +119,15 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, const struct pipe_sampler_state *sampler); +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets); + + void lp_build_sample_soa(LLVMBuilderRef builder, const struct lp_sampler_static_state *static_state, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 1a47ca32d2..4af0454935 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -27,7 +27,7 @@ /** * @file - * Texture sampling. + * Texture sampling -- SoA. * * @author Jose Fonseca */ @@ -48,41 +48,6 @@ #include "lp_bld_sample.h" -void -lp_sampler_static_state(struct lp_sampler_static_state *state, - const struct pipe_texture *texture, - const struct pipe_sampler_state *sampler) -{ - memset(state, 0, sizeof *state); - - if(!texture) - return; - - if(!sampler) - return; - - state->format = texture->format; - state->target = texture->target; - state->pot_width = util_is_pot(texture->width[0]); - state->pot_height = util_is_pot(texture->height[0]); - state->pot_depth = util_is_pot(texture->depth[0]); - - state->wrap_s = sampler->wrap_s; - state->wrap_t = sampler->wrap_t; - state->wrap_r = sampler->wrap_r; - state->min_img_filter = sampler->min_img_filter; - state->min_mip_filter = sampler->min_mip_filter; - state->mag_img_filter = sampler->mag_img_filter; - if(sampler->compare_mode) { - state->compare_mode = sampler->compare_mode; - state->compare_func = sampler->compare_func; - } - state->normalized_coords = sampler->normalized_coords; - state->prefilter = sampler->prefilter; -} - - - /** * Keep all information for sampling code generation in a single place. */ @@ -110,6 +75,28 @@ struct lp_build_sample_context }; +static void +lp_build_load_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + struct lp_type type, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + LLVMValueRef *rgba) +{ + LLVMValueRef packed; + + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + assert(format_desc->block.bits <= type.width); + + packed = lp_build_gather(builder, + type.length, format_desc->block.bits, type.width, + base_ptr, offsets); + + lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba); +} + + static void lp_build_sample_texel(struct lp_build_sample_context *bld, LLVMValueRef x, -- cgit v1.2.3 From 232b5864647d4c8d6cebb0845c046f1612e6054d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 22 Oct 2009 20:58:35 +0100 Subject: llvmpipe: Eliminate lp_build_load_rgba_aos. --- src/gallium/drivers/llvmpipe/lp_bld_format.h | 16 +----------- src/gallium/drivers/llvmpipe/lp_bld_format_aos.c | 32 +----------------------- src/gallium/drivers/llvmpipe/lp_test_format.c | 19 +++++++++----- 3 files changed, 15 insertions(+), 52 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h index 1ea694509d..b30537b2e9 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h @@ -51,7 +51,7 @@ struct lp_type; */ LLVMValueRef lp_build_unpack_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, + const struct util_format_description *desc, LLVMValueRef packed); @@ -66,20 +66,6 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, LLVMValueRef rgba); -/** - * Load a pixel into its RGBA components. - * - * @param ptr value with the pointer to the packed pixel. Pointer type is - * irrelevant. - * - * @return RGBA in a 4 floats vector. - */ -LLVMValueRef -lp_build_load_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr); - - /** * Store a pixel. * diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c index b9b5d84bed..840e54e558 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c @@ -33,10 +33,9 @@ LLVMValueRef lp_build_unpack_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, + const struct util_format_description *desc, LLVMValueRef packed) { - const struct util_format_description *desc; LLVMTypeRef type; LLVMValueRef shifted, casted, scaled, masked; LLVMValueRef shifts[4]; @@ -49,8 +48,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, unsigned shift; unsigned i; - desc = util_format_description(format); - /* FIXME: Support more formats */ assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); assert(desc->block.width == 1); @@ -249,33 +246,6 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, } -LLVMValueRef -lp_build_load_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr) -{ - const struct util_format_description *desc; - LLVMTypeRef type; - LLVMValueRef packed; - - desc = util_format_description(format); - - /* FIXME: Support more formats */ - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); - assert(desc->block.width == 1); - assert(desc->block.height == 1); - assert(desc->block.bits <= 32); - - type = LLVMIntType(desc->block.bits); - - ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), ""); - - packed = LLVMBuildLoad(builder, ptr, ""); - - return lp_build_unpack_rgba_aos(builder, format, packed); -} - - void lp_build_store_rgba_aos(LLVMBuilderRef builder, enum pipe_format format, diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 5dc8297fe9..6e501195f8 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -89,34 +89,41 @@ struct pixel_test_case test_cases[] = }; -typedef void (*load_ptr_t)(const void *, float *); +typedef void (*load_ptr_t)(const uint32_t packed, float *); static LLVMValueRef add_load_rgba_test(LLVMModuleRef module, enum pipe_format format) { + const struct util_format_description *desc; LLVMTypeRef args[2]; LLVMValueRef func; - LLVMValueRef ptr; + LLVMValueRef packed; LLVMValueRef rgba_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef rgba; - args[0] = LLVMPointerType(LLVMInt8Type(), 0); + desc = util_format_description(format); + + args[0] = LLVMInt32Type(); args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); func = LLVMAddFunction(module, "load", LLVMFunctionType(LLVMVoidType(), args, 2, 0)); LLVMSetFunctionCallConv(func, LLVMCCallConv); - ptr = LLVMGetParam(func, 0); + packed = LLVMGetParam(func, 0); rgba_ptr = LLVMGetParam(func, 1); block = LLVMAppendBasicBlock(func, "entry"); builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - rgba = lp_build_load_rgba_aos(builder, format, ptr); + if(desc->block.bits < 32) + packed = LLVMBuildTrunc(builder, packed, LLVMIntType(desc->block.bits), ""); + + rgba = lp_build_unpack_rgba_aos(builder, desc, packed); + LLVMBuildStore(builder, rgba, rgba_ptr); LLVMBuildRetVoid(builder); @@ -224,7 +231,7 @@ test_format(const struct pixel_test_case *test) memset(unpacked, 0, sizeof unpacked); packed = 0; - load_ptr(&test->packed, unpacked); + load_ptr(test->packed, unpacked); store_ptr(&packed, unpacked); success = TRUE; -- cgit v1.2.3 From 17afb6dd6959a3df692a6a49e6370e81ebe00038 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 22 Oct 2009 21:06:03 +0100 Subject: llvmpipe: Eliminate lp_build_store_rgba_aos. --- src/gallium/drivers/llvmpipe/lp_bld_format.h | 14 +---------- src/gallium/drivers/llvmpipe/lp_bld_format_aos.c | 32 +----------------------- src/gallium/drivers/llvmpipe/lp_test_format.c | 19 ++++++++++---- 3 files changed, 16 insertions(+), 49 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h index b30537b2e9..42ee3c7d90 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h @@ -62,22 +62,10 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, */ LLVMValueRef lp_build_pack_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, + const struct util_format_description *desc, LLVMValueRef rgba); -/** - * Store a pixel. - * - * @param rgba 4 float vector with the unpacked components. - */ -void -lp_build_store_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba); - - void lp_build_unpack_rgba_soa(LLVMBuilderRef builder, const struct util_format_description *format_desc, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c index 840e54e558..0591d77860 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c @@ -150,10 +150,9 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, LLVMValueRef lp_build_pack_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, + const struct util_format_description *desc, LLVMValueRef rgba) { - const struct util_format_description *desc; LLVMTypeRef type; LLVMValueRef packed = NULL; LLVMValueRef swizzles[4]; @@ -164,8 +163,6 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, unsigned shift; unsigned i, j; - desc = util_format_description(format); - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); assert(desc->block.width == 1); assert(desc->block.height == 1); @@ -244,30 +241,3 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, return packed; } - - -void -lp_build_store_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba) -{ - const struct util_format_description *desc; - LLVMTypeRef type; - LLVMValueRef packed; - - desc = util_format_description(format); - - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); - assert(desc->block.width == 1); - assert(desc->block.height == 1); - - type = LLVMIntType(desc->block.bits); - - packed = lp_build_pack_rgba_aos(builder, format, rgba); - - ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), ""); - - LLVMBuildStore(builder, packed, ptr); -} - diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 6e501195f8..0fe47426f6 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -133,27 +133,31 @@ add_load_rgba_test(LLVMModuleRef module, } -typedef void (*store_ptr_t)(void *, const float *); +typedef void (*store_ptr_t)(uint32_t *, const float *); static LLVMValueRef add_store_rgba_test(LLVMModuleRef module, enum pipe_format format) { + const struct util_format_description *desc; LLVMTypeRef args[2]; LLVMValueRef func; - LLVMValueRef ptr; + LLVMValueRef packed_ptr; LLVMValueRef rgba_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef rgba; + LLVMValueRef packed; + + desc = util_format_description(format); - args[0] = LLVMPointerType(LLVMInt8Type(), 0); + args[0] = LLVMPointerType(LLVMInt32Type(), 0); args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); func = LLVMAddFunction(module, "store", LLVMFunctionType(LLVMVoidType(), args, 2, 0)); LLVMSetFunctionCallConv(func, LLVMCCallConv); - ptr = LLVMGetParam(func, 0); + packed_ptr = LLVMGetParam(func, 0); rgba_ptr = LLVMGetParam(func, 1); block = LLVMAppendBasicBlock(func, "entry"); @@ -162,7 +166,12 @@ add_store_rgba_test(LLVMModuleRef module, rgba = LLVMBuildLoad(builder, rgba_ptr, ""); - lp_build_store_rgba_aos(builder, format, ptr, rgba); + packed = lp_build_pack_rgba_aos(builder, desc, rgba); + + if(desc->block.bits < 32) + packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), ""); + + LLVMBuildStore(builder, packed, packed_ptr); LLVMBuildRetVoid(builder); -- cgit v1.2.3 From fedd054d534206a5ebd6fed204aa97cbb5053b3a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 22 Oct 2009 21:16:26 +0100 Subject: llvmpipe: Share testing infrastructure with lp_test_format. --- src/gallium/drivers/llvmpipe/SConscript | 2 +- src/gallium/drivers/llvmpipe/lp_test_format.c | 67 ++++++++++++++++++--------- 2 files changed, 46 insertions(+), 23 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 72e445b881..169e0abc2b 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -77,7 +77,7 @@ env.Prepend(LIBS = [llvmpipe] + auxiliaries) env.Program( target = 'lp_test_format', - source = ['lp_test_format.c'], + source = ['lp_test_format.c', 'lp_test_main.c'], ) env.Program( diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 0fe47426f6..b2403ad521 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -39,6 +39,7 @@ #include "util/u_format.h" #include "lp_bld_format.h" +#include "lp_test.h" struct pixel_test_case @@ -89,14 +90,37 @@ struct pixel_test_case test_cases[] = }; +void +write_tsv_header(FILE *fp) +{ + fprintf(fp, + "result\t" + "format\n"); + + fflush(fp); +} + + +static void +write_tsv_row(FILE *fp, + const struct util_format_description *desc, + boolean success) +{ + fprintf(fp, "%s\t", success ? "pass" : "fail"); + + fprintf(fp, "%s\n", desc->name); + + fflush(fp); +} + + typedef void (*load_ptr_t)(const uint32_t packed, float *); static LLVMValueRef add_load_rgba_test(LLVMModuleRef module, - enum pipe_format format) + const struct util_format_description *desc) { - const struct util_format_description *desc; LLVMTypeRef args[2]; LLVMValueRef func; LLVMValueRef packed; @@ -105,8 +129,6 @@ add_load_rgba_test(LLVMModuleRef module, LLVMBuilderRef builder; LLVMValueRef rgba; - desc = util_format_description(format); - args[0] = LLVMInt32Type(); args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); @@ -138,9 +160,8 @@ typedef void (*store_ptr_t)(uint32_t *, const float *); static LLVMValueRef add_store_rgba_test(LLVMModuleRef module, - enum pipe_format format) + const struct util_format_description *desc) { - const struct util_format_description *desc; LLVMTypeRef args[2]; LLVMValueRef func; LLVMValueRef packed_ptr; @@ -150,8 +171,6 @@ add_store_rgba_test(LLVMModuleRef module, LLVMValueRef rgba; LLVMValueRef packed; - desc = util_format_description(format); - args[0] = LLVMPointerType(LLVMInt32Type(), 0); args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); @@ -181,7 +200,7 @@ add_store_rgba_test(LLVMModuleRef module, static boolean -test_format(const struct pixel_test_case *test) +test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test) { LLVMModuleRef module = NULL; LLVMValueRef load = NULL; @@ -203,8 +222,8 @@ test_format(const struct pixel_test_case *test) module = LLVMModuleCreateWithName("test"); - load = add_load_rgba_test(module, test->format); - store = add_store_rgba_test(module, test->format); + load = add_load_rgba_test(module, desc); + store = add_store_rgba_test(module, desc); if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { LLVMDumpModule(module); @@ -266,25 +285,29 @@ test_format(const struct pixel_test_case *test) if(pass) LLVMDisposePassManager(pass); + if(fp) + write_tsv_row(fp, desc, success); + return success; } -int main(int argc, char **argv) +boolean +test_all(unsigned verbose, FILE *fp) { unsigned i; - int ret; + bool success = TRUE; -#ifdef LLVM_NATIVE_ARCH - LLVMLinkInJIT(); - LLVMInitializeNativeTarget(); -#endif + for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i) + if(!test_format(verbose, fp, &test_cases[i])) + success = FALSE; - util_cpu_detect(); + return success; +} - for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i) - if(!test_format(&test_cases[i])) - ret = 1; - return ret; +boolean +test_some(unsigned verbose, FILE *fp, unsigned long n) +{ + return test_all(verbose, fp); } -- cgit v1.2.3 From bc93e9181cf179a797679d30cd1a3a563e1756c0 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 22 Oct 2009 22:37:06 +0100 Subject: llvmpipe: Factor our pixel offset computation. --- src/gallium/drivers/llvmpipe/lp_bld_sample.c | 66 ++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_bld_sample.h | 11 ++++ src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 49 +++--------------- 3 files changed, 83 insertions(+), 43 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c index b0543f22c9..4d272bea87 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c @@ -37,6 +37,9 @@ #include "util/u_format.h" #include "util/u_math.h" #include "lp_bld_debug.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_type.h" #include "lp_bld_format.h" #include "lp_bld_sample.h" @@ -122,3 +125,66 @@ lp_build_gather(LLVMBuilderRef builder, return res; } + + +/** + * Compute the offset of a pixel. + * + * x, y, y_stride are vectors + */ +LLVMValueRef +lp_build_sample_offset(struct lp_build_context *bld, + const struct util_format_description *format_desc, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr) +{ + LLVMValueRef x_stride; + LLVMValueRef offset; + + x_stride = lp_build_const_scalar(bld->type, format_desc->block.bits/8); + + if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + LLVMValueRef x_lo, x_hi; + LLVMValueRef y_lo, y_hi; + LLVMValueRef x_stride_lo, x_stride_hi; + LLVMValueRef y_stride_lo, y_stride_hi; + LLVMValueRef x_offset_lo, x_offset_hi; + LLVMValueRef y_offset_lo, y_offset_hi; + LLVMValueRef offset_lo, offset_hi; + + x_lo = LLVMBuildAnd(bld->builder, x, bld->one, ""); + y_lo = LLVMBuildAnd(bld->builder, y, bld->one, ""); + + x_hi = LLVMBuildLShr(bld->builder, x, bld->one, ""); + y_hi = LLVMBuildLShr(bld->builder, y, bld->one, ""); + + x_stride_lo = x_stride; + y_stride_lo = lp_build_const_scalar(bld->type, 2*format_desc->block.bits/8); + + x_stride_hi = lp_build_const_scalar(bld->type, 4*format_desc->block.bits/8); + y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, ""); + + x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo); + y_offset_lo = lp_build_mul(bld, y_lo, y_stride_lo); + offset_lo = lp_build_add(bld, x_offset_lo, y_offset_lo); + + x_offset_hi = lp_build_mul(bld, x_hi, x_stride_hi); + y_offset_hi = lp_build_mul(bld, y_hi, y_stride_hi); + offset_hi = lp_build_add(bld, x_offset_hi, y_offset_hi); + + offset = lp_build_add(bld, offset_hi, offset_lo); + } + else { + LLVMValueRef x_offset; + LLVMValueRef y_offset; + + x_offset = lp_build_mul(bld, x, x_stride); + y_offset = lp_build_mul(bld, y, y_stride); + + offset = lp_build_add(bld, x_offset, y_offset); + } + + return offset; +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h index 2b56179eb8..8cb8210ca7 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h @@ -40,7 +40,9 @@ struct pipe_texture; struct pipe_sampler_state; +struct util_format_description; struct lp_type; +struct lp_build_context; /** @@ -128,6 +130,15 @@ lp_build_gather(LLVMBuilderRef builder, LLVMValueRef offsets); +LLVMValueRef +lp_build_sample_offset(struct lp_build_context *bld, + const struct util_format_description *format_desc, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr); + + void lp_build_sample_soa(LLVMBuilderRef builder, const struct lp_sampler_static_state *static_state, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 4af0454935..6aa7ad4b45 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -106,51 +106,14 @@ lp_build_sample_texel(struct lp_build_sample_context *bld, LLVMValueRef *texel) { struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - LLVMValueRef x_stride; LLVMValueRef offset; - x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8); - - if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - LLVMValueRef x_lo, x_hi; - LLVMValueRef y_lo, y_hi; - LLVMValueRef x_stride_lo, x_stride_hi; - LLVMValueRef y_stride_lo, y_stride_hi; - LLVMValueRef x_offset_lo, x_offset_hi; - LLVMValueRef y_offset_lo, y_offset_hi; - LLVMValueRef offset_lo, offset_hi; - - x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, ""); - y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, ""); - - x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, ""); - y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, ""); - - x_stride_lo = x_stride; - y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8); - - x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8); - y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, ""); - - x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo); - y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo); - offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo); - - x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi); - y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi); - offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi); - - offset = lp_build_add(int_coord_bld, offset_hi, offset_lo); - } - else { - LLVMValueRef x_offset; - LLVMValueRef y_offset; - - x_offset = lp_build_mul(int_coord_bld, x, x_stride); - y_offset = lp_build_mul(int_coord_bld, y, y_stride); - - offset = lp_build_add(int_coord_bld, x_offset, y_offset); - } + offset = lp_build_sample_offset(&bld->int_coord_bld, + bld->format_desc, + x, + y, + y_stride, + data_ptr); lp_build_load_rgba_soa(bld->builder, bld->format_desc, -- cgit v1.2.3 From a55b305c5b3be3fed8112d44878e712cf09303ce Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 22 Oct 2009 22:44:32 +0100 Subject: llvmpipe: Merge lp_build_load_rgba_soa into lp_build_sample_texel. --- src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 48 ++++++++---------------- 1 file changed, 16 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 6aa7ad4b45..a7d2118c9b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -75,28 +75,6 @@ struct lp_build_sample_context }; -static void -lp_build_load_rgba_soa(LLVMBuilderRef builder, - const struct util_format_description *format_desc, - struct lp_type type, - LLVMValueRef base_ptr, - LLVMValueRef offsets, - LLVMValueRef *rgba) -{ - LLVMValueRef packed; - - assert(format_desc->block.width == 1); - assert(format_desc->block.height == 1); - assert(format_desc->block.bits <= type.width); - - packed = lp_build_gather(builder, - type.length, format_desc->block.bits, type.width, - base_ptr, offsets); - - lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba); -} - - static void lp_build_sample_texel(struct lp_build_sample_context *bld, LLVMValueRef x, @@ -105,22 +83,28 @@ lp_build_sample_texel(struct lp_build_sample_context *bld, LLVMValueRef data_ptr, LLVMValueRef *texel) { - struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMValueRef offset; + LLVMValueRef packed; offset = lp_build_sample_offset(&bld->int_coord_bld, bld->format_desc, - x, - y, - y_stride, + x, y, y_stride, data_ptr); - lp_build_load_rgba_soa(bld->builder, - bld->format_desc, - bld->texel_type, - data_ptr, - offset, - texel); + assert(bld->format_desc->block.width == 1); + assert(bld->format_desc->block.height == 1); + assert(bld->format_desc->block.bits <= bld->texel_type.width); + + packed = lp_build_gather(bld->builder, + bld->texel_type.length, + bld->format_desc->block.bits, + bld->texel_type.width, + data_ptr, offset); + + lp_build_unpack_rgba_soa(bld->builder, + bld->format_desc, + bld->texel_type, + packed, texel); } -- cgit v1.2.3 From b544ab72994a7eda1e8c17fa217213ff3713dd99 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 25 Oct 2009 09:03:18 +0000 Subject: llvmpipe: Add inlines to quickly generate types matching the native SIMD register bitwidth. --- src/gallium/drivers/llvmpipe/lp_bld_type.h | 94 +++++++++++++++++++++++++++- src/gallium/drivers/llvmpipe/lp_test_blend.c | 20 +++--- 2 files changed, 102 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h index 46c298fa20..2fb233d335 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h @@ -42,14 +42,19 @@ #include +/** + * Native SIMD register width. + * + * 128 for all architectures we care about. + */ +#define LP_NATIVE_VECTOR_WIDTH 128 + /** * Several functions can only cope with vectors of length up to this value. * You may need to increase that value if you want to represent bigger vectors. */ #define LP_MAX_VECTOR_LENGTH 16 -#define LP_MAX_TYPE_WIDTH 64 - /** * The LLVM type system can't conveniently express all the things we care about @@ -134,6 +139,91 @@ struct lp_build_context }; +static INLINE struct lp_type +lp_type_float(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.floating = TRUE; + res_type.sign = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_int(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.sign = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_uint(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_unorm(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.norm = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_fixed(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.sign = TRUE; + res_type.fixed = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_ufixed(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.fixed = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + LLVMTypeRef lp_build_elem_type(struct lp_type type); diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index e3af81cffb..149fec1d54 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -530,11 +530,11 @@ test_one(unsigned verbose, success = TRUE; for(i = 0; i < n && success; ++i) { if(mode == AoS) { - uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; + uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; + uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; + uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; + uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; @@ -595,11 +595,11 @@ test_one(unsigned verbose, if(mode == SoA) { const unsigned stride = type.length*type.width/8; - uint8_t src[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t dst[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t con[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t res[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t ref[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; + uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; + uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; + uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; + uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; boolean mismatch; -- cgit v1.2.3 From 8d80fd3f554cab2db962a903ce4eaba7c8fed7ac Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 25 Oct 2009 09:03:50 +0000 Subject: llvmpipe: Allow different signs when unpacking. --- src/gallium/drivers/llvmpipe/lp_bld_pack.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.c b/src/gallium/drivers/llvmpipe/lp_bld_pack.c index fe82fda039..bc360ad77a 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_pack.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.c @@ -159,11 +159,10 @@ lp_build_unpack2(LLVMBuilderRef builder, assert(!src_type.floating); assert(!dst_type.floating); - assert(dst_type.sign == src_type.sign); assert(dst_type.width == src_type.width * 2); assert(dst_type.length * 2 == src_type.length); - if(src_type.sign) { + if(dst_type.sign && src_type.sign) { /* Replicate the sign bit in the most significant bits */ msb = LLVMBuildAShr(builder, src, lp_build_int_const_scalar(src_type, src_type.width - 1), ""); } -- cgit v1.2.3 From abff4214ef870f26d5c64adac1235b9e9438a51e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 25 Oct 2009 09:06:05 +0000 Subject: llvmpipe: Split the format swizzling step from the unpacking. --- src/gallium/drivers/llvmpipe/lp_bld_format.h | 7 ++++ src/gallium/drivers/llvmpipe/lp_bld_format_soa.c | 43 +++++++++++++++--------- 2 files changed, 34 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h index 42ee3c7d90..8b08c016c0 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h @@ -42,6 +42,13 @@ struct util_format_description; struct lp_type; +void +lp_build_format_swizzle_soa(const struct util_format_description *format_desc, + struct lp_type type, + const LLVMValueRef *unswizzled, + LLVMValueRef *swizzled); + + /** * Unpack a pixel into its RGBA components. * diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c index 60ad4c0ee6..64151d169d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c @@ -35,16 +35,16 @@ static LLVMValueRef -lp_build_format_swizzle(struct lp_type type, - const LLVMValueRef *inputs, - enum util_format_swizzle swizzle) +lp_build_format_swizzle_chan_soa(struct lp_type type, + const LLVMValueRef *unswizzled, + enum util_format_swizzle swizzle) { switch (swizzle) { case UTIL_FORMAT_SWIZZLE_X: case UTIL_FORMAT_SWIZZLE_Y: case UTIL_FORMAT_SWIZZLE_Z: case UTIL_FORMAT_SWIZZLE_W: - return inputs[swizzle]; + return unswizzled[swizzle]; case UTIL_FORMAT_SWIZZLE_0: return lp_build_zero(type); case UTIL_FORMAT_SWIZZLE_1: @@ -58,6 +58,28 @@ lp_build_format_swizzle(struct lp_type type, } +void +lp_build_format_swizzle_soa(const struct util_format_description *format_desc, + struct lp_type type, + const LLVMValueRef *unswizzled, + LLVMValueRef *swizzled) +{ + if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + enum util_format_swizzle swizzle = format_desc->swizzle[0]; + LLVMValueRef depth = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle); + swizzled[2] = swizzled[1] = swizzled[0] = depth; + swizzled[3] = lp_build_one(type); + } + else { + unsigned chan; + for (chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle = format_desc->swizzle[chan]; + swizzled[chan] = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle); + } + } +} + + void lp_build_unpack_rgba_soa(LLVMBuilderRef builder, const struct util_format_description *format_desc, @@ -123,16 +145,5 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, start = stop; } - if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - enum util_format_swizzle swizzle = format_desc->swizzle[0]; - LLVMValueRef depth = lp_build_format_swizzle(type, inputs, swizzle); - rgba[2] = rgba[1] = rgba[0] = depth; - rgba[3] = lp_build_one(type); - } - else { - for (chan = 0; chan < 4; ++chan) { - enum util_format_swizzle swizzle = format_desc->swizzle[chan]; - rgba[chan] = lp_build_format_swizzle(type, inputs, swizzle); - } - } + lp_build_format_swizzle_soa(format_desc, type, inputs, rgba); } -- cgit v1.2.3 From 47d241be9ff89b65b978dd4fe4ea7473e07fa2c4 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 25 Oct 2009 09:09:23 +0000 Subject: llvmpipe: New function to unpack rgba8 formats into 4 x u8n AoS. --- src/gallium/drivers/llvmpipe/lp_bld_format.h | 19 ++- src/gallium/drivers/llvmpipe/lp_bld_format_aos.c | 141 +++++++++++++++++++++++ 2 files changed, 148 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h index 8b08c016c0..fa560576be 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h @@ -49,24 +49,19 @@ lp_build_format_swizzle_soa(const struct util_format_description *format_desc, LLVMValueRef *swizzled); -/** - * Unpack a pixel into its RGBA components. - * - * @param packed integer. - * - * @return RGBA in a 4 floats vector. - */ LLVMValueRef lp_build_unpack_rgba_aos(LLVMBuilderRef builder, const struct util_format_description *desc, LLVMValueRef packed); -/** - * Pack a pixel. - * - * @param rgba 4 float vector with the unpacked components. - */ +LLVMValueRef +lp_build_unpack_rgba8_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + struct lp_type type, + LLVMValueRef packed); + + LLVMValueRef lp_build_pack_rgba_aos(LLVMBuilderRef builder, const struct util_format_description *desc, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c index 0591d77860..5836e0173f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c @@ -25,12 +25,34 @@ * **************************************************************************/ +/** + * @file + * AoS pixel format manipulation. + * + * @author Jose Fonseca + */ + +#include "util/u_cpu_detect.h" #include "util/u_format.h" +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_logic.h" +#include "lp_bld_swizzle.h" #include "lp_bld_format.h" +/** + * Unpack a single pixel into its RGBA components. + * + * @param packed integer. + * + * @return RGBA in a 4 floats vector. + * + * XXX: This is mostly for reference and testing -- operating a single pixel at + * a time is rarely if ever needed. + */ LLVMValueRef lp_build_unpack_rgba_aos(LLVMBuilderRef builder, const struct util_format_description *desc, @@ -148,6 +170,125 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, } +/** + * Take a vector with packed pixels and unpack into a rgba8 vector. + * + * Formats with bit depth smaller than 32bits are accepted, but they must be + * padded to 32bits. + */ +LLVMValueRef +lp_build_unpack_rgba8_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + struct lp_type type, + LLVMValueRef packed) +{ + struct lp_build_context bld; + bool rgba8; + LLVMValueRef res; + unsigned i; + + lp_build_context_init(&bld, builder, type); + + /* FIXME: Support more formats */ + assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(desc->block.width == 1); + assert(desc->block.height == 1); + assert(desc->block.bits <= 32); + + assert(!type.floating); + assert(!type.fixed); + assert(type.norm); + assert(type.width == 8); + assert(type.length % 4 == 0); + + rgba8 = TRUE; + for(i = 0; i < 4; ++i) { + assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED || + desc->channel[i].type == UTIL_FORMAT_TYPE_VOID); + if(desc->channel[0].size != 8) + rgba8 = FALSE; + } + + if(rgba8) { + /* + * The pixel is already in a rgba8 format variant. All it is necessary + * is to swizzle the channels. + */ + + unsigned char swizzles[4]; + boolean zeros[4]; /* bitwise AND mask */ + boolean ones[4]; /* bitwise OR mask */ + boolean swizzles_needed = FALSE; + boolean zeros_needed = FALSE; + boolean ones_needed = FALSE; + + for(i = 0; i < 4; ++i) { + enum util_format_swizzle swizzle = desc->swizzle[i]; + + /* Initialize with the no-op case */ + swizzles[i] = util_cpu_caps.little_endian ? 3 - i : i; + zeros[i] = TRUE; + ones[i] = FALSE; + + switch (swizzle) { + case UTIL_FORMAT_SWIZZLE_X: + case UTIL_FORMAT_SWIZZLE_Y: + case UTIL_FORMAT_SWIZZLE_Z: + case UTIL_FORMAT_SWIZZLE_W: + if(swizzle != swizzles[i]) { + swizzles[i] = swizzle; + swizzles_needed = TRUE; + } + break; + case UTIL_FORMAT_SWIZZLE_0: + zeros[i] = FALSE; + zeros_needed = TRUE; + break; + case UTIL_FORMAT_SWIZZLE_1: + ones[i] = TRUE; + ones_needed = TRUE; + break; + case UTIL_FORMAT_SWIZZLE_NONE: + assert(0); + break; + } + } + + res = packed; + + if(swizzles_needed) + res = lp_build_swizzle1_aos(&bld, res, swizzles); + + if(zeros_needed) { + /* Mask out zero channels */ + LLVMValueRef mask = lp_build_const_mask_aos(type, zeros); + res = LLVMBuildAnd(builder, res, mask, ""); + } + + if(ones_needed) { + /* Or one channels */ + LLVMValueRef mask = lp_build_const_mask_aos(type, ones); + res = LLVMBuildOr(builder, res, mask, ""); + } + } + else { + /* FIXME */ + assert(0); + res = lp_build_undef(type); + } + + return res; +} + + +/** + * Pack a single pixel. + * + * @param rgba 4 float vector with the unpacked components. + * + * XXX: This is mostly for reference and testing -- operating a single pixel at + * a time is rarely if ever needed. + */ LLVMValueRef lp_build_pack_rgba_aos(LLVMBuilderRef builder, const struct util_format_description *desc, -- cgit v1.2.3 From bfd7a9ca967e5521fb3847db8615127c3ee7b9b3 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 25 Oct 2009 09:09:59 +0000 Subject: llvmpipe: New module to help make assertions about formats. --- src/gallium/drivers/llvmpipe/Makefile | 1 + src/gallium/drivers/llvmpipe/SConscript | 1 + src/gallium/drivers/llvmpipe/lp_bld_format.h | 4 ++ src/gallium/drivers/llvmpipe/lp_bld_format_query.c | 72 ++++++++++++++++++++++ 4 files changed, 78 insertions(+) create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_format_query.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index ea771392b1..96c014e592 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -17,6 +17,7 @@ C_SOURCES = \ lp_bld_depth.c \ lp_bld_flow.c \ lp_bld_format_aos.c \ + lp_bld_format_query.c \ lp_bld_format_soa.c \ lp_bld_interp.c \ lp_bld_intr.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 169e0abc2b..52983039fd 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -30,6 +30,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_depth.c', 'lp_bld_flow.c', 'lp_bld_format_aos.c', + 'lp_bld_format_query.c', 'lp_bld_format_soa.c', 'lp_bld_interp.c', 'lp_bld_intr.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h index fa560576be..970bee379f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h @@ -42,6 +42,10 @@ struct util_format_description; struct lp_type; +boolean +lp_format_is_rgba8(const struct util_format_description *desc); + + void lp_build_format_swizzle_soa(const struct util_format_description *format_desc, struct lp_type type, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_query.c b/src/gallium/drivers/llvmpipe/lp_bld_format_query.c new file mode 100644 index 0000000000..f3832d07ff --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_query.c @@ -0,0 +1,72 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Utility functions to make assertions about formats. + * + * This module centralizes most of logic used when determining what algorithm + * is most suitable (i.e., most efficient yet correct) for a given format. + * + * It might be possible to move some of these functions to u_format module, + * but since tiny differences in the format my render it more/less + * appropriate to a given algorithm it is impossible to make any long term + * guarantee about the semantics of these functions. + * + * @author Jose Fonseca + */ + + +#include "util/u_format.h" + +#include "lp_bld_format.h" + + +/** + * Whether this format is a 4 rgba8 variant + */ +boolean +lp_format_is_rgba8(const struct util_format_description *desc) +{ + unsigned chan; + + if(desc->block.width != 1 || + desc->block.height != 1 || + desc->block.bits != 32) + return FALSE; + + for(chan = 0; chan < 4; ++chan) { + if(desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED && + desc->channel[chan].type != UTIL_FORMAT_TYPE_SIGNED && + desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) + return FALSE; + if(desc->channel[chan].size != 8) + return FALSE; + } + + return TRUE; +} -- cgit v1.2.3 From f3893ca9c8bfdba9323ef2fc179ac203e85eda70 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 25 Oct 2009 09:16:38 +0000 Subject: llvmpipe: Make lerping work for 8.8 fixed point values. --- src/gallium/drivers/llvmpipe/lp_bld_arit.c | 54 +++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index 83ca06acf8..93e797cb44 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -361,6 +361,8 @@ lp_build_mul(struct lp_build_context *bld, LLVMValueRef b) { const struct lp_type type = bld->type; + LLVMValueRef shift; + LLVMValueRef res; if(a == bld->zero) return bld->zero; @@ -394,10 +396,31 @@ lp_build_mul(struct lp_build_context *bld, assert(0); } - if(LLVMIsConstant(a) && LLVMIsConstant(b)) - return LLVMConstMul(a, b); + if(type.fixed) + shift = lp_build_int_const_scalar(type, type.width/2); + else + shift = NULL; + + if(LLVMIsConstant(a) && LLVMIsConstant(b)) { + res = LLVMConstMul(a, b); + if(shift) { + if(type.sign) + res = LLVMConstAShr(res, shift); + else + res = LLVMConstLShr(res, shift); + } + } + else { + res = LLVMBuildMul(bld->builder, a, b, ""); + if(shift) { + if(type.sign) + res = LLVMBuildAShr(bld->builder, res, shift, ""); + else + res = LLVMBuildLShr(bld->builder, res, shift, ""); + } + } - return LLVMBuildMul(bld->builder, a, b, ""); + return res; } @@ -432,13 +455,36 @@ lp_build_div(struct lp_build_context *bld, } +/** + * Linear interpolation. + * + * This also works for integer values with a few caveats. + * + * @sa http://www.stereopsis.com/doubleblend.html + */ LLVMValueRef lp_build_lerp(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef v0, LLVMValueRef v1) { - return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0))); + LLVMValueRef delta; + LLVMValueRef res; + + delta = lp_build_sub(bld, v1, v0); + + res = lp_build_mul(bld, x, delta); + + res = lp_build_add(bld, v0, res); + + if(bld->type.fixed) + /* XXX: This step is necessary for lerping 8bit colors stored on 16bits, + * but it will be wrong for other uses. Basically we need a more + * powerful lp_type, capable of further distinguishing the values + * interpretation from the value storage. */ + res = LLVMBuildAnd(bld->builder, res, lp_build_int_const_scalar(bld->type, (1 << bld->type.width/2) - 1), ""); + + return res; } -- cgit v1.2.3 From e1342f871b2ec1ed0293f564540d03aaa11b1720 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 25 Oct 2009 09:51:57 +0000 Subject: llvmpipe: Fast path for sampling rgba8 textures with linear filtering. Implement Keith's suggestion of doing most of the sampling with 16x8 and 8x16 AoS, and only doing the conversion to floating point SoA at the very last step. Improves gloss performance by 10%. --- src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 268 ++++++++++++++++++++++- 1 file changed, 256 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index a7d2118c9b..f7a030fb8c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -38,12 +38,15 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_format.h" +#include "util/u_cpu_detect.h" #include "lp_bld_debug.h" #include "lp_bld_type.h" #include "lp_bld_const.h" +#include "lp_bld_conv.h" #include "lp_bld_arit.h" #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" +#include "lp_bld_pack.h" #include "lp_bld_format.h" #include "lp_bld_sample.h" @@ -76,12 +79,12 @@ struct lp_build_sample_context static void -lp_build_sample_texel(struct lp_build_sample_context *bld, - LLVMValueRef x, - LLVMValueRef y, - LLVMValueRef y_stride, - LLVMValueRef data_ptr, - LLVMValueRef *texel) +lp_build_sample_texel_soa(struct lp_build_sample_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) { LLVMValueRef offset; LLVMValueRef packed; @@ -108,6 +111,32 @@ lp_build_sample_texel(struct lp_build_sample_context *bld, } +static LLVMValueRef +lp_build_sample_packed(struct lp_build_sample_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr) +{ + LLVMValueRef offset; + + offset = lp_build_sample_offset(&bld->int_coord_bld, + bld->format_desc, + x, y, y_stride, + data_ptr); + + assert(bld->format_desc->block.width == 1); + assert(bld->format_desc->block.height == 1); + assert(bld->format_desc->block.bits <= bld->texel_type.width); + + return lp_build_gather(bld->builder, + bld->texel_type.length, + bld->format_desc->block.bits, + bld->texel_type.width, + data_ptr, offset); +} + + static LLVMValueRef lp_build_sample_wrap(struct lp_build_sample_context *bld, LLVMValueRef coord, @@ -174,7 +203,7 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld, x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s); y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t); - lp_build_sample_texel(bld, x, y, stride, data_ptr, texel); + lp_build_sample_texel_soa(bld, x, y, stride, data_ptr, texel); } @@ -220,10 +249,10 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); - lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]); - lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]); - lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]); - lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]); + lp_build_sample_texel_soa(bld, x0, y0, stride, data_ptr, neighbors[0][0]); + lp_build_sample_texel_soa(bld, x1, y0, stride, data_ptr, neighbors[0][1]); + lp_build_sample_texel_soa(bld, x0, y1, stride, data_ptr, neighbors[1][0]); + lp_build_sample_texel_soa(bld, x1, y1, stride, data_ptr, neighbors[1][1]); /* TODO: Don't interpolate missing channels */ for(chan = 0; chan < 4; ++chan) { @@ -237,6 +266,218 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, } +static void +lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, + struct lp_type dst_type, + LLVMValueRef packed, + LLVMValueRef *rgba) +{ + LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff); + unsigned chan; + + /* Decode the input vector components */ + for (chan = 0; chan < 4; ++chan) { + unsigned start = chan*8; + unsigned stop = start + 8; + LLVMValueRef input; + + input = packed; + + if(start) + input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), ""); + + if(stop < 32) + input = LLVMBuildAnd(builder, input, mask, ""); + + input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input); + + rgba[chan] = input; + } +} + + +static void +lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMBuilderRef builder = bld->builder; + struct lp_build_context i32, h16, u8n; + LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; + LLVMValueRef f32_c256, i32_c8, i32_c128, i32_c255; + LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; + LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; + LLVMValueRef x0, x1; + LLVMValueRef y0, y1; + LLVMValueRef neighbors[2][2]; + LLVMValueRef neighbors_lo[2][2]; + LLVMValueRef neighbors_hi[2][2]; + LLVMValueRef packed, packed_lo, packed_hi; + LLVMValueRef unswizzled[4]; + + lp_build_context_init(&i32, builder, lp_type_int(32)); + lp_build_context_init(&h16, builder, lp_type_ufixed(16)); + lp_build_context_init(&u8n, builder, lp_type_unorm(8)); + + i32_vec_type = lp_build_vec_type(i32.type); + h16_vec_type = lp_build_vec_type(h16.type); + u8n_vec_type = lp_build_vec_type(u8n.type); + + f32_c256 = lp_build_const_scalar(bld->coord_type, 256.0); + s = lp_build_mul(&bld->coord_bld, s, f32_c256); + t = lp_build_mul(&bld->coord_bld, t, f32_c256); + + s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); + t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); + + i32_c128 = lp_build_int_const_scalar(i32.type, -128); + s = LLVMBuildAdd(builder, s, i32_c128, ""); + t = LLVMBuildAdd(builder, t, i32_c128, ""); + + i32_c8 = lp_build_int_const_scalar(i32.type, 8); + s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); + t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); + + i32_c255 = lp_build_int_const_scalar(i32.type, 255); + s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); + t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); + + x0 = s_ipart; + y0 = t_ipart; + + x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one); + y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one); + + x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + /* + * Transform 4 x i32 in + * + * s_fpart = {s0, s1, s2, s3} + * + * into 8 x i16 + * + * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3} + * + * into two 8 x i16 + * + * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1} + * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3} + * + * and likewise for t_fpart. There is no risk of loosing precision here + * since the fractional parts only use the lower 8bits. + */ + + s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, ""); + t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, ""); + + { + LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef shuffle_lo; + LLVMValueRef shuffle_hi; + unsigned i, j; + + for(j = 0; j < h16.type.length; j += 4) { + unsigned subindex = util_cpu_caps.little_endian ? 0 : 1; + LLVMValueRef index; + + index = LLVMConstInt(elem_type, j/2 + subindex, 0); + for(i = 0; i < 4; ++i) + shuffles_lo[j + i] = index; + + index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0); + for(i = 0; i < 4; ++i) + shuffles_hi[j + i] = index; + } + + shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length); + shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length); + + s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, ""); + t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, ""); + s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, ""); + t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, ""); + } + + /* + * Fetch the pixels as 4 x 32bit (rgba order might differ): + * + * rgba0 rgba1 rgba2 rgba3 + * + * bit cast them into 16 x u8 + * + * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3 + * + * unpack them into two 8 x i16: + * + * r0 g0 b0 a0 r1 g1 b1 a1 + * r2 g2 b2 a2 r3 g3 b3 a3 + * + * The higher 8 bits of the resulting elements will be zero. + */ + + neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_ptr); + neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_ptr); + neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_ptr); + neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_ptr); + + neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, ""); + neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, ""); + neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, ""); + neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, ""); + + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]); + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]); + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]); + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]); + + /* + * Linear interpolate with 8.8 fixed point. + */ + + packed_lo = lp_build_lerp_2d(&h16, + s_fpart_lo, t_fpart_lo, + neighbors_lo[0][0], + neighbors_lo[0][1], + neighbors_lo[1][0], + neighbors_lo[1][1]); + + packed_hi = lp_build_lerp_2d(&h16, + s_fpart_hi, t_fpart_hi, + neighbors_hi[0][0], + neighbors_hi[0][1], + neighbors_hi[1][0], + neighbors_hi[1][1]); + + packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi); + + /* + * Convert to SoA and swizzle. + */ + + packed = LLVMBuildBitCast(builder, packed, i32_vec_type, ""); + + lp_build_rgba8_to_f32_soa(bld->builder, + bld->texel_type, + packed, unswizzled); + + lp_build_format_swizzle_soa(bld->format_desc, + bld->texel_type, unswizzled, + texel); +} + + static void lp_build_sample_compare(struct lp_build_sample_context *bld, LLVMValueRef p, @@ -336,7 +577,10 @@ lp_build_sample_soa(LLVMBuilderRef builder, break; case PIPE_TEX_FILTER_LINEAR: case PIPE_TEX_FILTER_ANISO: - lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); + if(lp_format_is_rgba8(bld.format_desc)) + lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel); + else + lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); break; default: assert(0); -- cgit v1.2.3 From e4c5e01c109e51baaad23e90d08e8543b0fd6c07 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 25 Oct 2009 11:48:17 +0000 Subject: llvmpipe: Immediate multiplication. --- src/gallium/drivers/llvmpipe/lp_bld_arit.c | 54 ++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_bld_arit.h | 5 +++ src/gallium/drivers/llvmpipe/lp_bld_interp.c | 30 +------------ src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 7 ++- 4 files changed, 64 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index 93e797cb44..9c59677a74 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -47,6 +47,7 @@ #include "util/u_memory.h" #include "util/u_debug.h" +#include "util/u_math.h" #include "util/u_string.h" #include "util/u_cpu_detect.h" @@ -424,6 +425,59 @@ lp_build_mul(struct lp_build_context *bld, } +/** + * Small vector x scale multiplication optimization. + */ +LLVMValueRef +lp_build_mul_imm(struct lp_build_context *bld, + LLVMValueRef a, + int b) +{ + LLVMValueRef factor; + + if(b == 0) + return bld->zero; + + if(b == 1) + return a; + + if(b == -1) + return LLVMBuildNeg(bld->builder, a, ""); + + if(b == 2 && bld->type.floating) + return lp_build_add(bld, a, a); + + if(util_is_pot(b)) { + unsigned shift = ffs(b) - 1; + + if(bld->type.floating) { +#if 0 + /* + * Power of two multiplication by directly manipulating the mantissa. + * + * XXX: This might not be always faster, it will introduce a small error + * for multiplication by zero, and it will produce wrong results + * for Inf and NaN. + */ + unsigned mantissa = lp_mantissa(bld->type); + factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa); + a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), ""); + a = LLVMBuildAdd(bld->builder, a, factor, ""); + a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), ""); + return a; +#endif + } + else { + factor = lp_build_const_scalar(bld->type, shift); + return LLVMBuildShl(bld->builder, a, factor, ""); + } + } + + factor = lp_build_const_scalar(bld->type, (double)b); + return lp_build_mul(bld, a, factor); +} + + /** * Generate a / b */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index 4e568c055e..62be4b9aee 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -66,6 +66,11 @@ lp_build_mul(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); +LLVMValueRef +lp_build_mul_imm(struct lp_build_context *bld, + LLVMValueRef a, + int b); + LLVMValueRef lp_build_div(struct lp_build_context *bld, LLVMValueRef a, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 338dbca6d1..818c0e943e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -108,32 +108,6 @@ coeffs_init(struct lp_build_interp_soa_context *bld, } -/** - * Small vector x scale multiplication optimization. - * - * TODO: Should be elsewhere. - */ -static LLVMValueRef -coeff_multiply(struct lp_build_interp_soa_context *bld, - LLVMValueRef coeff, - int step) -{ - LLVMValueRef factor; - - switch(step) { - case 0: - return bld->base.zero; - case 1: - return coeff; - case 2: - return lp_build_add(&bld->base, coeff, coeff); - default: - factor = lp_build_const_scalar(bld->base.type, (double)step); - return lp_build_mul(&bld->base, coeff, factor); - } -} - - /** * Multiply the dadx and dady with the xstep and ystep respectively. */ @@ -149,8 +123,8 @@ coeffs_update(struct lp_build_interp_soa_context *bld) if (mode != TGSI_INTERPOLATE_CONSTANT) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { if(mask & (1 << chan)) { - bld->dadx[attrib][chan] = coeff_multiply(bld, bld->dadx[attrib][chan], bld->xstep); - bld->dady[attrib][chan] = coeff_multiply(bld, bld->dady[attrib][chan], bld->ystep); + bld->dadx[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dadx[attrib][chan], bld->xstep); + bld->dady[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dady[attrib][chan], bld->ystep); } } } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index f7a030fb8c..42e4ee6986 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -309,7 +309,7 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, LLVMBuilderRef builder = bld->builder; struct lp_build_context i32, h16, u8n; LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; - LLVMValueRef f32_c256, i32_c8, i32_c128, i32_c255; + LLVMValueRef i32_c8, i32_c128, i32_c255; LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; LLVMValueRef x0, x1; @@ -328,9 +328,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, h16_vec_type = lp_build_vec_type(h16.type); u8n_vec_type = lp_build_vec_type(u8n.type); - f32_c256 = lp_build_const_scalar(bld->coord_type, 256.0); - s = lp_build_mul(&bld->coord_bld, s, f32_c256); - t = lp_build_mul(&bld->coord_bld, t, f32_c256); + s = lp_build_mul_imm(&bld->coord_bld, s, 256); + t = lp_build_mul_imm(&bld->coord_bld, t, 256); s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); -- cgit v1.2.3 From 5fcb75758c50bd10e8bd730e55bcbf73614eeb60 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 25 Oct 2009 11:49:01 +0000 Subject: llvmpipe: Dump the sampler state of the shader key. --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 7728ba6076..530a2d448c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -400,6 +400,7 @@ generate_fragment(struct llvmpipe_context *lp, #ifdef DEBUG tgsi_dump(shader->base.tokens, 0); if(key->depth.enabled) { + debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format)); debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); debug_printf("depth.writemask = %u\n", key->depth.writemask); } @@ -419,6 +420,23 @@ generate_fragment(struct llvmpipe_context *lp, debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); } debug_printf("blend.colormask = 0x%x\n", key->blend.colormask); + for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) { + if(key->sampler[i].format) { + debug_printf("sampler[%u] = \n", i); + debug_printf(" .format = %s\n", pf_name(key->sampler[i].format)); + debug_printf(" .target = %u\n", key->sampler[i].target); + debug_printf(" .pot = %u%u%u\n", key->sampler[i].pot_width, key->sampler[i].pot_height, key->sampler[i].pot_depth); + debug_printf(" .wrap = %u %u %u\n", key->sampler[i].wrap_s, key->sampler[i].wrap_t, key->sampler[i].wrap_r); + debug_printf(" .min_img_filter = %u\n", key->sampler[i].min_img_filter); + debug_printf(" .min_mip_filter = %u\n", key->sampler[i].min_mip_filter); + debug_printf(" .mag_img_filter = %u\n", key->sampler[i].mag_img_filter); + if(key->sampler[i].compare_mode) + debug_printf(" .compare_mode = %s\n", debug_dump_blend_func(key->sampler[i].compare_func, TRUE)); + debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); + debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter); + } + } + #endif variant = CALLOC_STRUCT(lp_fragment_shader_variant); -- cgit v1.2.3 From 88e08d7c6de89279c737dbf5139492b39f96dc43 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 25 Oct 2009 12:27:14 +0000 Subject: llvmpipe: Human friendlier sampler state dump. --- src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 4 +++- src/gallium/drivers/llvmpipe/lp_state_fs.c | 27 +++++++++++++++++------- 2 files changed, 22 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 42e4ee6986..47b68b71e2 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -35,6 +35,7 @@ #include "pipe/p_defines.h" #include "pipe/p_state.h" #include "util/u_debug.h" +#include "util/u_debug_dump.h" #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_format.h" @@ -171,7 +172,8 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld, case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: /* FIXME */ - _debug_printf("warning: failed to translate texture wrap mode %u\n", wrap_mode); + _debug_printf("warning: failed to translate texture wrap mode %s\n", + debug_dump_tex_wrap(wrap_mode, TRUE)); coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); coord = lp_build_min(int_coord_bld, coord, length_minus_one); break; diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 530a2d448c..2e9aa9fffe 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -423,15 +423,26 @@ generate_fragment(struct llvmpipe_context *lp, for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) { if(key->sampler[i].format) { debug_printf("sampler[%u] = \n", i); - debug_printf(" .format = %s\n", pf_name(key->sampler[i].format)); - debug_printf(" .target = %u\n", key->sampler[i].target); - debug_printf(" .pot = %u%u%u\n", key->sampler[i].pot_width, key->sampler[i].pot_height, key->sampler[i].pot_depth); - debug_printf(" .wrap = %u %u %u\n", key->sampler[i].wrap_s, key->sampler[i].wrap_t, key->sampler[i].wrap_r); - debug_printf(" .min_img_filter = %u\n", key->sampler[i].min_img_filter); - debug_printf(" .min_mip_filter = %u\n", key->sampler[i].min_mip_filter); - debug_printf(" .mag_img_filter = %u\n", key->sampler[i].mag_img_filter); + debug_printf(" .format = %s\n", + pf_name(key->sampler[i].format)); + debug_printf(" .target = %s\n", + debug_dump_tex_target(key->sampler[i].target, TRUE)); + debug_printf(" .pot = %u %u %u\n", + key->sampler[i].pot_width, + key->sampler[i].pot_height, + key->sampler[i].pot_depth); + debug_printf(" .wrap = %s %s %s\n", + debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), + debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), + debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); + debug_printf(" .min_img_filter = %s\n", + debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); + debug_printf(" .min_mip_filter = %s\n", + debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); + debug_printf(" .mag_img_filter = %s\n", + debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); if(key->sampler[i].compare_mode) - debug_printf(" .compare_mode = %s\n", debug_dump_blend_func(key->sampler[i].compare_func, TRUE)); + debug_printf(" .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter); } -- cgit v1.2.3 From 827002f5ff990f8676385583275d6b8090abfb7a Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 26 Oct 2009 01:46:21 +0100 Subject: r300g: add cubemap support Also, set a pitch for rectangles only. --- src/gallium/drivers/r300/r300_screen.c | 6 +--- src/gallium/drivers/r300/r300_texture.c | 55 +++++++++++++++++++++------------ 2 files changed, 37 insertions(+), 24 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index f581f0ca09..6eaf35bd4b 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -277,14 +277,10 @@ static boolean r300_is_format_supported(struct pipe_screen* pscreen, case PIPE_TEXTURE_1D: /* handle 1D textures as 2D ones */ case PIPE_TEXTURE_2D: case PIPE_TEXTURE_3D: + case PIPE_TEXTURE_CUBE: return check_tex_format(format, tex_usage, r300_screen(pscreen)->caps->is_r500); - case PIPE_TEXTURE_CUBE: - debug_printf("r300: Implementation error: Unsupported format " - "target: %d\n", target); - break; - default: debug_printf("r300: Fatal: This is not a format target: %d\n", target); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 762806822c..2a33393a8c 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -27,18 +27,31 @@ #include "r300_context.h" #include "r300_texture.h" +#include "r300_screen.h" -static void r300_setup_texture_state(struct r300_texture* tex) +static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) { struct r300_texture_state* state = &tex->state; struct pipe_texture *pt = &tex->tex; + unsigned stride; state->format0 = R300_TX_WIDTH((pt->width[0] - 1) & 0x7ff) | - R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff) | - R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) | - R300_TX_NUM_LEVELS(pt->last_level & 0xf);/* | - R300_TX_PITCH_EN;*/ - /* XXX TX_PITCH_EN breaks rendering mipmap levels > 0, weard */ + R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff); + + if (!util_is_power_of_two(pt->width[0]) || + !util_is_power_of_two(pt->height[0])) { + + /* rectangles love this */ + state->format0 |= R300_TX_PITCH_EN; + + stride = r300_texture_get_stride(tex, 0) / pt->block.size; + state->format2 = (stride - 1) & 0x1fff; + } + else { + /* power of two textures (3D, mipmaps, and no pitch) */ + state->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) | + R300_TX_NUM_LEVELS(pt->last_level & 0xf); + } /* XXX */ state->format1 = r300_translate_texformat(pt->format); @@ -49,17 +62,17 @@ static void r300_setup_texture_state(struct r300_texture* tex) state->format1 |= R300_TX_FORMAT_3D; } - state->format2 = ((r300_texture_get_stride(tex, 0) / pt->block.size) - 1) - & 0x1fff; - - /* Don't worry about accidentally setting this bit on non-r500; - * the kernel should catch it. */ - if (pt->width[0] > 2048) { - state->format2 |= R500_TXWIDTH_BIT11; - } - if (pt->height[0] > 2048) { - state->format2 |= R500_TXHEIGHT_BIT11; + /* large textures on r500 */ + if (is_r500) + { + if (pt->width[0] > 2048) { + state->format2 |= R500_TXWIDTH_BIT11; + } + if (pt->height[0] > 2048) { + state->format2 |= R500_TXHEIGHT_BIT11; + } } + assert(is_r500 || (pt->width[0] <= 2048 && pt->height[0] <= 2048)); debug_printf("r300: Set texture state (%dx%d, %d levels)\n", pt->width[0], pt->height[0], pt->last_level); @@ -121,7 +134,11 @@ static void r300_setup_miptree(struct r300_texture* tex) stride = r300_texture_get_stride(tex, i); layer_size = stride * base->nblocksy[i]; - size = layer_size * base->depth[i]; + + if (base->target == PIPE_TEXTURE_CUBE) + size = layer_size * 6; + else + size = layer_size * base->depth[i]; tex->offset[i] = align(tex->size, 32); tex->size = tex->offset[i] + size; @@ -151,7 +168,7 @@ static struct pipe_texture* r300_setup_miptree(tex); - r300_setup_texture_state(tex); + r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); tex->buffer = screen->buffer_create(screen, 1024, PIPE_BUFFER_USAGE_PIXEL, @@ -229,7 +246,7 @@ static struct pipe_texture* tex->stride_override = *stride; - r300_setup_texture_state(tex); + r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); pipe_buffer_reference(&tex->buffer, buffer); -- cgit v1.2.3 From 5241b9568c1f97eb9aca8be5eb7a3ef659d9917f Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Mon, 26 Oct 2009 01:47:55 +0100 Subject: r300g: read occlusion query results from both Z pipes on RV530 --- src/gallium/drivers/r300/r300_query.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 007f11efae..ca00b043c5 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -113,7 +113,7 @@ static boolean r300_get_query_result(struct pipe_context* pipe, unsigned flags = PIPE_BUFFER_USAGE_CPU_READ; uint32_t* map; uint32_t temp = 0; - unsigned i; + unsigned i, num_results; if (q->flushed == FALSE) pipe->flush(pipe, 0, NULL); @@ -125,7 +125,13 @@ static boolean r300_get_query_result(struct pipe_context* pipe, if (!map) return FALSE; map += q->offset / 4; - for (i = 0; i < r300screen->caps->num_frag_pipes; i++) { + + if (r300screen->caps->family == CHIP_FAMILY_RV530) + num_results = r300screen->caps->num_z_pipes; + else + num_results = r300screen->caps->num_frag_pipes; + + for (i = 0; i < num_results; i++) { if (*map == ~0U) { /* Looks like our results aren't ready yet. */ if (wait) { -- cgit v1.2.3 From 7d967b9b7c08aea2a471c5bf6aced8bfafdae874 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 28 Oct 2009 00:30:45 +0100 Subject: nv50: activate more lanes in a warp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some cards have crippling defaults set and use only 4 of 32 lanes. This should activate 16 on these. Those that allow 32 by default should still do so. Found out by Marcin Kościelnicki. --- src/gallium/drivers/nv50/nv50_screen.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index c672ea471a..c8d0f1e4d8 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -309,6 +309,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_method(so, screen->tesla, 0x121c, 1); so_data (so, 1); + /* try to activate all/more lanes (threads) in a warp */ + so_method(so, screen->tesla, 0x1400, 1); + so_data (so, 0xf); + so_method(so, screen->tesla, 0x13bc, 1); so_data (so, 0x54); /* origin is top left (set to 1 for bottom left) */ -- cgit v1.2.3 From 095e66f695ce1d869a824d9e22f63b54c95ca0ac Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 27 Oct 2009 20:09:53 +0000 Subject: llvmpipe: Implement round() for MSVC. --- src/gallium/drivers/llvmpipe/lp_test_main.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index d4767ff52b..82fada5a35 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -40,6 +40,18 @@ #include "lp_test.h" +#ifdef PIPE_CC_MSVC +static INLINE double +round(double x) +{ + if (x >= 0.0) + return floor(x + 0.5); + else + return ceil(x - 0.5); +} +#endif + + void dump_type(FILE *fp, struct lp_type type) -- cgit v1.2.3 From 182ff3e47a2d18917cdf3344c2ce95bd0a460784 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 28 Oct 2009 11:05:32 +0000 Subject: llvmpipe: Make sure the JIT engine and X86 target are linked on MSVC build. Basically mimic the llvm 2.6 way of linking execution engines and targets. --- src/gallium/drivers/llvmpipe/Makefile | 3 ++ src/gallium/drivers/llvmpipe/SConscript | 1 + src/gallium/drivers/llvmpipe/lp_bld_misc.cpp | 62 ++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_bld_misc.h | 50 ++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_jit.c | 3 +- src/gallium/drivers/llvmpipe/lp_test_main.c | 3 +- 6 files changed, 118 insertions(+), 4 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_misc.cpp create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_misc.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 96c014e592..cdf318844c 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -57,6 +57,9 @@ C_SOURCES = \ lp_tile_cache.c \ lp_tile_soa.c +CPP_SOURCES = \ + lp_bld_misc.cpp + include ../../Makefile.template lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_access.py ../../auxiliary/util/u_format.csv diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 52983039fd..f4410f8201 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -34,6 +34,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_format_soa.c', 'lp_bld_interp.c', 'lp_bld_intr.c', + 'lp_bld_misc.cpp', 'lp_bld_pack.c', 'lp_bld_sample.c', 'lp_bld_sample_soa.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp new file mode 100644 index 0000000000..c9acaf1f16 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp @@ -0,0 +1,62 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "llvm/Config/config.h" + +#include "pipe/p_config.h" + +#include "lp_bld_misc.h" + + +#ifndef LLVM_NATIVE_ARCH + +namespace llvm { + extern void LinkInJIT(); +} + + +void +LLVMLinkInJIT(void) +{ + llvm::LinkInJIT(); +} + + +extern "C" int X86TargetMachineModule; + + +void +LLVMInitializeNativeTarget(void) +{ +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + X86TargetMachineModule = 1; +#endif +} + + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.h b/src/gallium/drivers/llvmpipe/lp_bld_misc.h new file mode 100644 index 0000000000..51a84c5e25 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.h @@ -0,0 +1,50 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_BLD_MISC_H +#define LP_BLD_MISC_H + + +#ifdef __cplusplus +extern "C" { +#endif + + +void +LLVMLinkInJIT(void); + +void +LLVMInitializeNativeTarget(void); + + +#ifdef __cplusplus +} +#endif + + +#endif /* !LP_BLD_MISC_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 1126bf90b9..13535dd638 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -39,6 +39,7 @@ #include "util/u_cpu_detect.h" #include "lp_screen.h" #include "lp_bld_intr.h" +#include "lp_bld_misc.h" #include "lp_jit.h" @@ -156,10 +157,8 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) util_cpu_caps.has_sse4_1 = 0; #endif -#ifdef LLVM_NATIVE_ARCH LLVMLinkInJIT(); LLVMInitializeNativeTarget(); -#endif screen->module = LLVMModuleCreateWithName("llvmpipe"); diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index 82fada5a35..314544aa9a 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -37,6 +37,7 @@ #include "util/u_cpu_detect.h" #include "lp_bld_const.h" +#include "lp_bld_misc.h" #include "lp_test.h" @@ -379,10 +380,8 @@ int main(int argc, char **argv) n = atoi(argv[i]); } -#ifdef LLVM_NATIVE_ARCH LLVMLinkInJIT(); LLVMInitializeNativeTarget(); -#endif util_cpu_detect(); -- cgit v1.2.3 From 0e44884aada4e4bd6384245d9ae065da5aca7f3d Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 28 Oct 2009 02:19:52 +0100 Subject: r300g: fix blending and do some optimizations Signed-off-by: Corbin Simpson --- src/gallium/drivers/r300/r300_emit.c | 1 + src/gallium/drivers/r300/r300_state.c | 101 ++++++++++++++++++---------------- 2 files changed, 54 insertions(+), 48 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 258c38fefd..096165a292 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -31,6 +31,7 @@ #include "r300_screen.h" #include "r300_state_derived.h" #include "r300_state_inlines.h" +#include "r300_texture.h" #include "r300_vs.h" void r300_emit_blend_state(struct r300_context* r300, diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index a3e1bc621a..5d28837ef7 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -49,46 +49,47 @@ static void* r300_create_blend_state(struct pipe_context* pipe, { struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state); + if (state->blend_enable) { - unsigned eqRGB = state->rgb_func; - unsigned srcRGB = state->rgb_src_factor; - unsigned dstRGB = state->rgb_dst_factor; - - unsigned eqA = state->alpha_func; - unsigned srcA = state->alpha_src_factor; - unsigned dstA = state->alpha_dst_factor; - - if (srcA != srcRGB || - dstA != dstRGB || - eqA != eqRGB) { - blend->alpha_blend_control = - r300_translate_blend_function(eqA) | - (r300_translate_blend_factor(srcA) << - R300_SRC_BLEND_SHIFT) | - (r300_translate_blend_factor(dstA) << - R300_DST_BLEND_SHIFT); - blend->blend_control |= R300_ALPHA_BLEND_ENABLE | - R300_SEPARATE_ALPHA_ENABLE; - } else { - blend->alpha_blend_control = R300_COMB_FCN_ADD_CLAMP | - (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | - (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT); - } - } - if (state->blend_enable) { - /* XXX for now, always do separate alpha... - * is it faster to do it with one reg? */ - blend->blend_control |= R300_READ_ENABLE | - r300_translate_blend_function(state->rgb_func) | - (r300_translate_blend_factor(state->rgb_src_factor) << - R300_SRC_BLEND_SHIFT) | - (r300_translate_blend_factor(state->rgb_dst_factor) << - R300_DST_BLEND_SHIFT); - } else { - blend->blend_control = - R300_COMB_FCN_ADD_CLAMP | - (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | - (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT); + unsigned eqRGB = state->rgb_func; + unsigned srcRGB = state->rgb_src_factor; + unsigned dstRGB = state->rgb_dst_factor; + + unsigned eqA = state->alpha_func; + unsigned srcA = state->alpha_src_factor; + unsigned dstA = state->alpha_dst_factor; + + /* despite the name, ALPHA_BLEND_ENABLE has nothing to do with alpha, + * this is just the crappy D3D naming */ + blend->blend_control = R300_ALPHA_BLEND_ENABLE | + r300_translate_blend_function(eqRGB) | + ( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) | + ( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT); + + /* optimization: some operations do not require the destination color */ + if (eqRGB == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MIN || + eqRGB == PIPE_BLEND_MAX || eqA == PIPE_BLEND_MAX || + dstRGB != PIPE_BLENDFACTOR_ZERO || + dstA != PIPE_BLENDFACTOR_ZERO || + srcRGB == PIPE_BLENDFACTOR_DST_COLOR || + srcRGB == PIPE_BLENDFACTOR_DST_ALPHA || + srcRGB == PIPE_BLENDFACTOR_INV_DST_COLOR || + srcRGB == PIPE_BLENDFACTOR_INV_DST_ALPHA || + srcA == PIPE_BLENDFACTOR_DST_ALPHA || + srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA) + blend->blend_control |= R300_READ_ENABLE; + + /* XXX implement the optimization with DISCARD_SRC_PIXELS*/ + /* XXX implement the optimization with SRC_ALPHA_?_NO_READ */ + + /* separate alpha */ + if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { + blend->blend_control |= R300_SEPARATE_ALPHA_ENABLE; + blend->alpha_blend_control = + r300_translate_blend_function(eqA) | + (r300_translate_blend_factor(srcA) << R300_SRC_BLEND_SHIFT) | + (r300_translate_blend_factor(dstA) << R300_DST_BLEND_SHIFT); + } } /* PIPE_LOGICOP_* don't need to be translated, fortunately. */ @@ -122,25 +123,29 @@ static void r300_delete_blend_state(struct pipe_context* pipe, FREE(state); } +/* Convert float to 10bit integer */ +static unsigned float_to_fixed10(float f) +{ + return CLAMP((unsigned)(f * 1023.9f), 0, 1023); +} + /* Set blend color. * Setup both R300 and R500 registers, figure out later which one to write. */ static void r300_set_blend_color(struct pipe_context* pipe, const struct pipe_blend_color* color) { struct r300_context* r300 = r300_context(pipe); - ubyte ur, ug, ub, ua; - - ur = float_to_ubyte(color->color[0]); - ug = float_to_ubyte(color->color[1]); - ub = float_to_ubyte(color->color[2]); - ua = float_to_ubyte(color->color[3]); util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM, &r300->blend_color_state->blend_color); - /* XXX this is wrong */ - r300->blend_color_state->blend_color_red_alpha = ur | (ua << 16); - r300->blend_color_state->blend_color_green_blue = ub | (ug << 16); + /* XXX if FP16 blending is enabled, we should use the FP16 format */ + r300->blend_color_state->blend_color_red_alpha = + float_to_fixed10(color->color[0]) | + (float_to_fixed10(color->color[3]) << 16); + r300->blend_color_state->blend_color_green_blue = + float_to_fixed10(color->color[2]) | + (float_to_fixed10(color->color[1]) << 16); r300->dirty_state |= R300_NEW_BLEND_COLOR; } -- cgit v1.2.3 From a1d726aae8fcacfa1eb1d76ce9c46adaafeaf4a4 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 28 Oct 2009 02:21:49 +0100 Subject: r300g: fix the WRAP_T mode when using 1D textures Signed-off-by: Corbin Simpson --- src/gallium/drivers/r300/r300_emit.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 096165a292..8bfa2932c9 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -532,10 +532,17 @@ void r300_emit_texture(struct r300_context* r300, struct r300_texture* tex, unsigned offset) { + uint32_t filter0 = sampler->filter0; CS_LOCALS(r300); + /* to emulate 1D textures through 2D ones correctly */ + if (tex->tex.height[0] == 1) { + filter0 &= ~R300_TX_WRAP_T_MASK; + filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); + } + BEGIN_CS(16); - OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), sampler->filter0 | + OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), filter0 | (offset << 28)); OUT_CS_REG(R300_TX_FILTER1_0 + (offset * 4), sampler->filter1); OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (offset * 4), sampler->border_color); -- cgit v1.2.3 From bcfde429139476c2d04baddaf671651cfc860145 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 28 Oct 2009 02:43:51 +0100 Subject: r300g: fix emitting the stencil-ref and alpha-ref values Signed-off-by: Corbin Simpson DSA really needs its head examined someday. ~ C. --- src/gallium/drivers/r300/r300_emit.c | 16 ++++++++++------ src/gallium/drivers/r300/r300_reg.h | 2 ++ src/gallium/drivers/r300/r300_state.c | 24 +++++++++++++++++++----- 3 files changed, 31 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 8bfa2932c9..2a8e4a9f41 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -102,19 +102,23 @@ void r300_emit_dsa_state(struct r300_context* r300, struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); - BEGIN_CS(r300screen->caps->is_r500 ? 8 : 8); + BEGIN_CS(r300screen->caps->is_r500 ? 10 : 8); OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); - /* XXX figure out the r300 counterpart for this */ - if (r300screen->caps->is_r500) { - /* OUT_CS_REG(R500_FG_ALPHA_VALUE, dsa->alpha_reference); */ - } + + /* not needed since we use the 8bit alpha ref */ + /*if (r300screen->caps->is_r500) { + OUT_CS_REG(R500_FG_ALPHA_VALUE, dsa->alpha_reference); + }*/ + OUT_CS_REG_SEQ(R300_ZB_CNTL, 3); OUT_CS(dsa->z_buffer_control); OUT_CS(dsa->z_stencil_control); OUT_CS(dsa->stencil_ref_mask); OUT_CS_REG(R300_ZB_ZTOP, r300->ztop_state.z_buffer_top); + + /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */ if (r300screen->caps->is_r500) { - /* OUT_CS_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf); */ + OUT_CS_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf); } END_CS; } diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index e920b2a5e7..babc3c709e 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -2416,6 +2416,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_Z_WRITE_ENABLE (1 << 2) # define R300_Z_SIGNED_COMPARE (1 << 3) # define R300_STENCIL_FRONT_BACK (1 << 4) +# define R500_STENCIL_ZSIGNED_MAGNITUDE (1 << 5) +# define R500_STENCIL_REFMASK_FRONT_BACK (1 << 6) #define R300_ZB_ZSTENCILCNTL 0x4f04 /* functions */ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 5d28837ef7..5db8c69dec 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -198,6 +198,8 @@ static void* r300_create_dsa_state(struct pipe_context* pipe, const struct pipe_depth_stencil_alpha_state* state) { + struct r300_capabilities *caps = + r300_screen(r300_context(pipe)->context.screen)->caps; struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state); /* Depth test setup. */ @@ -242,9 +244,16 @@ static void* (r300_translate_stencil_op(state->stencil[1].zfail_op) << R300_S_BACK_ZFAIL_OP_SHIFT); - dsa->stencil_ref_bf = (state->stencil[1].ref_value) | - (state->stencil[1].valuemask << R300_STENCILMASK_SHIFT) | - (state->stencil[1].writemask << R300_STENCILWRITEMASK_SHIFT); + /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */ + if (caps->is_r500) + { + dsa->z_buffer_control |= R500_STENCIL_REFMASK_FRONT_BACK; + dsa->stencil_ref_bf = (state->stencil[1].ref_value) | + (state->stencil[1].valuemask << + R300_STENCILMASK_SHIFT) | + (state->stencil[1].writemask << + R300_STENCILWRITEMASK_SHIFT); + } } } @@ -253,8 +262,13 @@ static void* dsa->alpha_function = r300_translate_alpha_function(state->alpha.func) | R300_FG_ALPHA_FUNC_ENABLE; - dsa->alpha_reference = CLAMP(state->alpha.ref_value * 1023.0f, - 0, 1023); + + /* XXX figure out why emitting 10bit alpha ref causes CS to dump */ + /* always use 8bit alpha ref */ + dsa->alpha_function |= float_to_ubyte(state->alpha.ref_value); + + if (caps->is_r500) + dsa->alpha_function |= R500_FG_ALPHA_FUNC_8BIT; } return (void*)dsa; -- cgit v1.2.3 From 81c51bb67f97c60e21a5e7cf87e154bb46ee481b Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 28 Oct 2009 10:02:23 -0700 Subject: r300g: Fix XXX. Nothing strange here. --- src/gallium/drivers/r300/r300_texture.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 2a33393a8c..3e90fea6c8 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -53,7 +53,6 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) R300_TX_NUM_LEVELS(pt->last_level & 0xf); } - /* XXX */ state->format1 = r300_translate_texformat(pt->format); if (pt->target == PIPE_TEXTURE_CUBE) { state->format1 |= R300_TX_FORMAT_CUBIC_MAP; -- cgit v1.2.3 From 6007e2e0085d9131b22dc8a98d7500a66a0e4c97 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 28 Oct 2009 11:47:24 -0700 Subject: r300g: Clear up a bit of the buffer reference stuff. Still need to actually get reference info from winsys somehow. Doing added buffers is easy, but knowing whether a flush has happened is a bit tricky. --- src/gallium/drivers/r300/r300_context.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index c34fbb1123..e45564b54e 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -76,26 +76,23 @@ static void r300_destroy_context(struct pipe_context* context) } static unsigned int -r300_is_texture_referenced( struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level) +r300_is_texture_referenced(struct pipe_context *pipe, + struct pipe_texture *texture, + unsigned face, unsigned level) { - /** - * FIXME: Optimize. - */ + struct pipe_buffer* buf; - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + r300_get_texture_buffer(texture, &buf, NULL); + + return pipe->is_buffer_referenced(pipe, buf); } static unsigned int -r300_is_buffer_referenced( struct pipe_context *pipe, - struct pipe_buffer *buf) +r300_is_buffer_referenced(struct pipe_context *pipe, + struct pipe_buffer *buf) { - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + /* XXX */ + return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; } static void r300_flush_cb(void *data) -- cgit v1.2.3 From e7c8a2763855c35af1d141b67551b364e6579051 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sun, 18 Oct 2009 18:06:51 +0200 Subject: r300g: add some texture formats --- src/gallium/drivers/r300/r300_screen.c | 2 ++ src/gallium/drivers/r300/r300_texture.h | 7 +++++++ 2 files changed, 9 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 6eaf35bd4b..6efa17cbaf 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -197,6 +197,8 @@ static boolean check_tex_format(enum pipe_format format, uint32_t usage, case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: case PIPE_FORMAT_YCBCR: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8L8_UNORM: retval = usage & PIPE_TEXTURE_USAGE_SAMPLER; break; diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 35e06a9acb..a18e0cbe1a 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -85,6 +85,13 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_Z24X8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, W24_FP); + + case PIPE_FORMAT_A8L8_UNORM: + return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8); + + case PIPE_FORMAT_L8_UNORM: + return R300_EASY_TX_FORMAT(X, X, X, ONE, X8); + default: debug_printf("r300: Implementation error: " "Got unsupported texture format %s in %s\n", -- cgit v1.2.3 From 23d8d15bedb7178bedde9b994be3925a160c193d Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 28 Oct 2009 11:58:13 -0700 Subject: r300g: Keep texture formats organized. --- src/gallium/drivers/r300/r300_texture.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index a18e0cbe1a..55ceb1a513 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -43,6 +43,8 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) /* X8 */ case PIPE_FORMAT_I8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X8); + case PIPE_FORMAT_L8_UNORM: + return R300_EASY_TX_FORMAT(X, X, X, ONE, X8); /* X16 */ case PIPE_FORMAT_R16_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X16); @@ -51,6 +53,9 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) R300_TX_FORMAT_SIGNED; case PIPE_FORMAT_Z16_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X16); + /* Y8X8 */ + case PIPE_FORMAT_A8L8_UNORM: + return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8); /* W8Z8Y8X8 */ case PIPE_FORMAT_A8R8G8B8_UNORM: return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); @@ -86,12 +91,6 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) case PIPE_FORMAT_Z24X8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, W24_FP); - case PIPE_FORMAT_A8L8_UNORM: - return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8); - - case PIPE_FORMAT_L8_UNORM: - return R300_EASY_TX_FORMAT(X, X, X, ONE, X8); - default: debug_printf("r300: Implementation error: " "Got unsupported texture format %s in %s\n", -- cgit v1.2.3 From f3d8d534e6f1d102d71338d58fbaa98c382f1858 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 28 Oct 2009 12:11:52 -0700 Subject: r300g: Use u_trim_pipe_prim to prevent lockups from incorrect vert counts. Adapted from osiris' version on his tree. --- src/gallium/drivers/r300/r300_render.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 6f392402bd..c36350d29e 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -26,6 +26,7 @@ #include "pipe/p_inlines.h" #include "util/u_memory.h" +#include "util/u_prim.h" #include "r300_cs.h" #include "r300_context.h" @@ -86,6 +87,10 @@ boolean r300_draw_range_elements(struct pipe_context* pipe, CS_LOCALS(r300); + if (!u_trim_pipe_prim(mode, &count)) { + return FALSE; + } + validate: for (i = 0; i < aos_count; i++) { if (!r300->winsys->add_buffer(r300->winsys, aos[i].buffer, @@ -191,6 +196,10 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); int i; + if (!u_trim_pipe_prim(mode, &count)) { + return FALSE; + } + for (i = 0; i < r300->vertex_buffer_count; i++) { void* buf = pipe_buffer_map(pipe->screen, r300->vertex_buffers[i].buffer, -- cgit v1.2.3 From ca9c413647bf9efb5ed770e3a655bc758075aec7 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 30 Oct 2009 08:03:10 +0000 Subject: softpipe: Respect gl_rasterization_rules in primitive setup. --- src/gallium/drivers/softpipe/sp_setup.c | 40 +++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 00fb52a64f..615581b95f 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -87,6 +87,8 @@ struct setup_context { float oneoverarea; int facing; + float pixel_offset; + struct quad_header quad[MAX_QUADS]; struct quad_header *quad_ptrs[MAX_QUADS]; unsigned count; @@ -379,6 +381,16 @@ static boolean setup_sort_vertices( struct setup_context *setup, ((det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW)); + /* Prepare pixel offset for rasterisation: + * - pixel center (0.5, 0.5) for GL, or + * - assume (0.0, 0.0) for other APIs. + */ + if (setup->softpipe->rasterizer->gl_rasterization_rules) { + setup->pixel_offset = 0.5f; + } else { + setup->pixel_offset = 0.0f; + } + return TRUE; } @@ -427,7 +439,7 @@ static void tri_linear_coeff( struct setup_context *setup, /* calculate a0 as the value which would be sampled for the * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). + * pixel centers, in other words (pixel_offset, pixel_offset). * * this is neat but unfortunately not a good way to do things for * triangles with very large values of dadx or dady as it will @@ -438,8 +450,8 @@ static void tri_linear_coeff( struct setup_context *setup, * instead - i'll switch to this later. */ coef->a0[i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); /* debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", @@ -487,8 +499,8 @@ static void tri_persp_coeff( struct setup_context *setup, coef->dadx[i] = dadx; coef->dady[i] = dady; coef->a0[i] = (mina - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); } @@ -575,12 +587,12 @@ static void setup_tri_coefficients( struct setup_context *setup ) static void setup_tri_edges( struct setup_context *setup ) { - float vmin_x = setup->vmin[0][0] + 0.5f; - float vmid_x = setup->vmid[0][0] + 0.5f; + float vmin_x = setup->vmin[0][0] + setup->pixel_offset; + float vmid_x = setup->vmid[0][0] + setup->pixel_offset; - float vmin_y = setup->vmin[0][1] - 0.5f; - float vmid_y = setup->vmid[0][1] - 0.5f; - float vmax_y = setup->vmax[0][1] - 0.5f; + float vmin_y = setup->vmin[0][1] - setup->pixel_offset; + float vmid_y = setup->vmid[0][1] - setup->pixel_offset; + float vmax_y = setup->vmax[0][1] - setup->pixel_offset; setup->emaj.sy = ceilf(vmin_y); setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy); @@ -780,8 +792,8 @@ line_linear_coeff(const struct setup_context *setup, coef->dadx[i] = dadx; coef->dady[i] = dady; coef->a0[i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); } @@ -803,8 +815,8 @@ line_persp_coeff(const struct setup_context *setup, coef->dadx[i] = dadx; coef->dady[i] = dady; coef->a0[i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); } -- cgit v1.2.3 From ace78d90ded52d8fe4b3b077abf9a4db381dce16 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 29 Oct 2009 23:48:59 +0100 Subject: r300g: fix crash in r300_is_texture_referenced Also, a subtle fix in emitting a texture state --- src/gallium/drivers/r300/r300_context.c | 2 +- src/gallium/drivers/r300/r300_emit.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index e45564b54e..02f201b49a 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -80,7 +80,7 @@ r300_is_texture_referenced(struct pipe_context *pipe, struct pipe_texture *texture, unsigned face, unsigned level) { - struct pipe_buffer* buf; + struct pipe_buffer* buf = 0; r300_get_texture_buffer(texture, &buf, NULL); diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 2a8e4a9f41..3b0b41e486 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -540,7 +540,7 @@ void r300_emit_texture(struct r300_context* r300, CS_LOCALS(r300); /* to emulate 1D textures through 2D ones correctly */ - if (tex->tex.height[0] == 1) { + if (tex->tex.target == PIPE_TEXTURE_1D) { filter0 &= ~R300_TX_WRAP_T_MASK; filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); } -- cgit v1.2.3 From 11180b44717943d767b64f0b658f31b6c2594aa4 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 30 Oct 2009 13:08:37 +0100 Subject: r300g: remove unnecessary assertions Also, correct typos in comments. --- src/gallium/drivers/r300/r300_reg.h | 4 ++-- src/gallium/drivers/r300/r300_state.c | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index babc3c709e..1e4d3f5d70 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -862,10 +862,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_POINTSIZE_X_MASK 0xffff0000 # define R300_POINTSIZE_MAX (R300_POINTSIZE_Y_MASK / 6) -/* Blue fill color */ +/* Red fill color */ #define R500_GA_FILL_R 0x4220 -/* Blue fill color */ +/* Green fill color */ #define R500_GA_FILL_G 0x4224 /* Blue fill color */ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 5db8c69dec..1e7fabf683 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -608,17 +608,14 @@ static void r300_set_viewport_state(struct pipe_context* pipe, r300->viewport_state->vte_control = R300_VTX_W0_FMT; if (state->scale[0] != 1.0f) { - assert(state->scale[0] != 0.0f); r300->viewport_state->xscale = state->scale[0]; r300->viewport_state->vte_control |= R300_VPORT_X_SCALE_ENA; } if (state->scale[1] != 1.0f) { - assert(state->scale[1] != 0.0f); r300->viewport_state->yscale = state->scale[1]; r300->viewport_state->vte_control |= R300_VPORT_Y_SCALE_ENA; } if (state->scale[2] != 1.0f) { - assert(state->scale[2] != 0.0f); r300->viewport_state->zscale = state->scale[2]; r300->viewport_state->vte_control |= R300_VPORT_Z_SCALE_ENA; } -- cgit v1.2.3 From 63c9450ae776ff4207422442dd8c3d9d13a05e7a Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 30 Oct 2009 18:19:25 +0100 Subject: r300g: add precalculating of pixel pitch, add a new NPOT flag --- src/gallium/drivers/r300/r300_context.h | 8 ++++++++ src/gallium/drivers/r300/r300_emit.c | 16 ++++++++------- src/gallium/drivers/r300/r300_texture.c | 35 +++++++++++++++++++++++---------- 3 files changed, 42 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 4d73567bbe..cee0734d21 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -181,6 +181,9 @@ struct r300_texture { /* Offsets into the buffer. */ unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; + /* A pitch for each mip-level */ + unsigned pitch[PIPE_MAX_TEXTURE_LEVELS]; + /* Size of one zslice or face based on the texture target */ unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS]; @@ -197,6 +200,11 @@ struct r300_texture { /* Total size of this texture, in bytes. */ unsigned size; + /* Whether this texture has non-power-of-two dimensions. + * It can be either a regular texture or a rectangle one. + */ + boolean is_npot; + /* Pipe buffer backing this texture. */ struct pipe_buffer* buffer; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 3b0b41e486..be38fbc619 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -285,21 +285,22 @@ void r300_emit_fb_state(struct r300_context* r300, struct pipe_framebuffer_state* fb) { struct r300_texture* tex; - unsigned pixpitch; + struct pipe_surface* surf; int i; CS_LOCALS(r300); BEGIN_CS((10 * fb->nr_cbufs) + (fb->zsbuf ? 10 : 0) + 4); for (i = 0; i < fb->nr_cbufs; i++) { - tex = (struct r300_texture*)fb->cbufs[i]->texture; + surf = fb->cbufs[i]; + tex = (struct r300_texture*)surf->texture; assert(tex && tex->buffer && "cbuf is marked, but NULL!"); - pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size; + /* XXX I still need to figure out how to set the mipmap level here */ OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); - OUT_CS_RELOC(tex->buffer, pixpitch | + OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] | r300_translate_colorformat(tex->tex.format), 0, RADEON_GEM_DOMAIN_VRAM, 0); @@ -308,9 +309,9 @@ void r300_emit_fb_state(struct r300_context* r300, } if (fb->zsbuf) { - tex = (struct r300_texture*)fb->zsbuf->texture; + surf = fb->zsbuf; + tex = (struct r300_texture*)surf->texture; assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); - pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size; OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); @@ -318,7 +319,8 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG(R300_ZB_FORMAT, r300_translate_zsformat(tex->tex.format)); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(tex->buffer, pixpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level], 0, + RADEON_GEM_DOMAIN_VRAM, 0); } OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 3e90fea6c8..7199918a84 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -33,21 +33,15 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) { struct r300_texture_state* state = &tex->state; struct pipe_texture *pt = &tex->tex; - unsigned stride; state->format0 = R300_TX_WIDTH((pt->width[0] - 1) & 0x7ff) | R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff); - if (!util_is_power_of_two(pt->width[0]) || - !util_is_power_of_two(pt->height[0])) { - + if (tex->is_npot) { /* rectangles love this */ state->format0 |= R300_TX_PITCH_EN; - - stride = r300_texture_get_stride(tex, 0) / pt->block.size; - state->format2 = (stride - 1) & 0x1fff; - } - else { + state->format2 = (tex->pitch[0] - 1) & 0x1fff; + } else { /* power of two textures (3D, mipmaps, and no pitch) */ state->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) | R300_TX_NUM_LEVELS(pt->last_level & 0xf); @@ -143,6 +137,12 @@ static void r300_setup_miptree(struct r300_texture* tex) tex->size = tex->offset[i] + size; tex->layer_size[i] = layer_size; + if (tex->is_npot) { + tex->pitch[i] = stride / base->block.size; + } else { + tex->pitch[i] = base->width[i]; + } + debug_printf("r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes)\n", i, base->width[i], base->height[i], base->depth[i], @@ -150,6 +150,12 @@ static void r300_setup_miptree(struct r300_texture* tex) } } +static void r300_setup_flags(struct r300_texture* tex) +{ + tex->is_npot = !util_is_power_of_two(tex->tex.width[0]) || + !util_is_power_of_two(tex->tex.height[0]); +} + /* Create a new texture. */ static struct pipe_texture* r300_texture_create(struct pipe_screen* screen, @@ -165,8 +171,8 @@ static struct pipe_texture* pipe_reference_init(&tex->tex.reference, 1); tex->tex.screen = screen; + r300_setup_flags(tex); r300_setup_miptree(tex); - r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); tex->buffer = screen->buffer_create(screen, 1024, @@ -234,6 +240,13 @@ static struct pipe_texture* { struct r300_texture* tex; + /* Support only 2D textures without mipmaps */ + if (base->target != PIPE_TEXTURE_2D || + base->depth[0] != 1 || + base->last_level != 0) { + return NULL; + } + tex = CALLOC_STRUCT(r300_texture); if (!tex) { return NULL; @@ -244,7 +257,9 @@ static struct pipe_texture* tex->tex.screen = screen; tex->stride_override = *stride; + tex->pitch[0] = *stride / base->block.size; + r300_setup_flags(tex); r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); pipe_buffer_reference(&tex->buffer, buffer); -- cgit v1.2.3 From a8f85dceb5e721437ba30ec540cd0bf8ee454325 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 31 Oct 2009 05:34:46 +0100 Subject: r300g: fix reading from the destination buffer in blending --- src/gallium/drivers/r300/r300_state.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 1e7fabf683..3ac627e959 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -75,7 +75,9 @@ static void* r300_create_blend_state(struct pipe_context* pipe, srcRGB == PIPE_BLENDFACTOR_DST_ALPHA || srcRGB == PIPE_BLENDFACTOR_INV_DST_COLOR || srcRGB == PIPE_BLENDFACTOR_INV_DST_ALPHA || + srcA == PIPE_BLENDFACTOR_DST_COLOR || srcA == PIPE_BLENDFACTOR_DST_ALPHA || + srcA == PIPE_BLENDFACTOR_INV_DST_COLOR || srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA) blend->blend_control |= R300_READ_ENABLE; -- cgit v1.2.3 From 3f60130b87a4a75f1b7cb6e0b854001bbe8f7ec8 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 31 Oct 2009 05:38:25 +0100 Subject: r300g: pretend NPOT support It's requires to get GL2.1, therefore, much more piglit tests can be used for testing. Figure out later how to emulate this. --- src/gallium/drivers/r300/r300_screen.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 6efa17cbaf..390b63007e 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -84,7 +84,9 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) /* XXX I'm told this goes up to 16 */ return 8; case PIPE_CAP_NPOT_TEXTURES: - return 0; + /* XXX enable now to get GL2.1 API, + * figure out later how to emulate this */ + return 1; case PIPE_CAP_TWO_SIDED_STENCIL: if (r300screen->caps->is_r500) { return 1; -- cgit v1.2.3 From c9928ac3ee5dc0d10127388f9312779a6c59da7c Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 31 Oct 2009 07:23:00 +0100 Subject: r300g: correct the pitch calculation for smaller mipmaps --- src/gallium/drivers/r300/r300_emit.c | 2 +- src/gallium/drivers/r300/r300_texture.c | 7 +------ 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index be38fbc619..22cf9cac2a 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -305,7 +305,7 @@ void r300_emit_fb_state(struct r300_context* r300, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), - r300_translate_out_fmt(fb->cbufs[i]->format)); + r300_translate_out_fmt(surf->format)); } if (fb->zsbuf) { diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 7199918a84..aea25cf71d 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -136,12 +136,7 @@ static void r300_setup_miptree(struct r300_texture* tex) tex->offset[i] = align(tex->size, 32); tex->size = tex->offset[i] + size; tex->layer_size[i] = layer_size; - - if (tex->is_npot) { - tex->pitch[i] = stride / base->block.size; - } else { - tex->pitch[i] = base->width[i]; - } + tex->pitch[i] = stride / base->block.size; debug_printf("r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes)\n", -- cgit v1.2.3 From 525f529d138168386224136dc45abb858677bac7 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 31 Oct 2009 11:25:48 +0100 Subject: nv50: make MRTs work We have to indicate to the hw whether the FP exports multiple colour results. Method 0x121c is used to specify the number of RTs. Also deactivate zeta explicitly if there's no zsbuf. --- src/gallium/drivers/nv50/nv50_program.c | 4 ++++ src/gallium/drivers/nv50/nv50_state_validate.c | 11 +++++++++++ 2 files changed, 15 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 9ccc4f5a16..c3edc02cb5 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -2644,6 +2644,10 @@ nv50_program_tx_prep(struct nv50_pc *pc) pc->result[2].rhw = rid; p->cfg.high_result = rid; + + /* separate/different colour results for MRTs ? */ + if (pc->result_nr - (p->info.writes_z ? 1 : 0) > 1) + p->cfg.regs[2] |= 1; } if (pc->immd_nr) { diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 956a700615..a13d64b7fa 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -37,6 +37,14 @@ nv50_state_validate_fb(struct nv50_context *nv50) struct pipe_framebuffer_state *fb = &nv50->framebuffer; unsigned i, w, h, gw = 0; + /* Set nr of active RTs. Don't know what 0xfac6880 does, but + * at least 0x880 was required to draw to more than 1 RT. + * In some special cases, 0xfac6880 is not used, we probably + * don't hit any of these though. + */ + so_method(so, tesla, 0x121c, 1); + so_data (so, 0x0fac6880 | fb->nr_cbufs); + for (i = 0; i < fb->nr_cbufs; i++) { struct pipe_texture *pt = fb->cbufs[i]->texture; struct nouveau_bo *bo = nv50_miptree(pt)->base.bo; @@ -121,6 +129,9 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_data (so, fb->zsbuf->width); so_data (so, fb->zsbuf->height); so_data (so, 0x00010001); + } else { + so_method(so, tesla, 0x1538, 1); + so_data (so, 0); } so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2); -- cgit v1.2.3 From 9831e1f76cd020e1cde2b13e03149415319a8135 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 31 Oct 2009 13:38:22 +0100 Subject: nv50: use SIFC also for shader upload Adds a more generic SIFC transfer function. --- src/gallium/drivers/nv50/nv50_context.h | 11 +++- src/gallium/drivers/nv50/nv50_program.c | 79 +++++++++-------------------- src/gallium/drivers/nv50/nv50_transfer.c | 86 ++++++++++++++++++++++++++++++++ 3 files changed, 120 insertions(+), 56 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 33667e8765..890defb90c 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -196,7 +196,8 @@ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers, extern void nv50_vertprog_validate(struct nv50_context *nv50); extern void nv50_fragprog_validate(struct nv50_context *nv50); extern void nv50_linkage_validate(struct nv50_context *nv50); -extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p); +extern void nv50_program_destroy(struct nv50_context *nv50, + struct nv50_program *p); /* nv50_state_validate.c */ extern boolean nv50_state_validate(struct nv50_context *nv50); @@ -210,4 +211,12 @@ extern void nv50_so_init_sifc(struct nv50_context *nv50, /* nv50_tex.c */ extern void nv50_tex_validate(struct nv50_context *); +/* nv50_transfer.c */ +extern void +nv50_upload_sifc(struct nv50_context *nv50, + struct nouveau_bo *bo, unsigned dst_offset, unsigned reloc, + unsigned dst_format, int dst_w, int dst_h, int dst_pitch, + void *src, unsigned src_format, int src_pitch, + int x, int y, int w, int h, int cpp); + #endif diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index c3edc02cb5..faf638949f 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -2980,11 +2980,8 @@ static void nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) { struct nouveau_channel *chan = nv50->screen->base.channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_program_exec *e; - struct nouveau_stateobj *so; - const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR; - unsigned start, count, *up, *ptr; + uint32_t *up, i; boolean upload = FALSE; if (!p->bo) { @@ -2999,32 +2996,37 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) if (!upload) return; - for (e = p->exec_head; e; e = e->next) { + up = MALLOC(p->exec_size * 4); + + for (i = 0, e = p->exec_head; e; e = e->next) { unsigned ei, ci, bs; - if (e->param.index < 0) - continue; + if (e->param.index >= 0 && e->param.mask) { + bs = (e->inst[1] >> 22) & 0x07; + assert(bs < 2); + ei = e->param.shift >> 5; + ci = e->param.index; + if (bs == 0) + ci += p->data[bs]->start; - if (e->param.mask == 0) { + e->inst[ei] &= ~e->param.mask; + e->inst[ei] |= (ci << e->param.shift); + } else + if (e->param.index >= 0) { + /* zero mask means param is a jump/branch offset */ assert(!(e->param.index & 1)); /* seem to be 8 byte steps */ ei = (e->param.index >> 1) + 0 /* START_ID */; e->inst[0] &= 0xf0000fff; e->inst[0] |= ei << 12; - continue; } - bs = (e->inst[1] >> 22) & 0x07; - assert(bs < 2); - ei = e->param.shift >> 5; - ci = e->param.index; - if (bs == 0) - ci += p->data[bs]->start; - - e->inst[ei] &= ~e->param.mask; - e->inst[ei] |= (ci << e->param.shift); + up[i++] = e->inst[0]; + if (is_long(e)) + up[i++] = e->inst[1]; } + assert(i == p->exec_size); if (p->data[0]) p->data_start[0] = p->data[0]->start; @@ -3037,45 +3039,12 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) NOUVEAU_ERR("0x%08x\n", e->inst[1]); } #endif - - up = ptr = MALLOC(p->exec_size * 4); - for (e = p->exec_head; e; e = e->next) { - *(ptr++) = e->inst[0]; - if (is_long(e)) - *(ptr++) = e->inst[1]; - } - - so = so_new(4,2); - so_method(so, nv50->screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); - so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_LOW, 0, 0); - so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4)); - - start = 0; count = p->exec_size; - while (count) { - struct nouveau_channel *chan = nv50->screen->base.channel; - unsigned nr; - - so_emit(chan, so); - - nr = MIN2(count, 2047); - nr = MIN2(chan->pushbuf->remaining, nr); - if (chan->pushbuf->remaining < (nr + 3)) { - FIRE_RING(chan); - continue; - } - - BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1); - OUT_RING (chan, (start << 8) | NV50_CB_PUPLOAD); - BEGIN_RING(chan, tesla, NV50TCL_CB_DATA(0) | 0x40000000, nr); - OUT_RINGp (chan, up + start, nr); - - start += nr; - count -= nr; - } + nv50_upload_sifc(nv50, p->bo, 0, NOUVEAU_BO_VRAM, + NV50_2D_DST_FORMAT_R8_UNORM, 65536, 1, 262144, + up, NV50_2D_SIFC_FORMAT_R8_UNORM, 0, + 0, 0, p->exec_size * 4, 1, 1); FREE(up); - so_ref(NULL, &so); } void diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index 9c289026bb..f1eb672336 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -237,3 +237,89 @@ nv50_transfer_init_screen_functions(struct pipe_screen *pscreen) pscreen->transfer_map = nv50_transfer_map; pscreen->transfer_unmap = nv50_transfer_unmap; } + +void +nv50_upload_sifc(struct nv50_context *nv50, + struct nouveau_bo *bo, unsigned dst_offset, unsigned reloc, + unsigned dst_format, int dst_w, int dst_h, int dst_pitch, + void *src, unsigned src_format, int src_pitch, + int x, int y, int w, int h, int cpp) +{ + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nouveau_grobj *eng2d = nv50->screen->eng2d; + struct nouveau_grobj *tesla = nv50->screen->tesla; + unsigned line_dwords = (w * cpp + 3) / 4; + + reloc |= NOUVEAU_BO_WR; + + WAIT_RING (chan, 32); + + if (bo->tile_flags) { + BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5); + OUT_RING (chan, dst_format); + OUT_RING (chan, 0); + OUT_RING (chan, bo->tile_mode << 4); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + } else { + BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 2); + OUT_RING (chan, dst_format); + OUT_RING (chan, 1); + BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 1); + OUT_RING (chan, dst_pitch); + } + + BEGIN_RING(chan, eng2d, NV50_2D_DST_WIDTH, 4); + OUT_RING (chan, dst_w); + OUT_RING (chan, dst_h); + OUT_RELOCh(chan, bo, dst_offset, reloc); + OUT_RELOCl(chan, bo, dst_offset, reloc); + + /* NV50_2D_OPERATION_SRCCOPY assumed already set */ + + BEGIN_RING(chan, eng2d, NV50_2D_SIFC_UNK0800, 2); + OUT_RING (chan, 0); + OUT_RING (chan, src_format); + BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10); + OUT_RING (chan, w); + OUT_RING (chan, h); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, x); + OUT_RING (chan, 0); + OUT_RING (chan, y); + + while (h--) { + const uint32_t *p = src; + unsigned count = line_dwords; + + while (count) { + unsigned nr = MIN2(count, 1792); + + if (chan->pushbuf->remaining <= nr) { + FIRE_RING (chan); + + BEGIN_RING(chan, eng2d, + NV50_2D_DST_ADDRESS_HIGH, 2); + OUT_RELOCh(chan, bo, dst_offset, reloc); + OUT_RELOCl(chan, bo, dst_offset, reloc); + } + assert(chan->pushbuf->remaining > nr); + + BEGIN_RING(chan, eng2d, + NV50_2D_SIFC_DATA | (2 << 29), nr); + OUT_RINGp (chan, p, nr); + + p += nr; + count -= nr; + } + + src += src_pitch; + } + + BEGIN_RING(chan, tesla, 0x1440, 1); + OUT_RING (chan, 0); +} -- cgit v1.2.3 From 91232b7004d7a9fbf4f99bb9ec4e5eea8e1c6eef Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sat, 24 Oct 2009 17:36:48 -0400 Subject: nouveau: Support X8R8G8B8 textures on nv30, nv40 and RTs on nv10-nv40. --- src/gallium/drivers/nv04/nv04_surface_2d.c | 1 + src/gallium/drivers/nv10/nv10_state_emit.c | 3 +++ src/gallium/drivers/nv20/nv20_state_emit.c | 3 +++ src/gallium/drivers/nv30/nv30_fragtex.c | 1 + src/gallium/drivers/nv30/nv30_state_fb.c | 3 +++ src/gallium/drivers/nv40/nv40_fragtex.c | 1 + src/gallium/drivers/nv40/nv40_state_fb.c | 3 +++ 7 files changed, 15 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c index 8c7eb367e2..8be134b83d 100644 --- a/src/gallium/drivers/nv04/nv04_surface_2d.c +++ b/src/gallium/drivers/nv04/nv04_surface_2d.c @@ -42,6 +42,7 @@ nv04_rect_format(enum pipe_format format) case PIPE_FORMAT_A8L8_UNORM: case PIPE_FORMAT_Z16_UNORM: return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5; + case PIPE_FORMAT_X8R8G8B8_UNORM: case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_Z24X8_UNORM: diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c index d8691ef9c6..2577ab73b5 100644 --- a/src/gallium/drivers/nv10/nv10_state_emit.c +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -129,6 +129,9 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) rt_format = NV10TCL_RT_FORMAT_TYPE_LINEAR; switch (colour_format) { + case PIPE_FORMAT_X8R8G8B8_UNORM: + rt_format |= NV10TCL_RT_FORMAT_COLOR_X8R8G8B8; + break; case PIPE_FORMAT_A8R8G8B8_UNORM: case 0: rt_format |= NV10TCL_RT_FORMAT_COLOR_A8R8G8B8; diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c index 4042f46d05..0122b1c2cd 100644 --- a/src/gallium/drivers/nv20/nv20_state_emit.c +++ b/src/gallium/drivers/nv20/nv20_state_emit.c @@ -135,6 +135,9 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20) rt_format = NV20TCL_RT_FORMAT_TYPE_LINEAR | 0x20; switch (colour_format) { + case PIPE_FORMAT_X8R8G8B8_UNORM: + rt_format |= NV20TCL_RT_FORMAT_COLOR_X8R8G8B8; + break; case PIPE_FORMAT_A8R8G8B8_UNORM: case 0: rt_format |= NV20TCL_RT_FORMAT_COLOR_A8R8G8B8; diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index 3dd636f4ee..dca760cae6 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -21,6 +21,7 @@ struct nv30_texture_format { static struct nv30_texture_format nv30_texture_formats[] = { + _(X8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W), _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W), _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W), _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W), diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c index 4d6a67e56d..6f6d1740d6 100644 --- a/src/gallium/drivers/nv30/nv30_state_fb.c +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -66,6 +66,9 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) } switch (colour_format) { + case PIPE_FORMAT_X8R8G8B8_UNORM: + rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8; + break; case PIPE_FORMAT_A8R8G8B8_UNORM: case 0: rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index f6cdf31dfe..e2ec57564d 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -23,6 +23,7 @@ struct nv40_texture_format { static struct nv40_texture_format nv40_texture_formats[] = { + _(X8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c index c2f739157a..1c7a7cd64f 100644 --- a/src/gallium/drivers/nv40/nv40_state_fb.c +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -57,6 +57,9 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) rt_format = NV40TCL_RT_FORMAT_TYPE_LINEAR; switch (colour_format) { + case PIPE_FORMAT_X8R8G8B8_UNORM: + rt_format |= NV40TCL_RT_FORMAT_COLOR_X8R8G8B8; + break; case PIPE_FORMAT_A8R8G8B8_UNORM: case 0: rt_format |= NV40TCL_RT_FORMAT_COLOR_A8R8G8B8; -- cgit v1.2.3 From 1cc16e1b831cef8e1573cc998cee3e55179bb830 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 31 Oct 2009 20:46:59 +0100 Subject: nv50: fix textures with block size != cpp First, using width * block size as pitch is evidently wrong if a block contains more than 1 texel. For tiled textures, since a block occupies a contiguous area of memory, y addressing in m2mf has to be done by block index, not the y coordinate itself. This should fix compressed textures. --- src/gallium/drivers/nv50/nv50_miptree.c | 35 ++++++++++------------ src/gallium/drivers/nv50/nv50_transfer.c | 50 ++++++++++++++++++++------------ 2 files changed, 48 insertions(+), 37 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 93479a0314..229a59cb74 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -26,6 +26,16 @@ #include "nv50_context.h" +static INLINE uint32_t +get_tile_mode(unsigned ny) +{ + if (ny > 32) return 4; + if (ny > 16) return 3; + if (ny > 8) return 2; + if (ny > 4) return 1; + return 0; +} + static struct pipe_texture * nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) { @@ -34,7 +44,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) struct pipe_texture *pt = &mt->base.base; unsigned width = tmp->width[0], height = tmp->height[0]; unsigned depth = tmp->depth[0]; - uint32_t tile_mode, tile_flags, tile_h; + uint32_t tile_flags; int ret, i, l; *pt = *tmp; @@ -57,13 +67,6 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) break; } - if (pt->height[0] > 32) tile_mode = 4; - else if (pt->height[0] > 16) tile_mode = 3; - else if (pt->height[0] > 8) tile_mode = 2; - else if (pt->height[0] > 4) tile_mode = 1; - else tile_mode = 0; - tile_h = 1 << (tile_mode + 2); - switch (pt->target) { case PIPE_TEXTURE_3D: mt->image_nr = pt->depth[0]; @@ -86,28 +89,22 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); lvl->image_offset = CALLOC(mt->image_nr, sizeof(int)); - lvl->pitch = align(pt->width[l] * pt->block.size, 64); - lvl->tile_mode = tile_mode; + lvl->pitch = align(pt->nblocksx[l] * pt->block.size, 64); + lvl->tile_mode = get_tile_mode(pt->nblocksy[l]); width = MAX2(1, width >> 1); height = MAX2(1, height >> 1); depth = MAX2(1, depth >> 1); - - if (tile_mode && height <= (tile_h >> 1)) { - tile_mode--; - tile_h >>= 1; - } } for (i = 0; i < mt->image_nr; i++) { for (l = 0; l <= pt->last_level; l++) { struct nv50_miptree_level *lvl = &mt->level[l]; int size; - tile_h = 1 << (lvl->tile_mode + 2); + unsigned tile_ny = 1 << (lvl->tile_mode + 2); - size = align(pt->width[l], 8) * pt->block.size; - size = align(size, 64); - size *= align(pt->height[l], tile_h); + size = align(pt->nblocksx[l] * pt->block.size, 64); + size *= align(pt->nblocksy[l], tile_ny); lvl->image_offset[i] = mt->total_size; diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index f1eb672336..9c008090b8 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -124,7 +124,7 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, struct nv50_miptree *mt = nv50_miptree(pt); struct nv50_miptree_level *lvl = &mt->level[level]; struct nv50_transfer *tx; - unsigned image = 0; + unsigned nx, ny, image = 0; int ret; if (pt->target == PIPE_TEXTURE_CUBE) @@ -142,9 +142,16 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->base.width = w; tx->base.height = h; tx->base.block = pt->block; - tx->base.nblocksx = pt->nblocksx[level]; - tx->base.nblocksy = pt->nblocksy[level]; - tx->base.stride = (w * pt->block.size); + if (!pt->nblocksx[level]) { + tx->base.nblocksx = pf_get_nblocksx(&pt->block, + pt->width[level]); + tx->base.nblocksy = pf_get_nblocksy(&pt->block, + pt->height[level]); + } else { + tx->base.nblocksx = pt->nblocksx[level]; + tx->base.nblocksy = pt->nblocksy[level]; + } + tx->base.stride = tx->base.nblocksx * pt->block.size; tx->base.usage = usage; tx->level_pitch = lvl->pitch; @@ -152,24 +159,28 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->level_height = mt->base.base.height[level]; tx->level_offset = lvl->image_offset[image]; tx->level_tiling = lvl->tile_mode; - tx->level_x = x; - tx->level_y = y; + tx->level_x = pf_get_nblocksx(&tx->base.block, x); + tx->level_y = pf_get_nblocksy(&tx->base.block, y); ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, - w * pt->block.size * h, &tx->bo); + tx->base.nblocksy * tx->base.stride, &tx->bo); if (ret) { FREE(tx); return NULL; } if (usage & PIPE_TRANSFER_READ) { + nx = pf_get_nblocksx(&tx->base.block, tx->base.width); + ny = pf_get_nblocksy(&tx->base.block, tx->base.height); + nv50_transfer_rect_m2mf(pscreen, mt->base.bo, tx->level_offset, tx->level_pitch, tx->level_tiling, x, y, - tx->level_width, tx->level_height, - tx->bo, 0, tx->base.stride, - tx->bo->tile_mode, 0, 0, - tx->base.width, tx->base.height, - tx->base.block.size, w, h, + tx->base.nblocksx, tx->base.nblocksy, + tx->bo, 0, + tx->base.stride, tx->bo->tile_mode, + 0, 0, + tx->base.nblocksx, tx->base.nblocksy, + tx->base.block.size, nx, ny, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART, NOUVEAU_BO_GART); } @@ -183,17 +194,20 @@ nv50_transfer_del(struct pipe_transfer *ptx) struct nv50_transfer *tx = (struct nv50_transfer *)ptx; struct nv50_miptree *mt = nv50_miptree(ptx->texture); + unsigned nx = pf_get_nblocksx(&tx->base.block, tx->base.width); + unsigned ny = pf_get_nblocksy(&tx->base.block, tx->base.height); + if (ptx->usage & PIPE_TRANSFER_WRITE) { struct pipe_screen *pscreen = ptx->texture->screen; - nv50_transfer_rect_m2mf(pscreen, tx->bo, 0, tx->base.stride, - tx->bo->tile_mode, 0, 0, - tx->base.width, tx->base.height, + nv50_transfer_rect_m2mf(pscreen, tx->bo, 0, + tx->base.stride, tx->bo->tile_mode, + 0, 0, + tx->base.nblocksx, tx->base.nblocksy, mt->base.bo, tx->level_offset, tx->level_pitch, tx->level_tiling, tx->level_x, tx->level_y, - tx->level_width, tx->level_height, - tx->base.block.size, tx->base.width, - tx->base.height, + tx->base.nblocksx, tx->base.nblocksy, + tx->base.block.size, nx, ny, NOUVEAU_BO_GART, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); } -- cgit v1.2.3 From 99e308a0e0479971fe3a8a0aba586e19456e4b88 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 1 Nov 2009 14:27:35 +0100 Subject: nv50: implement TGSI_OPCODE_AND/OR/XOR Will use AND for gl_FrontFacing, the face input is either 0 or 0xffffffff. --- src/gallium/drivers/nv50/nv50_program.c | 47 +++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index faf638949f..5944a0b7ff 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -890,6 +890,43 @@ emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, src1->neg ^= 1; } +static void +emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1, unsigned op) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xd0000000; + set_long(pc, e); + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + + if (op != TGSI_OPCODE_AND && op != TGSI_OPCODE_OR && + op != TGSI_OPCODE_XOR) + assert(!"invalid bit op"); + + if (src1->type == P_IMMD && src0->type == P_TEMP && pc->allow32) { + set_immd(pc, src1, e); + if (op == TGSI_OPCODE_OR) + e->inst[0] |= 0x0100; + else + if (op == TGSI_OPCODE_XOR) + e->inst[0] |= 0x8000; + } else { + set_src_1(pc, src1, e); + e->inst[1] |= 0x04000000; /* 32 bit */ + if (op == TGSI_OPCODE_OR) + e->inst[1] |= 0x4000; + else + if (op == TGSI_OPCODE_XOR) + e->inst[1] |= 0x8000; + } + + emit(pc, e); +} + static void emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) @@ -1838,6 +1875,16 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_add(pc, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_AND: + case TGSI_OPCODE_XOR: + case TGSI_OPCODE_OR: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_bitop2(pc, dst[c], src[0][c], src[1][c], + inst->Instruction.Opcode); + } + break; case TGSI_OPCODE_ARL: assert(src[0][0]); temp = temp_temp(pc); -- cgit v1.2.3 From 496c9eaacfabc4df4e6fb5ba230e60dc660554c8 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 1 Nov 2009 14:04:54 +0100 Subject: nv50: make IF condition safe Don't assume that a SET that writes to IF's argument directly precedes the IF. --- src/gallium/drivers/nv50/nv50_program.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 5944a0b7ff..66190f070d 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -2026,7 +2026,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, case TGSI_OPCODE_IF: /* emitting a join_at may not be necessary */ assert(pc->if_lvl < MAX_IF_DEPTH); - set_pred_wr(pc, 1, 0, pc->if_cond); + /* set_pred_wr(pc, 1, 0, pc->if_cond); */ + emit_cvt(pc, NULL, src[0][0], 0, CVTOP_ABS | CVTOP_RN, + CVT_F32_F32); emit_branch(pc, 0, 2, &pc->br_join[pc->if_lvl]); pc->if_insn[pc->if_lvl++] = pc->p->exec_tail; terminate_mbb(pc); -- cgit v1.2.3 From 5de8f9744015d3645a12dac244ad47daf8481dd2 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 1 Nov 2009 14:15:30 +0100 Subject: nv50: handle TGSI_SEMANTIC_FACE --- src/gallium/drivers/nv50/nv50_program.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 66190f070d..27827c7ecf 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -2463,6 +2463,23 @@ load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg) emit_interp(pc, reg, iv, mode); } +/* The face input is always at v[255] (varying space), with a + * value of 0 for back-facing, and 0xffffffff for front-facing. + */ +static void +load_frontfacing(struct nv50_pc *pc, struct nv50_reg *a) +{ + struct nv50_reg *one = alloc_immd(pc, 1.0f); + + assert(a->rhw == -1); + alloc_reg(pc, a); /* do this before rhw is set */ + a->rhw = 255; + load_interpolant(pc, a); + emit_bitop2(pc, a, a, one, TGSI_OPCODE_AND); + + FREE(one); +} + static boolean nv50_program_tx_prep(struct nv50_pc *pc) { @@ -2607,6 +2624,8 @@ nv50_program_tx_prep(struct nv50_pc *pc) int rid, aid; unsigned n = 0, m = pc->attr_nr - flat_nr; + pc->allow32 = TRUE; + int base = (TGSI_SEMANTIC_POSITION == p->info.input_semantic_name[0]) ? 0 : 1; @@ -2635,6 +2654,12 @@ nv50_program_tx_prep(struct nv50_pc *pc) p->cfg.io[n].hw = rid = aid; i = p->cfg.io[n].id_fp; + if (p->info.input_semantic_name[n] == + TGSI_SEMANTIC_FACE) { + load_frontfacing(pc, &pc->attr[i * 4]); + continue; + } + for (c = 0; c < 4; ++c) { if (!pc->attr[i * 4 + c].acc) continue; -- cgit v1.2.3 From cab749a1d0046f59ca10f96d2e6343404e5f2616 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 1 Nov 2009 09:24:02 -0800 Subject: r300g: Adopt osiris' PSC data and swizzle. A fair amount more flexible and easier to maintain. --- src/gallium/drivers/r300/r300_state_inlines.h | 128 ++++++++++++++++++-------- 1 file changed, 88 insertions(+), 40 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index ec11a41253..176e59f281 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -402,58 +402,106 @@ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) return 0; } +/* Utility function to count the number of components in RGBAZS formats. + * XXX should go to util or p_format.h */ +static INLINE unsigned pf_component_count(enum pipe_format format) { + unsigned count = 0; + + if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) { + return count; + } + + if (pf_size_x(format)) { + count++; + } + if (pf_size_y(format)) { + count++; + } + if (pf_size_z(format)) { + count++; + } + if (pf_size_w(format)) { + count++; + } + + return count; +} + /* Translate pipe_formats into PSC vertex types. */ static INLINE uint16_t r300_translate_vertex_data_type(enum pipe_format format) { - switch (format) { - case PIPE_FORMAT_R32_FLOAT: - return R300_DATA_TYPE_FLOAT_1; - break; - case PIPE_FORMAT_R32G32_FLOAT: - return R300_DATA_TYPE_FLOAT_2; - break; - case PIPE_FORMAT_R32G32B32_FLOAT: - return R300_DATA_TYPE_FLOAT_3; - break; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return R300_DATA_TYPE_FLOAT_4; - break; - case PIPE_FORMAT_R8G8B8A8_UNORM: - return R300_DATA_TYPE_BYTE | - R300_NORMALIZE; + uint32_t result = 0; + unsigned components = pf_component_count(format); + + if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) { + debug_printf("r300: Bad format %s in %s\n", pf_name(format), + __FUNCTION__); + return 0; + } + + switch (pf_type(format)) { + /* Half-floats, floats, doubles */ + case PIPE_FORMAT_TYPE_FLOAT: + switch (pf_size_x(format)) { + case 4: + result = R300_DATA_TYPE_FLOAT_1 + (components - 1); + break; + default: + assert(0); + } + break; + /* Normalized unsigned ints */ + case PIPE_FORMAT_TYPE_UNORM: + /* Normalized signed ints */ + case PIPE_FORMAT_TYPE_SNORM: + /* Non-normalized unsigned ints */ + case PIPE_FORMAT_TYPE_USCALED: + /* Non-normalized signed ints */ + case PIPE_FORMAT_TYPE_SSCALED: + switch (pf_size_x(format)) { + case 1: + result = R300_DATA_TYPE_BYTE; + break; + case 2: + if (components > 2) { + result = R300_DATA_TYPE_SHORT_4; + } else { + result = R300_DATA_TYPE_SHORT_2; + } + break; + default: + assert(0); + } break; default: - debug_printf("r300: Implementation error: " - "Bad vertex data format %s!\n", pf_name(format)); assert(0); - break; } - return 0; + + if (pf_type(format) == PIPE_FORMAT_TYPE_SSCALED) { + result |= R300_SIGNED; + } else if (pf_type(format) == PIPE_FORMAT_TYPE_UNORM) { + result |= R300_NORMALIZE; + } else if (pf_type(format) == PIPE_FORMAT_TYPE_SNORM) { + result |= (R300_SIGNED | R300_NORMALIZE); + } + + return result; } static INLINE uint16_t r300_translate_vertex_data_swizzle(enum pipe_format format) { - switch (format) { - case PIPE_FORMAT_R32_FLOAT: - return R300_VAP_SWIZZLE_X001; - break; - case PIPE_FORMAT_R32G32_FLOAT: - return R300_VAP_SWIZZLE_XY01; - break; - case PIPE_FORMAT_R32G32B32_FLOAT: - return R300_VAP_SWIZZLE_XYZ1; - break; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R8G8B8A8_UNORM: - return R300_VAP_SWIZZLE_XYZW; - break; - default: - debug_printf("r300: Implementation error: " - "Bad vertex data format %s!\n", pf_name(format)); - assert(0); - break; + + if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) { + debug_printf("r300: Bad format %s in %s\n", pf_name(format), + __FUNCTION__); + return 0; } - return 0; + + return ((pf_swizzle_x(format) << R300_SWIZZLE_SELECT_X_SHIFT) | + (pf_swizzle_y(format) << R300_SWIZZLE_SELECT_Y_SHIFT) | + (pf_swizzle_z(format) << R300_SWIZZLE_SELECT_Z_SHIFT) | + (pf_swizzle_w(format) << R300_SWIZZLE_SELECT_W_SHIFT) | + (0xf << R300_WRITE_ENA_SHIFT)); } #endif /* R300_STATE_INLINES_H */ -- cgit v1.2.3 From 2db46af8758bf77a2748460f617d0ead5b08a454 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Wed, 21 Oct 2009 21:17:43 +0200 Subject: r300g: split constant buffer and shader emittion --- src/gallium/drivers/r300/r300_context.c | 2 - src/gallium/drivers/r300/r300_context.h | 16 ++-- src/gallium/drivers/r300/r300_emit.c | 157 ++++++++++++++++++++------------ src/gallium/drivers/r300/r300_emit.h | 21 +++-- src/gallium/drivers/r300/r300_state.c | 54 +++++------ 5 files changed, 152 insertions(+), 98 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 02f201b49a..f974147ea4 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -22,8 +22,6 @@ #include "draw/draw_context.h" -#include "pipe/p_inlines.h" - #include "tgsi/tgsi_scan.h" #include "util/u_hash_table.h" diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index cee0734d21..b1738452de 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -26,6 +26,7 @@ #include "draw/draw_vertex.h" #include "pipe/p_context.h" +#include "pipe/p_inlines.h" struct r300_fragment_shader; struct r300_vertex_shader; @@ -119,10 +120,10 @@ struct r300_ztop_state { #define R300_NEW_BLEND 0x00000001 #define R300_NEW_BLEND_COLOR 0x00000002 #define R300_NEW_CLIP 0x00000004 -#define R300_NEW_CONSTANTS 0x00000008 -#define R300_NEW_DSA 0x00000010 -#define R300_NEW_FRAMEBUFFERS 0x00000020 -#define R300_NEW_FRAGMENT_SHADER 0x00000040 +#define R300_NEW_DSA 0x00000008 +#define R300_NEW_FRAMEBUFFERS 0x00000010 +#define R300_NEW_FRAGMENT_SHADER 0x00000020 +#define R300_NEW_FRAGMENT_SHADER_CONSTANTS 0x00000040 #define R300_NEW_RASTERIZER 0x00000080 #define R300_NEW_RS_BLOCK 0x00000100 #define R300_NEW_SAMPLER 0x00000200 @@ -132,9 +133,10 @@ struct r300_ztop_state { #define R300_ANY_NEW_TEXTURES 0x03fc0000 #define R300_NEW_VERTEX_FORMAT 0x04000000 #define R300_NEW_VERTEX_SHADER 0x08000000 -#define R300_NEW_VIEWPORT 0x10000000 -#define R300_NEW_QUERY 0x20000000 -#define R300_NEW_KITCHEN_SINK 0x3fffffff +#define R300_NEW_VERTEX_SHADER_CONSTANTS 0x10000000 +#define R300_NEW_VIEWPORT 0x20000000 +#define R300_NEW_QUERY 0x40000000 +#define R300_NEW_KITCHEN_SINK 0x7fffffff /* The next several objects are not pure Radeon state; they inherit from * various Gallium classes. */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 22cf9cac2a..de27f0939b 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -178,18 +178,15 @@ static uint32_t pack_float24(float f) } void r300_emit_fragment_program_code(struct r300_context* r300, - struct rX00_fragment_program_code* generic_code, - struct r300_constant_buffer* externals) + struct rX00_fragment_program_code* generic_code) { struct r300_fragment_program_code * code = &generic_code->code.r300; - struct rc_constant_list * constants = &generic_code->constants; int i; CS_LOCALS(r300); BEGIN_CS(15 + code->alu.length * 4 + - (code->tex.length ? (1 + code->tex.length) : 0) + - (constants->Count ? (1 + constants->Count * 4) : 0)); + (code->tex.length ? (1 + code->tex.length) : 0)); OUT_CS_REG(R300_US_CONFIG, code->config); OUT_CS_REG(R300_US_PIXSIZE, code->pixsize); @@ -221,32 +218,41 @@ void r300_emit_fragment_program_code(struct r300_context* r300, OUT_CS(code->tex.inst[i]); } - if (constants->Count) { - OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, constants->Count * 4); - for(i = 0; i < constants->Count; ++i) { - const float * data = get_shader_constant(r300, &constants->Constants[i], externals); - OUT_CS(pack_float24(data[0])); - OUT_CS(pack_float24(data[1])); - OUT_CS(pack_float24(data[2])); - OUT_CS(pack_float24(data[3])); - } - } + END_CS; +} + +void r300_emit_fs_constant_buffer(struct r300_context* r300, + struct rc_constant_list* constants) +{ + int i; + CS_LOCALS(r300); + + if (constants->Count == 0) + return; + BEGIN_CS(constants->Count * 4 + 1); + OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, constants->Count * 4); + for(i = 0; i < constants->Count; ++i) { + const float * data = get_shader_constant(r300, + &constants->Constants[i], + &r300->shader_constants[PIPE_SHADER_FRAGMENT]); + OUT_CS(pack_float24(data[0])); + OUT_CS(pack_float24(data[1])); + OUT_CS(pack_float24(data[2])); + OUT_CS(pack_float24(data[3])); + } END_CS; } void r500_emit_fragment_program_code(struct r300_context* r300, - struct rX00_fragment_program_code* generic_code, - struct r300_constant_buffer* externals) + struct rX00_fragment_program_code* generic_code) { struct r500_fragment_program_code * code = &generic_code->code.r500; - struct rc_constant_list * constants = &generic_code->constants; int i; CS_LOCALS(r300); BEGIN_CS(13 + - ((code->inst_end + 1) * 6) + - (constants->Count ? (3 + (constants->Count * 4)) : 0)); + ((code->inst_end + 1) * 6)); OUT_CS_REG(R500_US_CONFIG, 0); OUT_CS_REG(R500_US_PIXSIZE, code->max_temp_idx); OUT_CS_REG(R500_US_CODE_RANGE, @@ -266,18 +272,30 @@ void r500_emit_fragment_program_code(struct r300_context* r300, OUT_CS(code->inst[i].inst5); } - if (constants->Count) { - OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST); - OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, constants->Count * 4); - for (i = 0; i < constants->Count; i++) { - const float * data = get_shader_constant(r300, &constants->Constants[i], externals); - OUT_CS_32F(data[0]); - OUT_CS_32F(data[1]); - OUT_CS_32F(data[2]); - OUT_CS_32F(data[3]); - } - } + END_CS; +} + +void r500_emit_fs_constant_buffer(struct r300_context* r300, + struct rc_constant_list* constants) +{ + int i; + CS_LOCALS(r300); + + if (constants->Count == 0) + return; + BEGIN_CS(constants->Count * 4 + 2); + OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST); + OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, constants->Count * 4); + for (i = 0; i < constants->Count; i++) { + const float * data = get_shader_constant(r300, + &constants->Constants[i], + &r300->shader_constants[PIPE_SHADER_FRAGMENT]); + OUT_CS_32F(data[0]); + OUT_CS_32F(data[1]); + OUT_CS_32F(data[2]); + OUT_CS_32F(data[3]); + } END_CS; } @@ -621,8 +639,7 @@ void r300_emit_vertex_format_state(struct r300_context* r300) } void r300_emit_vertex_program_code(struct r300_context* r300, - struct r300_vertex_program_code* code, - struct r300_constant_buffer* constants) + struct r300_vertex_program_code* code) { int i; struct r300_screen* r300screen = r300_screen(r300->context.screen); @@ -635,12 +652,7 @@ void r300_emit_vertex_program_code(struct r300_context* r300, return; } - if (code->constants.Count) { - BEGIN_CS(14 + code->length + (code->constants.Count * 4)); - } else { - BEGIN_CS(11 + code->length); - } - + BEGIN_CS(11 + code->length); /* R300_VAP_PVS_CODE_CNTL_0 * R300_VAP_PVS_CONST_CNTL * R300_VAP_PVS_CODE_CNTL_1 @@ -658,20 +670,6 @@ void r300_emit_vertex_program_code(struct r300_context* r300, for (i = 0; i < code->length; i++) OUT_CS(code->body.d[i]); - if (code->constants.Count) { - OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, - (r300screen->caps->is_r500 ? - R500_PVS_CONST_START : R300_PVS_CONST_START)); - OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->constants.Count * 4); - for (i = 0; i < code->constants.Count; i++) { - const float * data = get_shader_constant(r300, &code->constants.Constants[i], constants); - OUT_CS_32F(data[0]); - OUT_CS_32F(data[1]); - OUT_CS_32F(data[2]); - OUT_CS_32F(data[3]); - } - } - OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(10) | R300_PVS_NUM_CNTLRS(5) | R300_PVS_NUM_FPUS(r300screen->caps->num_vert_fpus) | @@ -683,7 +681,40 @@ void r300_emit_vertex_program_code(struct r300_context* r300, void r300_emit_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs) { - r300_emit_vertex_program_code(r300, &vs->code, &r300->shader_constants[PIPE_SHADER_VERTEX]); + r300_emit_vertex_program_code(r300, &vs->code); +} + +void r300_emit_vs_constant_buffer(struct r300_context* r300, + struct rc_constant_list* constants) +{ + int i; + struct r300_screen* r300screen = r300_screen(r300->context.screen); + CS_LOCALS(r300); + + if (!r300screen->caps->has_tcl) { + debug_printf("r300: Implementation error: emit_vertex_shader called," + " but has_tcl is FALSE!\n"); + return; + } + + if (constants->Count == 0) + return; + + BEGIN_CS(constants->Count * 4 + 3); + OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, + (r300screen->caps->is_r500 ? + R500_PVS_CONST_START : R300_PVS_CONST_START)); + OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, constants->Count * 4); + for (i = 0; i < constants->Count; i++) { + const float * data = get_shader_constant(r300, + &constants->Constants[i], + &r300->shader_constants[PIPE_SHADER_VERTEX]); + OUT_CS_32F(data[0]); + OUT_CS_32F(data[1]); + OUT_CS_32F(data[2]); + OUT_CS_32F(data[3]); + } + END_CS; } void r300_emit_viewport_state(struct r300_context* r300, @@ -822,13 +853,22 @@ validate: if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) { if (r300screen->caps->is_r500) { - r500_emit_fragment_program_code(r300, &r300->fs->code, &r300->shader_constants[PIPE_SHADER_FRAGMENT]); + r500_emit_fragment_program_code(r300, &r300->fs->code); } else { - r300_emit_fragment_program_code(r300, &r300->fs->code, &r300->shader_constants[PIPE_SHADER_FRAGMENT]); + r300_emit_fragment_program_code(r300, &r300->fs->code); } r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER; } + if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER_CONSTANTS) { + if (r300screen->caps->is_r500) { + r500_emit_fs_constant_buffer(r300, &r300->fs->code.constants); + } else { + r300_emit_fs_constant_buffer(r300, &r300->fs->code.constants); + } + r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER_CONSTANTS; + } + if (r300->dirty_state & R300_NEW_FRAMEBUFFERS) { r300_emit_fb_state(r300, &r300->framebuffer_state); r300->dirty_state &= ~R300_NEW_FRAMEBUFFERS; @@ -887,6 +927,11 @@ validate: r300->dirty_state &= ~R300_NEW_VERTEX_SHADER; } + if (r300->dirty_state & R300_NEW_VERTEX_SHADER_CONSTANTS) { + r300_emit_vs_constant_buffer(r300, &r300->vs->code.constants); + r300->dirty_state &= ~R300_NEW_VERTEX_SHADER_CONSTANTS; + } + /* XXX assert(r300->dirty_state == 0); */ diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 02ac5bebbd..6befca72ce 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -23,6 +23,9 @@ #ifndef R300_EMIT_H #define R300_EMIT_H +#include "r300_context.h" +#include "radeon_code.h" + struct rX00_fragment_program_code; struct r300_vertex_program_code; @@ -39,12 +42,16 @@ void r300_emit_dsa_state(struct r300_context* r300, struct r300_dsa_state* dsa); void r300_emit_fragment_program_code(struct r300_context* r300, - struct rX00_fragment_program_code* generic_code, - struct r300_constant_buffer* externals); + struct rX00_fragment_program_code* generic_code); + +void r300_emit_fs_constant_buffer(struct r300_context* r300, + struct rc_constant_list* constants); void r500_emit_fragment_program_code(struct r300_context* r300, - struct rX00_fragment_program_code* generic_code, - struct r300_constant_buffer* externals); + struct rX00_fragment_program_code* generic_code); + +void r500_emit_fs_constant_buffer(struct r300_context* r300, + struct rc_constant_list* constants); void r300_emit_fb_state(struct r300_context* r300, struct pipe_framebuffer_state* fb); @@ -72,8 +79,10 @@ void r300_emit_vertex_buffer(struct r300_context* r300); void r300_emit_vertex_format_state(struct r300_context* r300); void r300_emit_vertex_program_code(struct r300_context* r300, - struct r300_vertex_program_code* code, - struct r300_constant_buffer* constants); + struct r300_vertex_program_code* code); + +void r300_emit_vs_constant_buffer(struct r300_context* r300, + struct rc_constant_list* constants); void r300_emit_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 3ac627e959..4cf01389d2 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -166,31 +166,6 @@ static void r300_set_clip_state(struct pipe_context* pipe, } } -static void - r300_set_constant_buffer(struct pipe_context* pipe, - uint shader, uint index, - const struct pipe_constant_buffer* buffer) -{ - struct r300_context* r300 = r300_context(pipe); - - /* This entire chunk of code seems ever-so-slightly baked. - * It's as if I've got pipe_buffer* matryoshkas... */ - if (buffer && buffer->buffer && buffer->buffer->size) { - void* map = pipe->winsys->buffer_map(pipe->winsys, buffer->buffer, - PIPE_BUFFER_USAGE_CPU_READ); - memcpy(r300->shader_constants[shader].constants, - map, buffer->buffer->size); - pipe->winsys->buffer_unmap(pipe->winsys, buffer->buffer); - - r300->shader_constants[shader].count = - buffer->buffer->size / (sizeof(float) * 4); - } else { - r300->shader_constants[shader].count = 0; - } - - r300->dirty_state |= R300_NEW_CONSTANTS; -} - /* Create a new depth, stencil, and alpha state based on the CSO dsa state. * * This contains the depth buffer, stencil buffer, alpha test, and such. @@ -345,7 +320,7 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) r300->fs = fs; - r300->dirty_state |= R300_NEW_FRAGMENT_SHADER; + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS; } /* Delete fragment shader state. */ @@ -702,7 +677,7 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) draw_bind_vertex_shader(r300->draw, vs->draw); r300->vs = vs; - r300->dirty_state |= R300_NEW_VERTEX_SHADER; + r300->dirty_state |= R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS; } else { draw_bind_vertex_shader(r300->draw, (struct draw_vertex_shader*)shader); @@ -726,6 +701,31 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader) } } +static void r300_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct r300_context* r300 = r300_context(pipe); + void *mapped; + + if (buf == NULL || buf->buffer->size == 0 || + (mapped = pipe_buffer_map(pipe->screen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ)) == NULL) + { + r300->shader_constants[shader].count = 0; + return; + } + + assert((buf->buffer->size % 4 * sizeof(float)) == 0); + memcpy(r300->shader_constants[shader].constants, mapped, buf->buffer->size); + r300->shader_constants[shader].count = buf->buffer->size / (4 * sizeof(float)); + pipe_buffer_unmap(pipe->screen, buf->buffer); + + if (shader == PIPE_SHADER_VERTEX) + r300->dirty_state |= R300_NEW_VERTEX_SHADER_CONSTANTS; + else if (shader == PIPE_SHADER_FRAGMENT) + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; +} + void r300_init_state_functions(struct r300_context* r300) { r300->context.create_blend_state = r300_create_blend_state; -- cgit v1.2.3 From 3d73852121f13832f6bc87918798ff96589d0349 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sun, 1 Nov 2009 18:50:52 +0100 Subject: r300g: fix geometry corruptions PVS flush is needed before changing the vertex shader or vertex shader constants. --- src/gallium/drivers/r300/r300_emit.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index de27f0939b..5b03c1aa6c 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -652,7 +652,7 @@ void r300_emit_vertex_program_code(struct r300_context* r300, return; } - BEGIN_CS(11 + code->length); + BEGIN_CS(9 + code->length); /* R300_VAP_PVS_CODE_CNTL_0 * R300_VAP_PVS_CONST_CNTL * R300_VAP_PVS_CODE_CNTL_1 @@ -674,7 +674,6 @@ void r300_emit_vertex_program_code(struct r300_context* r300, R300_PVS_NUM_CNTLRS(5) | R300_PVS_NUM_FPUS(r300screen->caps->num_vert_fpus) | R300_PVS_VF_MAX_VTX_NUM(12)); - OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); END_CS; } @@ -749,6 +748,15 @@ void r300_flush_textures(struct r300_context* r300) END_CS; } +static void r300_flush_pvs(struct r300_context* r300) +{ + CS_LOCALS(r300); + + BEGIN_CS(2); + OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); + END_CS; +} + /* Emit all dirty state. */ void r300_emit_dirty_state(struct r300_context* r300) { @@ -922,6 +930,10 @@ validate: r300->dirty_state &= ~R300_NEW_VERTEX_FORMAT; } + if (r300->dirty_state & (R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS)) { + r300_flush_pvs(r300); + } + if (r300->dirty_state & R300_NEW_VERTEX_SHADER) { r300_emit_vertex_shader(r300, r300->vs); r300->dirty_state &= ~R300_NEW_VERTEX_SHADER; -- cgit v1.2.3 From 1f630fa0167ed799556a764178772c096a3ddeba Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 1 Nov 2009 11:54:52 -0800 Subject: r300g: Miscellania. Avoid draw segfaults, s/true/TRUE/, etc. Cleared out my git stash. --- src/gallium/drivers/r300/r300_context.h | 2 +- src/gallium/drivers/r300/r300_debug.c | 6 +++--- src/gallium/drivers/r300/r300_emit.c | 2 +- src/gallium/drivers/r300/r300_state.c | 28 +++++++++++++++++++++------- src/gallium/drivers/r300/r300_vs.c | 4 ++-- 5 files changed, 28 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index b1738452de..ae7015634c 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -339,7 +339,7 @@ void r300_init_surface_functions(struct r300_context* r300); static INLINE boolean DBG_ON(struct r300_context * ctx, unsigned flags) { - return (ctx->debug & flags) ? true : false; + return (ctx->debug & flags) ? TRUE : FALSE; } static INLINE void DBG(struct r300_context * ctx, unsigned flags, const char * fmt, ...) diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 421253ca72..2a6ed54ac9 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -49,7 +49,7 @@ static struct debug_option debug_options[] = { void r300_init_debug(struct r300_context * ctx) { const char * options = debug_get_option("RADEON_DEBUG", 0); - boolean printhint = false; + boolean printhint = FALSE; size_t length; struct debug_option * opt; @@ -71,14 +71,14 @@ void r300_init_debug(struct r300_context * ctx) if (!opt->name) { debug_printf("Unknown debug option: %s\n", options); - printhint = true; + printhint = TRUE; } options += length; } if (!ctx->debug) - printhint = true; + printhint = TRUE; } if (printhint || ctx->debug & DBG_HELP) { diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 5b03c1aa6c..79972dbb49 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -800,7 +800,7 @@ validate: for (i = 0; i < r300->texture_count; i++) { tex = r300->textures[i]; if (!tex) - continue; + continue; if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) { r300->context.flush(&r300->context, 0, NULL); diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 4cf01389d2..af063d4b20 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -281,7 +281,9 @@ static void { struct r300_context* r300 = r300_context(pipe); - draw_flush(r300->draw); + if (r300->draw) { + draw_flush(r300->draw); + } r300->framebuffer_state = *state; @@ -444,10 +446,13 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) struct r300_context* r300 = r300_context(pipe); struct r300_rs_state* rs = (struct r300_rs_state*)state; - draw_flush(r300->draw); - draw_set_rasterizer_state(r300->draw, &rs->rs); + if (r300->draw) { + draw_flush(r300->draw); + draw_set_rasterizer_state(r300->draw, &rs->rs); + } r300->rs_state = rs; + /* XXX Clean these up when we move to atom emits */ r300->dirty_state |= R300_NEW_RASTERIZER; r300->dirty_state |= R300_NEW_RS_BLOCK; r300->dirty_state |= R300_NEW_SCISSOR; @@ -623,8 +628,10 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, r300->vertex_buffer_count = count; - draw_flush(r300->draw); - draw_set_vertex_buffers(r300->draw, count, buffers); + if (r300->draw) { + draw_flush(r300->draw); + draw_set_vertex_buffers(r300->draw, count, buffers); + } } static void r300_set_vertex_elements(struct pipe_context* pipe, @@ -633,8 +640,15 @@ static void r300_set_vertex_elements(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - draw_flush(r300->draw); - draw_set_vertex_elements(r300->draw, count, elements); + memcpy(r300->vertex_elements, elements, + sizeof(struct pipe_vertex_element) * count); + + r300->vertex_element_count = count; + + if (r300->draw) { + draw_flush(r300->draw); + draw_set_vertex_elements(r300->draw, count, elements); + } } static void* r300_create_vs_state(struct pipe_context* pipe, diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index eca85879a7..74ef416dc1 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -37,7 +37,7 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) struct tgsi_shader_info* info = &vs->info; struct tgsi_parse_context parser; struct tgsi_full_declaration * decl; - boolean pointsize = false; + boolean pointsize = FALSE; int out_colors = 0; int colors = 0; int out_generic = 0; @@ -52,7 +52,7 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) for (i = 0; i < info->num_outputs; i++) { switch (info->output_semantic_name[i]) { case TGSI_SEMANTIC_PSIZE: - pointsize = true; + pointsize = TRUE; break; case TGSI_SEMANTIC_COLOR: out_colors++; -- cgit v1.2.3 From 87d7c1aa15a944d64e43b217e18553256f9fb681 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sun, 1 Nov 2009 18:25:59 -0500 Subject: nouveau: Assume all texture blankets are linear for now. --- src/gallium/drivers/nv30/nv30_miptree.c | 3 +++ src/gallium/drivers/nv40/nv40_miptree.c | 3 +++ src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c | 3 +-- 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index 17acca61ab..280696d450 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -147,6 +147,9 @@ nv30_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, mt->level[0].pitch = stride[0]; mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + /* Assume whoever created this buffer expects it to be linear for now */ + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + pipe_buffer_reference(&mt->buffer, pb); return &mt->base; } diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index 5a201ccf45..465dd3b069 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -141,6 +141,9 @@ nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, mt->level[0].pitch = stride[0]; mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + /* Assume whoever created this buffer expects it to be linear for now */ + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + pipe_buffer_reference(&mt->buffer, pb); return &mt->base; } diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c index f512c0e5f3..317dc44d22 100644 --- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c +++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c @@ -21,8 +21,7 @@ dri_surface_from_handle(struct drm_api *api, struct pipe_screen *pscreen, struct pipe_texture tmpl; memset(&tmpl, 0, sizeof(tmpl)); - tmpl.tex_usage = PIPE_TEXTURE_USAGE_PRIMARY | - NOUVEAU_TEXTURE_USAGE_LINEAR; + tmpl.tex_usage = PIPE_TEXTURE_USAGE_PRIMARY; tmpl.target = PIPE_TEXTURE_2D; tmpl.last_level = 0; tmpl.depth[0] = 1; -- cgit v1.2.3 From eb699d64ec7057032139baccedcb0694ca41d706 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 30 Oct 2009 08:27:17 +0000 Subject: softpipe: Sanitise shader semantic and interpolator handling. Handle the remaining semantic names and indices. Respect color interpolator when not flatshading. --- src/gallium/drivers/softpipe/sp_state_derived.c | 34 ++++++++----------------- 1 file changed, 10 insertions(+), 24 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 1faeca1c2a..3bc96b9538 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -66,8 +66,6 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) if (vinfo->num_attribs == 0) { /* compute vertex layout now */ const struct sp_fragment_shader *spfs = softpipe->fs; - const enum interp_mode colorInterp - = softpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; const uint num = draw_num_vs_outputs(softpipe->draw); uint i; @@ -108,33 +106,21 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) switch (spfs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: - src = draw_find_vs_output(softpipe->draw, - TGSI_SEMANTIC_POSITION, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); + interp = INTERP_POS; break; case TGSI_SEMANTIC_COLOR: - src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_COLOR, - spfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); + if (softpipe->rasterizer->flatshade) { + interp = INTERP_CONSTANT; + } break; - - case TGSI_SEMANTIC_FOG: - src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_FOG, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); - break; - - case TGSI_SEMANTIC_GENERIC: - case TGSI_SEMANTIC_FACE: - /* this includes texcoords and varying vars */ - src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_GENERIC, - spfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); - break; - - default: - assert(0); } + + /* this includes texcoords and varying vars */ + src = draw_find_vs_output(softpipe->draw, + spfs->info.input_semantic_name[i], + spfs->info.input_semantic_index[i]); + draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); } softpipe->psize_slot = draw_find_vs_output(softpipe->draw, -- cgit v1.2.3 From 677a055fa0cf7b6476c716be187513c41060d417 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 3 Nov 2009 13:10:58 +0000 Subject: llvmpipe: Respect gl_rasterization_rules in primitive setup. Based on Michal's identical commit for softpipe (ca9c413647bf9efb5ed770e3a655bc758075aec7). --- src/gallium/drivers/llvmpipe/lp_setup.c | 48 ++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index c43b3da450..11ebfa0236 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -90,6 +90,8 @@ struct setup_context { float oneoverarea; int facing; + float pixel_offset; + struct quad_header quad[MAX_QUADS]; struct quad_header *quad_ptrs[MAX_QUADS]; unsigned count; @@ -483,6 +485,16 @@ static boolean setup_sort_vertices( struct setup_context *setup, ((det > 0.0) ^ (setup->llvmpipe->rasterizer->front_winding == PIPE_WINDING_CW)); + /* Prepare pixel offset for rasterisation: + * - pixel center (0.5, 0.5) for GL, or + * - assume (0.0, 0.0) for other APIs. + */ + if (setup->llvmpipe->rasterizer->gl_rasterization_rules) { + setup->pixel_offset = 0.5f; + } else { + setup->pixel_offset = 0.0f; + } + return TRUE; } @@ -508,7 +520,7 @@ static void tri_pos_coeff( struct setup_context *setup, /* calculate a0 as the value which would be sampled for the * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). + * pixel centers, in other words (pixel_offset, pixel_offset). * * this is neat but unfortunately not a good way to do things for * triangles with very large values of dadx or dady as it will @@ -519,8 +531,8 @@ static void tri_pos_coeff( struct setup_context *setup, * instead - i'll switch to this later. */ setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); /* debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", @@ -609,8 +621,8 @@ static void tri_linear_coeff( struct setup_context *setup, * instead - i'll switch to this later. */ setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); /* debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", @@ -661,8 +673,8 @@ static void tri_persp_coeff( struct setup_context *setup, setup->coef.dadx[1 + attrib][i] = dadx; setup->coef.dady[1 + attrib][i] = dady; setup->coef.a0[1 + attrib][i] = (mina - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); } } @@ -746,12 +758,12 @@ static void setup_tri_coefficients( struct setup_context *setup ) static void setup_tri_edges( struct setup_context *setup ) { - float vmin_x = setup->vmin[0][0] + 0.5f; - float vmid_x = setup->vmid[0][0] + 0.5f; + float vmin_x = setup->vmin[0][0] + setup->pixel_offset; + float vmid_x = setup->vmid[0][0] + setup->pixel_offset; - float vmin_y = setup->vmin[0][1] - 0.5f; - float vmid_y = setup->vmid[0][1] - 0.5f; - float vmax_y = setup->vmax[0][1] - 0.5f; + float vmin_y = setup->vmin[0][1] - setup->pixel_offset; + float vmid_y = setup->vmid[0][1] - setup->pixel_offset; + float vmax_y = setup->vmax[0][1] - setup->pixel_offset; setup->emaj.sy = ceilf(vmin_y); setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy); @@ -950,8 +962,8 @@ linear_pos_coeff(struct setup_context *setup, setup->coef.dadx[0][i] = dadx; setup->coef.dady[0][i] = dady; setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); } @@ -972,8 +984,8 @@ line_linear_coeff(struct setup_context *setup, setup->coef.dadx[1 + attrib][i] = dadx; setup->coef.dady[1 + attrib][i] = dady; setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); } } @@ -998,8 +1010,8 @@ line_persp_coeff(struct setup_context *setup, setup->coef.dadx[1 + attrib][i] = dadx; setup->coef.dady[1 + attrib][i] = dady; setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - - (dadx * (setup->vmin[0][0] - 0.5f) + - dady * (setup->vmin[0][1] - 0.5f))); + (dadx * (setup->vmin[0][0] - setup->pixel_offset) + + dady * (setup->vmin[0][1] - setup->pixel_offset))); } } -- cgit v1.2.3 From 026cf84bbbd939f0ae573a9841bb49aaa1d9ae75 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 23 Aug 2009 11:22:41 +0100 Subject: llvmpipe: remove old prim_setup draw stage Everything now goes through the draw_vbuf handler, the same as regular drivers. Based on Keith's commit 4fe0fc3eba1f79beda890a5016359d549bab6ad4. --- src/gallium/drivers/llvmpipe/Makefile | 1 - src/gallium/drivers/llvmpipe/SConscript | 1 - src/gallium/drivers/llvmpipe/lp_context.c | 26 ++-- src/gallium/drivers/llvmpipe/lp_context.h | 5 +- src/gallium/drivers/llvmpipe/lp_prim_setup.c | 190 ------------------------ src/gallium/drivers/llvmpipe/lp_prim_setup.h | 85 ----------- src/gallium/drivers/llvmpipe/lp_prim_vbuf.c | 107 ++++--------- src/gallium/drivers/llvmpipe/lp_prim_vbuf.h | 4 +- src/gallium/drivers/llvmpipe/lp_setup.c | 1 - src/gallium/drivers/llvmpipe/lp_state_derived.c | 25 ++-- 10 files changed, 59 insertions(+), 386 deletions(-) delete mode 100644 src/gallium/drivers/llvmpipe/lp_prim_setup.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_prim_setup.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index cdf318844c..e038a5229e 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -35,7 +35,6 @@ C_SOURCES = \ lp_draw_arrays.c \ lp_flush.c \ lp_jit.c \ - lp_prim_setup.c \ lp_prim_vbuf.c \ lp_setup.c \ lp_query.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index f4410f8201..3bd2e70013 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -49,7 +49,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_draw_arrays.c', 'lp_flush.c', 'lp_jit.c', - 'lp_prim_setup.c', 'lp_prim_vbuf.c', 'lp_setup.c', 'lp_query.c', diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 202cb8ef43..57e71f3e98 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -31,13 +31,13 @@ */ #include "draw/draw_context.h" +#include "draw/draw_vbuf.h" #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "lp_clear.h" #include "lp_context.h" #include "lp_flush.h" -#include "lp_prim_setup.h" #include "lp_prim_vbuf.h" #include "lp_state.h" #include "lp_surface.h" @@ -264,21 +264,21 @@ llvmpipe_create( struct pipe_screen *screen ) (struct tgsi_sampler **) llvmpipe->tgsi.vert_samplers_list); - llvmpipe->setup = lp_draw_render_stage(llvmpipe); - if (!llvmpipe->setup) - goto fail; - if (debug_get_bool_option( "LP_NO_RAST", FALSE )) llvmpipe->no_rast = TRUE; - if (debug_get_bool_option( "LP_NO_VBUF", FALSE )) { - /* Deprecated path -- vbuf is the intended interface to the draw module: - */ - draw_set_rasterize_stage(llvmpipe->draw, llvmpipe->setup); - } - else { - lp_init_vbuf(llvmpipe); - } + llvmpipe->vbuf_backend = lp_create_vbuf_backend(llvmpipe); + if (!llvmpipe->vbuf_backend) + goto fail; + + llvmpipe->vbuf = draw_vbuf_stage(llvmpipe->draw, llvmpipe->vbuf_backend); + if (!llvmpipe->vbuf) + goto fail; + + draw_set_rasterize_stage(llvmpipe->draw, llvmpipe->vbuf); + draw_set_render(llvmpipe->draw, llvmpipe->vbuf_backend); + + /* plug in AA line/point stages */ draw_install_aaline_stage(llvmpipe->draw, &llvmpipe->pipe); diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 7df340554e..3ad95d0bfc 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -121,9 +121,10 @@ struct llvmpipe_context { /** The primitive drawing context */ struct draw_context *draw; - struct draw_stage *setup; + + /** Draw module backend */ + struct vbuf_render *vbuf_backend; struct draw_stage *vbuf; - struct llvmpipe_vbuf_render *vbuf_render; boolean dirty_render_cache; diff --git a/src/gallium/drivers/llvmpipe/lp_prim_setup.c b/src/gallium/drivers/llvmpipe/lp_prim_setup.c deleted file mode 100644 index b14f8fb99d..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_prim_setup.c +++ /dev/null @@ -1,190 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \brief A draw stage that drives our triangle setup routines from - * within the draw pipeline. One of two ways to drive setup, the - * other being in lp_prim_vbuf.c. - * - * \author Keith Whitwell - * \author Brian Paul - */ - - -#include "lp_context.h" -#include "lp_setup.h" -#include "lp_state.h" -#include "lp_prim_setup.h" -#include "draw/draw_pipe.h" -#include "draw/draw_vertex.h" -#include "util/u_memory.h" - -/** - * Triangle setup info (derived from draw_stage). - * Also used for line drawing (taking some liberties). - */ -struct setup_stage { - struct draw_stage stage; /**< This must be first (base class) */ - - struct setup_context *setup; -}; - - - -/** - * Basically a cast wrapper. - */ -static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) -{ - return (struct setup_stage *)stage; -} - - -typedef const float (*cptrf4)[4]; - -static void -do_tri(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - llvmpipe_setup_tri( setup->setup, - (cptrf4)prim->v[0]->data, - (cptrf4)prim->v[1]->data, - (cptrf4)prim->v[2]->data ); -} - -static void -do_line(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - llvmpipe_setup_line( setup->setup, - (cptrf4)prim->v[0]->data, - (cptrf4)prim->v[1]->data ); -} - -static void -do_point(struct draw_stage *stage, struct prim_header *prim) -{ - struct setup_stage *setup = setup_stage( stage ); - - llvmpipe_setup_point( setup->setup, - (cptrf4)prim->v[0]->data ); -} - - - - -static void setup_begin( struct draw_stage *stage ) -{ - struct setup_stage *setup = setup_stage(stage); - - llvmpipe_setup_prepare( setup->setup ); - - stage->point = do_point; - stage->line = do_line; - stage->tri = do_tri; -} - - -static void setup_first_point( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->point( stage, header ); -} - -static void setup_first_line( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->line( stage, header ); -} - - -static void setup_first_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - setup_begin(stage); - stage->tri( stage, header ); -} - - - -static void setup_flush( struct draw_stage *stage, - unsigned flags ) -{ - stage->point = setup_first_point; - stage->line = setup_first_line; - stage->tri = setup_first_tri; -} - - -static void reset_stipple_counter( struct draw_stage *stage ) -{ -} - - -static void render_destroy( struct draw_stage *stage ) -{ - struct setup_stage *ssetup = setup_stage(stage); - llvmpipe_setup_destroy_context(ssetup->setup); - FREE( stage ); -} - - -/** - * Create a new primitive setup/render stage. - */ -struct draw_stage *lp_draw_render_stage( struct llvmpipe_context *llvmpipe ) -{ - struct setup_stage *sstage = CALLOC_STRUCT(setup_stage); - - sstage->setup = llvmpipe_setup_create_context(llvmpipe); - sstage->stage.draw = llvmpipe->draw; - sstage->stage.point = setup_first_point; - sstage->stage.line = setup_first_line; - sstage->stage.tri = setup_first_tri; - sstage->stage.flush = setup_flush; - sstage->stage.reset_stipple_counter = reset_stipple_counter; - sstage->stage.destroy = render_destroy; - - return (struct draw_stage *)sstage; -} - -struct setup_context * -lp_draw_setup_context( struct draw_stage *stage ) -{ - struct setup_stage *ssetup = setup_stage(stage); - return ssetup->setup; -} - -void -lp_draw_flush( struct draw_stage *stage ) -{ - stage->flush( stage, 0 ); -} diff --git a/src/gallium/drivers/llvmpipe/lp_prim_setup.h b/src/gallium/drivers/llvmpipe/lp_prim_setup.h deleted file mode 100644 index da6cae6375..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_prim_setup.h +++ /dev/null @@ -1,85 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef LP_PRIM_SETUP_H -#define LP_PRIM_SETUP_H - - -/** - * vbuf is a special stage to gather the stream of triangles, lines, points - * together and reconstruct vertex buffers for hardware upload. - * - * First attempt, work in progress. - * - * TODO: - * - separate out vertex buffer building and primitive emit, ie >1 draw per vb. - * - tell vbuf stage how to build hw vertices directly - * - pass vbuf stage a buffer pointer for direct emit to agp/vram. - * - * - * - * Vertices are just an array of floats, with all the attributes - * packed. We currently assume a layout like: - * - * attr[0][0..3] - window position - * attr[1..n][0..3] - remaining attributes. - * - * Attributes are assumed to be 4 floats wide but are packed so that - * all the enabled attributes run contiguously. - */ - - -struct draw_stage; -struct llvmpipe_context; - - -typedef void (*vbuf_draw_func)( struct pipe_context *pipe, - unsigned prim, - const ushort *elements, - unsigned nr_elements, - const void *vertex_buffer, - unsigned nr_vertices ); - - -extern struct draw_stage * -lp_draw_render_stage( struct llvmpipe_context *llvmpipe ); - -extern struct setup_context * -lp_draw_setup_context( struct draw_stage * ); - -extern void -lp_draw_flush( struct draw_stage * ); - - -extern struct draw_stage * -lp_draw_vbuf_stage( struct draw_context *draw_context, - struct pipe_context *pipe, - vbuf_draw_func draw ); - - -#endif /* LP_PRIM_SETUP_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c index c394dcb61d..4abff4eccc 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c @@ -37,10 +37,9 @@ #include "lp_context.h" +#include "lp_setup.h" #include "lp_state.h" #include "lp_prim_vbuf.h" -#include "lp_prim_setup.h" -#include "lp_setup.h" #include "draw/draw_context.h" #include "draw/draw_vbuf.h" #include "util/u_memory.h" @@ -59,6 +58,8 @@ struct llvmpipe_vbuf_render { struct vbuf_render base; struct llvmpipe_context *llvmpipe; + struct setup_context *setup; + uint prim; uint vertex_size; uint nr_vertices; @@ -75,6 +76,11 @@ llvmpipe_vbuf_render(struct vbuf_render *vbr) } + + + + + static const struct vertex_info * lp_vbuf_get_vertex_info(struct vbuf_render *vbr) { @@ -105,36 +111,6 @@ lp_vbuf_allocate_vertices(struct vbuf_render *vbr, static void lp_vbuf_release_vertices(struct vbuf_render *vbr) { -#if 0 - { - struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - const struct vertex_info *info = - llvmpipe_get_vbuf_vertex_info(cvbr->llvmpipe); - const float *vtx = (const float *) cvbr->vertex_buffer; - uint i, j; - debug_printf("%s (vtx_size = %u, vtx_used = %u)\n", - __FUNCTION__, cvbr->vertex_size, cvbr->nr_vertices); - for (i = 0; i < cvbr->nr_vertices; i++) { - for (j = 0; j < info->num_attribs; j++) { - uint k; - switch (info->attrib[j].emit) { - case EMIT_4F: k = 4; break; - case EMIT_3F: k = 3; break; - case EMIT_2F: k = 2; break; - case EMIT_1F: k = 1; break; - default: assert(0); - } - debug_printf("Vert %u attr %u: ", i, j); - while (k-- > 0) { - debug_printf("%g ", vtx[0]); - vtx++; - } - debug_printf("\n"); - } - } - } -#endif - /* keep the old allocation for next time */ } @@ -160,11 +136,7 @@ static boolean lp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - - /* XXX: break this dependency - make setup_context live under - * llvmpipe, rename the old "setup" draw stage to something else. - */ - struct setup_context *setup_ctx = lp_draw_setup_context(cvbr->llvmpipe->setup); + struct setup_context *setup_ctx = cvbr->setup; llvmpipe_setup_prepare( setup_ctx ); @@ -193,14 +165,9 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) struct llvmpipe_context *llvmpipe = cvbr->llvmpipe; const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float); const void *vertex_buffer = cvbr->vertex_buffer; + struct setup_context *setup_ctx = cvbr->setup; unsigned i; - /* XXX: break this dependency - make setup_context live under - * llvmpipe, rename the old "setup" draw stage to something else. - */ - struct draw_stage *setup = llvmpipe->setup; - struct setup_context *setup_ctx = lp_draw_setup_context(setup); - switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { @@ -367,11 +334,6 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) default: assert(0); } - - /* XXX: why are we calling this??? If we had to call something, it - * would be a function in lp_setup.c: - */ - lp_draw_flush( setup ); } @@ -384,17 +346,12 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); struct llvmpipe_context *llvmpipe = cvbr->llvmpipe; + struct setup_context *setup_ctx = cvbr->setup; const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float); const void *vertex_buffer = (void *) get_vert(cvbr->vertex_buffer, start, stride); unsigned i; - /* XXX: break this dependency - make setup_context live under - * llvmpipe, rename the old "setup" draw stage to something else. - */ - struct draw_stage *setup = llvmpipe->setup; - struct setup_context *setup_ctx = lp_draw_setup_context(setup); - switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { @@ -568,40 +525,38 @@ static void lp_vbuf_destroy(struct vbuf_render *vbr) { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); - cvbr->llvmpipe->vbuf_render = NULL; + llvmpipe_setup_destroy_context(cvbr->setup); FREE(cvbr); } /** - * Initialize the post-transform vertex buffer information for the given - * context. + * Create the post-transform vertex handler for the given context. */ -void -lp_init_vbuf(struct llvmpipe_context *lp) +struct vbuf_render * +lp_create_vbuf_backend(struct llvmpipe_context *lp) { - assert(lp->draw); + struct llvmpipe_vbuf_render *cvbr = CALLOC_STRUCT(llvmpipe_vbuf_render); - lp->vbuf_render = CALLOC_STRUCT(llvmpipe_vbuf_render); + assert(lp->draw); - lp->vbuf_render->base.max_indices = LP_MAX_VBUF_INDEXES; - lp->vbuf_render->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; - lp->vbuf_render->base.get_vertex_info = lp_vbuf_get_vertex_info; - lp->vbuf_render->base.allocate_vertices = lp_vbuf_allocate_vertices; - lp->vbuf_render->base.map_vertices = lp_vbuf_map_vertices; - lp->vbuf_render->base.unmap_vertices = lp_vbuf_unmap_vertices; - lp->vbuf_render->base.set_primitive = lp_vbuf_set_primitive; - lp->vbuf_render->base.draw = lp_vbuf_draw; - lp->vbuf_render->base.draw_arrays = lp_vbuf_draw_arrays; - lp->vbuf_render->base.release_vertices = lp_vbuf_release_vertices; - lp->vbuf_render->base.destroy = lp_vbuf_destroy; + cvbr->base.max_indices = LP_MAX_VBUF_INDEXES; + cvbr->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; - lp->vbuf_render->llvmpipe = lp; + cvbr->base.get_vertex_info = lp_vbuf_get_vertex_info; + cvbr->base.allocate_vertices = lp_vbuf_allocate_vertices; + cvbr->base.map_vertices = lp_vbuf_map_vertices; + cvbr->base.unmap_vertices = lp_vbuf_unmap_vertices; + cvbr->base.set_primitive = lp_vbuf_set_primitive; + cvbr->base.draw = lp_vbuf_draw; + cvbr->base.draw_arrays = lp_vbuf_draw_arrays; + cvbr->base.release_vertices = lp_vbuf_release_vertices; + cvbr->base.destroy = lp_vbuf_destroy; - lp->vbuf = draw_vbuf_stage(lp->draw, &lp->vbuf_render->base); + cvbr->llvmpipe = lp; - draw_set_rasterize_stage(lp->draw, lp->vbuf); + cvbr->setup = llvmpipe_setup_create_context(cvbr->llvmpipe); - draw_set_render(lp->draw, &lp->vbuf_render->base); + return &cvbr->base; } diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h index 6c4e6063e6..0676e2f42a 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h @@ -31,8 +31,8 @@ struct llvmpipe_context; -extern void -lp_init_vbuf(struct llvmpipe_context *llvmpipe); +extern struct vbuf_render * +lp_create_vbuf_backend(struct llvmpipe_context *llvmpipe); #endif /* LP_VBUF_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 11ebfa0236..ffcbc9a379 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -33,7 +33,6 @@ */ #include "lp_context.h" -#include "lp_prim_setup.h" #include "lp_quad.h" #include "lp_setup.h" #include "lp_state.h" diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 30fb41ea65..31eaadda21 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -67,24 +67,19 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) const struct lp_fragment_shader *lpfs = llvmpipe->fs; const enum interp_mode colorInterp = llvmpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; + const uint num = draw_num_vs_outputs(llvmpipe->draw); uint i; - if (llvmpipe->vbuf) { - /* if using the post-transform vertex buffer, tell draw_vbuf to - * simply emit the whole post-xform vertex as-is: - */ - struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; - const uint num = draw_num_vs_outputs(llvmpipe->draw); - uint i; - - /* No longer any need to try and emit draw vertex_header info. - */ - vinfo_vbuf->num_attribs = 0; - for (i = 0; i < num; i++) { - draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); - } - draw_compute_vertex_size(vinfo_vbuf); + /* Tell draw_vbuf to simply emit the whole post-xform vertex + * as-is. No longer any need to try and emit draw vertex_header + * info. + */ + vinfo_vbuf->num_attribs = 0; + for (i = 0; i < num; i++) { + draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); } + draw_compute_vertex_size(vinfo_vbuf); /* * Loop over fragment shader inputs, searching for the matching output -- cgit v1.2.3 From ceb6728725a1eefe35a4d8371b2ff0abe212b5ad Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 30 Oct 2009 08:27:17 +0000 Subject: llvmpipe: Sanitise shader semantic and interpolator handling. Handle the remaining semantic names and indices. Respect color interpolator when not flatshading. Based on Michal's softpipe commit eb699d64ec7057032139baccedcb0694ca41d706. --- src/gallium/drivers/llvmpipe/lp_state_derived.c | 34 ++++++++----------------- 1 file changed, 10 insertions(+), 24 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 31eaadda21..c753b183c0 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -65,8 +65,6 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) if (vinfo->num_attribs == 0) { /* compute vertex layout now */ const struct lp_fragment_shader *lpfs = llvmpipe->fs; - const enum interp_mode colorInterp - = llvmpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; const uint num = draw_num_vs_outputs(llvmpipe->draw); uint i; @@ -107,33 +105,21 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) switch (lpfs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: - src = draw_find_vs_output(llvmpipe->draw, - TGSI_SEMANTIC_POSITION, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); + interp = INTERP_POS; break; case TGSI_SEMANTIC_COLOR: - src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_COLOR, - lpfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); + if (llvmpipe->rasterizer->flatshade) { + interp = INTERP_CONSTANT; + } break; - - case TGSI_SEMANTIC_FOG: - src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_FOG, 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); - break; - - case TGSI_SEMANTIC_GENERIC: - case TGSI_SEMANTIC_FACE: - /* this includes texcoords and varying vars */ - src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_GENERIC, - lpfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); - break; - - default: - assert(0); } + + /* this includes texcoords and varying vars */ + src = draw_find_vs_output(llvmpipe->draw, + lpfs->info.input_semantic_name[i], + lpfs->info.input_semantic_index[i]); + draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); } llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw, -- cgit v1.2.3 From 040e1d008f8f8258f1b0ee0fcdf4906e0979fb66 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Tue, 3 Nov 2009 23:19:56 +0100 Subject: nv50: add 3d texture tiling and mip-mapping Mip-mapped 3D textures are not arrays of 2D layers with a mip-map layout like 2D textures, therefore we cannot use image_nr == depth for them. Making use of "volume tiling" modes now, the allowed modes are 0xZY where Z <= 5 and y <= 5. --- src/gallium/drivers/nv50/nv50_context.h | 12 ++++++ src/gallium/drivers/nv50/nv50_miptree.c | 63 +++++++++++++++++++++----------- src/gallium/drivers/nv50/nv50_tex.c | 37 ++++++++++++++++--- src/gallium/drivers/nv50/nv50_transfer.c | 39 +++++++++++++++----- 4 files changed, 114 insertions(+), 37 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 890defb90c..4b0f062295 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -69,6 +69,18 @@ struct nv50_sampler_stateobj { unsigned tsc[8]; }; +static INLINE unsigned +get_tile_height(uint32_t tile_mode) +{ + return 1 << ((tile_mode & 0xf) + 2); +} + +static INLINE unsigned +get_tile_depth(uint32_t tile_mode) +{ + return 1 << (tile_mode >> 4); +} + struct nv50_miptree_level { int *image_offset; unsigned pitch; diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 229a59cb74..9c20c5cc28 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -26,14 +26,33 @@ #include "nv50_context.h" +/* The restrictions in tile mode selection probably aren't necessary. */ static INLINE uint32_t -get_tile_mode(unsigned ny) +get_tile_mode(unsigned ny, unsigned d) { - if (ny > 32) return 4; - if (ny > 16) return 3; - if (ny > 8) return 2; - if (ny > 4) return 1; - return 0; + uint32_t tile_mode = 0x00; + + if (ny > 32) tile_mode = 0x04; /* height 64 tiles */ + else + if (ny > 16) tile_mode = 0x03; /* height 32 tiles */ + else + if (ny > 8) tile_mode = 0x02; /* height 16 tiles */ + else + if (ny > 4) tile_mode = 0x01; /* height 8 tiles */ + + if (d == 1) + return tile_mode; + else + if (tile_mode > 0x02) + tile_mode = 0x02; + + if (d > 16 && tile_mode < 0x02) + return tile_mode | 0x50; /* depth 32 tiles */ + if (d > 8) return tile_mode | 0x40; /* depth 16 tiles */ + if (d > 4) return tile_mode | 0x30; /* depth 8 tiles */ + if (d > 2) return tile_mode | 0x20; /* depth 4 tiles */ + + return tile_mode | 0x10; } static struct pipe_texture * @@ -43,7 +62,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); struct pipe_texture *pt = &mt->base.base; unsigned width = tmp->width[0], height = tmp->height[0]; - unsigned depth = tmp->depth[0]; + unsigned depth = tmp->depth[0], image_alignment; uint32_t tile_flags; int ret, i, l; @@ -67,17 +86,8 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) break; } - switch (pt->target) { - case PIPE_TEXTURE_3D: - mt->image_nr = pt->depth[0]; - break; - case PIPE_TEXTURE_CUBE: - mt->image_nr = 6; - break; - default: - mt->image_nr = 1; - break; - } + /* XXX: texture arrays */ + mt->image_nr = (pt->target == PIPE_TEXTURE_CUBE) ? 6 : 1; for (l = 0; l <= pt->last_level; l++) { struct nv50_miptree_level *lvl = &mt->level[l]; @@ -90,26 +100,35 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) lvl->image_offset = CALLOC(mt->image_nr, sizeof(int)); lvl->pitch = align(pt->nblocksx[l] * pt->block.size, 64); - lvl->tile_mode = get_tile_mode(pt->nblocksy[l]); + lvl->tile_mode = get_tile_mode(pt->nblocksy[l], depth); width = MAX2(1, width >> 1); height = MAX2(1, height >> 1); depth = MAX2(1, depth >> 1); } + image_alignment = get_tile_height(mt->level[0].tile_mode) * 64; + image_alignment *= get_tile_depth(mt->level[0].tile_mode); + + /* NOTE the distinction between arrays of mip-mapped 2D textures and + * mip-mapped 3D textures. We can't use image_nr == depth for 3D mip. + */ for (i = 0; i < mt->image_nr; i++) { for (l = 0; l <= pt->last_level; l++) { struct nv50_miptree_level *lvl = &mt->level[l]; int size; - unsigned tile_ny = 1 << (lvl->tile_mode + 2); + unsigned tile_h = get_tile_height(lvl->tile_mode); + unsigned tile_d = get_tile_depth(lvl->tile_mode); - size = align(pt->nblocksx[l] * pt->block.size, 64); - size *= align(pt->nblocksy[l], tile_ny); + size = lvl->pitch; + size *= align(pt->nblocksy[l], tile_h); + size *= align(pt->depth[l], tile_d); lvl->image_offset[i] = mt->total_size; mt->total_size += size; } + mt->total_size = align(mt->total_size, image_alignment); } ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, mt->total_size, diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index 52ccdaa407..2813f54477 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -96,19 +96,44 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so, if (i == NV50_TEX_FORMAT_LIST_SIZE) return 1; - mode = (nv50->sampler[unit]->normalized ? 0xd0005000 : 0x5001d000) | - (mt->base.bo->tile_mode << 22); + if (nv50->sampler[unit]->normalized) + mode = 0x50001000 | (1 << 31); + else { + mode = 0x50001000 | (7 << 14); + assert(mt->base.base.target == PIPE_TEXTURE_2D); + } + + mode |= ((mt->base.bo->tile_mode & 0x0f) << 22) | + ((mt->base.bo->tile_mode & 0xf0) << 21); + if (pf_type(mt->base.base.format) == PIPE_FORMAT_TYPE_SRGB) mode |= 0x0400; + switch (mt->base.base.target) { + case PIPE_TEXTURE_1D: + break; + case PIPE_TEXTURE_2D: + mode |= (1 << 14); + break; + case PIPE_TEXTURE_3D: + mode |= (2 << 14); + break; + case PIPE_TEXTURE_CUBE: + mode |= (3 << 14); + break; + default: + assert(!"unsupported texture target"); + break; + } + so_data (so, nv50_tex_format_list[i].hw); so_reloc(so, mt->base.bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | - NOUVEAU_BO_RD, 0, 0); + NOUVEAU_BO_RD, 0, 0); so_data (so, mode); so_data (so, 0x00300000); - so_data (so, mt->base.base.width[0]); + so_data (so, mt->base.base.width[0] | (1 << 31)); so_data (so, (mt->base.base.last_level << 28) | - (mt->base.base.depth[0] << 16) | mt->base.base.height[0]); + (mt->base.base.depth[0] << 16) | mt->base.base.height[0]); so_data (so, 0x03000000); so_data (so, mt->base.base.last_level << 4); @@ -124,7 +149,7 @@ nv50_tex_validate(struct nv50_context *nv50) unsigned i, unit, push; push = MAX2(nv50->miptree_nr, nv50->state.miptree_nr) * 2 + 23 + 6; - so = so_new(nv50->miptree_nr * 9 + push, nv50->miptree_nr + 2); + so = so_new(nv50->miptree_nr * 9 + push, nv50->miptree_nr * 2 + 2); nv50_so_init_sifc(nv50, so, nv50->screen->tic, NOUVEAU_BO_VRAM, nv50->miptree_nr * 8 * 4); diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index 9c008090b8..ea61357aaa 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -12,6 +12,7 @@ struct nv50_transfer { int level_pitch; int level_width; int level_height; + int level_depth; int level_x; int level_y; }; @@ -20,10 +21,10 @@ static void nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct nouveau_bo *src_bo, unsigned src_offset, int src_pitch, unsigned src_tile_mode, - int sx, int sy, int sw, int sh, + int sx, int sy, int sw, int sh, int sd, struct nouveau_bo *dst_bo, unsigned dst_offset, int dst_pitch, unsigned dst_tile_mode, - int dx, int dy, int dw, int dh, + int dx, int dy, int dw, int dh, int dd, int cpp, int width, int height, unsigned src_reloc, unsigned dst_reloc) { @@ -51,7 +52,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, OUT_RING (chan, src_tile_mode << 4); OUT_RING (chan, sw * cpp); OUT_RING (chan, sh); - OUT_RING (chan, 1); + OUT_RING (chan, sd); OUT_RING (chan, 0); } @@ -70,7 +71,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, OUT_RING (chan, dst_tile_mode << 4); OUT_RING (chan, dw * cpp); OUT_RING (chan, dh); - OUT_RING (chan, 1); + OUT_RING (chan, dd); OUT_RING (chan, 0); } @@ -114,6 +115,20 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, } } +static INLINE unsigned +get_zslice_offset(unsigned tile_mode, unsigned z, unsigned pitch, unsigned ny) +{ + unsigned tile_h = get_tile_height(tile_mode); + unsigned tile_d = get_tile_depth(tile_mode); + + /* pitch_2d == to next slice within this volume-tile */ + /* pitch_3d == to next slice in next 2D array of blocks */ + unsigned pitch_2d = tile_h * 64; + unsigned pitch_3d = tile_d * align(ny, tile_h) * pitch; + + return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d; +} + static struct pipe_transfer * nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice, @@ -129,9 +144,6 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, if (pt->target == PIPE_TEXTURE_CUBE) image = face; - else - if (pt->target == PIPE_TEXTURE_3D) - image = zslice; tx = CALLOC_STRUCT(nv50_transfer); if (!tx) @@ -157,6 +169,7 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->level_pitch = lvl->pitch; tx->level_width = mt->base.base.width[level]; tx->level_height = mt->base.base.height[level]; + tx->level_depth = mt->base.base.depth[level]; tx->level_offset = lvl->image_offset[image]; tx->level_tiling = lvl->tile_mode; tx->level_x = pf_get_nblocksx(&tx->base.block, x); @@ -168,6 +181,11 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + if (pt->target == PIPE_TEXTURE_3D) + tx->level_offset += get_zslice_offset(lvl->tile_mode, zslice, + lvl->pitch, + tx->base.nblocksy); + if (usage & PIPE_TRANSFER_READ) { nx = pf_get_nblocksx(&tx->base.block, tx->base.width); ny = pf_get_nblocksy(&tx->base.block, tx->base.height); @@ -176,10 +194,11 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->level_pitch, tx->level_tiling, x, y, tx->base.nblocksx, tx->base.nblocksy, + tx->level_depth, tx->bo, 0, tx->base.stride, tx->bo->tile_mode, 0, 0, - tx->base.nblocksx, tx->base.nblocksy, + tx->base.nblocksx, tx->base.nblocksy, 1, tx->base.block.size, nx, ny, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART, NOUVEAU_BO_GART); @@ -199,14 +218,16 @@ nv50_transfer_del(struct pipe_transfer *ptx) if (ptx->usage & PIPE_TRANSFER_WRITE) { struct pipe_screen *pscreen = ptx->texture->screen; + nv50_transfer_rect_m2mf(pscreen, tx->bo, 0, tx->base.stride, tx->bo->tile_mode, 0, 0, - tx->base.nblocksx, tx->base.nblocksy, + tx->base.nblocksx, tx->base.nblocksy, 1, mt->base.bo, tx->level_offset, tx->level_pitch, tx->level_tiling, tx->level_x, tx->level_y, tx->base.nblocksx, tx->base.nblocksy, + tx->level_depth, tx->base.block.size, nx, ny, NOUVEAU_BO_GART, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); -- cgit v1.2.3 From 317ccfe0dfbfda13f58a26f661324d883b25a316 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Tue, 3 Nov 2009 22:09:32 +0100 Subject: nv50: add abs-modifier for emit_minmax --- src/gallium/drivers/nv50/nv50_program.c | 48 +++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 27827c7ecf..64a0b571a5 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -88,12 +88,16 @@ struct nv50_reg { int index; int hw; - int neg; + int mod; int rhw; /* result hw for FP outputs, or interpolant index */ int acc; /* instruction where this reg is last read (first insn == 1) */ }; +#define NV50_MOD_NEG 1 +#define NV50_MOD_ABS 2 +#define NV50_MOD_SAT 4 + /* arbitrary limits */ #define MAX_IF_DEPTH 4 #define MAX_LOOP_DEPTH 4 @@ -152,7 +156,7 @@ ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) reg->type = type; reg->index = index; reg->hw = hw; - reg->neg = 0; + reg->mod = 0; reg->rhw = -1; reg->acc = 0; } @@ -460,8 +464,12 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e) static INLINE void set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) { + unsigned val; float f = pc->immd_buf[imm->hw]; - unsigned val = fui(imm->neg ? -f : f); + + if (imm->mod & NV50_MOD_ABS) + f = fabsf(f); + val = fui((imm->mod & NV50_MOD_NEG) ? -f : f); set_long(pc, e); /*XXX: can't be predicated - bits overlap.. catch cases where both @@ -801,12 +809,12 @@ emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, set_dst(pc, dst, e); set_src_0(pc, src0, e); if (src1->type == P_IMMD && !is_long(e)) { - if (src0->neg) + if (src0->mod & NV50_MOD_NEG) e->inst[0] |= 0x00008000; set_immd(pc, src1, e); } else { set_src_1(pc, src1, e); - if (src0->neg ^ src1->neg) { + if ((src0->mod ^ src1->mod) & NV50_MOD_NEG) { if (is_long(e)) e->inst[1] |= 0x08000000; else @@ -828,9 +836,10 @@ emit_add(struct nv50_pc *pc, struct nv50_reg *dst, alloc_reg(pc, src1); check_swap_src_0_1(pc, &src0, &src1); - if (!pc->allow32 || (src0->neg | src1->neg) || src1->hw > 63) { + if (!pc->allow32 || (src0->mod | src1->mod) || src1->hw > 63) { set_long(pc, e); - e->inst[1] |= (src0->neg << 26) | (src1->neg << 27); + e->inst[1] |= ((src0->mod & NV50_MOD_NEG) << 26) | + ((src1->mod & NV50_MOD_NEG) << 27); } set_dst(pc, dst, e); @@ -877,6 +886,11 @@ emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, set_src_0(pc, src0, e); set_src_1(pc, src1, e); + if (src0->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00100000; + if (src1->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00080000; + emit(pc, e); } @@ -885,9 +899,9 @@ emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) { assert(src0 != src1); - src1->neg ^= 1; + src1->mod ^= NV50_MOD_NEG; emit_add(pc, dst, src0, src1); - src1->neg ^= 1; + src1->mod ^= NV50_MOD_NEG; } static void @@ -941,9 +955,9 @@ emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, set_src_1(pc, src1, e); set_src_2(pc, src2, e); - if (src0->neg ^ src1->neg) + if ((src0->mod ^ src1->mod) & NV50_MOD_NEG) e->inst[1] |= 0x04000000; - if (src2->neg) + if (src2->mod & NV50_MOD_NEG) e->inst[1] |= 0x08000000; emit(pc, e); @@ -954,9 +968,9 @@ emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) { assert(src2 != src0 && src2 != src1); - src2->neg ^= 1; + src2->mod ^= NV50_MOD_NEG; emit_mad(pc, dst, src0, src1, src2); - src2->neg ^= 1; + src2->mod ^= NV50_MOD_NEG; } static void @@ -1230,7 +1244,7 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src) const int r_pred = 1; unsigned cvn = CVT_F32_F32; - if (src->neg) + if (src->mod & NV50_MOD_NEG) cvn |= CVT_NEG; /* write predicate reg */ emit_cvt(pc, NULL, src, r_pred, CVTOP_RN, cvn); @@ -1408,7 +1422,7 @@ emit_ddy(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) assert(src->type == P_TEMP); - if (!src->neg) /* ! double negation */ + if (!(src->mod & NV50_MOD_NEG)) /* ! double negation */ emit_neg(pc, src, src); e->inst[0] = 0xc0150000; @@ -1671,7 +1685,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, break; case TGSI_UTIL_SIGN_TOGGLE: if (neg) - r->neg = 1; + r->mod = NV50_MOD_NEG; else { temp = temp_temp(pc); emit_neg(pc, temp, r); @@ -2207,7 +2221,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!src[i][c]) continue; - src[i][c]->neg = 0; + src[i][c]->mod = 0; if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD) FREE(src[i][c]); else -- cgit v1.2.3 From 618e3b89f6ecdf422132ecea19315b326dd348ec Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Tue, 3 Nov 2009 23:30:18 +0100 Subject: nv50: fix shader emit_tex for cube textures --- src/gallium/drivers/nv50/nv50_program.c | 50 ++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 64a0b571a5..bf50982dd1 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1257,11 +1257,37 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src) emit(pc, e); } +static void +load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], + struct nv50_reg **src, boolean proj) +{ + int mod[3] = { src[0]->mod, src[1]->mod, src[2]->mod }; + + src[0]->mod |= NV50_MOD_ABS; + src[1]->mod |= NV50_MOD_ABS; + src[2]->mod |= NV50_MOD_ABS; + + emit_minmax(pc, 4, t[2], src[0], src[1]); + emit_minmax(pc, 4, t[2], src[2], t[2]); + + src[0]->mod = mod[0]; + src[1]->mod = mod[1]; + src[2]->mod = mod[2]; + + if (proj && 0 /* looks more correct without this */) + emit_mul(pc, t[2], t[2], src[3]); + emit_flop(pc, 0, t[2], t[2]); + + emit_mul(pc, t[0], src[0], t[2]); + emit_mul(pc, t[1], src[1], t[2]); + emit_mul(pc, t[2], src[2], t[2]); +} + static void emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, struct nv50_reg **src, unsigned unit, unsigned type, boolean proj) { - struct nv50_reg *temp, *t[4]; + struct nv50_reg *t[4]; struct nv50_program_exec *e; unsigned c, mode, dim; @@ -1290,6 +1316,9 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, /* some cards need t[0]'s hw index to be a multiple of 4 */ alloc_temp4(pc, t, 0); + if (type == TGSI_TEXTURE_CUBE) { + load_cube_tex_coords(pc, t, src, proj); + } else if (proj) { if (src[0]->type == P_TEMP && src[0]->rhw != -1) { mode = pc->interp_mode[src[0]->index]; @@ -1314,17 +1343,8 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, */ } } else { - if (type == TGSI_TEXTURE_CUBE) { - temp = temp_temp(pc); - emit_minmax(pc, 4, temp, src[0], src[1]); - emit_minmax(pc, 4, temp, temp, src[2]); - emit_flop(pc, 0, temp, temp); - for (c = 0; c < 3; c++) - emit_mul(pc, t[c], src[c], temp); - } else { - for (c = 0; c < dim; c++) - emit_mov(pc, t[c], src[c]); - } + for (c = 0; c < dim; c++) + emit_mov(pc, t[c], src[c]); } e = exec(pc); @@ -1337,14 +1357,16 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, if (dim == 2) e->inst[0] |= 0x00400000; else - if (dim == 3) + if (dim == 3) { e->inst[0] |= 0x00800000; + if (type == TGSI_TEXTURE_CUBE) + e->inst[0] |= 0x08000000; + } e->inst[0] |= (mask & 0x3) << 25; e->inst[1] |= (mask & 0xc) << 12; emit(pc, e); - #if 1 c = 0; if (mask & 1) emit_mov(pc, dst[0], t[c++]); -- cgit v1.2.3 From ad96c0d851f6c3696fa6ae0c1f6ad56e849bc739 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 3 Nov 2009 16:48:48 +0100 Subject: r300g: add color channel masking Signed-off-by: Corbin Simpson --- src/gallium/drivers/r300/r300_context.h | 1 + src/gallium/drivers/r300/r300_emit.c | 6 +++--- src/gallium/drivers/r300/r300_state.c | 14 ++++++++++++++ src/gallium/drivers/r300/r300_state_invariant.c | 3 +-- 4 files changed, 19 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index ae7015634c..8c65c04d01 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -34,6 +34,7 @@ struct r300_vertex_shader; struct r300_blend_state { uint32_t blend_control; /* R300_RB3D_CBLEND: 0x4e04 */ uint32_t alpha_blend_control; /* R300_RB3D_ABLEND: 0x4e08 */ + uint32_t color_channel_mask; /* R300_RB3D_COLOR_CHANNEL_MASK: 0x4e0c */ uint32_t rop; /* R300_RB3D_ROPCNTL: 0x4e18 */ uint32_t dither; /* R300_RB3D_DITHER_CTL: 0x4e50 */ }; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 79972dbb49..8fe9a68886 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -38,10 +38,11 @@ void r300_emit_blend_state(struct r300_context* r300, struct r300_blend_state* blend) { CS_LOCALS(r300); - BEGIN_CS(7); - OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 2); + BEGIN_CS(8); + OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3); OUT_CS(blend->blend_control); OUT_CS(blend->alpha_blend_control); + OUT_CS(blend->color_channel_mask); OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG(R300_RB3D_DITHER_CTL, blend->dither); END_CS; @@ -313,7 +314,6 @@ void r300_emit_fb_state(struct r300_context* r300, tex = (struct r300_texture*)surf->texture; assert(tex && tex->buffer && "cbuf is marked, but NULL!"); - /* XXX I still need to figure out how to set the mipmap level here */ OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index af063d4b20..242ec9f365 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -100,6 +100,20 @@ static void* r300_create_blend_state(struct pipe_context* pipe, (state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT; } + /* Color Channel Mask */ + if (state->colormask & PIPE_MASK_R) { + blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_RED_MASK0; + } + if (state->colormask & PIPE_MASK_G) { + blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0; + } + if (state->colormask & PIPE_MASK_B) { + blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0; + } + if (state->colormask & PIPE_MASK_A) { + blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0; + } + if (state->dither) { blend->dither = R300_RB3D_DITHER_CTL_DITHER_MODE_LUT | R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT; diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 4865f16058..7e4d5c7c72 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -84,7 +84,7 @@ void r300_emit_invariant_state(struct r300_context* r300) END_CS; /* XXX unsorted stuff from surface_fill */ - BEGIN_CS(64 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0)); + BEGIN_CS(62 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0)); /* Flush PVS. */ OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); @@ -125,7 +125,6 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C); OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525); OUT_CS_REG(R300_RB3D_CCTL, 0x00000000); - OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0x0000000F); OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); if (caps->is_r500) { OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000); -- cgit v1.2.3 From 07190888bdc41f53bf8ea30c9e2ee4a61b42d802 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 3 Nov 2009 16:50:09 +0100 Subject: r300g: set the correct offset in a colorbuffer surface Suggested by Joakim Sindholt. Also, put flushing of colorbuffers _before_ the framebuffer state setup, suggested by docs. Signed-off-by: Corbin Simpson --- src/gallium/drivers/r300/r300_emit.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 8fe9a68886..fc823ad31f 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -309,13 +309,20 @@ void r300_emit_fb_state(struct r300_context* r300, CS_LOCALS(r300); BEGIN_CS((10 * fb->nr_cbufs) + (fb->zsbuf ? 10 : 0) + 4); + OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, + R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | + R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + for (i = 0; i < fb->nr_cbufs; i++) { surf = fb->cbufs[i]; tex = (struct r300_texture*)surf->texture; assert(tex && tex->buffer && "cbuf is marked, but NULL!"); OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); - OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_RELOC(tex->buffer, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] | @@ -332,7 +339,7 @@ void r300_emit_fb_state(struct r300_context* r300, assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_RELOC(tex->buffer, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_ZB_FORMAT, r300_translate_zsformat(tex->tex.format)); @@ -341,12 +348,6 @@ void r300_emit_fb_state(struct r300_context* r300, RADEON_GEM_DOMAIN_VRAM, 0); } - OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, - R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | - R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); - OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, - R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | - R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); END_CS; } -- cgit v1.2.3 From 4671005a4317fa37aea8786740470a40906fbfa7 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 3 Nov 2009 16:58:39 +0100 Subject: r300g: fix the size of CS when emitting the fragprog constant buffer Signed-off-by: Corbin Simpson --- src/gallium/drivers/r300/r300_emit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index fc823ad31f..6415c59c2d 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -285,7 +285,7 @@ void r500_emit_fs_constant_buffer(struct r300_context* r300, if (constants->Count == 0) return; - BEGIN_CS(constants->Count * 4 + 2); + BEGIN_CS(constants->Count * 4 + 3); OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST); OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, constants->Count * 4); for (i = 0; i < constants->Count; i++) { -- cgit v1.2.3 From c2e47191d72e16aaa1fae4f47bbed7639c2ff201 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 4 Nov 2009 10:56:44 +0100 Subject: r300g: add polygon mode Signed-off-by: Corbin Simpson --- src/gallium/drivers/r300/r300_context.h | 1 + src/gallium/drivers/r300/r300_emit.c | 3 ++- src/gallium/drivers/r300/r300_state.c | 27 +++++++++++++++++++ src/gallium/drivers/r300/r300_state_inlines.h | 36 +++++++++++++++++++++++++ src/gallium/drivers/r300/r300_state_invariant.c | 3 +-- 5 files changed, 67 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 8c65c04d01..850e5a41c9 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -78,6 +78,7 @@ struct r300_rs_state { uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */ uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */ uint32_t color_control; /* R300_GA_COLOR_CONTROL: 0x4278 */ + uint32_t polygon_mode; /* R300_GA_POLY_MODE: 0x4288 */ }; struct r300_rs_block { diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 6415c59c2d..69ce5966e8 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -483,7 +483,7 @@ void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs) { CS_LOCALS(r300); - BEGIN_CS(20); + BEGIN_CS(22); OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status); OUT_CS_REG(R300_GA_POINT_SIZE, rs->point_size); OUT_CS_REG_SEQ(R300_GA_POINT_MINMAX, 2); @@ -499,6 +499,7 @@ void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs) OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, rs->line_stipple_config); OUT_CS_REG(R300_GA_LINE_STIPPLE_VALUE, rs->line_stipple_value); OUT_CS_REG(R300_GA_COLOR_CONTROL, rs->color_control); + OUT_CS_REG(R300_GA_POLY_MODE, rs->polygon_mode); END_CS; } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 242ec9f365..658a8cba13 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -398,25 +398,52 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->line_control = pack_float_16_6x(state->line_width) | R300_GA_LINE_CNTL_END_TYPE_COMP; + /* XXX I think there is something wrong with the polygon mode, + * XXX re-test when r300g is in a better shape */ + + /* Enable polygon mode */ + if (state->fill_cw != PIPE_POLYGON_MODE_FILL || + state->fill_ccw != PIPE_POLYGON_MODE_FILL) { + rs->polygon_mode = R300_GA_POLY_MODE_DUAL; + } + /* Radeons don't think in "CW/CCW", they think in "front/back". */ if (state->front_winding == PIPE_WINDING_CW) { rs->cull_mode = R300_FRONT_FACE_CW; + /* Polygon offset */ if (state->offset_cw) { rs->polygon_offset_enable |= R300_FRONT_ENABLE; } if (state->offset_ccw) { rs->polygon_offset_enable |= R300_BACK_ENABLE; } + + /* Polygon mode */ + if (rs->polygon_mode) { + rs->polygon_mode |= + r300_translate_polygon_mode_front(state->fill_cw); + rs->polygon_mode |= + r300_translate_polygon_mode_back(state->fill_ccw); + } } else { rs->cull_mode = R300_FRONT_FACE_CCW; + /* Polygon offset */ if (state->offset_ccw) { rs->polygon_offset_enable |= R300_FRONT_ENABLE; } if (state->offset_cw) { rs->polygon_offset_enable |= R300_BACK_ENABLE; } + + /* Polygon mode */ + if (rs->polygon_mode) { + rs->polygon_mode |= + r300_translate_polygon_mode_front(state->fill_ccw); + rs->polygon_mode |= + r300_translate_polygon_mode_back(state->fill_cw); + } } if (state->front_winding & state->cull_mode) { rs->cull_mode |= R300_CULL_FRONT; diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 176e59f281..52b9650fc1 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -504,4 +504,40 @@ r300_translate_vertex_data_swizzle(enum pipe_format format) { (0xf << R300_WRITE_ENA_SHIFT)); } +static INLINE uint32_t +r300_translate_polygon_mode_front(unsigned mode) { + switch (mode) + { + case PIPE_POLYGON_MODE_FILL: + return R300_GA_POLY_MODE_FRONT_PTYPE_TRI; + case PIPE_POLYGON_MODE_LINE: + return R300_GA_POLY_MODE_FRONT_PTYPE_LINE; + case PIPE_POLYGON_MODE_POINT: + return R300_GA_POLY_MODE_FRONT_PTYPE_POINT; + + default: + debug_printf("r300: Bad polygon mode %i in %s\n", mode, + __FUNCTION__); + return R300_GA_POLY_MODE_FRONT_PTYPE_TRI; + } +} + +static INLINE uint32_t +r300_translate_polygon_mode_back(unsigned mode) { + switch (mode) + { + case PIPE_POLYGON_MODE_FILL: + return R300_GA_POLY_MODE_BACK_PTYPE_TRI; + case PIPE_POLYGON_MODE_LINE: + return R300_GA_POLY_MODE_BACK_PTYPE_LINE; + case PIPE_POLYGON_MODE_POINT: + return R300_GA_POLY_MODE_BACK_PTYPE_POINT; + + default: + debug_printf("r300: Bad polygon mode %i in %s\n", mode, + __FUNCTION__); + return R300_GA_POLY_MODE_BACK_PTYPE_TRI; + } +} + #endif /* R300_STATE_INLINES_H */ diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 7e4d5c7c72..c07e6ae676 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -84,7 +84,7 @@ void r300_emit_invariant_state(struct r300_context* r300) END_CS; /* XXX unsorted stuff from surface_fill */ - BEGIN_CS(62 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0)); + BEGIN_CS(60 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0)); /* Flush PVS. */ OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); @@ -114,7 +114,6 @@ void r300_emit_invariant_state(struct r300_context* r300) /* XXX this big chunk should be refactored into rs_state */ OUT_CS_REG(R300_GA_SOLID_RG, 0x00000000); OUT_CS_REG(R300_GA_SOLID_BA, 0x00000000); - OUT_CS_REG(R300_GA_POLY_MODE, 0x00000000); OUT_CS_REG(R300_GA_ROUND_MODE, 0x00000001); OUT_CS_REG(R300_GA_OFFSET, 0x00000000); OUT_CS_REG(R300_GA_FOG_SCALE, 0x3DBF1412); -- cgit v1.2.3 From 67034b9efce43a7b83f79e44beb6d4e8f6dff22a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 5 Nov 2009 17:05:20 +0000 Subject: softpipe: Implement PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE for destination. It is a valid and tested combination on D3D9. --- src/gallium/drivers/softpipe/sp_quad_blend.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 0ad0b98654..fe6b6cec35 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -478,7 +478,15 @@ blend_quad(struct quad_stage *qs, VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ break; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - assert(0); /* illegal */ + { + const float *alpha = quadColor[3]; + float diff[4], temp[4]; + VEC4_SUB(diff, one, dest[3]); + VEC4_MIN(temp, alpha, diff); + VEC4_MUL(dest[0], quadColor[0], temp); /* R */ + VEC4_MUL(dest[1], quadColor[1], temp); /* G */ + VEC4_MUL(dest[2], quadColor[2], temp); /* B */ + } break; case PIPE_BLENDFACTOR_CONST_COLOR: { @@ -600,7 +608,7 @@ blend_quad(struct quad_stage *qs, VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ break; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - assert(0); /* illegal */ + /* dest = dest * 1 NO-OP, leave dest as-is */ break; case PIPE_BLENDFACTOR_CONST_COLOR: /* fall-through */ -- cgit v1.2.3 From 96e938f62c729fab74601627d54c9c4cf499ebdf Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 6 Nov 2009 15:08:05 +0000 Subject: llvmpipe: Fix build with llvm 2.6. Fixes bug 24949. --- src/gallium/drivers/llvmpipe/lp_bld_misc.cpp | 7 +++---- src/gallium/drivers/llvmpipe/lp_bld_misc.h | 8 +++++++- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp index c9acaf1f16..d3f78c06d9 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp +++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp @@ -26,8 +26,6 @@ **************************************************************************/ -#include "llvm/Config/config.h" - #include "pipe/p_config.h" #include "lp_bld_misc.h" @@ -50,12 +48,13 @@ LLVMLinkInJIT(void) extern "C" int X86TargetMachineModule; -void +int LLVMInitializeNativeTarget(void) { #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - X86TargetMachineModule = 1; + X86TargetMachineModule = 1; #endif + return 0; } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.h b/src/gallium/drivers/llvmpipe/lp_bld_misc.h index 51a84c5e25..0e787e0b9c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_misc.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.h @@ -30,17 +30,23 @@ #define LP_BLD_MISC_H +#include "llvm/Config/config.h" + #ifdef __cplusplus extern "C" { #endif +#ifndef LLVM_NATIVE_ARCH + void LLVMLinkInJIT(void); -void +int LLVMInitializeNativeTarget(void); +#endif /* !LLVM_NATIVE_ARCH */ + #ifdef __cplusplus } -- cgit v1.2.3 From 44cb5b5c663da4d218448cfd2386b431de35c8d2 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 7 Nov 2009 10:46:47 +0100 Subject: nv50: enable all 32 threads of a warp This should be the default setting. See also 7d967b9b7c08aea2a471c5bf6aced8bfafdae874. --- src/gallium/drivers/nv50/nv50_screen.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index c8d0f1e4d8..e1b2f11239 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -309,7 +309,9 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_method(so, screen->tesla, 0x121c, 1); so_data (so, 1); - /* try to activate all/more lanes (threads) in a warp */ + /* activate all 32 lanes (threads) in a warp */ + so_method(so, screen->tesla, 0x19a0, 1); + so_data (so, 0x2); so_method(so, screen->tesla, 0x1400, 1); so_data (so, 0xf); -- cgit v1.2.3 From 57d77c6a4474beecdd22b97a8f5af6e4d2833d97 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Sat, 7 Nov 2009 19:31:18 +0000 Subject: i915g: Fix comment in is buffer referenced --- src/gallium/drivers/i915/i915_context.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index e745f3342d..94c8aee30f 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -155,15 +155,11 @@ static unsigned int i915_is_buffer_referenced(struct pipe_context *pipe, struct pipe_buffer *buf) { - /** - * FIXME: Return the corrent result. We can't alays return referenced - * since it causes a double flush within the vbo module. + /* + * Since we never expose hardware buffers to the state tracker + * they can never be referenced, so this isn't a lie */ -#if 0 - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -#else return 0; -#endif } -- cgit v1.2.3 From 6acb26eadfcb3c21fd09d0b22804b49de9a82cf7 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sun, 25 Oct 2009 13:22:22 +0100 Subject: r300g: move vborender context function to seperate file r300g: Un-migrate r300_draw_render. It'll make maintaining the SW TCL path easier. --- src/gallium/drivers/r300/r300_render.c | 5 ++++- src/gallium/drivers/r300/r300_render.h | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index c36350d29e..634c803f2a 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -20,6 +20,9 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/* r300_render: Vertex and index buffer primitive emission. Contains both + * HW TCL fastpath rendering, and SW TCL Draw-assisted rendering. */ + #include "draw/draw_context.h" #include "draw/draw_vbuf.h" @@ -38,7 +41,7 @@ /* r300_render: Vertex and index buffer primitive emission. */ #define R300_MAX_VBO_SIZE (1024 * 1024) -static uint32_t r300_translate_primitive(unsigned prim) +uint32_t r300_translate_primitive(unsigned prim) { switch (prim) { case PIPE_PRIM_POINTS: diff --git a/src/gallium/drivers/r300/r300_render.h b/src/gallium/drivers/r300/r300_render.h index 3d8f47ba75..3f8ac1fb7a 100644 --- a/src/gallium/drivers/r300/r300_render.h +++ b/src/gallium/drivers/r300/r300_render.h @@ -23,6 +23,8 @@ #ifndef R300_RENDER_H #define R300_RENDER_H +uint32_t r300_translate_primitive(unsigned prim); + boolean r300_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, unsigned indexSize, -- cgit v1.2.3 From c7dfffc5d5078e3cf1c28c230177cbbb43b91131 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sun, 25 Oct 2009 12:08:02 +0100 Subject: r300g: enable CS dumping --- src/gallium/drivers/r300/r300_cs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 883f0a02dc..86ba91db52 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -34,8 +34,8 @@ #define MAX_CS_SIZE 64 * 1024 / 4 -#define VERY_VERBOSE_CS 0 -#define VERY_VERBOSE_REGISTERS 0 +#define VERY_VERBOSE_CS 1 +#define VERY_VERBOSE_REGISTERS 1 /* XXX stolen from radeon_drm.h */ #define RADEON_GEM_DOMAIN_CPU 0x1 -- cgit v1.2.3 From d8592d1724d8c8fd0b36eb21f4007b52f809e062 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sun, 1 Nov 2009 17:04:32 +0100 Subject: r300g: add missing flush --- src/gallium/drivers/r300/r300_state.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 658a8cba13..bed886fad0 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -577,6 +577,8 @@ static void r300_set_sampler_textures(struct pipe_context* pipe, if (count > 8) { return; } + + r300->context.flush(&r300->context, 0, NULL); for (i = 0; i < count; i++) { if (r300->textures[i] != (struct r300_texture*)texture[i]) { -- cgit v1.2.3 From 3445f476977ae403cef9ca15661fa0f96ff50eca Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sun, 25 Oct 2009 13:53:25 +0100 Subject: r300g: VBOs WIP --- src/gallium/drivers/r300/Makefile | 1 + src/gallium/drivers/r300/r300_context.c | 14 +- src/gallium/drivers/r300/r300_context.h | 27 +-- src/gallium/drivers/r300/r300_emit.c | 108 +++++++++++- src/gallium/drivers/r300/r300_emit.h | 12 ++ src/gallium/drivers/r300/r300_render.c | 135 +++++++-------- src/gallium/drivers/r300/r300_state.c | 13 +- src/gallium/drivers/r300/r300_state_derived.c | 14 +- src/gallium/drivers/r300/r300_vbo.c | 226 ++++++++++++++++++++++++++ src/gallium/drivers/r300/r300_vbo.h | 36 ++++ 10 files changed, 477 insertions(+), 109 deletions(-) create mode 100644 src/gallium/drivers/r300/r300_vbo.c create mode 100644 src/gallium/drivers/r300/r300_vbo.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index f73d80de88..d13bb7a36b 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -17,6 +17,7 @@ C_SOURCES = \ r300_state.c \ r300_state_derived.c \ r300_state_invariant.c \ + r300_vbo.c \ r300_vs.c \ r300_texture.c \ r300_tgsi_to_rc.c diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index f974147ea4..b520e5929e 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -104,6 +104,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, struct r300_winsys* r300_winsys) { struct r300_context* r300 = CALLOC_STRUCT(r300_context); + struct r300_screen* r300screen = r300_screen(screen); if (!r300) return NULL; @@ -119,9 +120,16 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.clear = r300_clear; - r300->context.draw_arrays = r300_draw_arrays; - r300->context.draw_elements = r300_draw_elements; - r300->context.draw_range_elements = r300_swtcl_draw_range_elements; + if (r300screen->caps->has_tcl) + { + r300->context.draw_arrays = r300_draw_arrays; + r300->context.draw_elements = r300_draw_elements; + r300->context.draw_range_elements = r300_draw_range_elements; + } + else + { + assert(0); + } r300->context.is_texture_referenced = r300_is_texture_referenced; r300->context.is_buffer_referenced = r300_is_buffer_referenced; diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 850e5a41c9..a6748852d8 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -216,18 +216,19 @@ struct r300_texture { struct r300_texture_state state; }; -struct r300_vertex_format { +struct r300_vertex_info { /* Parent class */ struct vertex_info vinfo; - /* R300_VAP_PROG_STREAK_CNTL_[0-7] */ - uint32_t vap_prog_stream_cntl[8]; - /* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */ - uint32_t vap_prog_stream_cntl_ext[8]; /* Map of vertex attributes into PVS memory for HW TCL, * or GA memory for SW TCL. */ int vs_tab[16]; /* Map of rasterizer attributes from GB through RS to US. */ int fs_tab[16]; + + /* R300_VAP_PROG_STREAK_CNTL_[0-7] */ + uint32_t vap_prog_stream_cntl[8]; + /* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */ + uint32_t vap_prog_stream_cntl_ext[8]; }; extern struct pipe_viewport_state r300_viewport_identity; @@ -256,7 +257,7 @@ struct r300_context { * depends on the combination of both currently loaded shaders. */ struct util_hash_table* shader_hash_table; /* Vertex formatting information. */ - struct r300_vertex_format* vertex_info; + struct r300_vertex_info* vertex_info; /* Various CSO state objects. */ /* Blend state. */ @@ -285,12 +286,6 @@ struct r300_context { /* Texture states. */ struct r300_texture* textures[8]; int texture_count; - /* Vertex buffers for Gallium. */ - struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; - int vertex_buffer_count; - /* Vertex elements for Gallium. */ - struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS]; - int vertex_element_count; /* Vertex shader. */ struct r300_vertex_shader* vs; /* Viewport state. */ @@ -298,6 +293,14 @@ struct r300_context { /* ZTOP state. */ struct r300_ztop_state ztop_state; + /* Vertex buffers for Gallium. */ + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + int vbuf_count; + /* Vertex elements for Gallium. */ + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + int aos_count; + unsigned hw_prim; + /* Bitmask of dirty state objects. */ uint32_t dirty_state; /* Flag indicating whether or not the HW is dirty. */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 69ce5966e8..92e6ec606c 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -582,7 +582,48 @@ void r300_emit_texture(struct r300_context* r300, END_CS; } -void r300_emit_vertex_buffer(struct r300_context* r300) +void r300_emit_aos(struct r300_context* r300, unsigned offset) +{ + struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; + struct pipe_vertex_element *velem = r300->vertex_element; + CS_LOCALS(r300); + int i; + unsigned packet_size = (r300->aos_count * 3 + 1) / 2; + BEGIN_CS(2 + packet_size + r300->aos_count * 2); + OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); + OUT_CS(r300->aos_count); + for (i = 0; i < r300->aos_count - 1; i += 2) { + int buf_num1 = velem[i].vertex_buffer_index; + int buf_num2 = velem[i+1].vertex_buffer_index; + assert(vbuf[buf_num1].stride % 4 == 0 && pf_get_size(velem[i].src_format) % 4 == 0); + assert(vbuf[buf_num2].stride % 4 == 0 && pf_get_size(velem[i+1].src_format) % 4 == 0); + OUT_CS((pf_get_size(velem[i].src_format) >> 2) | (vbuf[buf_num1].stride << 6) | + (pf_get_size(velem[i+1].src_format) << 14) | (vbuf[buf_num2].stride << 22)); + OUT_CS(vbuf[buf_num1].buffer_offset + velem[i].src_offset + + offset * vbuf[buf_num1].stride); + OUT_CS(vbuf[buf_num2].buffer_offset + velem[i+1].src_offset + + offset * vbuf[buf_num2].stride); + } + if (r300->aos_count & 1) { + int buf_num = velem[i].vertex_buffer_index; + assert(vbuf[buf_num].stride % 4 == 0 && pf_get_size(velem[i].src_format) % 4 == 0); + OUT_CS((pf_get_size(velem[i].src_format) >> 2) | (vbuf[buf_num].stride << 6)); + OUT_CS(vbuf[buf_num].buffer_offset + velem[i].src_offset + + offset * vbuf[buf_num].stride); + } + + for (i = 0; i < r300->aos_count; i++) { + cs_winsys->write_cs_reloc(cs_winsys, + vbuf[velem[i].vertex_buffer_index].buffer, + RADEON_GEM_DOMAIN_GTT, + 0, + 0); + cs_count -= 2; + } + END_CS; +} +#if 0 +void r300_emit_draw_packet(struct r300_context* r300) { CS_LOCALS(r300); @@ -605,6 +646,65 @@ void r300_emit_vertex_buffer(struct r300_context* r300) OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); END_CS; } +#endif +void r300_emit_draw_arrays(struct r300_context *r300, + unsigned count) +{ + CS_LOCALS(r300); + assert(count < 65536); + + BEGIN_CS(4); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | + r300->hw_prim); + END_CS; +} + +void r300_emit_draw_elements(struct r300_context *r300, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned start, + unsigned count) +{ + CS_LOCALS(r300); + assert(indexSize == 4 || indexSize == 2); + assert(count < 65536); + assert((start * indexSize) % 4 == 0); + + uint32_t size_dwords; + uint32_t skip_dwords = indexSize * start / sizeof(uint32_t); + assert(skip_dwords == 0); + + BEGIN_CS(10); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); + if (indexSize == 4) { + size_dwords = count + start; + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | + R300_VAP_VF_CNTL__INDEX_SIZE_32bit | r300->hw_prim); + } else { + size_dwords = (count + start + 1) / 2; + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + (count << 16) | r300->hw_prim); + } + + OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); + OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) | + (0 << R300_INDX_BUFFER_SKIP_SHIFT)); + OUT_CS(skip_dwords); + OUT_CS(size_dwords); + cs_winsys->write_cs_reloc(cs_winsys, + indexBuffer, + RADEON_GEM_DOMAIN_GTT, + 0, + 0); + cs_count -= 2; + + END_CS; +} void r300_emit_vertex_format_state(struct r300_context* r300) { @@ -771,8 +871,6 @@ void r300_emit_dirty_state(struct r300_context* r300) return; } - r300_update_derived_state(r300); - /* Clean out BOs. */ r300->winsys->reset_bos(r300->winsys); @@ -823,7 +921,7 @@ validate: goto validate; } } else { - debug_printf("No VBO while emitting dirty state!\n"); + // debug_printf("No VBO while emitting dirty state!\n"); } if (!r300->winsys->validate(r300->winsys)) { r300->context.flush(&r300->context, 0, NULL); @@ -951,7 +1049,7 @@ validate: */ /* Finally, emit the VBO. */ - r300_emit_vertex_buffer(r300); + //r300_emit_vertex_buffer(r300); r300->dirty_hw++; } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 6befca72ce..b4fdfecde0 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -29,6 +29,8 @@ struct rX00_fragment_program_code; struct r300_vertex_program_code; +void r300_emit_aos(struct r300_context* r300, unsigned offset); + void r300_emit_blend_state(struct r300_context* r300, struct r300_blend_state* blend); @@ -38,6 +40,16 @@ void r300_emit_blend_color_state(struct r300_context* r300, void r300_emit_clip_state(struct r300_context* r300, struct pipe_clip_state* clip); +void r300_emit_draw_arrays(struct r300_context *r300, unsigned count); + +void r300_emit_draw_elements(struct r300_context *r300, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned start, + unsigned count); + void r300_emit_dsa_state(struct r300_context* r300, struct r300_dsa_state* dsa); diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 634c803f2a..86aaf841dd 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -31,6 +31,7 @@ #include "util/u_memory.h" #include "util/u_prim.h" +#include "r300_vbo.h" #include "r300_cs.h" #include "r300_context.h" #include "r300_emit.h" @@ -69,98 +70,64 @@ uint32_t r300_translate_primitive(unsigned prim) } } -/* This is the fast-path drawing & emission for HW TCL. */ -boolean r300_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) +static boolean setup_vertex_buffers(struct r300_context *r300) { - struct r300_context* r300 = r300_context(pipe); - uint32_t prim = r300_translate_primitive(mode); - struct pipe_vertex_buffer* aos = r300->vertex_buffers; - unsigned aos_count = r300->vertex_buffer_count; - short* indices; - unsigned packet_size; - unsigned i; - bool invalid = FALSE; - - CS_LOCALS(r300); - - if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; - } + unsigned vbuf_count = r300->aos_count; + struct pipe_vertex_buffer *vbuf= r300->vertex_buffer; + struct pipe_vertex_element *velem= r300->vertex_element; + bool invalid = false; validate: - for (i = 0; i < aos_count; i++) { - if (!r300->winsys->add_buffer(r300->winsys, aos[i].buffer, - RADEON_GEM_DOMAIN_GTT, 0)) { - pipe->flush(pipe, 0, NULL); + for (int i = 0; i < vbuf_count; i++) { + if (!r300->winsys->add_buffer(r300->winsys, vbuf[velem[i].vertex_buffer_index].buffer, + RADEON_GEM_DOMAIN_GTT, 0)) { + r300->context.flush(&r300->context, 0, NULL); goto validate; } } + if (!r300->winsys->validate(r300->winsys)) { - pipe->flush(pipe, 0, NULL); + r300->context.flush(&r300->context, 0, NULL); if (invalid) { /* Well, hell. */ debug_printf("r300: Stuck in validation loop, gonna quit now."); exit(1); } - invalid = TRUE; + invalid = true; goto validate; } - r300_emit_dirty_state(r300); + return invalid; +} - packet_size = (aos_count >> 1) * 3 + (aos_count & 1) * 2; - - BEGIN_CS(3 + packet_size + (aos_count * 2)); - OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); - OUT_CS(aos_count); - for (i = 0; i < aos_count - 1; i += 2) { - OUT_CS(aos[i].stride | - (aos[i].stride << 8) | - (aos[i + 1].stride << 16) | - (aos[i + 1].stride << 24)); - OUT_CS(aos[i].buffer_offset + start * 4 * aos[i].stride); - OUT_CS(aos[i + 1].buffer_offset + start * 4 * aos[i + 1].stride); - } - if (aos_count & 1) { - OUT_CS(aos[i].stride | (aos[i].stride << 8)); - OUT_CS(aos[i].buffer_offset + start * 4 * aos[i].stride); - } - for (i = 0; i < aos_count; i++) { - OUT_CS_RELOC(aos[i].buffer, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); - } - END_CS; +/* This is the fast-path drawing & emission for HW TCL. */ +boolean r300_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) +{ + struct r300_context* r300 = r300_context(pipe); - if (indexBuffer) { - indices = (short*)pipe_buffer_map(pipe->screen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); + r300_update_derived_state(r300); - /* Set the starting point. */ - indices += start; + setup_vertex_buffers(r300); - BEGIN_CS(2 + (count+1)/2); - OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count + 1)/2); - OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | prim); - for (i = 0; i < count - 1; i += 2) { - OUT_CS(indices[i + 1] << 16 | indices[i]); - } - if (count % 2) { - OUT_CS(indices[count - 1]); - } - END_CS; - } else { - BEGIN_CS(2); - OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); - OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | - prim); - END_CS; - } + setup_vertex_attributes(r300); + + setup_index_buffer(r300, indexBuffer, indexSize); + + r300->hw_prim = r300_translate_primitive(mode); + + r300_emit_dirty_state(r300); + + r300_emit_aos(r300, 0); + + r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, + start, count); return TRUE; } @@ -178,7 +145,23 @@ boolean r300_draw_elements(struct pipe_context* pipe, boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, unsigned start, unsigned count) { - return pipe->draw_elements(pipe, NULL, 0, mode, start, count); + struct r300_context* r300 = r300_context(pipe); + + r300_update_derived_state(r300); + + setup_vertex_buffers(r300); + + setup_vertex_attributes(r300); + + r300->hw_prim = r300_translate_primitive(mode); + + r300_emit_dirty_state(r300); + + r300_emit_aos(r300, start); + + r300_emit_draw_arrays(r300, count); + + return TRUE; } /**************************************************************************** @@ -196,7 +179,9 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, unsigned start, unsigned count) { + assert(0); struct r300_context* r300 = r300_context(pipe); +#if 0 int i; if (!u_trim_pipe_prim(mode, &count)) { @@ -236,7 +221,7 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, draw_set_mapped_element_buffer_range(r300->draw, 0, start, start + count - 1, NULL); } - +#endif return TRUE; } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index bed886fad0..e0b85ab768 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -666,10 +666,9 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - memcpy(r300->vertex_buffers, buffers, + memcpy(r300->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); - - r300->vertex_buffer_count = count; + r300->vbuf_count = count; if (r300->draw) { draw_flush(r300->draw); @@ -683,10 +682,10 @@ static void r300_set_vertex_elements(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - memcpy(r300->vertex_elements, elements, - sizeof(struct pipe_vertex_element) * count); - - r300->vertex_element_count = count; + memcpy(r300->vertex_element, + elements, + sizeof(struct pipe_vertex_element) * count); + r300->aos_count = count; if (r300->draw) { draw_flush(r300->draw); diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 7d000e9e2d..14d7bb094c 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -63,7 +63,7 @@ int r300_shader_key_compare(void* key1, void* key2) { /* Set up the vs_tab and routes. */ static void r300_vs_tab_routes(struct r300_context* r300, - struct r300_vertex_format* vformat) + struct r300_vertex_info* vformat) { struct r300_screen* r300screen = r300_screen(r300->context.screen); struct vertex_info* vinfo = &vformat->vinfo; @@ -219,7 +219,7 @@ static void r300_vs_tab_routes(struct r300_context* r300, /* Update the PSC tables. */ static void r300_vertex_psc(struct r300_context* r300, - struct r300_vertex_format* vformat) + struct r300_vertex_info* vformat) { struct r300_screen* r300screen = r300_screen(r300->context.screen); struct vertex_info* vinfo = &vformat->vinfo; @@ -282,7 +282,7 @@ static void r300_vertex_psc(struct r300_context* r300, /* Set up the mappings from GB to US, for RS block. */ static void r300_update_fs_tab(struct r300_context* r300, - struct r300_vertex_format* vformat) + struct r300_vertex_info* vformat) { struct tgsi_shader_info* info = &r300->fs->info; int i, cols = 0, texs = 0, cols_emitted = 0; @@ -455,13 +455,13 @@ static void r300_update_rs_block(struct r300_context* r300, /* Update the vertex format. */ static void r300_update_derived_shader_state(struct r300_context* r300) { - struct r300_shader_key* key; - struct r300_vertex_format* vformat; + struct r300_vertex_info* vformat; struct r300_rs_block* rs_block; - struct r300_shader_derived_value* value; int i; /* + struct r300_shader_key* key; + struct r300_shader_derived_value* value; key = CALLOC_STRUCT(r300_shader_key); key->vs = r300->vs; key->fs = r300->fs; @@ -486,7 +486,7 @@ static void r300_update_derived_shader_state(struct r300_context* r300) } */ /* XXX This will be refactored ASAP. */ - vformat = CALLOC_STRUCT(r300_vertex_format); + vformat = CALLOC_STRUCT(r300_vertex_info); rs_block = CALLOC_STRUCT(r300_rs_block); for (i = 0; i < 16; i++) { diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c new file mode 100644 index 0000000000..e032641286 --- /dev/null +++ b/src/gallium/drivers/r300/r300_vbo.c @@ -0,0 +1,226 @@ +/* + * Copyright 2009 Maciej Cencora + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "r300_vbo.h" + +#include "pipe/p_format.h" + +#include "r300_cs.h" +#include "r300_context.h" +#include "r300_reg.h" +#include "r300_winsys.h" + +static void translate_vertex_format(enum pipe_format format, + unsigned nr_comps, + unsigned component_size, + unsigned dst_loc, + uint32_t *hw_fmt1, + uint32_t *hw_fmt2) +{ + uint32_t fmt1 = 0; + + switch (pf_type(format)) + { + case PIPE_FORMAT_TYPE_FLOAT: + assert(component_size == 4); + fmt1 = R300_DATA_TYPE_FLOAT_1 + nr_comps - 1; + break; + case PIPE_FORMAT_TYPE_UNORM: + case PIPE_FORMAT_TYPE_SNORM: + case PIPE_FORMAT_TYPE_USCALED: + case PIPE_FORMAT_TYPE_SSCALED: + if (component_size == 1) + { + assert(nr_comps == 4); + fmt1 = R300_DATA_TYPE_BYTE; + } + else if (component_size == 2) + { + if (nr_comps == 2) + fmt1 = R300_DATA_TYPE_SHORT_2; + else if (nr_comps == 4) + fmt1 = R300_DATA_TYPE_SHORT_4; + else + assert(0); + } + else + { + assert(0); + } + + if (pf_type(format) == PIPE_FORMAT_TYPE_SNORM) + { + fmt1 |= R300_SIGNED; + } + else if (pf_type(format) == PIPE_FORMAT_TYPE_SSCALED) + { + fmt1 |= R300_SIGNED; + fmt1 |= R300_NORMALIZE; + } + else if (pf_type(format) == PIPE_FORMAT_TYPE_USCALED) + { + fmt1 |= R300_NORMALIZE; + } + break; + default: + assert(0); + break; + } + + *hw_fmt1 = fmt1 | (dst_loc << R300_DST_VEC_LOC_SHIFT); + *hw_fmt2 = (pf_swizzle_x(format) << R300_SWIZZLE_SELECT_X_SHIFT) | + (pf_swizzle_y(format) << R300_SWIZZLE_SELECT_Y_SHIFT) | + (pf_swizzle_z(format) << R300_SWIZZLE_SELECT_Z_SHIFT) | + (pf_swizzle_w(format) << R300_SWIZZLE_SELECT_W_SHIFT) | + (0xf << R300_WRITE_ENA_SHIFT); +} + +static INLINE void setup_vertex_attribute(struct r300_vertex_info *vinfo, + struct pipe_vertex_element *vert_elem, + unsigned attr_num) +{ + uint32_t hw_fmt1, hw_fmt2; + translate_vertex_format(vert_elem->src_format, + vert_elem->nr_components, + pf_size_x(vert_elem->src_format), + attr_num, + &hw_fmt1, + &hw_fmt2); + + if (attr_num % 2 == 0) + { + vinfo->vap_prog_stream_cntl[attr_num >> 1] = hw_fmt1; + vinfo->vap_prog_stream_cntl_ext[attr_num >> 1] = hw_fmt2; + } + else + { + vinfo->vap_prog_stream_cntl[attr_num >> 1] |= hw_fmt1 << 16; + vinfo->vap_prog_stream_cntl_ext[attr_num >> 1] |= hw_fmt2 << 16; + } +} + +static void finish_vertex_attribs_setup(struct r300_vertex_info *vinfo, + unsigned attribs_num) +{ + uint32_t last_vec_bit = (attribs_num % 2 == 0) ? (R300_LAST_VEC << 16) : R300_LAST_VEC; + + assert(attribs_num > 0 && attribs_num <= 16); + vinfo->vap_prog_stream_cntl[(attribs_num - 1) >> 1] |= last_vec_bit; +} + +void setup_vertex_attributes(struct r300_context *r300) +{ + for (int i=0; iaos_count; i++) + { + struct pipe_vertex_element *vert_elem = &r300->vertex_element[i]; + + setup_vertex_attribute(r300->vertex_info, vert_elem, i); + } + + finish_vertex_attribs_setup(r300->vertex_info, r300->aos_count); +} + +static void setup_vertex_array(struct r300_context *r300, struct pipe_vertex_element *element) +{ +} + +static void finish_vertex_arrays_setup(struct r300_context *r300) +{ +} + +static bool format_is_supported(enum pipe_format format, int nr_components) +{ + if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) + return false; + + if ((pf_size_x(format) != pf_size_y(format)) || + (pf_size_x(format) != pf_size_z(format)) || + (pf_size_x(format) != pf_size_w(format))) + return false; + + /* Following should be supported as long as stride is 4 bytes aligned */ + if (pf_size_x(format) != 1 && nr_components != 4) + return false; + + if (pf_size_x(format) != 2 && !(nr_components == 2 || nr_components == 4)) + return false; + + if (pf_size_x(format) == 3 || pf_size_x(format) > 4) + return false; + + return true; +} + +static INLINE int get_buffer_offset(struct r300_context *r300, + unsigned int buf_nr, + unsigned int elem_offset) +{ + return r300->vertex_buffer[buf_nr].buffer_offset + elem_offset; +} + +/** + */ +static void setup_vertex_buffers(struct r300_context *r300) +{ + for (int i=0; iaos_count; i++) + { + struct pipe_vertex_element *vert_elem = &r300->vertex_element[i]; + if (!format_is_supported(vert_elem->src_format, vert_elem->nr_components)) + { + assert(0); + /* use translate module to convert the data */ + /* + struct pipe_buffer *buf; + const unsigned int max_index = r300->vertex_buffers[vert_elem->vertex_buffer_index].max_index; + buf = pipe_buffer_create(r300->context.screen, 4, usage, vert_elem->nr_components * max_index * sizeof(float)); + */ + } + + if (get_buffer_offset(r300, vert_elem->vertex_buffer_index, vert_elem->src_offset) % 4 != 0) + { + /* need to align buffer */ + assert(0); + } + setup_vertex_array(r300, vert_elem); + } + + finish_vertex_arrays_setup(r300); +} + +void setup_index_buffer(struct r300_context *r300, + struct pipe_buffer* indexBuffer, + unsigned indexSize) +{ + assert(indexSize = 2); + + if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, RADEON_GEM_DOMAIN_GTT, 0)) + { + assert(0); + } + + if (!r300->winsys->validate(r300->winsys)) + { + assert(0); + } +} + diff --git a/src/gallium/drivers/r300/r300_vbo.h b/src/gallium/drivers/r300/r300_vbo.h new file mode 100644 index 0000000000..7afa75899c --- /dev/null +++ b/src/gallium/drivers/r300/r300_vbo.h @@ -0,0 +1,36 @@ +/* + * Copyright 2009 Maciej Cencora + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef R300_VBO_H +#define R300_VBO_H + +struct r300_context; +struct pipe_buffer; + +void setup_vertex_attributes(struct r300_context *r300); + +void setup_index_buffer(struct r300_context *r300, + struct pipe_buffer* indexBuffer, + unsigned indexSize); + +#endif -- cgit v1.2.3 From 1ef0341ea7ee08284ebafe4f347643e1190d5777 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sun, 25 Oct 2009 13:51:45 +0100 Subject: r300g: don't hang GPU on misbehaving apps --- src/gallium/drivers/r300/r300_render.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 86aaf841dd..cbda30227d 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -112,6 +112,9 @@ boolean r300_draw_range_elements(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); + if (!u_trim_pipe_prim(mode, &count)) + return false; + r300_update_derived_state(r300); setup_vertex_buffers(r300); @@ -147,6 +150,9 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, { struct r300_context* r300 = r300_context(pipe); + if (!u_trim_pipe_prim(mode, &count)) + return false; + r300_update_derived_state(r300); setup_vertex_buffers(r300); -- cgit v1.2.3 From 24c6fdbd32a84314c81897d0d1567121ed1c6118 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 6 Nov 2009 20:21:38 -0800 Subject: r300g: Use common state funcs for translating vert formats. --- src/gallium/drivers/r300/r300_vbo.c | 78 +++---------------------------------- 1 file changed, 6 insertions(+), 72 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c index e032641286..37b5c9224f 100644 --- a/src/gallium/drivers/r300/r300_vbo.c +++ b/src/gallium/drivers/r300/r300_vbo.c @@ -27,85 +27,19 @@ #include "r300_cs.h" #include "r300_context.h" +#include "r300_state_inlines.h" #include "r300_reg.h" #include "r300_winsys.h" -static void translate_vertex_format(enum pipe_format format, - unsigned nr_comps, - unsigned component_size, - unsigned dst_loc, - uint32_t *hw_fmt1, - uint32_t *hw_fmt2) -{ - uint32_t fmt1 = 0; - - switch (pf_type(format)) - { - case PIPE_FORMAT_TYPE_FLOAT: - assert(component_size == 4); - fmt1 = R300_DATA_TYPE_FLOAT_1 + nr_comps - 1; - break; - case PIPE_FORMAT_TYPE_UNORM: - case PIPE_FORMAT_TYPE_SNORM: - case PIPE_FORMAT_TYPE_USCALED: - case PIPE_FORMAT_TYPE_SSCALED: - if (component_size == 1) - { - assert(nr_comps == 4); - fmt1 = R300_DATA_TYPE_BYTE; - } - else if (component_size == 2) - { - if (nr_comps == 2) - fmt1 = R300_DATA_TYPE_SHORT_2; - else if (nr_comps == 4) - fmt1 = R300_DATA_TYPE_SHORT_4; - else - assert(0); - } - else - { - assert(0); - } - - if (pf_type(format) == PIPE_FORMAT_TYPE_SNORM) - { - fmt1 |= R300_SIGNED; - } - else if (pf_type(format) == PIPE_FORMAT_TYPE_SSCALED) - { - fmt1 |= R300_SIGNED; - fmt1 |= R300_NORMALIZE; - } - else if (pf_type(format) == PIPE_FORMAT_TYPE_USCALED) - { - fmt1 |= R300_NORMALIZE; - } - break; - default: - assert(0); - break; - } - - *hw_fmt1 = fmt1 | (dst_loc << R300_DST_VEC_LOC_SHIFT); - *hw_fmt2 = (pf_swizzle_x(format) << R300_SWIZZLE_SELECT_X_SHIFT) | - (pf_swizzle_y(format) << R300_SWIZZLE_SELECT_Y_SHIFT) | - (pf_swizzle_z(format) << R300_SWIZZLE_SELECT_Z_SHIFT) | - (pf_swizzle_w(format) << R300_SWIZZLE_SELECT_W_SHIFT) | - (0xf << R300_WRITE_ENA_SHIFT); -} - static INLINE void setup_vertex_attribute(struct r300_vertex_info *vinfo, struct pipe_vertex_element *vert_elem, unsigned attr_num) { - uint32_t hw_fmt1, hw_fmt2; - translate_vertex_format(vert_elem->src_format, - vert_elem->nr_components, - pf_size_x(vert_elem->src_format), - attr_num, - &hw_fmt1, - &hw_fmt2); + uint16_t hw_fmt1, hw_fmt2; + + hw_fmt1 = r300_translate_vertex_data_type(vert_elem->src_format) | + (attr_num << R300_DST_VEC_LOC_SHIFT); + hw_fmt2 = r300_translate_vertex_data_swizzle(vert_elem->src_format); if (attr_num % 2 == 0) { -- cgit v1.2.3 From 96b729f926fafeca6479eed0933bc4275fb7843b Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 7 Nov 2009 09:47:01 -0800 Subject: r300g: Don't pass hw_prim around in the context. And some other fixes. --- src/gallium/drivers/r300/r300_context.h | 1 - src/gallium/drivers/r300/r300_emit.c | 58 ------------------------- src/gallium/drivers/r300/r300_emit.h | 10 ----- src/gallium/drivers/r300/r300_render.c | 76 +++++++++++++++++++++++++++++---- src/gallium/drivers/r300/r300_vbo.c | 10 +++-- 5 files changed, 74 insertions(+), 81 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index a6748852d8..8d14c53f49 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -299,7 +299,6 @@ struct r300_context { /* Vertex elements for Gallium. */ struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; int aos_count; - unsigned hw_prim; /* Bitmask of dirty state objects. */ uint32_t dirty_state; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 92e6ec606c..ec1d521800 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -647,64 +647,6 @@ void r300_emit_draw_packet(struct r300_context* r300) END_CS; } #endif -void r300_emit_draw_arrays(struct r300_context *r300, - unsigned count) -{ - CS_LOCALS(r300); - assert(count < 65536); - - BEGIN_CS(4); - OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count); - OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); - OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | - r300->hw_prim); - END_CS; -} - -void r300_emit_draw_elements(struct r300_context *r300, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned start, - unsigned count) -{ - CS_LOCALS(r300); - assert(indexSize == 4 || indexSize == 2); - assert(count < 65536); - assert((start * indexSize) % 4 == 0); - - uint32_t size_dwords; - uint32_t skip_dwords = indexSize * start / sizeof(uint32_t); - assert(skip_dwords == 0); - - BEGIN_CS(10); - OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex); - OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); - if (indexSize == 4) { - size_dwords = count + start; - OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | - R300_VAP_VF_CNTL__INDEX_SIZE_32bit | r300->hw_prim); - } else { - size_dwords = (count + start + 1) / 2; - OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | - (count << 16) | r300->hw_prim); - } - - OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); - OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) | - (0 << R300_INDX_BUFFER_SKIP_SHIFT)); - OUT_CS(skip_dwords); - OUT_CS(size_dwords); - cs_winsys->write_cs_reloc(cs_winsys, - indexBuffer, - RADEON_GEM_DOMAIN_GTT, - 0, - 0); - cs_count -= 2; - - END_CS; -} void r300_emit_vertex_format_state(struct r300_context* r300) { diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index b4fdfecde0..7c83c5166d 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -40,16 +40,6 @@ void r300_emit_blend_color_state(struct r300_context* r300, void r300_emit_clip_state(struct r300_context* r300, struct pipe_clip_state* clip); -void r300_emit_draw_arrays(struct r300_context *r300, unsigned count); - -void r300_emit_draw_elements(struct r300_context *r300, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned start, - unsigned count); - void r300_emit_dsa_state(struct r300_context* r300, struct r300_dsa_state* dsa); diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index cbda30227d..6f7c645334 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -31,13 +31,13 @@ #include "util/u_memory.h" #include "util/u_prim.h" -#include "r300_vbo.h" #include "r300_cs.h" #include "r300_context.h" #include "r300_emit.h" #include "r300_reg.h" #include "r300_render.h" #include "r300_state_derived.h" +#include "r300_vbo.h" /* r300_render: Vertex and index buffer primitive emission. */ #define R300_MAX_VBO_SIZE (1024 * 1024) @@ -70,6 +70,70 @@ uint32_t r300_translate_primitive(unsigned prim) } } +static void r300_emit_draw_arrays(struct r300_context *r300, + unsigned mode, + unsigned count) +{ + CS_LOCALS(r300); + assert(count < 65536); + + BEGIN_CS(4); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | + r300_translate_primitive(mode)); + END_CS; +} + +static void r300_emit_draw_elements(struct r300_context *r300, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) +{ + CS_LOCALS(r300); + assert(indexSize == 4 || indexSize == 2); + assert(count < 65536); + assert((start * indexSize) % 4 == 0); + + uint32_t size_dwords; + uint32_t skip_dwords = indexSize * start / sizeof(uint32_t); + assert(skip_dwords == 0); + + BEGIN_CS(10); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); + if (indexSize == 4) { + size_dwords = count + start; + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | + R300_VAP_VF_CNTL__INDEX_SIZE_32bit | + r300_translate_primitive(mode)); + } else { + size_dwords = (count + start + 1) / 2; + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | + r300_translate_primitive(mode)); + } + + OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); + OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) | + (0 << R300_INDX_BUFFER_SKIP_SHIFT)); + OUT_CS(skip_dwords); + OUT_CS(size_dwords); + /* XXX hax */ + cs_winsys->write_cs_reloc(cs_winsys, + indexBuffer, + RADEON_GEM_DOMAIN_GTT, + 0, + 0); + cs_count -= 2; + + END_CS; +} + + static boolean setup_vertex_buffers(struct r300_context *r300) { unsigned vbuf_count = r300->aos_count; @@ -123,14 +187,12 @@ boolean r300_draw_range_elements(struct pipe_context* pipe, setup_index_buffer(r300, indexBuffer, indexSize); - r300->hw_prim = r300_translate_primitive(mode); - r300_emit_dirty_state(r300); r300_emit_aos(r300, 0); r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, - start, count); + mode, start, count); return TRUE; } @@ -159,13 +221,11 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, setup_vertex_attributes(r300); - r300->hw_prim = r300_translate_primitive(mode); - r300_emit_dirty_state(r300); r300_emit_aos(r300, start); - r300_emit_draw_arrays(r300, count); + r300_emit_draw_arrays(r300, mode, count); return TRUE; } @@ -186,8 +246,8 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, unsigned count) { assert(0); - struct r300_context* r300 = r300_context(pipe); #if 0 + struct r300_context* r300 = r300_context(pipe); int i; if (!u_trim_pipe_prim(mode, &count)) { diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c index 37b5c9224f..ab6f5c5942 100644 --- a/src/gallium/drivers/r300/r300_vbo.c +++ b/src/gallium/drivers/r300/r300_vbo.c @@ -56,7 +56,8 @@ static INLINE void setup_vertex_attribute(struct r300_vertex_info *vinfo, static void finish_vertex_attribs_setup(struct r300_vertex_info *vinfo, unsigned attribs_num) { - uint32_t last_vec_bit = (attribs_num % 2 == 0) ? (R300_LAST_VEC << 16) : R300_LAST_VEC; + uint32_t last_vec_bit = (attribs_num % 2 == 0) ? + (R300_LAST_VEC << 16) : R300_LAST_VEC; assert(attribs_num > 0 && attribs_num <= 16); vinfo->vap_prog_stream_cntl[(attribs_num - 1) >> 1] |= last_vec_bit; @@ -64,10 +65,11 @@ static void finish_vertex_attribs_setup(struct r300_vertex_info *vinfo, void setup_vertex_attributes(struct r300_context *r300) { - for (int i=0; iaos_count; i++) - { - struct pipe_vertex_element *vert_elem = &r300->vertex_element[i]; + struct pipe_vertex_element *vert_elem; + int i; + for (i = 0; i < r300->aos_count; i++) { + vert_elem = &r300->vertex_element[i]; setup_vertex_attribute(r300->vertex_info, vert_elem, i); } -- cgit v1.2.3 From 7518d9b1b7369f6e5ca1fdaf6a34e39a4acace9a Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 7 Nov 2009 10:05:31 -0800 Subject: r300g: Clean up r300_setup_vertex_buffers. --- src/gallium/drivers/r300/r300_render.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 6f7c645334..e28af7600a 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -134,16 +134,16 @@ static void r300_emit_draw_elements(struct r300_context *r300, } -static boolean setup_vertex_buffers(struct r300_context *r300) +static boolean r300_setup_vertex_buffers(struct r300_context *r300) { unsigned vbuf_count = r300->aos_count; - struct pipe_vertex_buffer *vbuf= r300->vertex_buffer; - struct pipe_vertex_element *velem= r300->vertex_element; - bool invalid = false; + struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; + struct pipe_vertex_element *velem = r300->vertex_element; validate: for (int i = 0; i < vbuf_count; i++) { - if (!r300->winsys->add_buffer(r300->winsys, vbuf[velem[i].vertex_buffer_index].buffer, + if (!r300->winsys->add_buffer(r300->winsys, + vbuf[velem[i].vertex_buffer_index].buffer, RADEON_GEM_DOMAIN_GTT, 0)) { r300->context.flush(&r300->context, 0, NULL); goto validate; @@ -152,16 +152,10 @@ validate: if (!r300->winsys->validate(r300->winsys)) { r300->context.flush(&r300->context, 0, NULL); - if (invalid) { - /* Well, hell. */ - debug_printf("r300: Stuck in validation loop, gonna quit now."); - exit(1); - } - invalid = true; - goto validate; + return r300->winsys->validate(r300->winsys); } - return invalid; + return TRUE; } /* This is the fast-path drawing & emission for HW TCL. */ @@ -181,7 +175,9 @@ boolean r300_draw_range_elements(struct pipe_context* pipe, r300_update_derived_state(r300); - setup_vertex_buffers(r300); + if (!r300_setup_vertex_buffers(r300)) { + return FALSE; + } setup_vertex_attributes(r300); @@ -217,7 +213,9 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, r300_update_derived_state(r300); - setup_vertex_buffers(r300); + if (!r300_setup_vertex_buffers(r300)) { + return FALSE; + } setup_vertex_attributes(r300); -- cgit v1.2.3 From 7da3cc4241b8550ccc1ec5ba3c93334094f5fb11 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 7 Nov 2009 10:14:07 -0800 Subject: r300g: Clean up indexbuf render, switch to RELOC macro. --- src/gallium/drivers/r300/r300_render.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index e28af7600a..b4351d541d 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -94,41 +94,43 @@ static void r300_emit_draw_elements(struct r300_context *r300, unsigned start, unsigned count) { + uint32_t count_dwords; + uint32_t offset_dwords = indexSize * start / sizeof(uint32_t); CS_LOCALS(r300); + + /* XXX most of these are stupid */ assert(indexSize == 4 || indexSize == 2); assert(count < 65536); assert((start * indexSize) % 4 == 0); - - uint32_t size_dwords; - uint32_t skip_dwords = indexSize * start / sizeof(uint32_t); - assert(skip_dwords == 0); + assert(offset_dwords == 0); BEGIN_CS(10); OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); if (indexSize == 4) { - size_dwords = count + start; + count_dwords = count + start; OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | R300_VAP_VF_CNTL__INDEX_SIZE_32bit | r300_translate_primitive(mode)); } else { - size_dwords = (count + start + 1) / 2; + count_dwords = (count + start + 1) / 2; OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300_translate_primitive(mode)); } + /* INDX_BUFFER is a truly special packet3. + * Unlike most other packet3, where the offset is after the count, + * the order is reversed, so the relocation ends up carrying the + * size of the indexbuf instead of the offset. + * + * XXX Fix offset + */ OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) | (0 << R300_INDX_BUFFER_SKIP_SHIFT)); - OUT_CS(skip_dwords); - OUT_CS(size_dwords); - /* XXX hax */ - cs_winsys->write_cs_reloc(cs_winsys, - indexBuffer, - RADEON_GEM_DOMAIN_GTT, - 0, - 0); - cs_count -= 2; + OUT_CS(offset_dwords); + OUT_CS_RELOC(indexBuffer, count_dwords, + RADEON_GEM_DOMAIN_GTT, 0, 0); END_CS; } -- cgit v1.2.3 From b6c3954138ef70ea7d2cbd3ba9519f404ef616d7 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 7 Nov 2009 10:26:57 -0800 Subject: r300g: s/false/FALSE/ Also s/true/TRUE/ --- src/gallium/drivers/r300/r300_render.c | 4 ++-- src/gallium/drivers/r300/r300_vbo.c | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index b4351d541d..1532de367f 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -173,7 +173,7 @@ boolean r300_draw_range_elements(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); if (!u_trim_pipe_prim(mode, &count)) - return false; + return FALSE; r300_update_derived_state(r300); @@ -211,7 +211,7 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, struct r300_context* r300 = r300_context(pipe); if (!u_trim_pipe_prim(mode, &count)) - return false; + return FALSE; r300_update_derived_state(r300); diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c index ab6f5c5942..be74a49eb8 100644 --- a/src/gallium/drivers/r300/r300_vbo.c +++ b/src/gallium/drivers/r300/r300_vbo.c @@ -84,27 +84,27 @@ static void finish_vertex_arrays_setup(struct r300_context *r300) { } -static bool format_is_supported(enum pipe_format format, int nr_components) +static boolean format_is_supported(enum pipe_format format, int nr_components) { if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) - return false; + return FALSE; if ((pf_size_x(format) != pf_size_y(format)) || (pf_size_x(format) != pf_size_z(format)) || (pf_size_x(format) != pf_size_w(format))) - return false; + return FALSE; /* Following should be supported as long as stride is 4 bytes aligned */ if (pf_size_x(format) != 1 && nr_components != 4) - return false; + return FALSE; if (pf_size_x(format) != 2 && !(nr_components == 2 || nr_components == 4)) - return false; + return FALSE; if (pf_size_x(format) == 3 || pf_size_x(format) > 4) - return false; + return FALSE; - return true; + return TRUE; } static INLINE int get_buffer_offset(struct r300_context *r300, -- cgit v1.2.3 From 746c01b3b2f77d8d8ba14fc517d04dbaf080d77d Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 7 Nov 2009 10:34:00 -0800 Subject: r300g: Moar vbo cleanup. --- src/gallium/drivers/r300/r300_vbo.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c index be74a49eb8..cec79ec97e 100644 --- a/src/gallium/drivers/r300/r300_vbo.c +++ b/src/gallium/drivers/r300/r300_vbo.c @@ -118,13 +118,16 @@ static INLINE int get_buffer_offset(struct r300_context *r300, */ static void setup_vertex_buffers(struct r300_context *r300) { - for (int i=0; iaos_count; i++) + struct pipe_vertex_element *vert_elem; + int i; + + for (i = 0; i < r300->aos_count; i++) { - struct pipe_vertex_element *vert_elem = &r300->vertex_element[i]; - if (!format_is_supported(vert_elem->src_format, vert_elem->nr_components)) - { + vert_elem = &r300->vertex_element[i]; + if (!format_is_supported(vert_elem->src_format, + vert_elem->nr_components)) { + /* XXX use translate module to convert the data */ assert(0); - /* use translate module to convert the data */ /* struct pipe_buffer *buf; const unsigned int max_index = r300->vertex_buffers[vert_elem->vertex_buffer_index].max_index; @@ -132,9 +135,10 @@ static void setup_vertex_buffers(struct r300_context *r300) */ } - if (get_buffer_offset(r300, vert_elem->vertex_buffer_index, vert_elem->src_offset) % 4 != 0) - { - /* need to align buffer */ + if (get_buffer_offset(r300, + vert_elem->vertex_buffer_index, + vert_elem->src_offset) % 4) { + /* XXX need to align buffer */ assert(0); } setup_vertex_array(r300, vert_elem); -- cgit v1.2.3 From ef513776b5bdd11968d2ca03862e9d1ac48e099f Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 7 Nov 2009 10:39:42 -0800 Subject: r300g: Don't assert on oversized VBOs, just return FALSE. --- src/gallium/drivers/r300/r300_render.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 1532de367f..89bf749b5f 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -75,7 +75,6 @@ static void r300_emit_draw_arrays(struct r300_context *r300, unsigned count) { CS_LOCALS(r300); - assert(count < 65536); BEGIN_CS(4); OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count); @@ -100,7 +99,6 @@ static void r300_emit_draw_elements(struct r300_context *r300, /* XXX most of these are stupid */ assert(indexSize == 4 || indexSize == 2); - assert(count < 65536); assert((start * indexSize) % 4 == 0); assert(offset_dwords == 0); @@ -172,8 +170,13 @@ boolean r300_draw_range_elements(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - if (!u_trim_pipe_prim(mode, &count)) + if (!u_trim_pipe_prim(mode, &count)) { return FALSE; + } + + if (count > 65535) { + return FALSE; + } r300_update_derived_state(r300); @@ -210,8 +213,13 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, { struct r300_context* r300 = r300_context(pipe); - if (!u_trim_pipe_prim(mode, &count)) + if (!u_trim_pipe_prim(mode, &count)) { return FALSE; + } + + if (count > 65535) { + return FALSE; + } r300_update_derived_state(r300); -- cgit v1.2.3 From cd5b2a93d5c9c60dbe72ebc963dcddf0db0b665c Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 7 Nov 2009 10:52:06 -0800 Subject: r300g: Comments. --- src/gallium/drivers/r300/r300_render.c | 3 ++- src/gallium/drivers/r300/r300_vbo.c | 14 +++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 89bf749b5f..0df9a94610 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -243,7 +243,8 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, * keep these functions separated so that they are easier to locate. ~C. * ***************************************************************************/ -/* Draw-based drawing for SW TCL chipsets. */ +/* Draw-based drawing for SW TCL chipsets. + * XXX currently broken as fucking hell. */ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, unsigned indexSize, diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c index cec79ec97e..d8b356a061 100644 --- a/src/gallium/drivers/r300/r300_vbo.c +++ b/src/gallium/drivers/r300/r300_vbo.c @@ -21,6 +21,9 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/* r300_vbo: Various helpers for emitting vertex buffers. Needs cleanup, + * refactoring, etc. */ + #include "r300_vbo.h" #include "pipe/p_format.h" @@ -76,6 +79,7 @@ void setup_vertex_attributes(struct r300_context *r300) finish_vertex_attribs_setup(r300->vertex_info, r300->aos_count); } +/* XXX WTF are these doing? */ static void setup_vertex_array(struct r300_context *r300, struct pipe_vertex_element *element) { } @@ -84,6 +88,7 @@ static void finish_vertex_arrays_setup(struct r300_context *r300) { } +/* XXX move/integrate this with the checks in r300_state_inlines */ static boolean format_is_supported(enum pipe_format format, int nr_components) { if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) @@ -151,16 +156,15 @@ void setup_index_buffer(struct r300_context *r300, struct pipe_buffer* indexBuffer, unsigned indexSize) { + /* XXX I call BS; why is this different from the assert in r300_render? */ assert(indexSize = 2); - if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, RADEON_GEM_DOMAIN_GTT, 0)) - { + if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, + RADEON_GEM_DOMAIN_GTT, 0)) { assert(0); } - if (!r300->winsys->validate(r300->winsys)) - { + if (!r300->winsys->validate(r300->winsys)) { assert(0); } } - -- cgit v1.2.3 From 0fe5f0c09abce9d540d51942eab08b2248243943 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 7 Nov 2009 11:49:39 -0800 Subject: r300g: Be more verbose in what's killing us WRT vert formats. --- src/gallium/drivers/r300/r300_state_inlines.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 52b9650fc1..e53db3d0b5 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -434,8 +434,8 @@ r300_translate_vertex_data_type(enum pipe_format format) { unsigned components = pf_component_count(format); if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) { - debug_printf("r300: Bad format %s in %s\n", pf_name(format), - __FUNCTION__); + debug_printf("r300: Bad format %s in %s:%d\n", pf_name(format), + __FUNCTION__, __LINE__); return 0; } @@ -447,6 +447,8 @@ r300_translate_vertex_data_type(enum pipe_format format) { result = R300_DATA_TYPE_FLOAT_1 + (components - 1); break; default: + debug_printf("r300: Bad format %s in %s:%d\n", + pf_name(format), __FUNCTION__, __LINE__); assert(0); } break; @@ -470,10 +472,16 @@ r300_translate_vertex_data_type(enum pipe_format format) { } break; default: + debug_printf("r300: Bad format %s in %s:%d\n", + pf_name(format), __FUNCTION__, __LINE__); + debug_printf("r300: pf_size_x(format) == %d\n", + pf_size_x(format)); assert(0); } break; default: + debug_printf("r300: Bad format %s in %s:%d\n", + pf_name(format), __FUNCTION__, __LINE__); assert(0); } @@ -492,8 +500,8 @@ static INLINE uint16_t r300_translate_vertex_data_swizzle(enum pipe_format format) { if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) { - debug_printf("r300: Bad format %s in %s\n", pf_name(format), - __FUNCTION__); + debug_printf("r300: Bad format %s in %s:%d\n", + pf_name(format), __FUNCTION__, __LINE__); return 0; } -- cgit v1.2.3 From c4fa0e4caa0aeb5cce9bd871f9156da25a9ec404 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 7 Nov 2009 13:07:52 -0800 Subject: r300g: Remove faulty assert. --- src/gallium/drivers/r300/r300_vbo.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c index d8b356a061..7e88bf3b7c 100644 --- a/src/gallium/drivers/r300/r300_vbo.c +++ b/src/gallium/drivers/r300/r300_vbo.c @@ -152,13 +152,11 @@ static void setup_vertex_buffers(struct r300_context *r300) finish_vertex_arrays_setup(r300); } +/* XXX these shouldn't be asserts since we can work around bad indexbufs */ void setup_index_buffer(struct r300_context *r300, struct pipe_buffer* indexBuffer, unsigned indexSize) { - /* XXX I call BS; why is this different from the assert in r300_render? */ - assert(indexSize = 2); - if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, RADEON_GEM_DOMAIN_GTT, 0)) { assert(0); -- cgit v1.2.3 From fa6916cfef6a75eacdbf927a02f64a5a37c3b0d9 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 7 Nov 2009 13:12:15 -0800 Subject: r300g: Remove do-nothing functions. --- src/gallium/drivers/r300/r300_vbo.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c index 7e88bf3b7c..d3f2ce799a 100644 --- a/src/gallium/drivers/r300/r300_vbo.c +++ b/src/gallium/drivers/r300/r300_vbo.c @@ -79,15 +79,6 @@ void setup_vertex_attributes(struct r300_context *r300) finish_vertex_attribs_setup(r300->vertex_info, r300->aos_count); } -/* XXX WTF are these doing? */ -static void setup_vertex_array(struct r300_context *r300, struct pipe_vertex_element *element) -{ -} - -static void finish_vertex_arrays_setup(struct r300_context *r300) -{ -} - /* XXX move/integrate this with the checks in r300_state_inlines */ static boolean format_is_supported(enum pipe_format format, int nr_components) { @@ -146,10 +137,7 @@ static void setup_vertex_buffers(struct r300_context *r300) /* XXX need to align buffer */ assert(0); } - setup_vertex_array(r300, vert_elem); } - - finish_vertex_arrays_setup(r300); } /* XXX these shouldn't be asserts since we can work around bad indexbufs */ -- cgit v1.2.3 From 9f49db6f843885620a52a06721d5972afb29f21a Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 7 Nov 2009 13:37:07 -0800 Subject: r300g: Minor code cleanup to avoid confusion. --- src/gallium/drivers/r300/r300_render.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 0df9a94610..fa057324f8 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -136,12 +136,11 @@ static void r300_emit_draw_elements(struct r300_context *r300, static boolean r300_setup_vertex_buffers(struct r300_context *r300) { - unsigned vbuf_count = r300->aos_count; struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; struct pipe_vertex_element *velem = r300->vertex_element; validate: - for (int i = 0; i < vbuf_count; i++) { + for (int i = 0; i < r300->aos_count; i++) { if (!r300->winsys->add_buffer(r300->winsys, vbuf[velem[i].vertex_buffer_index].buffer, RADEON_GEM_DOMAIN_GTT, 0)) { -- cgit v1.2.3 From 547e939afb980c2fcc3edbbb07dba0f44be785c1 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 7 Nov 2009 14:14:19 -0800 Subject: r300g: DCE. This must never have been called before; it's completely wrong. --- src/gallium/drivers/r300/r300_state_inlines.h | 2 +- src/gallium/drivers/r300/r300_vbo.c | 27 +-------------------------- 2 files changed, 2 insertions(+), 27 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index e53db3d0b5..b0f3386c62 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -436,7 +436,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) { debug_printf("r300: Bad format %s in %s:%d\n", pf_name(format), __FUNCTION__, __LINE__); - return 0; + assert(0); } switch (pf_type(format)) { diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c index d3f2ce799a..1d45fd590c 100644 --- a/src/gallium/drivers/r300/r300_vbo.c +++ b/src/gallium/drivers/r300/r300_vbo.c @@ -79,30 +79,6 @@ void setup_vertex_attributes(struct r300_context *r300) finish_vertex_attribs_setup(r300->vertex_info, r300->aos_count); } -/* XXX move/integrate this with the checks in r300_state_inlines */ -static boolean format_is_supported(enum pipe_format format, int nr_components) -{ - if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) - return FALSE; - - if ((pf_size_x(format) != pf_size_y(format)) || - (pf_size_x(format) != pf_size_z(format)) || - (pf_size_x(format) != pf_size_w(format))) - return FALSE; - - /* Following should be supported as long as stride is 4 bytes aligned */ - if (pf_size_x(format) != 1 && nr_components != 4) - return FALSE; - - if (pf_size_x(format) != 2 && !(nr_components == 2 || nr_components == 4)) - return FALSE; - - if (pf_size_x(format) == 3 || pf_size_x(format) > 4) - return FALSE; - - return TRUE; -} - static INLINE int get_buffer_offset(struct r300_context *r300, unsigned int buf_nr, unsigned int elem_offset) @@ -110,8 +86,7 @@ static INLINE int get_buffer_offset(struct r300_context *r300, return r300->vertex_buffer[buf_nr].buffer_offset + elem_offset; } -/** - */ +/* XXX not called at all */ static void setup_vertex_buffers(struct r300_context *r300) { struct pipe_vertex_element *vert_elem; -- cgit v1.2.3 From a12fc1a9c4d544b015b40ff0266b8c8726d16f75 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 7 Nov 2009 14:32:31 -0800 Subject: r300g: Organize inlined state. --- src/gallium/drivers/r300/r300_state_inlines.h | 72 +++++++++++++-------------- 1 file changed, 36 insertions(+), 36 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index b0f3386c62..e6c1cb54da 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -191,6 +191,42 @@ static INLINE uint32_t r300_translate_alpha_function(int alpha_func) return 0; } +static INLINE uint32_t +r300_translate_polygon_mode_front(unsigned mode) { + switch (mode) + { + case PIPE_POLYGON_MODE_FILL: + return R300_GA_POLY_MODE_FRONT_PTYPE_TRI; + case PIPE_POLYGON_MODE_LINE: + return R300_GA_POLY_MODE_FRONT_PTYPE_LINE; + case PIPE_POLYGON_MODE_POINT: + return R300_GA_POLY_MODE_FRONT_PTYPE_POINT; + + default: + debug_printf("r300: Bad polygon mode %i in %s\n", mode, + __FUNCTION__); + return R300_GA_POLY_MODE_FRONT_PTYPE_TRI; + } +} + +static INLINE uint32_t +r300_translate_polygon_mode_back(unsigned mode) { + switch (mode) + { + case PIPE_POLYGON_MODE_FILL: + return R300_GA_POLY_MODE_BACK_PTYPE_TRI; + case PIPE_POLYGON_MODE_LINE: + return R300_GA_POLY_MODE_BACK_PTYPE_LINE; + case PIPE_POLYGON_MODE_POINT: + return R300_GA_POLY_MODE_BACK_PTYPE_POINT; + + default: + debug_printf("r300: Bad polygon mode %i in %s\n", mode, + __FUNCTION__); + return R300_GA_POLY_MODE_BACK_PTYPE_TRI; + } +} + /* Texture sampler state. */ static INLINE uint32_t r300_translate_wrap(int wrap) @@ -512,40 +548,4 @@ r300_translate_vertex_data_swizzle(enum pipe_format format) { (0xf << R300_WRITE_ENA_SHIFT)); } -static INLINE uint32_t -r300_translate_polygon_mode_front(unsigned mode) { - switch (mode) - { - case PIPE_POLYGON_MODE_FILL: - return R300_GA_POLY_MODE_FRONT_PTYPE_TRI; - case PIPE_POLYGON_MODE_LINE: - return R300_GA_POLY_MODE_FRONT_PTYPE_LINE; - case PIPE_POLYGON_MODE_POINT: - return R300_GA_POLY_MODE_FRONT_PTYPE_POINT; - - default: - debug_printf("r300: Bad polygon mode %i in %s\n", mode, - __FUNCTION__); - return R300_GA_POLY_MODE_FRONT_PTYPE_TRI; - } -} - -static INLINE uint32_t -r300_translate_polygon_mode_back(unsigned mode) { - switch (mode) - { - case PIPE_POLYGON_MODE_FILL: - return R300_GA_POLY_MODE_BACK_PTYPE_TRI; - case PIPE_POLYGON_MODE_LINE: - return R300_GA_POLY_MODE_BACK_PTYPE_LINE; - case PIPE_POLYGON_MODE_POINT: - return R300_GA_POLY_MODE_BACK_PTYPE_POINT; - - default: - debug_printf("r300: Bad polygon mode %i in %s\n", mode, - __FUNCTION__); - return R300_GA_POLY_MODE_BACK_PTYPE_TRI; - } -} - #endif /* R300_STATE_INLINES_H */ -- cgit v1.2.3 From ee28a69188d5054f996d0f5fc12820b024ef96a6 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 8 Nov 2009 09:35:07 -0800 Subject: r300g: Fix build error on old compilers. This dead code was still getting compiled, causing a bad ref in the lib. --- src/gallium/drivers/r300/r300_vbo.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c index 1d45fd590c..5ad6b9c215 100644 --- a/src/gallium/drivers/r300/r300_vbo.c +++ b/src/gallium/drivers/r300/r300_vbo.c @@ -85,7 +85,7 @@ static INLINE int get_buffer_offset(struct r300_context *r300, { return r300->vertex_buffer[buf_nr].buffer_offset + elem_offset; } - +#if 0 /* XXX not called at all */ static void setup_vertex_buffers(struct r300_context *r300) { @@ -95,9 +95,9 @@ static void setup_vertex_buffers(struct r300_context *r300) for (i = 0; i < r300->aos_count; i++) { vert_elem = &r300->vertex_element[i]; + /* XXX use translate module to convert the data */ if (!format_is_supported(vert_elem->src_format, vert_elem->nr_components)) { - /* XXX use translate module to convert the data */ assert(0); /* struct pipe_buffer *buf; @@ -114,7 +114,7 @@ static void setup_vertex_buffers(struct r300_context *r300) } } } - +#endif /* XXX these shouldn't be asserts since we can work around bad indexbufs */ void setup_index_buffer(struct r300_context *r300, struct pipe_buffer* indexBuffer, -- cgit v1.2.3 From 0525cb1273a51343fba0a94d01d115e4256d1db2 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 8 Nov 2009 09:56:02 -0800 Subject: r300g: Fix is_buffer_referenced. --- src/gallium/drivers/r300/r300_context.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index b520e5929e..43d7ff3ed3 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -89,8 +89,11 @@ static unsigned int r300_is_buffer_referenced(struct pipe_context *pipe, struct pipe_buffer *buf) { - /* XXX */ - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + /* This only checks to see whether actual hardware buffers are + * referenced. Since we use managed BOs and transfers, it's actually not + * possible for pipe_buffers to ever reference the actual hardware, so + * buffers are never referenced. */ + return 0; } static void r300_flush_cb(void *data) -- cgit v1.2.3 From b6f93e2607f1bbc5b2f478f0a57d7786dd7d73a5 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 8 Nov 2009 11:32:32 -0800 Subject: r300g: Enable PSC/RS dump with new debugging flags. --- src/gallium/drivers/r300/r300_emit.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index ec1d521800..b3d9db676a 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -510,6 +510,8 @@ void r300_emit_rs_block_state(struct r300_context* r300, struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); + DBG(r300, DBG_DRAW, "r300: RS emit:\n"); + BEGIN_CS(21); if (r300screen->caps->is_r500) { OUT_CS_REG_SEQ(R500_RS_IP_0, 8); @@ -518,7 +520,7 @@ void r300_emit_rs_block_state(struct r300_context* r300, } for (i = 0; i < 8; i++) { OUT_CS(rs->ip[i]); - /* debug_printf("ip %d: 0x%08x\n", i, rs->ip[i]); */ + DBG(r300, DBG_DRAW, " : ip %d: 0x%08x\n", i, rs->ip[i]); } OUT_CS_REG_SEQ(R300_RS_COUNT, 2); @@ -532,11 +534,11 @@ void r300_emit_rs_block_state(struct r300_context* r300, } for (i = 0; i < 8; i++) { OUT_CS(rs->inst[i]); - /* debug_printf("inst %d: 0x%08x\n", i, rs->inst[i]); */ + DBG(r300, DBG_DRAW, " : inst %d: 0x%08x\n", i, rs->inst[i]); } - /* debug_printf("count: 0x%08x inst_count: 0x%08x\n", rs->count, - * rs->inst_count); */ + DBG(r300, DBG_DRAW, " : count: 0x%08x inst_count: 0x%08x\n", + rs->count, rs->inst_count); END_CS; } @@ -653,6 +655,8 @@ void r300_emit_vertex_format_state(struct r300_context* r300) int i; CS_LOCALS(r300); + DBG(r300, DBG_DRAW, "r300: VAP/PSC emit:\n"); + BEGIN_CS(26); OUT_CS_REG(R300_VAP_VTX_SIZE, r300->vertex_info->vinfo.size); @@ -662,22 +666,22 @@ void r300_emit_vertex_format_state(struct r300_context* r300) OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); OUT_CS(r300->vertex_info->vinfo.hwfmt[2]); OUT_CS(r300->vertex_info->vinfo.hwfmt[3]); - /* for (i = 0; i < 4; i++) { - * debug_printf("hwfmt%d: 0x%08x\n", i, - * r300->vertex_info->vinfo.hwfmt[i]); - * } */ + for (i = 0; i < 4; i++) { + DBG(r300, DBG_DRAW, " : hwfmt%d: 0x%08x\n", i, + r300->vertex_info->vinfo.hwfmt[i]); + } OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, 8); for (i = 0; i < 8; i++) { OUT_CS(r300->vertex_info->vap_prog_stream_cntl[i]); - /* debug_printf("prog_stream_cntl%d: 0x%08x\n", i, - * r300->vertex_info->vap_prog_stream_cntl[i]); */ + DBG(r300, DBG_DRAW, " : prog_stream_cntl%d: 0x%08x\n", i, + r300->vertex_info->vap_prog_stream_cntl[i]); } OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, 8); for (i = 0; i < 8; i++) { OUT_CS(r300->vertex_info->vap_prog_stream_cntl_ext[i]); - /* debug_printf("prog_stream_cntl_ext%d: 0x%08x\n", i, - * r300->vertex_info->vap_prog_stream_cntl_ext[i]); */ + DBG(r300, DBG_DRAW, " : prog_stream_cntl_ext%d: 0x%08x\n", i, + r300->vertex_info->vap_prog_stream_cntl_ext[i]); } END_CS; } -- cgit v1.2.3 From 11d9edf4c9c75d5a41fb0a1757441ad315330bea Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 8 Nov 2009 11:45:57 -0800 Subject: r300g: Unify context names for counts. From the SW TCL fixups. --- src/gallium/drivers/r300/r300_context.c | 2 +- src/gallium/drivers/r300/r300_context.h | 4 ++-- src/gallium/drivers/r300/r300_emit.c | 16 ++++++++++------ src/gallium/drivers/r300/r300_render.c | 2 +- src/gallium/drivers/r300/r300_state.c | 4 ++-- src/gallium/drivers/r300/r300_vbo.c | 5 +++-- 6 files changed, 19 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 43d7ff3ed3..ae23329b83 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -158,6 +158,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, /* Open up the OQ BO. */ r300->oqbo = screen->buffer_create(screen, 4096, PIPE_BUFFER_USAGE_VERTEX, 4096); + make_empty_list(&r300->query_list); r300_init_flush_functions(r300); @@ -172,6 +173,5 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->winsys->set_flush_cb(r300->winsys, r300_flush_cb, r300); r300->dirty_state = R300_NEW_KITCHEN_SINK; r300->dirty_hw++; - make_empty_list(&r300->query_list); return &r300->context; } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 8d14c53f49..f954ba7f9a 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -295,10 +295,10 @@ struct r300_context { /* Vertex buffers for Gallium. */ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - int vbuf_count; + int vertex_buffer_count; /* Vertex elements for Gallium. */ struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; - int aos_count; + int vertex_element_count; /* Bitmask of dirty state objects. */ uint32_t dirty_state; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index b3d9db676a..eeb97a2d37 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -584,17 +584,20 @@ void r300_emit_texture(struct r300_context* r300, END_CS; } +/* XXX I can't read this and that's not good */ void r300_emit_aos(struct r300_context* r300, unsigned offset) { struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; struct pipe_vertex_element *velem = r300->vertex_element; CS_LOCALS(r300); int i; - unsigned packet_size = (r300->aos_count * 3 + 1) / 2; - BEGIN_CS(2 + packet_size + r300->aos_count * 2); + unsigned aos_count = r300->vertex_element_count; + + unsigned packet_size = (aos_count * 3 + 1) / 2; + BEGIN_CS(2 + packet_size + aos_count * 2); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); - OUT_CS(r300->aos_count); - for (i = 0; i < r300->aos_count - 1; i += 2) { + OUT_CS(aos_count); + for (i = 0; i < aos_count - 1; i += 2) { int buf_num1 = velem[i].vertex_buffer_index; int buf_num2 = velem[i+1].vertex_buffer_index; assert(vbuf[buf_num1].stride % 4 == 0 && pf_get_size(velem[i].src_format) % 4 == 0); @@ -606,7 +609,7 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) OUT_CS(vbuf[buf_num2].buffer_offset + velem[i+1].src_offset + offset * vbuf[buf_num2].stride); } - if (r300->aos_count & 1) { + if (aos_count & 1) { int buf_num = velem[i].vertex_buffer_index; assert(vbuf[buf_num].stride % 4 == 0 && pf_get_size(velem[i].src_format) % 4 == 0); OUT_CS((pf_get_size(velem[i].src_format) >> 2) | (vbuf[buf_num].stride << 6)); @@ -614,7 +617,8 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) offset * vbuf[buf_num].stride); } - for (i = 0; i < r300->aos_count; i++) { + /* XXX bare CS reloc */ + for (i = 0; i < aos_count; i++) { cs_winsys->write_cs_reloc(cs_winsys, vbuf[velem[i].vertex_buffer_index].buffer, RADEON_GEM_DOMAIN_GTT, diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index fa057324f8..1ff3e64b44 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -140,7 +140,7 @@ static boolean r300_setup_vertex_buffers(struct r300_context *r300) struct pipe_vertex_element *velem = r300->vertex_element; validate: - for (int i = 0; i < r300->aos_count; i++) { + for (int i = 0; i < r300->vertex_element_count; i++) { if (!r300->winsys->add_buffer(r300->winsys, vbuf[velem[i].vertex_buffer_index].buffer, RADEON_GEM_DOMAIN_GTT, 0)) { diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index e0b85ab768..d1eced61db 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -668,7 +668,7 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, memcpy(r300->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); - r300->vbuf_count = count; + r300->vertex_buffer_count = count; if (r300->draw) { draw_flush(r300->draw); @@ -685,7 +685,7 @@ static void r300_set_vertex_elements(struct pipe_context* pipe, memcpy(r300->vertex_element, elements, sizeof(struct pipe_vertex_element) * count); - r300->aos_count = count; + r300->vertex_element_count = count; if (r300->draw) { draw_flush(r300->draw); diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c index 5ad6b9c215..a6a159667a 100644 --- a/src/gallium/drivers/r300/r300_vbo.c +++ b/src/gallium/drivers/r300/r300_vbo.c @@ -71,12 +71,13 @@ void setup_vertex_attributes(struct r300_context *r300) struct pipe_vertex_element *vert_elem; int i; - for (i = 0; i < r300->aos_count; i++) { + for (i = 0; i < r300->vertex_element_count; i++) { vert_elem = &r300->vertex_element[i]; setup_vertex_attribute(r300->vertex_info, vert_elem, i); } - finish_vertex_attribs_setup(r300->vertex_info, r300->aos_count); + finish_vertex_attribs_setup(r300->vertex_info, + r300->vertex_element_count); } static INLINE int get_buffer_offset(struct r300_context *r300, -- cgit v1.2.3 From fe898638086370ed86a9ce76b21fa8ebb88c4b08 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 8 Nov 2009 14:07:01 -0800 Subject: r300g: Protect against possibly missing Draw pointer. Part of the SW TCL revival. --- src/gallium/drivers/r300/r300_state_derived.c | 47 +++++++++++++++++---------- 1 file changed, 29 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 14d7bb094c..7166694edf 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -170,20 +170,30 @@ static void r300_vs_tab_routes(struct r300_context* r300, } tab[0] = 0; } - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_POSITION, 0)); + + /* Position. */ + if (r300->draw) { + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, + draw_find_vs_output(r300->draw, TGSI_SEMANTIC_POSITION, 0)); + } vinfo->hwfmt[1] |= R300_INPUT_CNTL_POS; vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + /* Point size. */ if (psize) { - draw_emit_vertex_attr(vinfo, EMIT_1F_PSIZE, INTERP_POS, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_PSIZE, 0)); + if (r300->draw) { + draw_emit_vertex_attr(vinfo, EMIT_1F_PSIZE, INTERP_POS, + draw_find_vs_output(r300->draw, TGSI_SEMANTIC_PSIZE, 0)); + } vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; } + /* Colors. */ for (i = 0; i < cols; i++) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_COLOR, i)); + if (r300->draw) { + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, + draw_find_vs_output(r300->draw, TGSI_SEMANTIC_COLOR, i)); + } vinfo->hwfmt[1] |= R300_INPUT_CNTL_COLOR; vinfo->hwfmt[2] |= (R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i); } @@ -192,28 +202,27 @@ static void r300_vs_tab_routes(struct r300_context* r300, * This gets around a double-increment problem. */ i = 0; + /* Fog. This is a special-cased texcoord. */ if (fog) { i++; - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_FOG, 0)); + if (r300->draw) { + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, + draw_find_vs_output(r300->draw, TGSI_SEMANTIC_FOG, 0)); + } vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i); vinfo->hwfmt[3] |= (4 << (3 * i)); } + /* Texcoords. */ for (; i < texs; i++) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i)); + if (r300->draw) { + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, + draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i)); + } vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i); vinfo->hwfmt[3] |= (4 << (3 * i)); } - /* Handle the case where the vertex shader will be generating some of - * the attribs based on its inputs. */ - if (r300screen->caps->has_tcl && - info->num_inputs < info->num_outputs) { - vinfo->num_attribs = info->num_inputs; - } - draw_compute_vertex_size(vinfo); } @@ -455,6 +464,7 @@ static void r300_update_rs_block(struct r300_context* r300, /* Update the vertex format. */ static void r300_update_derived_shader_state(struct r300_context* r300) { + struct r300_screen* r300screen = r300_screen(r300->context.screen); struct r300_vertex_info* vformat; struct r300_rs_block* rs_block; int i; @@ -543,7 +553,8 @@ static void r300_update_ztop(struct r300_context* r300) void r300_update_derived_state(struct r300_context* r300) { - if (r300->dirty_state & + /* XXX */ + if (TRUE || r300->dirty_state & (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER)) { r300_update_derived_shader_state(r300); } -- cgit v1.2.3 From c9167d868cfb2ba821f01e0217e3880c5df4c97b Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 8 Nov 2009 14:51:52 -0800 Subject: r300g: Fix up SW TCL rendering functions. They don't work, but at least they're clean now. --- src/gallium/drivers/r300/r300_render.c | 68 ++++++++++++++++++++++++---------- src/gallium/drivers/r300/r300_render.h | 5 +++ 2 files changed, 53 insertions(+), 20 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 1ff3e64b44..62e1456ed3 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -242,8 +242,44 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, * keep these functions separated so that they are easier to locate. ~C. * ***************************************************************************/ -/* Draw-based drawing for SW TCL chipsets. - * XXX currently broken as fucking hell. */ +/* SW TCL arrays, using Draw. */ +boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, + unsigned mode, + unsigned start, + unsigned count) +{ + struct r300_context* r300 = r300_context(pipe); + int i; + + if (!u_trim_pipe_prim(mode, &count)) { + return FALSE; + } + + for (i = 0; i < r300->vertex_buffer_count; i++) { + void* buf = pipe_buffer_map(pipe->screen, + r300->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(r300->draw, i, buf); + } + + draw_set_mapped_element_buffer(r300->draw, 0, NULL); + + draw_set_mapped_constant_buffer(r300->draw, + r300->shader_constants[PIPE_SHADER_VERTEX].constants, + r300->shader_constants[PIPE_SHADER_VERTEX].count * + (sizeof(float) * 4)); + + draw_arrays(r300->draw, mode, start, count); + + for (i = 0; i < r300->vertex_buffer_count; i++) { + pipe_buffer_unmap(pipe->screen, r300->vertex_buffer[i].buffer); + draw_set_mapped_vertex_buffer(r300->draw, i, NULL); + } + + return TRUE; +} + +/* SW TCL elements, using Draw. */ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, unsigned indexSize, @@ -253,8 +289,6 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, unsigned start, unsigned count) { - assert(0); -#if 0 struct r300_context* r300 = r300_context(pipe); int i; @@ -264,19 +298,15 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, for (i = 0; i < r300->vertex_buffer_count; i++) { void* buf = pipe_buffer_map(pipe->screen, - r300->vertex_buffers[i].buffer, + r300->vertex_buffer[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(r300->draw, i, buf); } - if (indexBuffer) { - void* indices = pipe_buffer_map(pipe->screen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer_range(r300->draw, indexSize, - minIndex, maxIndex, indices); - } else { - draw_set_mapped_element_buffer(r300->draw, 0, NULL); - } + void* indices = pipe_buffer_map(pipe->screen, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer_range(r300->draw, indexSize, + minIndex, maxIndex, indices); draw_set_mapped_constant_buffer(r300->draw, r300->shader_constants[PIPE_SHADER_VERTEX].constants, @@ -286,16 +316,14 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, draw_arrays(r300->draw, mode, start, count); for (i = 0; i < r300->vertex_buffer_count; i++) { - pipe_buffer_unmap(pipe->screen, r300->vertex_buffers[i].buffer); + pipe_buffer_unmap(pipe->screen, r300->vertex_buffer[i].buffer); draw_set_mapped_vertex_buffer(r300->draw, i, NULL); } - if (indexBuffer) { - pipe_buffer_unmap(pipe->screen, indexBuffer); - draw_set_mapped_element_buffer_range(r300->draw, 0, start, - start + count - 1, NULL); - } -#endif + pipe_buffer_unmap(pipe->screen, indexBuffer); + draw_set_mapped_element_buffer_range(r300->draw, 0, start, + start + count - 1, NULL); + return TRUE; } diff --git a/src/gallium/drivers/r300/r300_render.h b/src/gallium/drivers/r300/r300_render.h index 3f8ac1fb7a..da83069083 100644 --- a/src/gallium/drivers/r300/r300_render.h +++ b/src/gallium/drivers/r300/r300_render.h @@ -42,6 +42,11 @@ boolean r300_draw_elements(struct pipe_context* pipe, boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, unsigned start, unsigned count); +boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, + unsigned mode, + unsigned start, + unsigned count); + boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, unsigned indexSize, -- cgit v1.2.3 From 7204b92101ecf4e2fbc78cf91f387996396deec8 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 9 Nov 2009 14:29:00 +0100 Subject: nv50: clarify data for method 0x121c --- src/gallium/drivers/nv50/nv50_state_validate.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index a13d64b7fa..799d2758fe 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -37,13 +37,14 @@ nv50_state_validate_fb(struct nv50_context *nv50) struct pipe_framebuffer_state *fb = &nv50->framebuffer; unsigned i, w, h, gw = 0; - /* Set nr of active RTs. Don't know what 0xfac6880 does, but - * at least 0x880 was required to draw to more than 1 RT. - * In some special cases, 0xfac6880 is not used, we probably - * don't hit any of these though. + /* Set nr of active RTs and select RT for each colour output. + * FP result 0 always goes to RT[0], bits 4 - 6 are ignored. + * Ambiguous assignment results in no rendering (no DATA_ERROR). */ so_method(so, tesla, 0x121c, 1); - so_data (so, 0x0fac6880 | fb->nr_cbufs); + so_data (so, fb->nr_cbufs | + (0 << 4) | (1 << 7) | (2 << 10) | (3 << 13) | + (4 << 16) | (5 << 19) | (6 << 22) | (7 << 25)); for (i = 0; i < fb->nr_cbufs; i++) { struct pipe_texture *pt = fb->cbufs[i]->texture; -- cgit v1.2.3 From bc9d51bb0eab90c47e7b07756e9eba9575f80ffc Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 9 Nov 2009 06:59:03 -0800 Subject: llvmpipe: Ensure stack variables in unit tests are properly aligned. --- src/gallium/drivers/llvmpipe/lp_test_blend.c | 21 +++++++++++---------- src/gallium/drivers/llvmpipe/lp_test_conv.c | 5 +++-- src/gallium/drivers/llvmpipe/lp_test_format.c | 1 + 3 files changed, 15 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 149fec1d54..29fff91981 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -462,6 +462,7 @@ compute_blend_ref(const struct pipe_blend_state *blend, } +ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, @@ -530,11 +531,11 @@ test_one(unsigned verbose, success = TRUE; for(i = 0; i < n && success; ++i) { if(mode == AoS) { - uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; - uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; - uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; - uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; - uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; @@ -595,11 +596,11 @@ test_one(unsigned verbose, if(mode == SoA) { const unsigned stride = type.length*type.width/8; - uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; - uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; - uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; - uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; - uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; + ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; boolean mismatch; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index ac2a6d05e3..968c7a2d4a 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -142,6 +142,7 @@ add_conv_test(LLVMModuleRef module, } +ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, @@ -229,8 +230,8 @@ test_one(unsigned verbose, for(i = 0; i < n && success; ++i) { unsigned src_stride = src_type.length*src_type.width/8; unsigned dst_stride = dst_type.length*dst_type.width/8; - uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; - uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + ALIGN16_ATTRIB uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + ALIGN16_ATTRIB uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; int64_t start_counter = 0; diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index b2403ad521..23ea9ebbe7 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -199,6 +199,7 @@ add_store_rgba_test(LLVMModuleRef module, } +ALIGN_STACK static boolean test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test) { -- cgit v1.2.3 From eef5a0b3a3e03abd1c69649763efc79575df650f Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 10 Nov 2009 05:22:15 -0800 Subject: llvmpipe: Fix derived blend color state. --- src/gallium/drivers/llvmpipe/lp_state_blend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c index 3f03bd0057..b2e75d3b14 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c @@ -76,7 +76,7 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe, for (i = 0; i < 4; ++i) { uint8_t c = float_to_ubyte(blend_color->color[i]); for (j = 0; j < 16; ++j) - llvmpipe->jit_context.blend_color[i*4 + j] = c; + llvmpipe->jit_context.blend_color[i*16 + j] = c; } } -- cgit v1.2.3 From 520b6abdecdaba856e5ca04938e18eb83b33dfaa Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Fri, 6 Nov 2009 12:00:14 -0800 Subject: i915g: Fix memory leak when pci id is unknown. --- src/gallium/drivers/i915/i915_screen.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index c66558c320..d4ee8f5339 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -271,6 +271,7 @@ i915_create_screen(struct intel_winsys *iws, uint pci_id) default: debug_printf("%s: unknown pci id 0x%x, cannot create screen\n", __FUNCTION__, pci_id); + FREE(is); return NULL; } -- cgit v1.2.3 From cbee31a1f84a4d28d126356aaca317e2cdd003dc Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 11 Nov 2009 03:05:16 -0800 Subject: r300, r300g: Add missing registers. --- src/gallium/drivers/r300/r300_reg.h | 8 +++++--- src/mesa/drivers/dri/r300/r300_reg.h | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 1e4d3f5d70..8ca785cb58 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -1884,6 +1884,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_RGB_ADDR0(x) ((x) << 0) # define R300_RGB_ADDR1(x) ((x) << 6) # define R300_RGB_ADDR2(x) ((x) << 12) +# define R300_RGB_TARGET(x) ((x) << 29) #define R300_US_ALU_ALPHA_ADDR_0 0x47C0 # define R300_ALU_SRC0A_SHIFT 0 @@ -1901,9 +1902,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ALU_DSTA_REG (1 << 23) # define R300_ALU_DSTA_OUTPUT (1 << 24) # define R300_ALU_DSTA_DEPTH (1 << 27) -# define R300_ALPHA_ADDR0(x) ((x) << 0) -# define R300_ALPHA_ADDR1(x) ((x) << 6) -# define R300_ALPHA_ADDR2(x) ((x) << 12) +# define R300_ALPHA_ADDR0(x) ((x) << 0) +# define R300_ALPHA_ADDR1(x) ((x) << 6) +# define R300_ALPHA_ADDR2(x) ((x) << 12) +# define R300_ALPHA_TARGET(x) ((x) << 25) #define R300_US_ALU_RGB_INST_0 0x48C0 # define R300_ALU_ARGC_SRC0C_XYZ 0 diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 623da60333..ea684e7df1 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1789,6 +1789,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ALU_DSTC_OUTPUT_X (1 << 26) # define R300_ALU_DSTC_OUTPUT_Y (1 << 27) # define R300_ALU_DSTC_OUTPUT_Z (1 << 28) +# define R300_RGB_TARGET(x) ((x) << 29) #define R300_US_ALU_ALPHA_ADDR_0 0x47C0 # define R300_ALU_SRC0A_SHIFT 0 @@ -1806,6 +1807,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ALU_DSTA_REG (1 << 23) # define R300_ALU_DSTA_OUTPUT (1 << 24) # define R300_ALU_DSTA_DEPTH (1 << 27) +# define R300_ALPHA_TARGET(x) ((x) << 25) #define R300_US_ALU_RGB_INST_0 0x48C0 # define R300_ALU_ARGC_SRC0C_XYZ 0 -- cgit v1.2.3 From ed9c4933af6fb58269f1efc7c826cb6a5fd81d38 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 5 Nov 2009 19:07:19 +0100 Subject: nv10: Fix build for the last nouveau_class.h changes. Signed-off-by: Francisco Jerez Signed-off-by: Pekka Paalanen --- src/gallium/drivers/nv10/nv10_context.c | 2 +- src/gallium/drivers/nv10/nv10_prim_vbuf.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c index 933176fc32..65a22b175e 100644 --- a/src/gallium/drivers/nv10/nv10_context.c +++ b/src/gallium/drivers/nv10/nv10_context.c @@ -243,7 +243,7 @@ static void nv10_init_hwctx(struct nv10_context *nv10) OUT_RING (0.0); OUT_RINGf (16777216.0); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_SCALE_X, 4); + BEGIN_RING(celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4); OUT_RINGf (-2048.0); OUT_RINGf (-2048.0); OUT_RINGf (16777215.0 * 0.5); diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c index 1806d5f8cc..7ba9777a22 100644 --- a/src/gallium/drivers/nv10/nv10_prim_vbuf.c +++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c @@ -69,9 +69,9 @@ void nv10_vtxbuf_bind( struct nv10_context* nv10 ) { int i; for(i = 0; i < 8; i++) { - BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_OFFSET(i), 1); + BEGIN_RING(celsius, NV10TCL_VTXBUF_ADDRESS(i), 1); OUT_RING(0/*nv10->vtxbuf*/); - BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT(i) ,1); + BEGIN_RING(celsius, NV10TCL_VTXFMT(i), 1); OUT_RING(0/*XXX*/); } } -- cgit v1.2.3 From abefd7dcdf28c90454b59faaf9401fa6e6c6f526 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sun, 15 Nov 2009 14:49:02 +0100 Subject: nv20: Fix build for the last nouveau_class.h changes. Signed-off-by: Francisco Jerez Signed-off-by: Pekka Paalanen --- src/gallium/drivers/nv20/nv20_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c index 9a48739661..276db8b57b 100644 --- a/src/gallium/drivers/nv20/nv20_context.c +++ b/src/gallium/drivers/nv20/nv20_context.c @@ -360,13 +360,13 @@ static void nv20_init_hwctx(struct nv20_context *nv20) OUT_RINGf (0.0); OUT_RINGf (16777216.0); /* [0, 1] scaled approx to [0, 2^24] */ - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE0_X, 4); + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); OUT_RINGf (0.0); /* x-offset, w/2 + 1.031250 */ OUT_RINGf (0.0); /* y-offset, h/2 + 0.030762 */ OUT_RINGf (0.0); OUT_RINGf (16777215.0); - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE1_X, 4); + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE_X, 4); OUT_RINGf (0.0); /* no effect?, w/2 */ OUT_RINGf (0.0); /* no effect?, h/2 */ OUT_RINGf (16777215.0 * 0.5); -- cgit v1.2.3 From 3192633d4abe262d413e41feb871fe8deed409d8 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Mon, 16 Nov 2009 19:56:18 +0100 Subject: svga: Add svga driver --- src/gallium/drivers/svga/Makefile | 63 + src/gallium/drivers/svga/SConscript | 75 + src/gallium/drivers/svga/include/README | 3 + src/gallium/drivers/svga/include/svga3d_caps.h | 139 + src/gallium/drivers/svga/include/svga3d_reg.h | 1793 +++++++++++++ .../drivers/svga/include/svga3d_shaderdefs.h | 519 ++++ src/gallium/drivers/svga/include/svga_reg.h | 1346 ++++++++++ src/gallium/drivers/svga/include/svga_types.h | 46 + src/gallium/drivers/svga/svga_cmd.c | 1427 ++++++++++ src/gallium/drivers/svga/svga_cmd.h | 235 ++ src/gallium/drivers/svga/svga_context.c | 269 ++ src/gallium/drivers/svga/svga_context.h | 443 ++++ src/gallium/drivers/svga/svga_debug.h | 74 + src/gallium/drivers/svga/svga_draw.c | 370 +++ src/gallium/drivers/svga/svga_draw.h | 83 + src/gallium/drivers/svga/svga_draw_arrays.c | 297 +++ src/gallium/drivers/svga/svga_draw_elements.c | 255 ++ src/gallium/drivers/svga/svga_draw_private.h | 158 ++ src/gallium/drivers/svga/svga_hw_reg.h | 42 + src/gallium/drivers/svga/svga_pipe_blend.c | 246 ++ src/gallium/drivers/svga/svga_pipe_blit.c | 84 + src/gallium/drivers/svga/svga_pipe_clear.c | 119 + src/gallium/drivers/svga/svga_pipe_constants.c | 74 + src/gallium/drivers/svga/svga_pipe_depthstencil.c | 153 ++ src/gallium/drivers/svga/svga_pipe_draw.c | 261 ++ src/gallium/drivers/svga/svga_pipe_flush.c | 68 + src/gallium/drivers/svga/svga_pipe_fs.c | 124 + src/gallium/drivers/svga/svga_pipe_misc.c | 187 ++ src/gallium/drivers/svga/svga_pipe_query.c | 267 ++ src/gallium/drivers/svga/svga_pipe_rasterizer.c | 250 ++ src/gallium/drivers/svga/svga_pipe_sampler.c | 243 ++ src/gallium/drivers/svga/svga_pipe_vertex.c | 115 + src/gallium/drivers/svga/svga_pipe_vs.c | 189 ++ src/gallium/drivers/svga/svga_screen.c | 435 ++++ src/gallium/drivers/svga/svga_screen.h | 95 + src/gallium/drivers/svga/svga_screen_buffer.c | 820 ++++++ src/gallium/drivers/svga/svga_screen_buffer.h | 190 ++ src/gallium/drivers/svga/svga_screen_cache.c | 307 +++ src/gallium/drivers/svga/svga_screen_cache.h | 135 + src/gallium/drivers/svga/svga_screen_texture.c | 1065 ++++++++ src/gallium/drivers/svga/svga_screen_texture.h | 177 ++ src/gallium/drivers/svga/svga_state.c | 278 ++ src/gallium/drivers/svga/svga_state.h | 95 + src/gallium/drivers/svga/svga_state_constants.c | 239 ++ src/gallium/drivers/svga/svga_state_framebuffer.c | 455 ++++ src/gallium/drivers/svga/svga_state_fs.c | 282 ++ src/gallium/drivers/svga/svga_state_need_swtnl.c | 200 ++ src/gallium/drivers/svga/svga_state_rss.c | 268 ++ src/gallium/drivers/svga/svga_state_tss.c | 279 ++ src/gallium/drivers/svga/svga_state_vdecl.c | 182 ++ src/gallium/drivers/svga/svga_state_vs.c | 239 ++ src/gallium/drivers/svga/svga_swtnl.h | 52 + src/gallium/drivers/svga/svga_swtnl_backend.c | 349 +++ src/gallium/drivers/svga/svga_swtnl_draw.c | 170 ++ src/gallium/drivers/svga/svga_swtnl_private.h | 93 + src/gallium/drivers/svga/svga_swtnl_state.c | 242 ++ src/gallium/drivers/svga/svga_tgsi.c | 266 ++ src/gallium/drivers/svga/svga_tgsi.h | 139 + src/gallium/drivers/svga/svga_tgsi_decl_sm20.c | 280 ++ src/gallium/drivers/svga/svga_tgsi_decl_sm30.c | 385 +++ src/gallium/drivers/svga/svga_tgsi_emit.h | 345 +++ src/gallium/drivers/svga/svga_tgsi_insn.c | 2716 ++++++++++++++++++++ src/gallium/drivers/svga/svga_winsys.h | 299 +++ src/gallium/drivers/svga/svgadump/st_shader.h | 214 ++ src/gallium/drivers/svga/svgadump/st_shader_dump.c | 649 +++++ src/gallium/drivers/svga/svgadump/st_shader_dump.h | 42 + src/gallium/drivers/svga/svgadump/st_shader_op.c | 168 ++ src/gallium/drivers/svga/svgadump/st_shader_op.h | 46 + src/gallium/drivers/svga/svgadump/svga_dump.c | 1736 +++++++++++++ src/gallium/drivers/svga/svgadump/svga_dump.h | 34 + src/gallium/drivers/svga/svgadump/svga_dump.py | 329 +++ src/gallium/winsys/drm/vmware/Makefile | 12 + src/gallium/winsys/drm/vmware/SConscript | 11 + src/gallium/winsys/drm/vmware/core/Makefile | 47 + src/gallium/winsys/drm/vmware/core/SConscript | 39 + src/gallium/winsys/drm/vmware/core/vmw_buffer.c | 274 ++ src/gallium/winsys/drm/vmware/core/vmw_buffer.h | 65 + src/gallium/winsys/drm/vmware/core/vmw_context.c | 297 +++ src/gallium/winsys/drm/vmware/core/vmw_context.h | 59 + src/gallium/winsys/drm/vmware/core/vmw_fence.c | 108 + src/gallium/winsys/drm/vmware/core/vmw_fence.h | 59 + src/gallium/winsys/drm/vmware/core/vmw_screen.c | 74 + src/gallium/winsys/drm/vmware/core/vmw_screen.h | 134 + .../winsys/drm/vmware/core/vmw_screen_dri.c | 371 +++ .../winsys/drm/vmware/core/vmw_screen_ioctl.c | 503 ++++ .../winsys/drm/vmware/core/vmw_screen_pools.c | 79 + .../winsys/drm/vmware/core/vmw_screen_svga.c | 295 +++ src/gallium/winsys/drm/vmware/core/vmw_surface.c | 59 + src/gallium/winsys/drm/vmware/core/vmw_surface.h | 79 + src/gallium/winsys/drm/vmware/dri/Makefile | 18 + src/gallium/winsys/drm/vmware/dri/SConscript | 63 + src/gallium/winsys/drm/vmware/egl/Makefile | 18 + src/gallium/winsys/drm/vmware/xorg/Makefile | 54 + src/gallium/winsys/drm/vmware/xorg/SConscript | 55 + src/gallium/winsys/drm/vmware/xorg/vmw_xorg.c | 150 ++ 95 files changed, 27235 insertions(+) create mode 100644 src/gallium/drivers/svga/Makefile create mode 100644 src/gallium/drivers/svga/SConscript create mode 100644 src/gallium/drivers/svga/include/README create mode 100644 src/gallium/drivers/svga/include/svga3d_caps.h create mode 100644 src/gallium/drivers/svga/include/svga3d_reg.h create mode 100644 src/gallium/drivers/svga/include/svga3d_shaderdefs.h create mode 100644 src/gallium/drivers/svga/include/svga_reg.h create mode 100644 src/gallium/drivers/svga/include/svga_types.h create mode 100644 src/gallium/drivers/svga/svga_cmd.c create mode 100644 src/gallium/drivers/svga/svga_cmd.h create mode 100644 src/gallium/drivers/svga/svga_context.c create mode 100644 src/gallium/drivers/svga/svga_context.h create mode 100644 src/gallium/drivers/svga/svga_debug.h create mode 100644 src/gallium/drivers/svga/svga_draw.c create mode 100644 src/gallium/drivers/svga/svga_draw.h create mode 100644 src/gallium/drivers/svga/svga_draw_arrays.c create mode 100644 src/gallium/drivers/svga/svga_draw_elements.c create mode 100644 src/gallium/drivers/svga/svga_draw_private.h create mode 100644 src/gallium/drivers/svga/svga_hw_reg.h create mode 100644 src/gallium/drivers/svga/svga_pipe_blend.c create mode 100644 src/gallium/drivers/svga/svga_pipe_blit.c create mode 100644 src/gallium/drivers/svga/svga_pipe_clear.c create mode 100644 src/gallium/drivers/svga/svga_pipe_constants.c create mode 100644 src/gallium/drivers/svga/svga_pipe_depthstencil.c create mode 100644 src/gallium/drivers/svga/svga_pipe_draw.c create mode 100644 src/gallium/drivers/svga/svga_pipe_flush.c create mode 100644 src/gallium/drivers/svga/svga_pipe_fs.c create mode 100644 src/gallium/drivers/svga/svga_pipe_misc.c create mode 100644 src/gallium/drivers/svga/svga_pipe_query.c create mode 100644 src/gallium/drivers/svga/svga_pipe_rasterizer.c create mode 100644 src/gallium/drivers/svga/svga_pipe_sampler.c create mode 100644 src/gallium/drivers/svga/svga_pipe_vertex.c create mode 100644 src/gallium/drivers/svga/svga_pipe_vs.c create mode 100644 src/gallium/drivers/svga/svga_screen.c create mode 100644 src/gallium/drivers/svga/svga_screen.h create mode 100644 src/gallium/drivers/svga/svga_screen_buffer.c create mode 100644 src/gallium/drivers/svga/svga_screen_buffer.h create mode 100644 src/gallium/drivers/svga/svga_screen_cache.c create mode 100644 src/gallium/drivers/svga/svga_screen_cache.h create mode 100644 src/gallium/drivers/svga/svga_screen_texture.c create mode 100644 src/gallium/drivers/svga/svga_screen_texture.h create mode 100644 src/gallium/drivers/svga/svga_state.c create mode 100644 src/gallium/drivers/svga/svga_state.h create mode 100644 src/gallium/drivers/svga/svga_state_constants.c create mode 100644 src/gallium/drivers/svga/svga_state_framebuffer.c create mode 100644 src/gallium/drivers/svga/svga_state_fs.c create mode 100644 src/gallium/drivers/svga/svga_state_need_swtnl.c create mode 100644 src/gallium/drivers/svga/svga_state_rss.c create mode 100644 src/gallium/drivers/svga/svga_state_tss.c create mode 100644 src/gallium/drivers/svga/svga_state_vdecl.c create mode 100644 src/gallium/drivers/svga/svga_state_vs.c create mode 100644 src/gallium/drivers/svga/svga_swtnl.h create mode 100644 src/gallium/drivers/svga/svga_swtnl_backend.c create mode 100644 src/gallium/drivers/svga/svga_swtnl_draw.c create mode 100644 src/gallium/drivers/svga/svga_swtnl_private.h create mode 100644 src/gallium/drivers/svga/svga_swtnl_state.c create mode 100644 src/gallium/drivers/svga/svga_tgsi.c create mode 100644 src/gallium/drivers/svga/svga_tgsi.h create mode 100644 src/gallium/drivers/svga/svga_tgsi_decl_sm20.c create mode 100644 src/gallium/drivers/svga/svga_tgsi_decl_sm30.c create mode 100644 src/gallium/drivers/svga/svga_tgsi_emit.h create mode 100644 src/gallium/drivers/svga/svga_tgsi_insn.c create mode 100644 src/gallium/drivers/svga/svga_winsys.h create mode 100644 src/gallium/drivers/svga/svgadump/st_shader.h create mode 100644 src/gallium/drivers/svga/svgadump/st_shader_dump.c create mode 100644 src/gallium/drivers/svga/svgadump/st_shader_dump.h create mode 100644 src/gallium/drivers/svga/svgadump/st_shader_op.c create mode 100644 src/gallium/drivers/svga/svgadump/st_shader_op.h create mode 100644 src/gallium/drivers/svga/svgadump/svga_dump.c create mode 100644 src/gallium/drivers/svga/svgadump/svga_dump.h create mode 100755 src/gallium/drivers/svga/svgadump/svga_dump.py create mode 100644 src/gallium/winsys/drm/vmware/Makefile create mode 100644 src/gallium/winsys/drm/vmware/SConscript create mode 100644 src/gallium/winsys/drm/vmware/core/Makefile create mode 100644 src/gallium/winsys/drm/vmware/core/SConscript create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_buffer.c create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_buffer.h create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_context.c create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_context.h create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_fence.c create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_fence.h create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_screen.c create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_screen.h create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_screen_ioctl.c create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_screen_svga.c create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_surface.c create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_surface.h create mode 100644 src/gallium/winsys/drm/vmware/dri/Makefile create mode 100644 src/gallium/winsys/drm/vmware/dri/SConscript create mode 100644 src/gallium/winsys/drm/vmware/egl/Makefile create mode 100644 src/gallium/winsys/drm/vmware/xorg/Makefile create mode 100644 src/gallium/winsys/drm/vmware/xorg/SConscript create mode 100644 src/gallium/winsys/drm/vmware/xorg/vmw_xorg.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/Makefile b/src/gallium/drivers/svga/Makefile new file mode 100644 index 0000000000..05ab4ab9b3 --- /dev/null +++ b/src/gallium/drivers/svga/Makefile @@ -0,0 +1,63 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = svga + +C_SOURCES = \ + svgadump/st_shader_dump.c \ + svgadump/st_shader_op.c \ + svgadump/svga_dump.c \ + svga_cmd.c \ + svga_context.c \ + svga_draw.c \ + svga_draw_arrays.c \ + svga_draw_elements.c \ + svga_pipe_blend.c \ + svga_pipe_blit.c \ + svga_pipe_clear.c \ + svga_pipe_constants.c \ + svga_pipe_depthstencil.c \ + svga_pipe_draw.c \ + svga_pipe_flush.c \ + svga_pipe_fs.c \ + svga_pipe_misc.c \ + svga_pipe_query.c \ + svga_pipe_rasterizer.c \ + svga_pipe_sampler.c \ + svga_pipe_vertex.c \ + svga_pipe_vs.c \ + svga_screen.c \ + svga_screen_buffer.c \ + svga_screen_texture.c \ + svga_screen_cache.c \ + svga_state.c \ + svga_state_need_swtnl.c \ + svga_state_constants.c \ + svga_state_framebuffer.c \ + svga_state_rss.c \ + svga_state_tss.c \ + svga_state_vdecl.c \ + svga_state_fs.c \ + svga_state_vs.c \ + svga_swtnl_backend.c \ + svga_swtnl_draw.c \ + svga_swtnl_state.c \ + svga_tgsi.c \ + svga_tgsi_decl_sm20.c \ + svga_tgsi_decl_sm30.c \ + svga_tgsi_insn.c + +LIBRARY_INCLUDES = \ + -I$(TOP)/src/gallium/drivers/svga/include + +LIBRARY_DEFINES = \ + -DHAVE_STDINT_H -DHAVE_SYS_TYPES_H + +CC = gcc -fvisibility=hidden -msse -msse2 + +# Set the gnu99 standard to enable anonymous structs in vmware headers. +# +CFLAGS = -Wall -Werror -Wmissing-prototypes -std=gnu99 -ffast-math \ + $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) $(ASM_FLAGS) + +include ../../Makefile.template diff --git a/src/gallium/drivers/svga/SConscript b/src/gallium/drivers/svga/SConscript new file mode 100644 index 0000000000..0fa745c9b8 --- /dev/null +++ b/src/gallium/drivers/svga/SConscript @@ -0,0 +1,75 @@ +Import('*') + +env = env.Clone() + +if env['platform'] in ['linux']: + env.Append(CCFLAGS = ['-fvisibility=hidden']) + +if env['gcc']: + env.Append(CPPDEFINES = [ + 'HAVE_STDINT_H', + 'HAVE_SYS_TYPES_H', + ]) + if env['platform'] not in ['windows']: + # The Windows headers cause many gcc warnings + env.Append(CCFLAGS = ['-Werror']) + +env.Prepend(CPPPATH = [ + 'include', +]) + +env.Append(CPPDEFINES = [ +]) + +sources = [ + 'svga_cmd.c', + 'svga_context.c', + 'svga_draw.c', + 'svga_draw_arrays.c', + 'svga_draw_elements.c', + 'svga_pipe_blend.c', + 'svga_pipe_blit.c', + 'svga_pipe_clear.c', + 'svga_pipe_constants.c', + 'svga_pipe_depthstencil.c', + 'svga_pipe_draw.c', + 'svga_pipe_flush.c', + 'svga_pipe_fs.c', + 'svga_pipe_misc.c', + 'svga_pipe_query.c', + 'svga_pipe_rasterizer.c', + 'svga_pipe_sampler.c', + 'svga_pipe_vertex.c', + 'svga_pipe_vs.c', + 'svga_screen.c', + 'svga_screen_buffer.c', + 'svga_screen_cache.c', + 'svga_screen_texture.c', + 'svga_state.c', + 'svga_state_constants.c', + 'svga_state_framebuffer.c', + 'svga_state_need_swtnl.c', + 'svga_state_rss.c', + 'svga_state_tss.c', + 'svga_state_vdecl.c', + 'svga_state_fs.c', + 'svga_state_vs.c', + 'svga_swtnl_backend.c', + 'svga_swtnl_draw.c', + 'svga_swtnl_state.c', + 'svga_tgsi.c', + 'svga_tgsi_decl_sm20.c', + 'svga_tgsi_decl_sm30.c', + 'svga_tgsi_insn.c', + + 'svgadump/svga_dump.c', + 'svgadump/st_shader_dump.c', + 'svgadump/st_shader_op.c', +] + +svga = env.ConvenienceLibrary( + target = 'svga', + source = sources, +) + +Export('svga') diff --git a/src/gallium/drivers/svga/include/README b/src/gallium/drivers/svga/include/README new file mode 100644 index 0000000000..a0b8916104 --- /dev/null +++ b/src/gallium/drivers/svga/include/README @@ -0,0 +1,3 @@ +This directory contains the headers from the VMware SVGA Device Developer Kit: + + https://vmware-svga.svn.sourceforge.net/svnroot/vmware-svga/trunk/lib/vmware/ diff --git a/src/gallium/drivers/svga/include/svga3d_caps.h b/src/gallium/drivers/svga/include/svga3d_caps.h new file mode 100644 index 0000000000..714ce9f45f --- /dev/null +++ b/src/gallium/drivers/svga/include/svga3d_caps.h @@ -0,0 +1,139 @@ +/********************************************************** + * Copyright 2007-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/* + * svga3d_caps.h -- + * + * Definitions for SVGA3D hardware capabilities. Capabilities + * are used to query for optional rendering features during + * driver initialization. The capability data is stored as very + * basic key/value dictionary within the "FIFO register" memory + * area at the beginning of BAR2. + * + * Note that these definitions are only for 3D capabilities. + * The SVGA device also has "device capabilities" and "FIFO + * capabilities", which are non-3D-specific and are stored as + * bitfields rather than key/value pairs. + */ + +#ifndef _SVGA3D_CAPS_H_ +#define _SVGA3D_CAPS_H_ + +#define SVGA_FIFO_3D_CAPS_SIZE (SVGA_FIFO_3D_CAPS_LAST - \ + SVGA_FIFO_3D_CAPS + 1) + + +/* + * SVGA3dCapsRecordType + * + * Record types that can be found in the caps block. + * Related record types are grouped together numerically so that + * SVGA3dCaps_FindRecord() can be applied on a range of record + * types. + */ + +typedef enum { + SVGA3DCAPS_RECORD_UNKNOWN = 0, + SVGA3DCAPS_RECORD_DEVCAPS_MIN = 0x100, + SVGA3DCAPS_RECORD_DEVCAPS = 0x100, + SVGA3DCAPS_RECORD_DEVCAPS_MAX = 0x1ff, +} SVGA3dCapsRecordType; + + +/* + * SVGA3dCapsRecordHeader + * + * Header field leading each caps block record. Contains the offset (in + * register words, NOT bytes) to the next caps block record (or the end + * of caps block records which will be a zero word) and the record type + * as defined above. + */ + +typedef +struct SVGA3dCapsRecordHeader { + uint32 length; + SVGA3dCapsRecordType type; +} +SVGA3dCapsRecordHeader; + + +/* + * SVGA3dCapsRecord + * + * Caps block record; "data" is a placeholder for the actual data structure + * contained within the record; for example a record containing a FOOBAR + * structure would be of size "sizeof(SVGA3dCapsRecordHeader) + + * sizeof(FOOBAR)". + */ + +typedef +struct SVGA3dCapsRecord { + SVGA3dCapsRecordHeader header; + uint32 data[1]; +} +SVGA3dCapsRecord; + + +typedef uint32 SVGA3dCapPair[2]; + + +/* + *---------------------------------------------------------------------- + * + * SVGA3dCaps_FindRecord + * + * Finds the record with the highest-valued type within the given range + * in the caps block. + * + * Result: pointer to found record, or NULL if not found. + * + *---------------------------------------------------------------------- + */ + +static INLINE SVGA3dCapsRecord * +SVGA3dCaps_FindRecord(const uint32 *capsBlock, + SVGA3dCapsRecordType recordTypeMin, + SVGA3dCapsRecordType recordTypeMax) +{ + SVGA3dCapsRecord *record, *found = NULL; + uint32 offset; + + /* + * Search linearly through the caps block records for the specified type. + */ + for (offset = 0; capsBlock[offset] != 0; offset += capsBlock[offset]) { + record = (SVGA3dCapsRecord *) (capsBlock + offset); + if ((record->header.type >= recordTypeMin) && + (record->header.type <= recordTypeMax) && + (!found || (record->header.type > found->header.type))) { + found = record; + } + } + + return found; +} + + +#endif // _SVGA3D_CAPS_H_ diff --git a/src/gallium/drivers/svga/include/svga3d_reg.h b/src/gallium/drivers/svga/include/svga3d_reg.h new file mode 100644 index 0000000000..77cb453310 --- /dev/null +++ b/src/gallium/drivers/svga/include/svga3d_reg.h @@ -0,0 +1,1793 @@ +/********************************************************** + * Copyright 1998-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/* + * svga3d_reg.h -- + * + * SVGA 3D hardware definitions + */ + +#ifndef _SVGA3D_REG_H_ +#define _SVGA3D_REG_H_ + +#include "svga_reg.h" + + +/* + * 3D Hardware Version + * + * The hardware version is stored in the SVGA_FIFO_3D_HWVERSION fifo + * register. Is set by the host and read by the guest. This lets + * us make new guest drivers which are backwards-compatible with old + * SVGA hardware revisions. It does not let us support old guest + * drivers. Good enough for now. + * + */ + +#define SVGA3D_MAKE_HWVERSION(major, minor) (((major) << 16) | ((minor) & 0xFF)) +#define SVGA3D_MAJOR_HWVERSION(version) ((version) >> 16) +#define SVGA3D_MINOR_HWVERSION(version) ((version) & 0xFF) + +typedef enum { + SVGA3D_HWVERSION_WS5_RC1 = SVGA3D_MAKE_HWVERSION(0, 1), + SVGA3D_HWVERSION_WS5_RC2 = SVGA3D_MAKE_HWVERSION(0, 2), + SVGA3D_HWVERSION_WS51_RC1 = SVGA3D_MAKE_HWVERSION(0, 3), + SVGA3D_HWVERSION_WS6_B1 = SVGA3D_MAKE_HWVERSION(1, 1), + SVGA3D_HWVERSION_FUSION_11 = SVGA3D_MAKE_HWVERSION(1, 4), + SVGA3D_HWVERSION_WS65_B1 = SVGA3D_MAKE_HWVERSION(2, 0), + SVGA3D_HWVERSION_CURRENT = SVGA3D_HWVERSION_WS65_B1, +} SVGA3dHardwareVersion; + +/* + * Generic Types + */ + +typedef uint32 SVGA3dBool; /* 32-bit Bool definition */ +#define SVGA3D_NUM_CLIPPLANES 6 +#define SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS 8 + + +/* + * Surface formats. + * + * If you modify this list, be sure to keep GLUtil.c in sync. It + * includes the internal format definition of each surface in + * GLUtil_ConvertSurfaceFormat, and it contains a table of + * human-readable names in GLUtil_GetFormatName. + */ + +typedef enum SVGA3dSurfaceFormat { + SVGA3D_FORMAT_INVALID = 0, + + SVGA3D_X8R8G8B8 = 1, + SVGA3D_A8R8G8B8 = 2, + + SVGA3D_R5G6B5 = 3, + SVGA3D_X1R5G5B5 = 4, + SVGA3D_A1R5G5B5 = 5, + SVGA3D_A4R4G4B4 = 6, + + SVGA3D_Z_D32 = 7, + SVGA3D_Z_D16 = 8, + SVGA3D_Z_D24S8 = 9, + SVGA3D_Z_D15S1 = 10, + + SVGA3D_LUMINANCE8 = 11, + SVGA3D_LUMINANCE4_ALPHA4 = 12, + SVGA3D_LUMINANCE16 = 13, + SVGA3D_LUMINANCE8_ALPHA8 = 14, + + SVGA3D_DXT1 = 15, + SVGA3D_DXT2 = 16, + SVGA3D_DXT3 = 17, + SVGA3D_DXT4 = 18, + SVGA3D_DXT5 = 19, + + SVGA3D_BUMPU8V8 = 20, + SVGA3D_BUMPL6V5U5 = 21, + SVGA3D_BUMPX8L8V8U8 = 22, + SVGA3D_BUMPL8V8U8 = 23, + + SVGA3D_ARGB_S10E5 = 24, /* 16-bit floating-point ARGB */ + SVGA3D_ARGB_S23E8 = 25, /* 32-bit floating-point ARGB */ + + SVGA3D_A2R10G10B10 = 26, + + /* signed formats */ + SVGA3D_V8U8 = 27, + SVGA3D_Q8W8V8U8 = 28, + SVGA3D_CxV8U8 = 29, + + /* mixed formats */ + SVGA3D_X8L8V8U8 = 30, + SVGA3D_A2W10V10U10 = 31, + + SVGA3D_ALPHA8 = 32, + + /* Single- and dual-component floating point formats */ + SVGA3D_R_S10E5 = 33, + SVGA3D_R_S23E8 = 34, + SVGA3D_RG_S10E5 = 35, + SVGA3D_RG_S23E8 = 36, + + /* + * Any surface can be used as a buffer object, but SVGA3D_BUFFER is + * the most efficient format to use when creating new surfaces + * expressly for index or vertex data. + */ + SVGA3D_BUFFER = 37, + + SVGA3D_Z_D24X8 = 38, + + SVGA3D_V16U16 = 39, + + SVGA3D_G16R16 = 40, + SVGA3D_A16B16G16R16 = 41, + + /* Packed Video formats */ + SVGA3D_UYVY = 42, + SVGA3D_YUY2 = 43, + + SVGA3D_FORMAT_MAX +} SVGA3dSurfaceFormat; + +typedef uint32 SVGA3dColor; /* a, r, g, b */ + +/* + * These match the D3DFORMAT_OP definitions used by Direct3D. We need + * them so that we can query the host for what the supported surface + * operations are (when we're using the D3D backend, in particular), + * and so we can send those operations to the guest. + */ +typedef enum { + SVGA3DFORMAT_OP_TEXTURE = 0x00000001, + SVGA3DFORMAT_OP_VOLUMETEXTURE = 0x00000002, + SVGA3DFORMAT_OP_CUBETEXTURE = 0x00000004, + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET = 0x00000008, + SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET = 0x00000010, + SVGA3DFORMAT_OP_ZSTENCIL = 0x00000040, + SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH = 0x00000080, + +/* + * This format can be used as a render target if the current display mode + * is the same depth if the alpha channel is ignored. e.g. if the device + * can render to A8R8G8B8 when the display mode is X8R8G8B8, then the + * format op list entry for A8R8G8B8 should have this cap. + */ + SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET = 0x00000100, + +/* + * This format contains DirectDraw support (including Flip). This flag + * should not to be set on alpha formats. + */ + SVGA3DFORMAT_OP_DISPLAYMODE = 0x00000400, + +/* + * The rasterizer can support some level of Direct3D support in this format + * and implies that the driver can create a Context in this mode (for some + * render target format). When this flag is set, the SVGA3DFORMAT_OP_DISPLAYMODE + * flag must also be set. + */ + SVGA3DFORMAT_OP_3DACCELERATION = 0x00000800, + +/* + * This is set for a private format when the driver has put the bpp in + * the structure. + */ + SVGA3DFORMAT_OP_PIXELSIZE = 0x00001000, + +/* + * Indicates that this format can be converted to any RGB format for which + * SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB is specified + */ + SVGA3DFORMAT_OP_CONVERT_TO_ARGB = 0x00002000, + +/* + * Indicates that this format can be used to create offscreen plain surfaces. + */ + SVGA3DFORMAT_OP_OFFSCREENPLAIN = 0x00004000, + +/* + * Indicated that this format can be read as an SRGB texture (meaning that the + * sampler will linearize the looked up data) + */ + SVGA3DFORMAT_OP_SRGBREAD = 0x00008000, + +/* + * Indicates that this format can be used in the bumpmap instructions + */ + SVGA3DFORMAT_OP_BUMPMAP = 0x00010000, + +/* + * Indicates that this format can be sampled by the displacement map sampler + */ + SVGA3DFORMAT_OP_DMAP = 0x00020000, + +/* + * Indicates that this format cannot be used with texture filtering + */ + SVGA3DFORMAT_OP_NOFILTER = 0x00040000, + +/* + * Indicates that format conversions are supported to this RGB format if + * SVGA3DFORMAT_OP_CONVERT_TO_ARGB is specified in the source format. + */ + SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB = 0x00080000, + +/* + * Indicated that this format can be written as an SRGB target (meaning that the + * pixel pipe will DE-linearize data on output to format) + */ + SVGA3DFORMAT_OP_SRGBWRITE = 0x00100000, + +/* + * Indicates that this format cannot be used with alpha blending + */ + SVGA3DFORMAT_OP_NOALPHABLEND = 0x00200000, + +/* + * Indicates that the device can auto-generated sublevels for resources + * of this format + */ + SVGA3DFORMAT_OP_AUTOGENMIPMAP = 0x00400000, + +/* + * Indicates that this format can be used by vertex texture sampler + */ + SVGA3DFORMAT_OP_VERTEXTEXTURE = 0x00800000, + +/* + * Indicates that this format supports neither texture coordinate wrap + * modes, nor mipmapping + */ + SVGA3DFORMAT_OP_NOTEXCOORDWRAPNORMIP = 0x01000000 +} SVGA3dFormatOp; + +/* + * This structure is a conversion of SVGA3DFORMAT_OP_*. + * Entries must be located at the same position. + */ +typedef union { + uint32 value; + struct { + uint32 texture : 1; + uint32 volumeTexture : 1; + uint32 cubeTexture : 1; + uint32 offscreenRenderTarget : 1; + uint32 sameFormatRenderTarget : 1; + uint32 unknown1 : 1; + uint32 zStencil : 1; + uint32 zStencilArbitraryDepth : 1; + uint32 sameFormatUpToAlpha : 1; + uint32 unknown2 : 1; + uint32 displayMode : 1; + uint32 acceleration3d : 1; + uint32 pixelSize : 1; + uint32 convertToARGB : 1; + uint32 offscreenPlain : 1; + uint32 sRGBRead : 1; + uint32 bumpMap : 1; + uint32 dmap : 1; + uint32 noFilter : 1; + uint32 memberOfGroupARGB : 1; + uint32 sRGBWrite : 1; + uint32 noAlphaBlend : 1; + uint32 autoGenMipMap : 1; + uint32 vertexTexture : 1; + uint32 noTexCoordWrapNorMip : 1; + }; +} SVGA3dSurfaceFormatCaps; + +/* + * SVGA_3D_CMD_SETRENDERSTATE Types. All value types + * must fit in a uint32. + */ + +typedef enum { + SVGA3D_RS_INVALID = 0, + SVGA3D_RS_ZENABLE = 1, /* SVGA3dBool */ + SVGA3D_RS_ZWRITEENABLE = 2, /* SVGA3dBool */ + SVGA3D_RS_ALPHATESTENABLE = 3, /* SVGA3dBool */ + SVGA3D_RS_DITHERENABLE = 4, /* SVGA3dBool */ + SVGA3D_RS_BLENDENABLE = 5, /* SVGA3dBool */ + SVGA3D_RS_FOGENABLE = 6, /* SVGA3dBool */ + SVGA3D_RS_SPECULARENABLE = 7, /* SVGA3dBool */ + SVGA3D_RS_STENCILENABLE = 8, /* SVGA3dBool */ + SVGA3D_RS_LIGHTINGENABLE = 9, /* SVGA3dBool */ + SVGA3D_RS_NORMALIZENORMALS = 10, /* SVGA3dBool */ + SVGA3D_RS_POINTSPRITEENABLE = 11, /* SVGA3dBool */ + SVGA3D_RS_POINTSCALEENABLE = 12, /* SVGA3dBool */ + SVGA3D_RS_STENCILREF = 13, /* uint32 */ + SVGA3D_RS_STENCILMASK = 14, /* uint32 */ + SVGA3D_RS_STENCILWRITEMASK = 15, /* uint32 */ + SVGA3D_RS_FOGSTART = 16, /* float */ + SVGA3D_RS_FOGEND = 17, /* float */ + SVGA3D_RS_FOGDENSITY = 18, /* float */ + SVGA3D_RS_POINTSIZE = 19, /* float */ + SVGA3D_RS_POINTSIZEMIN = 20, /* float */ + SVGA3D_RS_POINTSIZEMAX = 21, /* float */ + SVGA3D_RS_POINTSCALE_A = 22, /* float */ + SVGA3D_RS_POINTSCALE_B = 23, /* float */ + SVGA3D_RS_POINTSCALE_C = 24, /* float */ + SVGA3D_RS_FOGCOLOR = 25, /* SVGA3dColor */ + SVGA3D_RS_AMBIENT = 26, /* SVGA3dColor */ + SVGA3D_RS_CLIPPLANEENABLE = 27, /* SVGA3dClipPlanes */ + SVGA3D_RS_FOGMODE = 28, /* SVGA3dFogMode */ + SVGA3D_RS_FILLMODE = 29, /* SVGA3dFillMode */ + SVGA3D_RS_SHADEMODE = 30, /* SVGA3dShadeMode */ + SVGA3D_RS_LINEPATTERN = 31, /* SVGA3dLinePattern */ + SVGA3D_RS_SRCBLEND = 32, /* SVGA3dBlendOp */ + SVGA3D_RS_DSTBLEND = 33, /* SVGA3dBlendOp */ + SVGA3D_RS_BLENDEQUATION = 34, /* SVGA3dBlendEquation */ + SVGA3D_RS_CULLMODE = 35, /* SVGA3dFace */ + SVGA3D_RS_ZFUNC = 36, /* SVGA3dCmpFunc */ + SVGA3D_RS_ALPHAFUNC = 37, /* SVGA3dCmpFunc */ + SVGA3D_RS_STENCILFUNC = 38, /* SVGA3dCmpFunc */ + SVGA3D_RS_STENCILFAIL = 39, /* SVGA3dStencilOp */ + SVGA3D_RS_STENCILZFAIL = 40, /* SVGA3dStencilOp */ + SVGA3D_RS_STENCILPASS = 41, /* SVGA3dStencilOp */ + SVGA3D_RS_ALPHAREF = 42, /* float (0.0 .. 1.0) */ + SVGA3D_RS_FRONTWINDING = 43, /* SVGA3dFrontWinding */ + SVGA3D_RS_COORDINATETYPE = 44, /* SVGA3dCoordinateType */ + SVGA3D_RS_ZBIAS = 45, /* float */ + SVGA3D_RS_RANGEFOGENABLE = 46, /* SVGA3dBool */ + SVGA3D_RS_COLORWRITEENABLE = 47, /* SVGA3dColorMask */ + SVGA3D_RS_VERTEXMATERIALENABLE = 48, /* SVGA3dBool */ + SVGA3D_RS_DIFFUSEMATERIALSOURCE = 49, /* SVGA3dVertexMaterial */ + SVGA3D_RS_SPECULARMATERIALSOURCE = 50, /* SVGA3dVertexMaterial */ + SVGA3D_RS_AMBIENTMATERIALSOURCE = 51, /* SVGA3dVertexMaterial */ + SVGA3D_RS_EMISSIVEMATERIALSOURCE = 52, /* SVGA3dVertexMaterial */ + SVGA3D_RS_TEXTUREFACTOR = 53, /* SVGA3dColor */ + SVGA3D_RS_LOCALVIEWER = 54, /* SVGA3dBool */ + SVGA3D_RS_SCISSORTESTENABLE = 55, /* SVGA3dBool */ + SVGA3D_RS_BLENDCOLOR = 56, /* SVGA3dColor */ + SVGA3D_RS_STENCILENABLE2SIDED = 57, /* SVGA3dBool */ + SVGA3D_RS_CCWSTENCILFUNC = 58, /* SVGA3dCmpFunc */ + SVGA3D_RS_CCWSTENCILFAIL = 59, /* SVGA3dStencilOp */ + SVGA3D_RS_CCWSTENCILZFAIL = 60, /* SVGA3dStencilOp */ + SVGA3D_RS_CCWSTENCILPASS = 61, /* SVGA3dStencilOp */ + SVGA3D_RS_VERTEXBLEND = 62, /* SVGA3dVertexBlendFlags */ + SVGA3D_RS_SLOPESCALEDEPTHBIAS = 63, /* float */ + SVGA3D_RS_DEPTHBIAS = 64, /* float */ + + + /* + * Output Gamma Level + * + * Output gamma effects the gamma curve of colors that are output from the + * rendering pipeline. A value of 1.0 specifies a linear color space. If the + * value is <= 0.0, gamma correction is ignored and linear color space is + * used. + */ + + SVGA3D_RS_OUTPUTGAMMA = 65, /* float */ + SVGA3D_RS_ZVISIBLE = 66, /* SVGA3dBool */ + SVGA3D_RS_LASTPIXEL = 67, /* SVGA3dBool */ + SVGA3D_RS_CLIPPING = 68, /* SVGA3dBool */ + SVGA3D_RS_WRAP0 = 69, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP1 = 70, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP2 = 71, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP3 = 72, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP4 = 73, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP5 = 74, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP6 = 75, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP7 = 76, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP8 = 77, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP9 = 78, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP10 = 79, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP11 = 80, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP12 = 81, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP13 = 82, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP14 = 83, /* SVGA3dWrapFlags */ + SVGA3D_RS_WRAP15 = 84, /* SVGA3dWrapFlags */ + SVGA3D_RS_MULTISAMPLEANTIALIAS = 85, /* SVGA3dBool */ + SVGA3D_RS_MULTISAMPLEMASK = 86, /* uint32 */ + SVGA3D_RS_INDEXEDVERTEXBLENDENABLE = 87, /* SVGA3dBool */ + SVGA3D_RS_TWEENFACTOR = 88, /* float */ + SVGA3D_RS_ANTIALIASEDLINEENABLE = 89, /* SVGA3dBool */ + SVGA3D_RS_COLORWRITEENABLE1 = 90, /* SVGA3dColorMask */ + SVGA3D_RS_COLORWRITEENABLE2 = 91, /* SVGA3dColorMask */ + SVGA3D_RS_COLORWRITEENABLE3 = 92, /* SVGA3dColorMask */ + SVGA3D_RS_SEPARATEALPHABLENDENABLE = 93, /* SVGA3dBool */ + SVGA3D_RS_SRCBLENDALPHA = 94, /* SVGA3dBlendOp */ + SVGA3D_RS_DSTBLENDALPHA = 95, /* SVGA3dBlendOp */ + SVGA3D_RS_BLENDEQUATIONALPHA = 96, /* SVGA3dBlendEquation */ + SVGA3D_RS_MAX +} SVGA3dRenderStateName; + +typedef enum { + SVGA3D_VERTEXMATERIAL_NONE = 0, /* Use the value in the current material */ + SVGA3D_VERTEXMATERIAL_DIFFUSE = 1, /* Use the value in the diffuse component */ + SVGA3D_VERTEXMATERIAL_SPECULAR = 2, /* Use the value in the specular component */ +} SVGA3dVertexMaterial; + +typedef enum { + SVGA3D_FILLMODE_INVALID = 0, + SVGA3D_FILLMODE_POINT = 1, + SVGA3D_FILLMODE_LINE = 2, + SVGA3D_FILLMODE_FILL = 3, + SVGA3D_FILLMODE_MAX +} SVGA3dFillModeType; + + +typedef +union { + struct { + uint16 mode; /* SVGA3dFillModeType */ + uint16 face; /* SVGA3dFace */ + }; + uint32 uintValue; +} SVGA3dFillMode; + +typedef enum { + SVGA3D_SHADEMODE_INVALID = 0, + SVGA3D_SHADEMODE_FLAT = 1, + SVGA3D_SHADEMODE_SMOOTH = 2, + SVGA3D_SHADEMODE_PHONG = 3, /* Not supported */ + SVGA3D_SHADEMODE_MAX +} SVGA3dShadeMode; + +typedef +union { + struct { + uint16 repeat; + uint16 pattern; + }; + uint32 uintValue; +} SVGA3dLinePattern; + +typedef enum { + SVGA3D_BLENDOP_INVALID = 0, + SVGA3D_BLENDOP_ZERO = 1, + SVGA3D_BLENDOP_ONE = 2, + SVGA3D_BLENDOP_SRCCOLOR = 3, + SVGA3D_BLENDOP_INVSRCCOLOR = 4, + SVGA3D_BLENDOP_SRCALPHA = 5, + SVGA3D_BLENDOP_INVSRCALPHA = 6, + SVGA3D_BLENDOP_DESTALPHA = 7, + SVGA3D_BLENDOP_INVDESTALPHA = 8, + SVGA3D_BLENDOP_DESTCOLOR = 9, + SVGA3D_BLENDOP_INVDESTCOLOR = 10, + SVGA3D_BLENDOP_SRCALPHASAT = 11, + SVGA3D_BLENDOP_BLENDFACTOR = 12, + SVGA3D_BLENDOP_INVBLENDFACTOR = 13, + SVGA3D_BLENDOP_MAX +} SVGA3dBlendOp; + +typedef enum { + SVGA3D_BLENDEQ_INVALID = 0, + SVGA3D_BLENDEQ_ADD = 1, + SVGA3D_BLENDEQ_SUBTRACT = 2, + SVGA3D_BLENDEQ_REVSUBTRACT = 3, + SVGA3D_BLENDEQ_MINIMUM = 4, + SVGA3D_BLENDEQ_MAXIMUM = 5, + SVGA3D_BLENDEQ_MAX +} SVGA3dBlendEquation; + +typedef enum { + SVGA3D_FRONTWINDING_INVALID = 0, + SVGA3D_FRONTWINDING_CW = 1, + SVGA3D_FRONTWINDING_CCW = 2, + SVGA3D_FRONTWINDING_MAX +} SVGA3dFrontWinding; + +typedef enum { + SVGA3D_FACE_INVALID = 0, + SVGA3D_FACE_NONE = 1, + SVGA3D_FACE_FRONT = 2, + SVGA3D_FACE_BACK = 3, + SVGA3D_FACE_FRONT_BACK = 4, + SVGA3D_FACE_MAX +} SVGA3dFace; + +/* + * The order and the values should not be changed + */ + +typedef enum { + SVGA3D_CMP_INVALID = 0, + SVGA3D_CMP_NEVER = 1, + SVGA3D_CMP_LESS = 2, + SVGA3D_CMP_EQUAL = 3, + SVGA3D_CMP_LESSEQUAL = 4, + SVGA3D_CMP_GREATER = 5, + SVGA3D_CMP_NOTEQUAL = 6, + SVGA3D_CMP_GREATEREQUAL = 7, + SVGA3D_CMP_ALWAYS = 8, + SVGA3D_CMP_MAX +} SVGA3dCmpFunc; + +/* + * SVGA3D_FOGFUNC_* specifies the fog equation, or PER_VERTEX which allows + * the fog factor to be specified in the alpha component of the specular + * (a.k.a. secondary) vertex color. + */ +typedef enum { + SVGA3D_FOGFUNC_INVALID = 0, + SVGA3D_FOGFUNC_EXP = 1, + SVGA3D_FOGFUNC_EXP2 = 2, + SVGA3D_FOGFUNC_LINEAR = 3, + SVGA3D_FOGFUNC_PER_VERTEX = 4 +} SVGA3dFogFunction; + +/* + * SVGA3D_FOGTYPE_* specifies if fog factors are computed on a per-vertex + * or per-pixel basis. + */ +typedef enum { + SVGA3D_FOGTYPE_INVALID = 0, + SVGA3D_FOGTYPE_VERTEX = 1, + SVGA3D_FOGTYPE_PIXEL = 2, + SVGA3D_FOGTYPE_MAX = 3 +} SVGA3dFogType; + +/* + * SVGA3D_FOGBASE_* selects depth or range-based fog. Depth-based fog is + * computed using the eye Z value of each pixel (or vertex), whereas range- + * based fog is computed using the actual distance (range) to the eye. + */ +typedef enum { + SVGA3D_FOGBASE_INVALID = 0, + SVGA3D_FOGBASE_DEPTHBASED = 1, + SVGA3D_FOGBASE_RANGEBASED = 2, + SVGA3D_FOGBASE_MAX = 3 +} SVGA3dFogBase; + +typedef enum { + SVGA3D_STENCILOP_INVALID = 0, + SVGA3D_STENCILOP_KEEP = 1, + SVGA3D_STENCILOP_ZERO = 2, + SVGA3D_STENCILOP_REPLACE = 3, + SVGA3D_STENCILOP_INCRSAT = 4, + SVGA3D_STENCILOP_DECRSAT = 5, + SVGA3D_STENCILOP_INVERT = 6, + SVGA3D_STENCILOP_INCR = 7, + SVGA3D_STENCILOP_DECR = 8, + SVGA3D_STENCILOP_MAX +} SVGA3dStencilOp; + +typedef enum { + SVGA3D_CLIPPLANE_0 = (1 << 0), + SVGA3D_CLIPPLANE_1 = (1 << 1), + SVGA3D_CLIPPLANE_2 = (1 << 2), + SVGA3D_CLIPPLANE_3 = (1 << 3), + SVGA3D_CLIPPLANE_4 = (1 << 4), + SVGA3D_CLIPPLANE_5 = (1 << 5), +} SVGA3dClipPlanes; + +typedef enum { + SVGA3D_CLEAR_COLOR = 0x1, + SVGA3D_CLEAR_DEPTH = 0x2, + SVGA3D_CLEAR_STENCIL = 0x4 +} SVGA3dClearFlag; + +typedef enum { + SVGA3D_RT_DEPTH = 0, + SVGA3D_RT_STENCIL = 1, + SVGA3D_RT_COLOR0 = 2, + SVGA3D_RT_COLOR1 = 3, + SVGA3D_RT_COLOR2 = 4, + SVGA3D_RT_COLOR3 = 5, + SVGA3D_RT_COLOR4 = 6, + SVGA3D_RT_COLOR5 = 7, + SVGA3D_RT_COLOR6 = 8, + SVGA3D_RT_COLOR7 = 9, + SVGA3D_RT_MAX, + SVGA3D_RT_INVALID = ((uint32)-1), +} SVGA3dRenderTargetType; + +#define SVGA3D_MAX_RT_COLOR (SVGA3D_RT_COLOR7 - SVGA3D_RT_COLOR0 + 1) + +typedef +union { + struct { + uint32 red : 1; + uint32 green : 1; + uint32 blue : 1; + uint32 alpha : 1; + }; + uint32 uintValue; +} SVGA3dColorMask; + +typedef enum { + SVGA3D_VBLEND_DISABLE = 0, + SVGA3D_VBLEND_1WEIGHT = 1, + SVGA3D_VBLEND_2WEIGHT = 2, + SVGA3D_VBLEND_3WEIGHT = 3, +} SVGA3dVertexBlendFlags; + +typedef enum { + SVGA3D_WRAPCOORD_0 = 1 << 0, + SVGA3D_WRAPCOORD_1 = 1 << 1, + SVGA3D_WRAPCOORD_2 = 1 << 2, + SVGA3D_WRAPCOORD_3 = 1 << 3, + SVGA3D_WRAPCOORD_ALL = 0xF, +} SVGA3dWrapFlags; + +/* + * SVGA_3D_CMD_TEXTURESTATE Types. All value types + * must fit in a uint32. + */ + +typedef enum { + SVGA3D_TS_INVALID = 0, + SVGA3D_TS_BIND_TEXTURE = 1, /* SVGA3dSurfaceId */ + SVGA3D_TS_COLOROP = 2, /* SVGA3dTextureCombiner */ + SVGA3D_TS_COLORARG1 = 3, /* SVGA3dTextureArgData */ + SVGA3D_TS_COLORARG2 = 4, /* SVGA3dTextureArgData */ + SVGA3D_TS_ALPHAOP = 5, /* SVGA3dTextureCombiner */ + SVGA3D_TS_ALPHAARG1 = 6, /* SVGA3dTextureArgData */ + SVGA3D_TS_ALPHAARG2 = 7, /* SVGA3dTextureArgData */ + SVGA3D_TS_ADDRESSU = 8, /* SVGA3dTextureAddress */ + SVGA3D_TS_ADDRESSV = 9, /* SVGA3dTextureAddress */ + SVGA3D_TS_MIPFILTER = 10, /* SVGA3dTextureFilter */ + SVGA3D_TS_MAGFILTER = 11, /* SVGA3dTextureFilter */ + SVGA3D_TS_MINFILTER = 12, /* SVGA3dTextureFilter */ + SVGA3D_TS_BORDERCOLOR = 13, /* SVGA3dColor */ + SVGA3D_TS_TEXCOORDINDEX = 14, /* uint32 */ + SVGA3D_TS_TEXTURETRANSFORMFLAGS = 15, /* SVGA3dTexTransformFlags */ + SVGA3D_TS_TEXCOORDGEN = 16, /* SVGA3dTextureCoordGen */ + SVGA3D_TS_BUMPENVMAT00 = 17, /* float */ + SVGA3D_TS_BUMPENVMAT01 = 18, /* float */ + SVGA3D_TS_BUMPENVMAT10 = 19, /* float */ + SVGA3D_TS_BUMPENVMAT11 = 20, /* float */ + SVGA3D_TS_TEXTURE_MIPMAP_LEVEL = 21, /* uint32 */ + SVGA3D_TS_TEXTURE_LOD_BIAS = 22, /* float */ + SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL = 23, /* uint32 */ + SVGA3D_TS_ADDRESSW = 24, /* SVGA3dTextureAddress */ + + + /* + * Sampler Gamma Level + * + * Sampler gamma effects the color of samples taken from the sampler. A + * value of 1.0 will produce linear samples. If the value is <= 0.0 the + * gamma value is ignored and a linear space is used. + */ + + SVGA3D_TS_GAMMA = 25, /* float */ + SVGA3D_TS_BUMPENVLSCALE = 26, /* float */ + SVGA3D_TS_BUMPENVLOFFSET = 27, /* float */ + SVGA3D_TS_COLORARG0 = 28, /* SVGA3dTextureArgData */ + SVGA3D_TS_ALPHAARG0 = 29, /* SVGA3dTextureArgData */ + SVGA3D_TS_MAX +} SVGA3dTextureStateName; + +typedef enum { + SVGA3D_TC_INVALID = 0, + SVGA3D_TC_DISABLE = 1, + SVGA3D_TC_SELECTARG1 = 2, + SVGA3D_TC_SELECTARG2 = 3, + SVGA3D_TC_MODULATE = 4, + SVGA3D_TC_ADD = 5, + SVGA3D_TC_ADDSIGNED = 6, + SVGA3D_TC_SUBTRACT = 7, + SVGA3D_TC_BLENDTEXTUREALPHA = 8, + SVGA3D_TC_BLENDDIFFUSEALPHA = 9, + SVGA3D_TC_BLENDCURRENTALPHA = 10, + SVGA3D_TC_BLENDFACTORALPHA = 11, + SVGA3D_TC_MODULATE2X = 12, + SVGA3D_TC_MODULATE4X = 13, + SVGA3D_TC_DSDT = 14, + SVGA3D_TC_DOTPRODUCT3 = 15, + SVGA3D_TC_BLENDTEXTUREALPHAPM = 16, + SVGA3D_TC_ADDSIGNED2X = 17, + SVGA3D_TC_ADDSMOOTH = 18, + SVGA3D_TC_PREMODULATE = 19, + SVGA3D_TC_MODULATEALPHA_ADDCOLOR = 20, + SVGA3D_TC_MODULATECOLOR_ADDALPHA = 21, + SVGA3D_TC_MODULATEINVALPHA_ADDCOLOR = 22, + SVGA3D_TC_MODULATEINVCOLOR_ADDALPHA = 23, + SVGA3D_TC_BUMPENVMAPLUMINANCE = 24, + SVGA3D_TC_MULTIPLYADD = 25, + SVGA3D_TC_LERP = 26, + SVGA3D_TC_MAX +} SVGA3dTextureCombiner; + +#define SVGA3D_TC_CAP_BIT(svga3d_tc_op) (svga3d_tc_op ? (1 << (svga3d_tc_op - 1)) : 0) + +typedef enum { + SVGA3D_TEX_ADDRESS_INVALID = 0, + SVGA3D_TEX_ADDRESS_WRAP = 1, + SVGA3D_TEX_ADDRESS_MIRROR = 2, + SVGA3D_TEX_ADDRESS_CLAMP = 3, + SVGA3D_TEX_ADDRESS_BORDER = 4, + SVGA3D_TEX_ADDRESS_MIRRORONCE = 5, + SVGA3D_TEX_ADDRESS_EDGE = 6, + SVGA3D_TEX_ADDRESS_MAX +} SVGA3dTextureAddress; + +/* + * SVGA3D_TEX_FILTER_NONE as the minification filter means mipmapping is + * disabled, and the rasterizer should use the magnification filter instead. + */ +typedef enum { + SVGA3D_TEX_FILTER_NONE = 0, + SVGA3D_TEX_FILTER_NEAREST = 1, + SVGA3D_TEX_FILTER_LINEAR = 2, + SVGA3D_TEX_FILTER_ANISOTROPIC = 3, + SVGA3D_TEX_FILTER_FLATCUBIC = 4, // Deprecated, not implemented + SVGA3D_TEX_FILTER_GAUSSIANCUBIC = 5, // Deprecated, not implemented + SVGA3D_TEX_FILTER_PYRAMIDALQUAD = 6, // Not currently implemented + SVGA3D_TEX_FILTER_GAUSSIANQUAD = 7, // Not currently implemented + SVGA3D_TEX_FILTER_MAX +} SVGA3dTextureFilter; + +typedef enum { + SVGA3D_TEX_TRANSFORM_OFF = 0, + SVGA3D_TEX_TRANSFORM_S = (1 << 0), + SVGA3D_TEX_TRANSFORM_T = (1 << 1), + SVGA3D_TEX_TRANSFORM_R = (1 << 2), + SVGA3D_TEX_TRANSFORM_Q = (1 << 3), + SVGA3D_TEX_PROJECTED = (1 << 15), +} SVGA3dTexTransformFlags; + +typedef enum { + SVGA3D_TEXCOORD_GEN_OFF = 0, + SVGA3D_TEXCOORD_GEN_EYE_POSITION = 1, + SVGA3D_TEXCOORD_GEN_EYE_NORMAL = 2, + SVGA3D_TEXCOORD_GEN_REFLECTIONVECTOR = 3, + SVGA3D_TEXCOORD_GEN_SPHERE = 4, + SVGA3D_TEXCOORD_GEN_MAX +} SVGA3dTextureCoordGen; + +/* + * Texture argument constants for texture combiner + */ +typedef enum { + SVGA3D_TA_INVALID = 0, + SVGA3D_TA_CONSTANT = 1, + SVGA3D_TA_PREVIOUS = 2, + SVGA3D_TA_DIFFUSE = 3, + SVGA3D_TA_TEXTURE = 4, + SVGA3D_TA_SPECULAR = 5, + SVGA3D_TA_MAX +} SVGA3dTextureArgData; + +#define SVGA3D_TM_MASK_LEN 4 + +/* Modifiers for texture argument constants defined above. */ +typedef enum { + SVGA3D_TM_NONE = 0, + SVGA3D_TM_ALPHA = (1 << SVGA3D_TM_MASK_LEN), + SVGA3D_TM_ONE_MINUS = (2 << SVGA3D_TM_MASK_LEN), +} SVGA3dTextureArgModifier; + +#define SVGA3D_INVALID_ID ((uint32)-1) +#define SVGA3D_MAX_CLIP_PLANES 6 + +/* + * This is the limit to the number of fixed-function texture + * transforms and texture coordinates we can support. It does *not* + * correspond to the number of texture image units (samplers) we + * support! + */ +#define SVGA3D_MAX_TEXTURE_COORDS 8 + +/* + * Vertex declarations + * + * Notes: + * + * SVGA3D_DECLUSAGE_POSITIONT is for pre-transformed vertices. If you + * draw with any POSITIONT vertex arrays, the programmable vertex + * pipeline will be implicitly disabled. Drawing will take place as if + * no vertex shader was bound. + */ + +typedef enum { + SVGA3D_DECLUSAGE_POSITION = 0, + SVGA3D_DECLUSAGE_BLENDWEIGHT, // 1 + SVGA3D_DECLUSAGE_BLENDINDICES, // 2 + SVGA3D_DECLUSAGE_NORMAL, // 3 + SVGA3D_DECLUSAGE_PSIZE, // 4 + SVGA3D_DECLUSAGE_TEXCOORD, // 5 + SVGA3D_DECLUSAGE_TANGENT, // 6 + SVGA3D_DECLUSAGE_BINORMAL, // 7 + SVGA3D_DECLUSAGE_TESSFACTOR, // 8 + SVGA3D_DECLUSAGE_POSITIONT, // 9 + SVGA3D_DECLUSAGE_COLOR, // 10 + SVGA3D_DECLUSAGE_FOG, // 11 + SVGA3D_DECLUSAGE_DEPTH, // 12 + SVGA3D_DECLUSAGE_SAMPLE, // 13 + SVGA3D_DECLUSAGE_MAX +} SVGA3dDeclUsage; + +typedef enum { + SVGA3D_DECLMETHOD_DEFAULT = 0, + SVGA3D_DECLMETHOD_PARTIALU, + SVGA3D_DECLMETHOD_PARTIALV, + SVGA3D_DECLMETHOD_CROSSUV, // Normal + SVGA3D_DECLMETHOD_UV, + SVGA3D_DECLMETHOD_LOOKUP, // Lookup a displacement map + SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, // Lookup a pre-sampled displacement map +} SVGA3dDeclMethod; + +typedef enum { + SVGA3D_DECLTYPE_FLOAT1 = 0, + SVGA3D_DECLTYPE_FLOAT2 = 1, + SVGA3D_DECLTYPE_FLOAT3 = 2, + SVGA3D_DECLTYPE_FLOAT4 = 3, + SVGA3D_DECLTYPE_D3DCOLOR = 4, + SVGA3D_DECLTYPE_UBYTE4 = 5, + SVGA3D_DECLTYPE_SHORT2 = 6, + SVGA3D_DECLTYPE_SHORT4 = 7, + SVGA3D_DECLTYPE_UBYTE4N = 8, + SVGA3D_DECLTYPE_SHORT2N = 9, + SVGA3D_DECLTYPE_SHORT4N = 10, + SVGA3D_DECLTYPE_USHORT2N = 11, + SVGA3D_DECLTYPE_USHORT4N = 12, + SVGA3D_DECLTYPE_UDEC3 = 13, + SVGA3D_DECLTYPE_DEC3N = 14, + SVGA3D_DECLTYPE_FLOAT16_2 = 15, + SVGA3D_DECLTYPE_FLOAT16_4 = 16, + SVGA3D_DECLTYPE_MAX, +} SVGA3dDeclType; + +/* + * This structure is used for the divisor for geometry instancing; + * it's a direct translation of the Direct3D equivalent. + */ +typedef union { + struct { + /* + * For index data, this number represents the number of instances to draw. + * For instance data, this number represents the number of + * instances/vertex in this stream + */ + uint32 count : 30; + + /* + * This is 1 if this is supposed to be the data that is repeated for + * every instance. + */ + uint32 indexedData : 1; + + /* + * This is 1 if this is supposed to be the per-instance data. + */ + uint32 instanceData : 1; + }; + + uint32 value; +} SVGA3dVertexDivisor; + +typedef enum { + SVGA3D_PRIMITIVE_INVALID = 0, + SVGA3D_PRIMITIVE_TRIANGLELIST = 1, + SVGA3D_PRIMITIVE_POINTLIST = 2, + SVGA3D_PRIMITIVE_LINELIST = 3, + SVGA3D_PRIMITIVE_LINESTRIP = 4, + SVGA3D_PRIMITIVE_TRIANGLESTRIP = 5, + SVGA3D_PRIMITIVE_TRIANGLEFAN = 6, + SVGA3D_PRIMITIVE_MAX +} SVGA3dPrimitiveType; + +typedef enum { + SVGA3D_COORDINATE_INVALID = 0, + SVGA3D_COORDINATE_LEFTHANDED = 1, + SVGA3D_COORDINATE_RIGHTHANDED = 2, + SVGA3D_COORDINATE_MAX +} SVGA3dCoordinateType; + +typedef enum { + SVGA3D_TRANSFORM_INVALID = 0, + SVGA3D_TRANSFORM_WORLD = 1, + SVGA3D_TRANSFORM_VIEW = 2, + SVGA3D_TRANSFORM_PROJECTION = 3, + SVGA3D_TRANSFORM_TEXTURE0 = 4, + SVGA3D_TRANSFORM_TEXTURE1 = 5, + SVGA3D_TRANSFORM_TEXTURE2 = 6, + SVGA3D_TRANSFORM_TEXTURE3 = 7, + SVGA3D_TRANSFORM_TEXTURE4 = 8, + SVGA3D_TRANSFORM_TEXTURE5 = 9, + SVGA3D_TRANSFORM_TEXTURE6 = 10, + SVGA3D_TRANSFORM_TEXTURE7 = 11, + SVGA3D_TRANSFORM_WORLD1 = 12, + SVGA3D_TRANSFORM_WORLD2 = 13, + SVGA3D_TRANSFORM_WORLD3 = 14, + SVGA3D_TRANSFORM_MAX +} SVGA3dTransformType; + +typedef enum { + SVGA3D_LIGHTTYPE_INVALID = 0, + SVGA3D_LIGHTTYPE_POINT = 1, + SVGA3D_LIGHTTYPE_SPOT1 = 2, /* 1-cone, in degrees */ + SVGA3D_LIGHTTYPE_SPOT2 = 3, /* 2-cone, in radians */ + SVGA3D_LIGHTTYPE_DIRECTIONAL = 4, + SVGA3D_LIGHTTYPE_MAX +} SVGA3dLightType; + +typedef enum { + SVGA3D_CUBEFACE_POSX = 0, + SVGA3D_CUBEFACE_NEGX = 1, + SVGA3D_CUBEFACE_POSY = 2, + SVGA3D_CUBEFACE_NEGY = 3, + SVGA3D_CUBEFACE_POSZ = 4, + SVGA3D_CUBEFACE_NEGZ = 5, +} SVGA3dCubeFace; + +typedef enum { + SVGA3D_SHADERTYPE_COMPILED_DX8 = 0, + SVGA3D_SHADERTYPE_VS = 1, + SVGA3D_SHADERTYPE_PS = 2, + SVGA3D_SHADERTYPE_MAX +} SVGA3dShaderType; + +typedef enum { + SVGA3D_CONST_TYPE_FLOAT = 0, + SVGA3D_CONST_TYPE_INT = 1, + SVGA3D_CONST_TYPE_BOOL = 2, +} SVGA3dShaderConstType; + +#define SVGA3D_MAX_SURFACE_FACES 6 + +typedef enum { + SVGA3D_STRETCH_BLT_POINT = 0, + SVGA3D_STRETCH_BLT_LINEAR = 1, + SVGA3D_STRETCH_BLT_MAX +} SVGA3dStretchBltMode; + +typedef enum { + SVGA3D_QUERYTYPE_OCCLUSION = 0, + SVGA3D_QUERYTYPE_MAX +} SVGA3dQueryType; + +typedef enum { + SVGA3D_QUERYSTATE_PENDING = 0, /* Waiting on the host (set by guest) */ + SVGA3D_QUERYSTATE_SUCCEEDED = 1, /* Completed successfully (set by host) */ + SVGA3D_QUERYSTATE_FAILED = 2, /* Completed unsuccessfully (set by host) */ + SVGA3D_QUERYSTATE_NEW = 3, /* Never submitted (For guest use only) */ +} SVGA3dQueryState; + +typedef enum { + SVGA3D_WRITE_HOST_VRAM = 1, + SVGA3D_READ_HOST_VRAM = 2, +} SVGA3dTransferType; + +/* + * The maximum number vertex arrays we're guaranteed to support in + * SVGA_3D_CMD_DRAWPRIMITIVES. + */ +#define SVGA3D_MAX_VERTEX_ARRAYS 32 + +/* + * Identifiers for commands in the command FIFO. + * + * IDs between 1000 and 1039 (inclusive) were used by obsolete versions of + * the SVGA3D protocol and remain reserved; they should not be used in the + * future. + * + * IDs between 1040 and 1999 (inclusive) are available for use by the + * current SVGA3D protocol. + * + * FIFO clients other than SVGA3D should stay below 1000, or at 2000 + * and up. + */ + +#define SVGA_3D_CMD_LEGACY_BASE 1000 +#define SVGA_3D_CMD_BASE 1040 + +#define SVGA_3D_CMD_SURFACE_DEFINE SVGA_3D_CMD_BASE + 0 +#define SVGA_3D_CMD_SURFACE_DESTROY SVGA_3D_CMD_BASE + 1 +#define SVGA_3D_CMD_SURFACE_COPY SVGA_3D_CMD_BASE + 2 +#define SVGA_3D_CMD_SURFACE_STRETCHBLT SVGA_3D_CMD_BASE + 3 +#define SVGA_3D_CMD_SURFACE_DMA SVGA_3D_CMD_BASE + 4 +#define SVGA_3D_CMD_CONTEXT_DEFINE SVGA_3D_CMD_BASE + 5 +#define SVGA_3D_CMD_CONTEXT_DESTROY SVGA_3D_CMD_BASE + 6 +#define SVGA_3D_CMD_SETTRANSFORM SVGA_3D_CMD_BASE + 7 +#define SVGA_3D_CMD_SETZRANGE SVGA_3D_CMD_BASE + 8 +#define SVGA_3D_CMD_SETRENDERSTATE SVGA_3D_CMD_BASE + 9 +#define SVGA_3D_CMD_SETRENDERTARGET SVGA_3D_CMD_BASE + 10 +#define SVGA_3D_CMD_SETTEXTURESTATE SVGA_3D_CMD_BASE + 11 +#define SVGA_3D_CMD_SETMATERIAL SVGA_3D_CMD_BASE + 12 +#define SVGA_3D_CMD_SETLIGHTDATA SVGA_3D_CMD_BASE + 13 +#define SVGA_3D_CMD_SETLIGHTENABLED SVGA_3D_CMD_BASE + 14 +#define SVGA_3D_CMD_SETVIEWPORT SVGA_3D_CMD_BASE + 15 +#define SVGA_3D_CMD_SETCLIPPLANE SVGA_3D_CMD_BASE + 16 +#define SVGA_3D_CMD_CLEAR SVGA_3D_CMD_BASE + 17 +#define SVGA_3D_CMD_PRESENT SVGA_3D_CMD_BASE + 18 // Deprecated +#define SVGA_3D_CMD_SHADER_DEFINE SVGA_3D_CMD_BASE + 19 +#define SVGA_3D_CMD_SHADER_DESTROY SVGA_3D_CMD_BASE + 20 +#define SVGA_3D_CMD_SET_SHADER SVGA_3D_CMD_BASE + 21 +#define SVGA_3D_CMD_SET_SHADER_CONST SVGA_3D_CMD_BASE + 22 +#define SVGA_3D_CMD_DRAW_PRIMITIVES SVGA_3D_CMD_BASE + 23 +#define SVGA_3D_CMD_SETSCISSORRECT SVGA_3D_CMD_BASE + 24 +#define SVGA_3D_CMD_BEGIN_QUERY SVGA_3D_CMD_BASE + 25 +#define SVGA_3D_CMD_END_QUERY SVGA_3D_CMD_BASE + 26 +#define SVGA_3D_CMD_WAIT_FOR_QUERY SVGA_3D_CMD_BASE + 27 +#define SVGA_3D_CMD_PRESENT_READBACK SVGA_3D_CMD_BASE + 28 // Deprecated +#define SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN SVGA_3D_CMD_BASE + 29 +#define SVGA_3D_CMD_MAX SVGA_3D_CMD_BASE + 30 + +#define SVGA_3D_CMD_FUTURE_MAX 2000 + +/* + * Common substructures used in multiple FIFO commands: + */ + +typedef struct { + union { + struct { + uint16 function; // SVGA3dFogFunction + uint8 type; // SVGA3dFogType + uint8 base; // SVGA3dFogBase + }; + uint32 uintValue; + }; +} SVGA3dFogMode; + +/* + * Uniquely identify one image (a 1D/2D/3D array) from a surface. This + * is a surface ID as well as face/mipmap indices. + */ + +typedef +struct SVGA3dSurfaceImageId { + uint32 sid; + uint32 face; + uint32 mipmap; +} SVGA3dSurfaceImageId; + +typedef +struct SVGA3dGuestImage { + SVGAGuestPtr ptr; + + /* + * A note on interpretation of pitch: This value of pitch is the + * number of bytes between vertically adjacent image + * blocks. Normally this is the number of bytes between the first + * pixel of two adjacent scanlines. With compressed textures, + * however, this may represent the number of bytes between + * compression blocks rather than between rows of pixels. + * + * XXX: Compressed textures currently must be tightly packed in guest memory. + * + * If the image is 1-dimensional, pitch is ignored. + * + * If 'pitch' is zero, the SVGA3D device calculates a pitch value + * assuming each row of blocks is tightly packed. + */ + uint32 pitch; +} SVGA3dGuestImage; + + +/* + * FIFO command format definitions: + */ + +/* + * The data size header following cmdNum for every 3d command + */ +typedef +struct { + uint32 id; + uint32 size; +} SVGA3dCmdHeader; + +/* + * A surface is a hierarchy of host VRAM surfaces: 1D, 2D, or 3D, with + * optional mipmaps and cube faces. + */ + +typedef +struct { + uint32 width; + uint32 height; + uint32 depth; +} SVGA3dSize; + +typedef enum { + SVGA3D_SURFACE_CUBEMAP = (1 << 0), + SVGA3D_SURFACE_HINT_STATIC = (1 << 1), + SVGA3D_SURFACE_HINT_DYNAMIC = (1 << 2), + SVGA3D_SURFACE_HINT_INDEXBUFFER = (1 << 3), + SVGA3D_SURFACE_HINT_VERTEXBUFFER = (1 << 4), + SVGA3D_SURFACE_HINT_TEXTURE = (1 << 5), + SVGA3D_SURFACE_HINT_RENDERTARGET = (1 << 6), + SVGA3D_SURFACE_HINT_DEPTHSTENCIL = (1 << 7), + SVGA3D_SURFACE_HINT_WRITEONLY = (1 << 8), +} SVGA3dSurfaceFlags; + +typedef +struct { + uint32 numMipLevels; +} SVGA3dSurfaceFace; + +typedef +struct { + uint32 sid; + SVGA3dSurfaceFlags surfaceFlags; + SVGA3dSurfaceFormat format; + SVGA3dSurfaceFace face[SVGA3D_MAX_SURFACE_FACES]; + /* + * Followed by an SVGA3dSize structure for each mip level in each face. + * + * A note on surface sizes: Sizes are always specified in pixels, + * even if the true surface size is not a multiple of the minimum + * block size of the surface's format. For example, a 3x3x1 DXT1 + * compressed texture would actually be stored as a 4x4x1 image in + * memory. + */ +} SVGA3dCmdDefineSurface; /* SVGA_3D_CMD_SURFACE_DEFINE */ + +typedef +struct { + uint32 sid; +} SVGA3dCmdDestroySurface; /* SVGA_3D_CMD_SURFACE_DESTROY */ + +typedef +struct { + uint32 cid; +} SVGA3dCmdDefineContext; /* SVGA_3D_CMD_CONTEXT_DEFINE */ + +typedef +struct { + uint32 cid; +} SVGA3dCmdDestroyContext; /* SVGA_3D_CMD_CONTEXT_DESTROY */ + +typedef +struct { + uint32 cid; + SVGA3dClearFlag clearFlag; + uint32 color; + float depth; + uint32 stencil; + /* Followed by variable number of SVGA3dRect structures */ +} SVGA3dCmdClear; /* SVGA_3D_CMD_CLEAR */ + +typedef +struct SVGA3dCopyRect { + uint32 x; + uint32 y; + uint32 w; + uint32 h; + uint32 srcx; + uint32 srcy; +} SVGA3dCopyRect; + +typedef +struct SVGA3dCopyBox { + uint32 x; + uint32 y; + uint32 z; + uint32 w; + uint32 h; + uint32 d; + uint32 srcx; + uint32 srcy; + uint32 srcz; +} SVGA3dCopyBox; + +typedef +struct { + uint32 x; + uint32 y; + uint32 w; + uint32 h; +} SVGA3dRect; + +typedef +struct { + uint32 x; + uint32 y; + uint32 z; + uint32 w; + uint32 h; + uint32 d; +} SVGA3dBox; + +typedef +struct { + uint32 x; + uint32 y; + uint32 z; +} SVGA3dPoint; + +typedef +struct { + SVGA3dLightType type; + SVGA3dBool inWorldSpace; + float diffuse[4]; + float specular[4]; + float ambient[4]; + float position[4]; + float direction[4]; + float range; + float falloff; + float attenuation0; + float attenuation1; + float attenuation2; + float theta; + float phi; +} SVGA3dLightData; + +typedef +struct { + uint32 sid; + /* Followed by variable number of SVGA3dCopyRect structures */ +} SVGA3dCmdPresent; /* SVGA_3D_CMD_PRESENT */ + +typedef +struct { + SVGA3dRenderStateName state; + union { + uint32 uintValue; + float floatValue; + }; +} SVGA3dRenderState; + +typedef +struct { + uint32 cid; + /* Followed by variable number of SVGA3dRenderState structures */ +} SVGA3dCmdSetRenderState; /* SVGA_3D_CMD_SETRENDERSTATE */ + +typedef +struct { + uint32 cid; + SVGA3dRenderTargetType type; + SVGA3dSurfaceImageId target; +} SVGA3dCmdSetRenderTarget; /* SVGA_3D_CMD_SETRENDERTARGET */ + +typedef +struct { + SVGA3dSurfaceImageId src; + SVGA3dSurfaceImageId dest; + /* Followed by variable number of SVGA3dCopyBox structures */ +} SVGA3dCmdSurfaceCopy; /* SVGA_3D_CMD_SURFACE_COPY */ + +typedef +struct { + SVGA3dSurfaceImageId src; + SVGA3dSurfaceImageId dest; + SVGA3dBox boxSrc; + SVGA3dBox boxDest; + SVGA3dStretchBltMode mode; +} SVGA3dCmdSurfaceStretchBlt; /* SVGA_3D_CMD_SURFACE_STRETCHBLT */ + +typedef +struct { + /* + * If the discard flag is present in a surface DMA operation, the host may + * discard the contents of the current mipmap level and face of the target + * surface before applying the surface DMA contents. + */ + uint32 discard : 1; + + /* + * If the unsynchronized flag is present, the host may perform this upload + * without syncing to pending reads on this surface. + */ + uint32 unsynchronized : 1; + + /* + * Guests *MUST* set the reserved bits to 0 before submitting the command + * suffix as future flags may occupy these bits. + */ + uint32 reserved : 30; +} SVGA3dSurfaceDMAFlags; + +typedef +struct { + SVGA3dGuestImage guest; + SVGA3dSurfaceImageId host; + SVGA3dTransferType transfer; + /* + * Followed by variable number of SVGA3dCopyBox structures. For consistency + * in all clipping logic and coordinate translation, we define the + * "source" in each copyBox as the guest image and the + * "destination" as the host image, regardless of transfer + * direction. + * + * For efficiency, the SVGA3D device is free to copy more data than + * specified. For example, it may round copy boxes outwards such + * that they lie on particular alignment boundaries. + */ +} SVGA3dCmdSurfaceDMA; /* SVGA_3D_CMD_SURFACE_DMA */ + +/* + * SVGA3dCmdSurfaceDMASuffix -- + * + * This is a command suffix that will appear after a SurfaceDMA command in + * the FIFO. It contains some extra information that hosts may use to + * optimize performance or protect the guest. This suffix exists to preserve + * backwards compatibility while also allowing for new functionality to be + * implemented. + */ + +typedef +struct { + uint32 suffixSize; + + /* + * The maximum offset is used to determine the maximum offset from the + * guestPtr base address that will be accessed or written to during this + * surfaceDMA. If the suffix is supported, the host will respect this + * boundary while performing surface DMAs. + * + * Defaults to MAX_UINT32 + */ + uint32 maximumOffset; + + /* + * A set of flags that describes optimizations that the host may perform + * while performing this surface DMA operation. The guest should never rely + * on behaviour that is different when these flags are set for correctness. + * + * Defaults to 0 + */ + SVGA3dSurfaceDMAFlags flags; +} SVGA3dCmdSurfaceDMASuffix; + +/* + * SVGA_3D_CMD_DRAW_PRIMITIVES -- + * + * This command is the SVGA3D device's generic drawing entry point. + * It can draw multiple ranges of primitives, optionally using an + * index buffer, using an arbitrary collection of vertex buffers. + * + * Each SVGA3dVertexDecl defines a distinct vertex array to bind + * during this draw call. The declarations specify which surface + * the vertex data lives in, what that vertex data is used for, + * and how to interpret it. + * + * Each SVGA3dPrimitiveRange defines a collection of primitives + * to render using the same vertex arrays. An index buffer is + * optional. + */ + +typedef +struct { + /* + * A range hint is an optional specification for the range of indices + * in an SVGA3dArray that will be used. If 'last' is zero, it is assumed + * that the entire array will be used. + * + * These are only hints. The SVGA3D device may use them for + * performance optimization if possible, but it's also allowed to + * ignore these values. + */ + uint32 first; + uint32 last; +} SVGA3dArrayRangeHint; + +typedef +struct { + /* + * Define the origin and shape of a vertex or index array. Both + * 'offset' and 'stride' are in bytes. The provided surface will be + * reinterpreted as a flat array of bytes in the same format used + * by surface DMA operations. To avoid unnecessary conversions, the + * surface should be created with the SVGA3D_BUFFER format. + * + * Index 0 in the array starts 'offset' bytes into the surface. + * Index 1 begins at byte 'offset + stride', etc. Array indices may + * not be negative. + */ + uint32 surfaceId; + uint32 offset; + uint32 stride; +} SVGA3dArray; + +typedef +struct { + /* + * Describe a vertex array's data type, and define how it is to be + * used by the fixed function pipeline or the vertex shader. It + * isn't useful to have two VertexDecls with the same + * VertexArrayIdentity in one draw call. + */ + SVGA3dDeclType type; + SVGA3dDeclMethod method; + SVGA3dDeclUsage usage; + uint32 usageIndex; +} SVGA3dVertexArrayIdentity; + +typedef +struct { + SVGA3dVertexArrayIdentity identity; + SVGA3dArray array; + SVGA3dArrayRangeHint rangeHint; +} SVGA3dVertexDecl; + +typedef +struct { + /* + * Define a group of primitives to render, from sequential indices. + * + * The value of 'primitiveType' and 'primitiveCount' imply the + * total number of vertices that will be rendered. + */ + SVGA3dPrimitiveType primType; + uint32 primitiveCount; + + /* + * Optional index buffer. If indexArray.surfaceId is + * SVGA3D_INVALID_ID, we render without an index buffer. Rendering + * without an index buffer is identical to rendering with an index + * buffer containing the sequence [0, 1, 2, 3, ...]. + * + * If an index buffer is in use, indexWidth specifies the width in + * bytes of each index value. It must be less than or equal to + * indexArray.stride. + * + * (Currently, the SVGA3D device requires index buffers to be tightly + * packed. In other words, indexWidth == indexArray.stride) + */ + SVGA3dArray indexArray; + uint32 indexWidth; + + /* + * Optional index bias. This number is added to all indices from + * indexArray before they are used as vertex array indices. This + * can be used in multiple ways: + * + * - When not using an indexArray, this bias can be used to + * specify where in the vertex arrays to begin rendering. + * + * - A positive number here is equivalent to increasing the + * offset in each vertex array. + * + * - A negative number can be used to render using a small + * vertex array and an index buffer that contains large + * values. This may be used by some applications that + * crop a vertex buffer without modifying their index + * buffer. + * + * Note that rendering with a negative bias value may be slower and + * use more memory than rendering with a positive or zero bias. + */ + int32 indexBias; +} SVGA3dPrimitiveRange; + +typedef +struct { + uint32 cid; + uint32 numVertexDecls; + uint32 numRanges; + + /* + * There are two variable size arrays after the + * SVGA3dCmdDrawPrimitives structure. In order, + * they are: + * + * 1. SVGA3dVertexDecl, quantity 'numVertexDecls' + * 2. SVGA3dPrimitiveRange, quantity 'numRanges' + * 3. Optionally, SVGA3dVertexDivisor, quantity 'numVertexDecls' (contains + * the frequency divisor for this the corresponding vertex decl) + */ +} SVGA3dCmdDrawPrimitives; /* SVGA_3D_CMD_DRAWPRIMITIVES */ + +typedef +struct { + uint32 stage; + SVGA3dTextureStateName name; + union { + uint32 value; + float floatValue; + }; +} SVGA3dTextureState; + +typedef +struct { + uint32 cid; + /* Followed by variable number of SVGA3dTextureState structures */ +} SVGA3dCmdSetTextureState; /* SVGA_3D_CMD_SETTEXTURESTATE */ + +typedef +struct { + uint32 cid; + SVGA3dTransformType type; + float matrix[16]; +} SVGA3dCmdSetTransform; /* SVGA_3D_CMD_SETTRANSFORM */ + +typedef +struct { + float min; + float max; +} SVGA3dZRange; + +typedef +struct { + uint32 cid; + SVGA3dZRange zRange; +} SVGA3dCmdSetZRange; /* SVGA_3D_CMD_SETZRANGE */ + +typedef +struct { + float diffuse[4]; + float ambient[4]; + float specular[4]; + float emissive[4]; + float shininess; +} SVGA3dMaterial; + +typedef +struct { + uint32 cid; + SVGA3dFace face; + SVGA3dMaterial material; +} SVGA3dCmdSetMaterial; /* SVGA_3D_CMD_SETMATERIAL */ + +typedef +struct { + uint32 cid; + uint32 index; + SVGA3dLightData data; +} SVGA3dCmdSetLightData; /* SVGA_3D_CMD_SETLIGHTDATA */ + +typedef +struct { + uint32 cid; + uint32 index; + uint32 enabled; +} SVGA3dCmdSetLightEnabled; /* SVGA_3D_CMD_SETLIGHTENABLED */ + +typedef +struct { + uint32 cid; + SVGA3dRect rect; +} SVGA3dCmdSetViewport; /* SVGA_3D_CMD_SETVIEWPORT */ + +typedef +struct { + uint32 cid; + SVGA3dRect rect; +} SVGA3dCmdSetScissorRect; /* SVGA_3D_CMD_SETSCISSORRECT */ + +typedef +struct { + uint32 cid; + uint32 index; + float plane[4]; +} SVGA3dCmdSetClipPlane; /* SVGA_3D_CMD_SETCLIPPLANE */ + +typedef +struct { + uint32 cid; + uint32 shid; + SVGA3dShaderType type; + /* Followed by variable number of DWORDs for shader bycode */ +} SVGA3dCmdDefineShader; /* SVGA_3D_CMD_SHADER_DEFINE */ + +typedef +struct { + uint32 cid; + uint32 shid; + SVGA3dShaderType type; +} SVGA3dCmdDestroyShader; /* SVGA_3D_CMD_SHADER_DESTROY */ + +typedef +struct { + uint32 cid; + uint32 reg; /* register number */ + SVGA3dShaderType type; + SVGA3dShaderConstType ctype; + uint32 values[4]; +} SVGA3dCmdSetShaderConst; /* SVGA_3D_CMD_SET_SHADER_CONST */ + +typedef +struct { + uint32 cid; + SVGA3dShaderType type; + uint32 shid; +} SVGA3dCmdSetShader; /* SVGA_3D_CMD_SET_SHADER */ + +typedef +struct { + uint32 cid; + SVGA3dQueryType type; +} SVGA3dCmdBeginQuery; /* SVGA_3D_CMD_BEGIN_QUERY */ + +typedef +struct { + uint32 cid; + SVGA3dQueryType type; + SVGAGuestPtr guestResult; /* Points to an SVGA3dQueryResult structure */ +} SVGA3dCmdEndQuery; /* SVGA_3D_CMD_END_QUERY */ + +typedef +struct { + uint32 cid; /* Same parameters passed to END_QUERY */ + SVGA3dQueryType type; + SVGAGuestPtr guestResult; +} SVGA3dCmdWaitForQuery; /* SVGA_3D_CMD_WAIT_FOR_QUERY */ + +typedef +struct { + uint32 totalSize; /* Set by guest before query is ended. */ + SVGA3dQueryState state; /* Set by host or guest. See SVGA3dQueryState. */ + union { /* Set by host on exit from PENDING state */ + uint32 result32; + }; +} SVGA3dQueryResult; + +/* + * SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN -- + * + * This is a blit from an SVGA3D surface to a Screen Object. Just + * like GMR-to-screen blits, this blit may be directed at a + * specific screen or to the virtual coordinate space. + * + * The blit copies from a rectangular region of an SVGA3D surface + * image to a rectangular region of a screen or screens. + * + * This command takes an optional variable-length list of clipping + * rectangles after the body of the command. If no rectangles are + * specified, there is no clipping region. The entire destRect is + * drawn to. If one or more rectangles are included, they describe + * a clipping region. The clip rectangle coordinates are measured + * relative to the top-left corner of destRect. + * + * This clipping region serves multiple purposes: + * + * - It can be used to perform an irregularly shaped blit more + * efficiently than by issuing many separate blit commands. + * + * - It is equivalent to allowing blits with non-integer + * source coordinates. You could blit just one half-pixel + * of a source, for example, by specifying a larger + * destination rectangle than you need, then removing + * part of it using a clip rectangle. + * + * Availability: + * SVGA_FIFO_CAP_SCREEN_OBJECT + * + * Limitations: + * + * - Currently, no backend supports blits from a mipmap or face + * other than the first one. + */ + +typedef +struct { + SVGA3dSurfaceImageId srcImage; + SVGASignedRect srcRect; + uint32 destScreenId; /* Screen ID or SVGA_ID_INVALID for virt. coords */ + SVGASignedRect destRect; /* Supports scaling if src/rest different size */ + /* Clipping: zero or more SVGASignedRects follow */ +} SVGA3dCmdBlitSurfaceToScreen; /* SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN */ + + +/* + * Capability query index. + * + * Notes: + * + * 1. SVGA3D_DEVCAP_MAX_TEXTURES reflects the maximum number of + * fixed-function texture units available. Each of these units + * work in both FFP and Shader modes, and they support texture + * transforms and texture coordinates. The host may have additional + * texture image units that are only usable with shaders. + * + * 2. The BUFFER_FORMAT capabilities are deprecated, and they always + * return TRUE. Even on physical hardware that does not support + * these formats natively, the SVGA3D device will provide an emulation + * which should be invisible to the guest OS. + * + * In general, the SVGA3D device should support any operation on + * any surface format, it just may perform some of these + * operations in software depending on the capabilities of the + * available physical hardware. + * + * XXX: In the future, we will add capabilities that describe in + * detail what formats are supported in hardware for what kinds + * of operations. + */ + +typedef enum { + SVGA3D_DEVCAP_3D = 0, + SVGA3D_DEVCAP_MAX_LIGHTS = 1, + SVGA3D_DEVCAP_MAX_TEXTURES = 2, /* See note (1) */ + SVGA3D_DEVCAP_MAX_CLIP_PLANES = 3, + SVGA3D_DEVCAP_VERTEX_SHADER_VERSION = 4, + SVGA3D_DEVCAP_VERTEX_SHADER = 5, + SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION = 6, + SVGA3D_DEVCAP_FRAGMENT_SHADER = 7, + SVGA3D_DEVCAP_MAX_RENDER_TARGETS = 8, + SVGA3D_DEVCAP_S23E8_TEXTURES = 9, + SVGA3D_DEVCAP_S10E5_TEXTURES = 10, + SVGA3D_DEVCAP_MAX_FIXED_VERTEXBLEND = 11, + SVGA3D_DEVCAP_D16_BUFFER_FORMAT = 12, /* See note (2) */ + SVGA3D_DEVCAP_D24S8_BUFFER_FORMAT = 13, /* See note (2) */ + SVGA3D_DEVCAP_D24X8_BUFFER_FORMAT = 14, /* See note (2) */ + SVGA3D_DEVCAP_QUERY_TYPES = 15, + SVGA3D_DEVCAP_TEXTURE_GRADIENT_SAMPLING = 16, + SVGA3D_DEVCAP_MAX_POINT_SIZE = 17, + SVGA3D_DEVCAP_MAX_SHADER_TEXTURES = 18, + SVGA3D_DEVCAP_MAX_TEXTURE_WIDTH = 19, + SVGA3D_DEVCAP_MAX_TEXTURE_HEIGHT = 20, + SVGA3D_DEVCAP_MAX_VOLUME_EXTENT = 21, + SVGA3D_DEVCAP_MAX_TEXTURE_REPEAT = 22, + SVGA3D_DEVCAP_MAX_TEXTURE_ASPECT_RATIO = 23, + SVGA3D_DEVCAP_MAX_TEXTURE_ANISOTROPY = 24, + SVGA3D_DEVCAP_MAX_PRIMITIVE_COUNT = 25, + SVGA3D_DEVCAP_MAX_VERTEX_INDEX = 26, + SVGA3D_DEVCAP_MAX_VERTEX_SHADER_INSTRUCTIONS = 27, + SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_INSTRUCTIONS = 28, + SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEMPS = 29, + SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS = 30, + SVGA3D_DEVCAP_TEXTURE_OPS = 31, + SVGA3D_DEVCAP_SURFACEFMT_X8R8G8B8 = 32, + SVGA3D_DEVCAP_SURFACEFMT_A8R8G8B8 = 33, + SVGA3D_DEVCAP_SURFACEFMT_A2R10G10B10 = 34, + SVGA3D_DEVCAP_SURFACEFMT_X1R5G5B5 = 35, + SVGA3D_DEVCAP_SURFACEFMT_A1R5G5B5 = 36, + SVGA3D_DEVCAP_SURFACEFMT_A4R4G4B4 = 37, + SVGA3D_DEVCAP_SURFACEFMT_R5G6B5 = 38, + SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE16 = 39, + SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8_ALPHA8 = 40, + SVGA3D_DEVCAP_SURFACEFMT_ALPHA8 = 41, + SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8 = 42, + SVGA3D_DEVCAP_SURFACEFMT_Z_D16 = 43, + SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8 = 44, + SVGA3D_DEVCAP_SURFACEFMT_Z_D24X8 = 45, + SVGA3D_DEVCAP_SURFACEFMT_DXT1 = 46, + SVGA3D_DEVCAP_SURFACEFMT_DXT2 = 47, + SVGA3D_DEVCAP_SURFACEFMT_DXT3 = 48, + SVGA3D_DEVCAP_SURFACEFMT_DXT4 = 49, + SVGA3D_DEVCAP_SURFACEFMT_DXT5 = 50, + SVGA3D_DEVCAP_SURFACEFMT_BUMPX8L8V8U8 = 51, + SVGA3D_DEVCAP_SURFACEFMT_A2W10V10U10 = 52, + SVGA3D_DEVCAP_SURFACEFMT_BUMPU8V8 = 53, + SVGA3D_DEVCAP_SURFACEFMT_Q8W8V8U8 = 54, + SVGA3D_DEVCAP_SURFACEFMT_CxV8U8 = 55, + SVGA3D_DEVCAP_SURFACEFMT_R_S10E5 = 56, + SVGA3D_DEVCAP_SURFACEFMT_R_S23E8 = 57, + SVGA3D_DEVCAP_SURFACEFMT_RG_S10E5 = 58, + SVGA3D_DEVCAP_SURFACEFMT_RG_S23E8 = 59, + SVGA3D_DEVCAP_SURFACEFMT_ARGB_S10E5 = 60, + SVGA3D_DEVCAP_SURFACEFMT_ARGB_S23E8 = 61, + SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEXTURES = 63, + + /* + * Note that MAX_SIMULTANEOUS_RENDER_TARGETS is a maximum count of color + * render targets. This does no include the depth or stencil targets. + */ + SVGA3D_DEVCAP_MAX_SIMULTANEOUS_RENDER_TARGETS = 64, + + SVGA3D_DEVCAP_SURFACEFMT_V16U16 = 65, + SVGA3D_DEVCAP_SURFACEFMT_G16R16 = 66, + SVGA3D_DEVCAP_SURFACEFMT_A16B16G16R16 = 67, + SVGA3D_DEVCAP_SURFACEFMT_UYVY = 68, + SVGA3D_DEVCAP_SURFACEFMT_YUY2 = 69, + + /* + * Don't add new caps into the previous section; the values in this + * enumeration must not change. You can put new values right before + * SVGA3D_DEVCAP_MAX. + */ + SVGA3D_DEVCAP_MAX /* This must be the last index. */ +} SVGA3dDevCapIndex; + +typedef union { + Bool b; + uint32 u; + int32 i; + float f; +} SVGA3dDevCapResult; + +#endif /* _SVGA3D_REG_H_ */ diff --git a/src/gallium/drivers/svga/include/svga3d_shaderdefs.h b/src/gallium/drivers/svga/include/svga3d_shaderdefs.h new file mode 100644 index 0000000000..2078c4a8a4 --- /dev/null +++ b/src/gallium/drivers/svga/include/svga3d_shaderdefs.h @@ -0,0 +1,519 @@ +/********************************************************** + * Copyright 2007-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/* + * svga3d_shaderdefs.h -- + * + * SVGA3D byte code format and limit definitions. + * + * The format of the byte code directly corresponds to that defined + * by Microsoft DirectX SDK 9.0c (file d3d9types.h). The format can + * also be extended so that different shader formats can be supported + * for example GLSL, ARB vp/fp, NV/ATI shader formats, etc. + * + */ + +#ifndef __SVGA3D_SHADER_DEFS__ +#define __SVGA3D_SHADER_DEFS__ + +/* SVGA3D shader hardware limits. */ + +#define SVGA3D_INPUTREG_MAX 16 +#define SVGA3D_OUTPUTREG_MAX 12 +#define SVGA3D_VERTEX_SAMPLERREG_MAX 4 +#define SVGA3D_PIXEL_SAMPLERREG_MAX 16 +#define SVGA3D_SAMPLERREG_MAX (SVGA3D_PIXEL_SAMPLERREG_MAX+\ + SVGA3D_VERTEX_SAMPLERREG_MAX) +#define SVGA3D_TEMPREG_MAX 32 +#define SVGA3D_CONSTREG_MAX 256 +#define SVGA3D_CONSTINTREG_MAX 16 +#define SVGA3D_CONSTBOOLREG_MAX 16 +#define SVGA3D_ADDRREG_MAX 1 +#define SVGA3D_PREDREG_MAX 1 + +/* SVGA3D byte code specific limits */ + +#define SVGA3D_MAX_SRC_REGS 4 +#define SVGA3D_MAX_NESTING_LEVEL 32 + +/* SVGA3D version information. */ + +#define SVGA3D_VS_TYPE 0xFFFE +#define SVGA3D_PS_TYPE 0xFFFF + +typedef struct { + union { + struct { + uint32 minor : 8; + uint32 major : 8; + uint32 type : 16; + }; + + uint32 value; + }; +} SVGA3dShaderVersion; + +#define SVGA3D_VS_10 ((SVGA3D_VS_TYPE << 16) | 1 << 8) +#define SVGA3D_VS_11 (SVGA3D_VS_10 | 1) +#define SVGA3D_VS_20 ((SVGA3D_VS_TYPE << 16) | 2 << 8) +#define SVGA3D_VS_30 ((SVGA3D_VS_TYPE << 16) | 3 << 8) + +#define SVGA3D_PS_10 ((SVGA3D_PS_TYPE << 16) | 1 << 8) +#define SVGA3D_PS_11 (SVGA3D_PS_10 | 1) +#define SVGA3D_PS_12 (SVGA3D_PS_10 | 2) +#define SVGA3D_PS_13 (SVGA3D_PS_10 | 3) +#define SVGA3D_PS_14 (SVGA3D_PS_10 | 4) +#define SVGA3D_PS_20 ((SVGA3D_PS_TYPE << 16) | 2 << 8) +#define SVGA3D_PS_30 ((SVGA3D_PS_TYPE << 16) | 3 << 8) + +/* The *_ENABLED are for backwards compatibility with old drivers */ +typedef enum { + SVGA3DPSVERSION_NONE = 0, + SVGA3DPSVERSION_ENABLED = 1, + SVGA3DPSVERSION_11 = 3, + SVGA3DPSVERSION_12 = 5, + SVGA3DPSVERSION_13 = 7, + SVGA3DPSVERSION_14 = 9, + SVGA3DPSVERSION_20 = 11, + SVGA3DPSVERSION_30 = 13, + SVGA3DPSVERSION_40 = 15, + SVGA3DPSVERSION_MAX +} SVGA3dPixelShaderVersion; + +typedef enum { + SVGA3DVSVERSION_NONE = 0, + SVGA3DVSVERSION_ENABLED = 1, + SVGA3DVSVERSION_11 = 3, + SVGA3DVSVERSION_20 = 5, + SVGA3DVSVERSION_30 = 7, + SVGA3DVSVERSION_40 = 9, + SVGA3DVSVERSION_MAX +} SVGA3dVertexShaderVersion; + +/* SVGA3D instruction op codes. */ + +typedef enum { + SVGA3DOP_NOP = 0, + SVGA3DOP_MOV, + SVGA3DOP_ADD, + SVGA3DOP_SUB, + SVGA3DOP_MAD, + SVGA3DOP_MUL, + SVGA3DOP_RCP, + SVGA3DOP_RSQ, + SVGA3DOP_DP3, + SVGA3DOP_DP4, + SVGA3DOP_MIN, + SVGA3DOP_MAX, + SVGA3DOP_SLT, + SVGA3DOP_SGE, + SVGA3DOP_EXP, + SVGA3DOP_LOG, + SVGA3DOP_LIT, + SVGA3DOP_DST, + SVGA3DOP_LRP, + SVGA3DOP_FRC, + SVGA3DOP_M4x4, + SVGA3DOP_M4x3, + SVGA3DOP_M3x4, + SVGA3DOP_M3x3, + SVGA3DOP_M3x2, + SVGA3DOP_CALL, + SVGA3DOP_CALLNZ, + SVGA3DOP_LOOP, + SVGA3DOP_RET, + SVGA3DOP_ENDLOOP, + SVGA3DOP_LABEL, + SVGA3DOP_DCL, + SVGA3DOP_POW, + SVGA3DOP_CRS, + SVGA3DOP_SGN, + SVGA3DOP_ABS, + SVGA3DOP_NRM, + SVGA3DOP_SINCOS, + SVGA3DOP_REP, + SVGA3DOP_ENDREP, + SVGA3DOP_IF, + SVGA3DOP_IFC, + SVGA3DOP_ELSE, + SVGA3DOP_ENDIF, + SVGA3DOP_BREAK, + SVGA3DOP_BREAKC, + SVGA3DOP_MOVA, + SVGA3DOP_DEFB, + SVGA3DOP_DEFI, + SVGA3DOP_TEXCOORD = 64, + SVGA3DOP_TEXKILL, + SVGA3DOP_TEX, + SVGA3DOP_TEXBEM, + SVGA3DOP_TEXBEML, + SVGA3DOP_TEXREG2AR, + SVGA3DOP_TEXREG2GB = 70, + SVGA3DOP_TEXM3x2PAD, + SVGA3DOP_TEXM3x2TEX, + SVGA3DOP_TEXM3x3PAD, + SVGA3DOP_TEXM3x3TEX, + SVGA3DOP_RESERVED0, + SVGA3DOP_TEXM3x3SPEC, + SVGA3DOP_TEXM3x3VSPEC, + SVGA3DOP_EXPP, + SVGA3DOP_LOGP, + SVGA3DOP_CND = 80, + SVGA3DOP_DEF, + SVGA3DOP_TEXREG2RGB, + SVGA3DOP_TEXDP3TEX, + SVGA3DOP_TEXM3x2DEPTH, + SVGA3DOP_TEXDP3, + SVGA3DOP_TEXM3x3, + SVGA3DOP_TEXDEPTH, + SVGA3DOP_CMP, + SVGA3DOP_BEM, + SVGA3DOP_DP2ADD = 90, + SVGA3DOP_DSX, + SVGA3DOP_DSY, + SVGA3DOP_TEXLDD, + SVGA3DOP_SETP, + SVGA3DOP_TEXLDL, + SVGA3DOP_BREAKP = 96, + SVGA3DOP_LAST_INST, + SVGA3DOP_PHASE = 0xFFFD, + SVGA3DOP_COMMENT = 0xFFFE, + SVGA3DOP_END = 0xFFFF, +} SVGA3dShaderOpCodeType; + +/* SVGA3D operation control/comparison function types */ + +typedef enum { + SVGA3DOPCONT_NONE, + SVGA3DOPCONT_PROJECT, /* Projective texturing */ + SVGA3DOPCONT_BIAS, /* Texturing with a LOD bias */ +} SVGA3dShaderOpCodeControlFnType; + +typedef enum { + SVGA3DOPCOMP_RESERVED0 = 0, + SVGA3DOPCOMP_GT, + SVGA3DOPCOMP_EQ, + SVGA3DOPCOMP_GE, + SVGA3DOPCOMP_LT, + SVGA3DOPCOMPC_NE, + SVGA3DOPCOMP_LE, + SVGA3DOPCOMP_RESERVED1 +} SVGA3dShaderOpCodeCompFnType; + +/* SVGA3D register types */ + +typedef enum { + SVGA3DREG_TEMP = 0, /* Temporary register file */ + SVGA3DREG_INPUT, /* Input register file */ + SVGA3DREG_CONST, /* Constant register file */ + SVGA3DREG_ADDR, /* Address register for VS */ + SVGA3DREG_TEXTURE = 3, /* Texture register file for PS */ + SVGA3DREG_RASTOUT, /* Rasterizer register file */ + SVGA3DREG_ATTROUT, /* Attribute output register file */ + SVGA3DREG_TEXCRDOUT, /* Texture coordinate output register file */ + SVGA3DREG_OUTPUT = 6, /* Output register file for VS 3.0+ */ + SVGA3DREG_CONSTINT, /* Constant integer vector register file */ + SVGA3DREG_COLOROUT, /* Color output register file */ + SVGA3DREG_DEPTHOUT, /* Depth output register file */ + SVGA3DREG_SAMPLER, /* Sampler state register file */ + SVGA3DREG_CONST2, /* Constant register file 2048 - 4095 */ + SVGA3DREG_CONST3, /* Constant register file 4096 - 6143 */ + SVGA3DREG_CONST4, /* Constant register file 6144 - 8191 */ + SVGA3DREG_CONSTBOOL, /* Constant boolean register file */ + SVGA3DREG_LOOP, /* Loop counter register file */ + SVGA3DREG_TEMPFLOAT16, /* 16-bit float temp register file */ + SVGA3DREG_MISCTYPE, /* Miscellaneous (single) registers */ + SVGA3DREG_LABEL, /* Label */ + SVGA3DREG_PREDICATE, /* Predicate register */ +} SVGA3dShaderRegType; + +/* SVGA3D rasterizer output register types */ + +typedef enum { + SVGA3DRASTOUT_POSITION = 0, + SVGA3DRASTOUT_FOG, + SVGA3DRASTOUT_PSIZE +} SVGA3dShaderRastOutRegType; + +/* SVGA3D miscellaneous register types */ + +typedef enum { + SVGA3DMISCREG_POSITION = 0, /* Input position x,y,z,rhw (PS) */ + SVGA3DMISCREG_FACE /* Floating point primitive area (PS) */ +} SVGA3DShaderMiscRegType; + +/* SVGA3D sampler types */ + +typedef enum { + SVGA3DSAMP_UNKNOWN = 0, /* Uninitialized value */ + SVGA3DSAMP_2D = 2, /* dcl_2d s# (for declaring a 2-D texture) */ + SVGA3DSAMP_CUBE, /* dcl_cube s# (for declaring a cube texture) */ + SVGA3DSAMP_VOLUME, /* dcl_volume s# (for declaring a volume texture) */ +} SVGA3dShaderSamplerType; + +/* SVGA3D sampler format classes */ + +typedef enum { + SVGA3DSAMPFORMAT_ARGB, /* ARGB formats */ + SVGA3DSAMPFORMAT_V8U8, /* Sign and normalize (SNORM) V & U */ + SVGA3DSAMPFORMAT_Q8W8V8U8, /* SNORM all */ + SVGA3DSAMPFORMAT_CxV8U8, /* SNORM V & U, C=SQRT(1-U^2-V^2) */ + SVGA3DSAMPFORMAT_X8L8V8U8, /* SNORM V & U */ + SVGA3DSAMPFORMAT_A2W10V10U10, /* SNORM W, V & U */ + SVGA3DSAMPFORMAT_DXT_PMA, /* DXT pre-multiplied alpha */ + SVGA3DSAMPFORMAT_YUV, /* YUV video format */ + SVGA3DSAMPFORMAT_UYVY, /* UYVY video format */ + SVGA3DSAMPFORMAT_Rx, /* R16F/32F */ + SVGA3DSAMPFORMAT_RxGx, /* R16FG16F, R32FG32F */ + SVGA3DSAMPFORMAT_V16U16, /* SNORM all */ +} SVGA3DShaderSamplerFormatClass; + +/* SVGA3D write mask */ + +#define SVGA3DWRITEMASK_0 1 /* Component 0 (X;Red) */ +#define SVGA3DWRITEMASK_1 2 /* Component 1 (Y;Green) */ +#define SVGA3DWRITEMASK_2 4 /* Component 2 (Z;Blue) */ +#define SVGA3DWRITEMASK_3 8 /* Component 3 (W;Alpha) */ +#define SVGA3DWRITEMASK_ALL 15 /* All components */ + +/* SVGA3D destination modifiers */ + +#define SVGA3DDSTMOD_NONE 0 /* nop */ +#define SVGA3DDSTMOD_SATURATE 1 /* clamp to [0, 1] */ +#define SVGA3DDSTMOD_PARTIALPRECISION 2 /* Partial precision hint */ + +/* + * Relevant to multisampling only: + * When the pixel center is not covered, sample + * attribute or compute gradients/LOD + * using multisample "centroid" location. + * "Centroid" is some location within the covered + * region of the pixel. + */ + +#define SVGA3DDSTMOD_MSAMPCENTROID 4 + +/* SVGA3D source swizzle */ + +#define SVGA3DSWIZZLE_REPLICATEX 0x00 +#define SVGA3DSWIZZLE_REPLICATEY 0x55 +#define SVGA3DSWIZZLE_REPLICATEZ 0xAA +#define SVGA3DSWIZZLE_REPLICATEW 0xFF +#define SVGA3DSWIZZLE_NONE 0xE4 +#define SVGA3DSWIZZLE_YZXW 0xC9 +#define SVGA3DSWIZZLE_ZXYW 0xD2 +#define SVGA3DSWIZZLE_WXYZ 0x1B + +/* SVGA3D source modifiers */ + +typedef enum { + SVGA3DSRCMOD_NONE = 0, /* nop */ + SVGA3DSRCMOD_NEG, /* negate */ + SVGA3DSRCMOD_BIAS, /* bias */ + SVGA3DSRCMOD_BIASNEG, /* bias and negate */ + SVGA3DSRCMOD_SIGN, /* sign */ + SVGA3DSRCMOD_SIGNNEG, /* sign and negate */ + SVGA3DSRCMOD_COMP, /* complement */ + SVGA3DSRCMOD_X2, /* x2 */ + SVGA3DSRCMOD_X2NEG, /* x2 and negate */ + SVGA3DSRCMOD_DZ, /* divide through by z component */ + SVGA3DSRCMOD_DW, /* divide through by w component */ + SVGA3DSRCMOD_ABS, /* abs() */ + SVGA3DSRCMOD_ABSNEG, /* -abs() */ + SVGA3DSRCMOD_NOT, /* ! (for predicate register) */ +} SVGA3dShaderSrcModType; + +/* SVGA3D instruction token */ + +typedef struct { + union { + struct { + uint32 comment_op : 16; + uint32 comment_size : 16; + }; + + struct { + uint32 op : 16; + uint32 control : 3; + uint32 reserved2 : 5; + uint32 size : 4; + uint32 predicated : 1; + uint32 reserved1 : 1; + uint32 coissue : 1; + uint32 reserved0 : 1; + }; + + uint32 value; + }; +} SVGA3dShaderInstToken; + +/* SVGA3D destination parameter token */ + +typedef struct { + union { + struct { + uint32 num : 11; + uint32 type_upper : 2; + uint32 relAddr : 1; + uint32 reserved1 : 2; + uint32 mask : 4; + uint32 dstMod : 4; + uint32 shfScale : 4; + uint32 type_lower : 3; + uint32 reserved0 : 1; + }; + + uint32 value; + }; +} SVGA3dShaderDestToken; + +/* SVGA3D source parameter token */ + +typedef struct { + union { + struct { + uint32 num : 11; + uint32 type_upper : 2; + uint32 relAddr : 1; + uint32 reserved1 : 2; + uint32 swizzle : 8; + uint32 srcMod : 4; + uint32 type_lower : 3; + uint32 reserved0 : 1; + }; + + uint32 value; + }; +} SVGA3dShaderSrcToken; + +/* SVGA3DOP_DCL parameter tokens */ + +typedef struct { + union { + struct { + union { + struct { + uint32 usage : 5; + uint32 reserved1 : 11; + uint32 index : 4; + uint32 reserved0 : 12; + }; /* input / output declaration */ + + struct { + uint32 reserved3 : 27; + uint32 type : 4; + uint32 reserved2 : 1; + }; /* sampler declaration */ + }; + + SVGA3dShaderDestToken dst; + }; + + uint32 values[2]; + }; +} SVGA3DOpDclArgs; + +/* SVGA3DOP_DEF parameter tokens */ + +typedef struct { + union { + struct { + SVGA3dShaderDestToken dst; + + union { + float constValues[4]; + int constIValues[4]; + Bool constBValue; + }; + }; + + uint32 values[5]; + }; +} SVGA3DOpDefArgs; + +/* SVGA3D shader token */ + +typedef union { + uint32 value; + SVGA3dShaderInstToken inst; + SVGA3dShaderDestToken dest; + SVGA3dShaderSrcToken src; +} SVGA3dShaderToken; + +/* SVGA3D shader program */ + +typedef struct { + SVGA3dShaderVersion version; + /* SVGA3dShaderToken stream */ +} SVGA3dShaderProgram; + +/* SVGA3D version specific register assignments */ + +static const uint32 SVGA3D_INPUT_REG_POSITION_VS11 = 0; +static const uint32 SVGA3D_INPUT_REG_PSIZE_VS11 = 1; +static const uint32 SVGA3D_INPUT_REG_FOG_VS11 = 3; +static const uint32 SVGA3D_INPUT_REG_FOG_MASK_VS11 = SVGA3DWRITEMASK_3; +static const uint32 SVGA3D_INPUT_REG_COLOR_BASE_VS11 = 2; +static const uint32 SVGA3D_INPUT_REG_TEXCOORD_BASE_VS11 = 4; + +static const uint32 SVGA3D_INPUT_REG_COLOR_BASE_PS11 = 0; +static const uint32 SVGA3D_INPUT_REG_TEXCOORD_BASE_PS11 = 2; +static const uint32 SVGA3D_OUTPUT_REG_DEPTH_PS11 = 0; +static const uint32 SVGA3D_OUTPUT_REG_COLOR_PS11 = 1; + +static const uint32 SVGA3D_INPUT_REG_COLOR_BASE_PS20 = 0; +static const uint32 SVGA3D_INPUT_REG_COLOR_NUM_PS20 = 2; +static const uint32 SVGA3D_INPUT_REG_TEXCOORD_BASE_PS20 = 2; +static const uint32 SVGA3D_INPUT_REG_TEXCOORD_NUM_PS20 = 8; +static const uint32 SVGA3D_OUTPUT_REG_COLOR_BASE_PS20 = 1; +static const uint32 SVGA3D_OUTPUT_REG_COLOR_NUM_PS20 = 4; +static const uint32 SVGA3D_OUTPUT_REG_DEPTH_BASE_PS20 = 0; +static const uint32 SVGA3D_OUTPUT_REG_DEPTH_NUM_PS20 = 1; + +/* + *---------------------------------------------------------------------- + * + * SVGA3dShaderGetRegType -- + * + * As the register type is split into two non sequential fields, + * this function provides an useful way of accessing the actual + * register type without having to manually concatenate the + * type_upper and type_lower fields. + * + * Results: + * Returns the register type. + * + *---------------------------------------------------------------------- + */ + +static INLINE SVGA3dShaderRegType +SVGA3dShaderGetRegType(uint32 token) +{ + SVGA3dShaderSrcToken src; + src.value = token; + return (SVGA3dShaderRegType)(src.type_upper << 3 | src.type_lower); +} + +#endif /* __SVGA3D_SHADER_DEFS__ */ diff --git a/src/gallium/drivers/svga/include/svga_reg.h b/src/gallium/drivers/svga/include/svga_reg.h new file mode 100644 index 0000000000..1b96c2ec07 --- /dev/null +++ b/src/gallium/drivers/svga/include/svga_reg.h @@ -0,0 +1,1346 @@ +/********************************************************** + * Copyright 1998-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/* + * svga_reg.h -- + * + * Virtual hardware definitions for the VMware SVGA II device. + */ + +#ifndef _SVGA_REG_H_ +#define _SVGA_REG_H_ + +/* + * PCI device IDs. + */ +#define PCI_VENDOR_ID_VMWARE 0x15AD +#define PCI_DEVICE_ID_VMWARE_SVGA2 0x0405 + +/* + * Legal values for the SVGA_REG_CURSOR_ON register in old-fashioned + * cursor bypass mode. This is still supported, but no new guest + * drivers should use it. + */ +#define SVGA_CURSOR_ON_HIDE 0x0 /* Must be 0 to maintain backward compatibility */ +#define SVGA_CURSOR_ON_SHOW 0x1 /* Must be 1 to maintain backward compatibility */ +#define SVGA_CURSOR_ON_REMOVE_FROM_FB 0x2 /* Remove the cursor from the framebuffer because we need to see what's under it */ +#define SVGA_CURSOR_ON_RESTORE_TO_FB 0x3 /* Put the cursor back in the framebuffer so the user can see it */ + +/* + * The maximum framebuffer size that can traced for e.g. guests in VESA mode. + * The changeMap in the monitor is proportional to this number. Therefore, we'd + * like to keep it as small as possible to reduce monitor overhead (using + * SVGA_VRAM_MAX_SIZE for this increases the size of the shared area by over + * 4k!). + * + * NB: For compatibility reasons, this value must be greater than 0xff0000. + * See bug 335072. + */ +#define SVGA_FB_MAX_TRACEABLE_SIZE 0x1000000 + +#define SVGA_MAX_PSEUDOCOLOR_DEPTH 8 +#define SVGA_MAX_PSEUDOCOLORS (1 << SVGA_MAX_PSEUDOCOLOR_DEPTH) +#define SVGA_NUM_PALETTE_REGS (3 * SVGA_MAX_PSEUDOCOLORS) + +#define SVGA_MAGIC 0x900000UL +#define SVGA_MAKE_ID(ver) (SVGA_MAGIC << 8 | (ver)) + +/* Version 2 let the address of the frame buffer be unsigned on Win32 */ +#define SVGA_VERSION_2 2 +#define SVGA_ID_2 SVGA_MAKE_ID(SVGA_VERSION_2) + +/* Version 1 has new registers starting with SVGA_REG_CAPABILITIES so + PALETTE_BASE has moved */ +#define SVGA_VERSION_1 1 +#define SVGA_ID_1 SVGA_MAKE_ID(SVGA_VERSION_1) + +/* Version 0 is the initial version */ +#define SVGA_VERSION_0 0 +#define SVGA_ID_0 SVGA_MAKE_ID(SVGA_VERSION_0) + +/* "Invalid" value for all SVGA IDs. (Version ID, screen object ID, surface ID...) */ +#define SVGA_ID_INVALID 0xFFFFFFFF + +/* Port offsets, relative to BAR0 */ +#define SVGA_INDEX_PORT 0x0 +#define SVGA_VALUE_PORT 0x1 +#define SVGA_BIOS_PORT 0x2 +#define SVGA_IRQSTATUS_PORT 0x8 + +/* + * Interrupt source flags for IRQSTATUS_PORT and IRQMASK. + * + * Interrupts are only supported when the + * SVGA_CAP_IRQMASK capability is present. + */ +#define SVGA_IRQFLAG_ANY_FENCE 0x1 /* Any fence was passed */ +#define SVGA_IRQFLAG_FIFO_PROGRESS 0x2 /* Made forward progress in the FIFO */ +#define SVGA_IRQFLAG_FENCE_GOAL 0x4 /* SVGA_FIFO_FENCE_GOAL reached */ + +/* + * Registers + */ + +enum { + SVGA_REG_ID = 0, + SVGA_REG_ENABLE = 1, + SVGA_REG_WIDTH = 2, + SVGA_REG_HEIGHT = 3, + SVGA_REG_MAX_WIDTH = 4, + SVGA_REG_MAX_HEIGHT = 5, + SVGA_REG_DEPTH = 6, + SVGA_REG_BITS_PER_PIXEL = 7, /* Current bpp in the guest */ + SVGA_REG_PSEUDOCOLOR = 8, + SVGA_REG_RED_MASK = 9, + SVGA_REG_GREEN_MASK = 10, + SVGA_REG_BLUE_MASK = 11, + SVGA_REG_BYTES_PER_LINE = 12, + SVGA_REG_FB_START = 13, /* (Deprecated) */ + SVGA_REG_FB_OFFSET = 14, + SVGA_REG_VRAM_SIZE = 15, + SVGA_REG_FB_SIZE = 16, + + /* ID 0 implementation only had the above registers, then the palette */ + + SVGA_REG_CAPABILITIES = 17, + SVGA_REG_MEM_START = 18, /* (Deprecated) */ + SVGA_REG_MEM_SIZE = 19, + SVGA_REG_CONFIG_DONE = 20, /* Set when memory area configured */ + SVGA_REG_SYNC = 21, /* See "FIFO Synchronization Registers" */ + SVGA_REG_BUSY = 22, /* See "FIFO Synchronization Registers" */ + SVGA_REG_GUEST_ID = 23, /* Set guest OS identifier */ + SVGA_REG_CURSOR_ID = 24, /* (Deprecated) */ + SVGA_REG_CURSOR_X = 25, /* (Deprecated) */ + SVGA_REG_CURSOR_Y = 26, /* (Deprecated) */ + SVGA_REG_CURSOR_ON = 27, /* (Deprecated) */ + SVGA_REG_HOST_BITS_PER_PIXEL = 28, /* (Deprecated) */ + SVGA_REG_SCRATCH_SIZE = 29, /* Number of scratch registers */ + SVGA_REG_MEM_REGS = 30, /* Number of FIFO registers */ + SVGA_REG_NUM_DISPLAYS = 31, /* (Deprecated) */ + SVGA_REG_PITCHLOCK = 32, /* Fixed pitch for all modes */ + SVGA_REG_IRQMASK = 33, /* Interrupt mask */ + + /* Legacy multi-monitor support */ + SVGA_REG_NUM_GUEST_DISPLAYS = 34,/* Number of guest displays in X/Y direction */ + SVGA_REG_DISPLAY_ID = 35, /* Display ID for the following display attributes */ + SVGA_REG_DISPLAY_IS_PRIMARY = 36,/* Whether this is a primary display */ + SVGA_REG_DISPLAY_POSITION_X = 37,/* The display position x */ + SVGA_REG_DISPLAY_POSITION_Y = 38,/* The display position y */ + SVGA_REG_DISPLAY_WIDTH = 39, /* The display's width */ + SVGA_REG_DISPLAY_HEIGHT = 40, /* The display's height */ + + /* See "Guest memory regions" below. */ + SVGA_REG_GMR_ID = 41, + SVGA_REG_GMR_DESCRIPTOR = 42, + SVGA_REG_GMR_MAX_IDS = 43, + SVGA_REG_GMR_MAX_DESCRIPTOR_LENGTH = 44, + + SVGA_REG_TRACES = 45, /* Enable trace-based updates even when FIFO is on */ + SVGA_REG_TOP = 46, /* Must be 1 more than the last register */ + + SVGA_PALETTE_BASE = 1024, /* Base of SVGA color map */ + /* Next 768 (== 256*3) registers exist for colormap */ + + SVGA_SCRATCH_BASE = SVGA_PALETTE_BASE + SVGA_NUM_PALETTE_REGS + /* Base of scratch registers */ + /* Next reg[SVGA_REG_SCRATCH_SIZE] registers exist for scratch usage: + First 4 are reserved for VESA BIOS Extension; any remaining are for + the use of the current SVGA driver. */ +}; + + +/* + * Guest memory regions (GMRs): + * + * This is a new memory mapping feature available in SVGA devices + * which have the SVGA_CAP_GMR bit set. Previously, there were two + * fixed memory regions available with which to share data between the + * device and the driver: the FIFO ('MEM') and the framebuffer. GMRs + * are our name for an extensible way of providing arbitrary DMA + * buffers for use between the driver and the SVGA device. They are a + * new alternative to framebuffer memory, usable for both 2D and 3D + * graphics operations. + * + * Since GMR mapping must be done synchronously with guest CPU + * execution, we use a new pair of SVGA registers: + * + * SVGA_REG_GMR_ID -- + * + * Read/write. + * This register holds the 32-bit ID (a small positive integer) + * of a GMR to create, delete, or redefine. Writing this register + * has no side-effects. + * + * SVGA_REG_GMR_DESCRIPTOR -- + * + * Write-only. + * Writing this register will create, delete, or redefine the GMR + * specified by the above ID register. If this register is zero, + * the GMR is deleted. Any pointers into this GMR (including those + * currently being processed by FIFO commands) will be + * synchronously invalidated. + * + * If this register is nonzero, it must be the physical page + * number (PPN) of a data structure which describes the physical + * layout of the memory region this GMR should describe. The + * descriptor structure will be read synchronously by the SVGA + * device when this register is written. The descriptor need not + * remain allocated for the lifetime of the GMR. + * + * The guest driver should write SVGA_REG_GMR_ID first, then + * SVGA_REG_GMR_DESCRIPTOR. + * + * SVGA_REG_GMR_MAX_IDS -- + * + * Read-only. + * The SVGA device may choose to support a maximum number of + * user-defined GMR IDs. This register holds the number of supported + * IDs. (The maximum supported ID plus 1) + * + * SVGA_REG_GMR_MAX_DESCRIPTOR_LENGTH -- + * + * Read-only. + * The SVGA device may choose to put a limit on the total number + * of SVGAGuestMemDescriptor structures it will read when defining + * a single GMR. + * + * The descriptor structure is an array of SVGAGuestMemDescriptor + * structures. Each structure may do one of three things: + * + * - Terminate the GMR descriptor list. + * (ppn==0, numPages==0) + * + * - Add a PPN or range of PPNs to the GMR's virtual address space. + * (ppn != 0, numPages != 0) + * + * - Provide the PPN of the next SVGAGuestMemDescriptor, in order to + * support multi-page GMR descriptor tables without forcing the + * driver to allocate physically contiguous memory. + * (ppn != 0, numPages == 0) + * + * Note that each physical page of SVGAGuestMemDescriptor structures + * can describe at least 2MB of guest memory. If the driver needs to + * use more than one page of descriptor structures, it must use one of + * its SVGAGuestMemDescriptors to point to an additional page. The + * device will never automatically cross a page boundary. + * + * Once the driver has described a GMR, it is immediately available + * for use via any FIFO command that uses an SVGAGuestPtr structure. + * These pointers include a GMR identifier plus an offset into that + * GMR. + * + * The driver must check the SVGA_CAP_GMR bit before using the GMR + * registers. + */ + +/* + * Special GMR IDs, allowing SVGAGuestPtrs to point to framebuffer + * memory as well. In the future, these IDs could even be used to + * allow legacy memory regions to be redefined by the guest as GMRs. + * + * Using the guest framebuffer (GFB) at BAR1 for general purpose DMA + * is being phased out. Please try to use user-defined GMRs whenever + * possible. + */ +#define SVGA_GMR_NULL ((uint32) -1) +#define SVGA_GMR_FRAMEBUFFER ((uint32) -2) // Guest Framebuffer (GFB) + +typedef +struct SVGAGuestMemDescriptor { + uint32 ppn; + uint32 numPages; +} SVGAGuestMemDescriptor; + +typedef +struct SVGAGuestPtr { + uint32 gmrId; + uint32 offset; +} SVGAGuestPtr; + + +/* + * SVGAGMRImageFormat -- + * + * This is a packed representation of the source 2D image format + * for a GMR-to-screen blit. Currently it is defined as an encoding + * of the screen's color depth and bits-per-pixel, however, 16 bits + * are reserved for future use to identify other encodings (such as + * RGBA or higher-precision images). + * + * Currently supported formats: + * + * bpp depth Format Name + * --- ----- ----------- + * 32 24 32-bit BGRX + * 24 24 24-bit BGR + * 16 16 RGB 5-6-5 + * 16 15 RGB 5-5-5 + * + */ + +typedef +struct SVGAGMRImageFormat { + union { + struct { + uint32 bitsPerPixel : 8; + uint32 colorDepth : 8; + uint32 reserved : 16; // Must be zero + }; + + uint32 value; + }; +} SVGAGMRImageFormat; + +/* + * SVGAColorBGRX -- + * + * A 24-bit color format (BGRX), which does not depend on the + * format of the legacy guest framebuffer (GFB) or the current + * GMRFB state. + */ + +typedef +struct SVGAColorBGRX { + union { + struct { + uint32 b : 8; + uint32 g : 8; + uint32 r : 8; + uint32 x : 8; // Unused + }; + + uint32 value; + }; +} SVGAColorBGRX; + + +/* + * SVGASignedRect -- + * SVGASignedPoint -- + * + * Signed rectangle and point primitives. These are used by the new + * 2D primitives for drawing to Screen Objects, which can occupy a + * signed virtual coordinate space. + * + * SVGASignedRect specifies a half-open interval: the (left, top) + * pixel is part of the rectangle, but the (right, bottom) pixel is + * not. + */ + +typedef +struct SVGASignedRect { + int32 left; + int32 top; + int32 right; + int32 bottom; +} SVGASignedRect; + +typedef +struct SVGASignedPoint { + int32 x; + int32 y; +} SVGASignedPoint; + + +/* + * Capabilities + * + * Note the holes in the bitfield. Missing bits have been deprecated, + * and must not be reused. Those capabilities will never be reported + * by new versions of the SVGA device. + */ + +#define SVGA_CAP_NONE 0x00000000 +#define SVGA_CAP_RECT_COPY 0x00000002 +#define SVGA_CAP_CURSOR 0x00000020 +#define SVGA_CAP_CURSOR_BYPASS 0x00000040 // Legacy (Use Cursor Bypass 3 instead) +#define SVGA_CAP_CURSOR_BYPASS_2 0x00000080 // Legacy (Use Cursor Bypass 3 instead) +#define SVGA_CAP_8BIT_EMULATION 0x00000100 +#define SVGA_CAP_ALPHA_CURSOR 0x00000200 +#define SVGA_CAP_3D 0x00004000 +#define SVGA_CAP_EXTENDED_FIFO 0x00008000 +#define SVGA_CAP_MULTIMON 0x00010000 // Legacy multi-monitor support +#define SVGA_CAP_PITCHLOCK 0x00020000 +#define SVGA_CAP_IRQMASK 0x00040000 +#define SVGA_CAP_DISPLAY_TOPOLOGY 0x00080000 // Legacy multi-monitor support +#define SVGA_CAP_GMR 0x00100000 +#define SVGA_CAP_TRACES 0x00200000 + + +/* + * FIFO register indices. + * + * The FIFO is a chunk of device memory mapped into guest physmem. It + * is always treated as 32-bit words. + * + * The guest driver gets to decide how to partition it between + * - FIFO registers (there are always at least 4, specifying where the + * following data area is and how much data it contains; there may be + * more registers following these, depending on the FIFO protocol + * version in use) + * - FIFO data, written by the guest and slurped out by the VMX. + * These indices are 32-bit word offsets into the FIFO. + */ + +enum { + /* + * Block 1 (basic registers): The originally defined FIFO registers. + * These exist and are valid for all versions of the FIFO protocol. + */ + + SVGA_FIFO_MIN = 0, + SVGA_FIFO_MAX, /* The distance from MIN to MAX must be at least 10K */ + SVGA_FIFO_NEXT_CMD, + SVGA_FIFO_STOP, + + /* + * Block 2 (extended registers): Mandatory registers for the extended + * FIFO. These exist if the SVGA caps register includes + * SVGA_CAP_EXTENDED_FIFO; some of them are valid only if their + * associated capability bit is enabled. + * + * Note that when originally defined, SVGA_CAP_EXTENDED_FIFO implied + * support only for (FIFO registers) CAPABILITIES, FLAGS, and FENCE. + * This means that the guest has to test individually (in most cases + * using FIFO caps) for the presence of registers after this; the VMX + * can define "extended FIFO" to mean whatever it wants, and currently + * won't enable it unless there's room for that set and much more. + */ + + SVGA_FIFO_CAPABILITIES = 4, + SVGA_FIFO_FLAGS, + // Valid with SVGA_FIFO_CAP_FENCE: + SVGA_FIFO_FENCE, + + /* + * Block 3a (optional extended registers): Additional registers for the + * extended FIFO, whose presence isn't actually implied by + * SVGA_CAP_EXTENDED_FIFO; these exist if SVGA_FIFO_MIN is high enough to + * leave room for them. + * + * These in block 3a, the VMX currently considers mandatory for the + * extended FIFO. + */ + + // Valid if exists (i.e. if extended FIFO enabled): + SVGA_FIFO_3D_HWVERSION, /* See SVGA3dHardwareVersion in svga3d_reg.h */ + // Valid with SVGA_FIFO_CAP_PITCHLOCK: + SVGA_FIFO_PITCHLOCK, + + // Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3: + SVGA_FIFO_CURSOR_ON, /* Cursor bypass 3 show/hide register */ + SVGA_FIFO_CURSOR_X, /* Cursor bypass 3 x register */ + SVGA_FIFO_CURSOR_Y, /* Cursor bypass 3 y register */ + SVGA_FIFO_CURSOR_COUNT, /* Incremented when any of the other 3 change */ + SVGA_FIFO_CURSOR_LAST_UPDATED,/* Last time the host updated the cursor */ + + // Valid with SVGA_FIFO_CAP_RESERVE: + SVGA_FIFO_RESERVED, /* Bytes past NEXT_CMD with real contents */ + + /* + * Valid with SVGA_FIFO_CAP_SCREEN_OBJECT: + * + * By default this is SVGA_ID_INVALID, to indicate that the cursor + * coordinates are specified relative to the virtual root. If this + * is set to a specific screen ID, cursor position is reinterpreted + * as a signed offset relative to that screen's origin. This is the + * only way to place the cursor on a non-rooted screen. + */ + SVGA_FIFO_CURSOR_SCREEN_ID, + + /* + * XXX: The gap here, up until SVGA_FIFO_3D_CAPS, can be used for new + * registers, but this must be done carefully and with judicious use of + * capability bits, since comparisons based on SVGA_FIFO_MIN aren't + * enough to tell you whether the register exists: we've shipped drivers + * and products that used SVGA_FIFO_3D_CAPS but didn't know about some of + * the earlier ones. The actual order of introduction was: + * - PITCHLOCK + * - 3D_CAPS + * - CURSOR_* (cursor bypass 3) + * - RESERVED + * So, code that wants to know whether it can use any of the + * aforementioned registers, or anything else added after PITCHLOCK and + * before 3D_CAPS, needs to reason about something other than + * SVGA_FIFO_MIN. + */ + + /* + * 3D caps block space; valid with 3D hardware version >= + * SVGA3D_HWVERSION_WS6_B1. + */ + SVGA_FIFO_3D_CAPS = 32, + SVGA_FIFO_3D_CAPS_LAST = 32 + 255, + + /* + * End of VMX's current definition of "extended-FIFO registers". + * Registers before here are always enabled/disabled as a block; either + * the extended FIFO is enabled and includes all preceding registers, or + * it's disabled entirely. + * + * Block 3b (truly optional extended registers): Additional registers for + * the extended FIFO, which the VMX already knows how to enable and + * disable with correct granularity. + * + * Registers after here exist if and only if the guest SVGA driver + * sets SVGA_FIFO_MIN high enough to leave room for them. + */ + + // Valid if register exists: + SVGA_FIFO_GUEST_3D_HWVERSION, /* Guest driver's 3D version */ + SVGA_FIFO_FENCE_GOAL, /* Matching target for SVGA_IRQFLAG_FENCE_GOAL */ + SVGA_FIFO_BUSY, /* See "FIFO Synchronization Registers" */ + + /* + * Always keep this last. This defines the maximum number of + * registers we know about. At power-on, this value is placed in + * the SVGA_REG_MEM_REGS register, and we expect the guest driver + * to allocate this much space in FIFO memory for registers. + */ + SVGA_FIFO_NUM_REGS +}; + + +/* + * Definition of registers included in extended FIFO support. + * + * The guest SVGA driver gets to allocate the FIFO between registers + * and data. It must always allocate at least 4 registers, but old + * drivers stopped there. + * + * The VMX will enable extended FIFO support if and only if the guest + * left enough room for all registers defined as part of the mandatory + * set for the extended FIFO. + * + * Note that the guest drivers typically allocate the FIFO only at + * initialization time, not at mode switches, so it's likely that the + * number of FIFO registers won't change without a reboot. + * + * All registers less than this value are guaranteed to be present if + * svgaUser->fifo.extended is set. Any later registers must be tested + * individually for compatibility at each use (in the VMX). + * + * This value is used only by the VMX, so it can change without + * affecting driver compatibility; keep it that way? + */ +#define SVGA_FIFO_EXTENDED_MANDATORY_REGS (SVGA_FIFO_3D_CAPS_LAST + 1) + + +/* + * FIFO Synchronization Registers + * + * This explains the relationship between the various FIFO + * sync-related registers in IOSpace and in FIFO space. + * + * SVGA_REG_SYNC -- + * + * The SYNC register can be used in two different ways by the guest: + * + * 1. If the guest wishes to fully sync (drain) the FIFO, + * it will write once to SYNC then poll on the BUSY + * register. The FIFO is sync'ed once BUSY is zero. + * + * 2. If the guest wants to asynchronously wake up the host, + * it will write once to SYNC without polling on BUSY. + * Ideally it will do this after some new commands have + * been placed in the FIFO, and after reading a zero + * from SVGA_FIFO_BUSY. + * + * (1) is the original behaviour that SYNC was designed to + * support. Originally, a write to SYNC would implicitly + * trigger a read from BUSY. This causes us to synchronously + * process the FIFO. + * + * This behaviour has since been changed so that writing SYNC + * will *not* implicitly cause a read from BUSY. Instead, it + * makes a channel call which asynchronously wakes up the MKS + * thread. + * + * New guests can use this new behaviour to implement (2) + * efficiently. This lets guests get the host's attention + * without waiting for the MKS to poll, which gives us much + * better CPU utilization on SMP hosts and on UP hosts while + * we're blocked on the host GPU. + * + * Old guests shouldn't notice the behaviour change. SYNC was + * never guaranteed to process the entire FIFO, since it was + * bounded to a particular number of CPU cycles. Old guests will + * still loop on the BUSY register until the FIFO is empty. + * + * Writing to SYNC currently has the following side-effects: + * + * - Sets SVGA_REG_BUSY to TRUE (in the monitor) + * - Asynchronously wakes up the MKS thread for FIFO processing + * - The value written to SYNC is recorded as a "reason", for + * stats purposes. + * + * If SVGA_FIFO_BUSY is available, drivers are advised to only + * write to SYNC if SVGA_FIFO_BUSY is FALSE. Drivers should set + * SVGA_FIFO_BUSY to TRUE after writing to SYNC. The MKS will + * eventually set SVGA_FIFO_BUSY on its own, but this approach + * lets the driver avoid sending multiple asynchronous wakeup + * messages to the MKS thread. + * + * SVGA_REG_BUSY -- + * + * This register is set to TRUE when SVGA_REG_SYNC is written, + * and it reads as FALSE when the FIFO has been completely + * drained. + * + * Every read from this register causes us to synchronously + * process FIFO commands. There is no guarantee as to how many + * commands each read will process. + * + * CPU time spent processing FIFO commands will be billed to + * the guest. + * + * New drivers should avoid using this register unless they + * need to guarantee that the FIFO is completely drained. It + * is overkill for performing a sync-to-fence. Older drivers + * will use this register for any type of synchronization. + * + * SVGA_FIFO_BUSY -- + * + * This register is a fast way for the guest driver to check + * whether the FIFO is already being processed. It reads and + * writes at normal RAM speeds, with no monitor intervention. + * + * If this register reads as TRUE, the host is guaranteeing that + * any new commands written into the FIFO will be noticed before + * the MKS goes back to sleep. + * + * If this register reads as FALSE, no such guarantee can be + * made. + * + * The guest should use this register to quickly determine + * whether or not it needs to wake up the host. If the guest + * just wrote a command or group of commands that it would like + * the host to begin processing, it should: + * + * 1. Read SVGA_FIFO_BUSY. If it reads as TRUE, no further + * action is necessary. + * + * 2. Write TRUE to SVGA_FIFO_BUSY. This informs future guest + * code that we've already sent a SYNC to the host and we + * don't need to send a duplicate. + * + * 3. Write a reason to SVGA_REG_SYNC. This will send an + * asynchronous wakeup to the MKS thread. + */ + + +/* + * FIFO Capabilities + * + * Fence -- Fence register and command are supported + * Accel Front -- Front buffer only commands are supported + * Pitch Lock -- Pitch lock register is supported + * Video -- SVGA Video overlay units are supported + * Escape -- Escape command is supported + * + * XXX: Add longer descriptions for each capability, including a list + * of the new features that each capability provides. + * + * SVGA_FIFO_CAP_SCREEN_OBJECT -- + * + * Provides dynamic multi-screen rendering, for improved Unity and + * multi-monitor modes. With Screen Object, the guest can + * dynamically create and destroy 'screens', which can represent + * Unity windows or virtual monitors. Screen Object also provides + * strong guarantees that DMA operations happen only when + * guest-initiated. Screen Object deprecates the BAR1 guest + * framebuffer (GFB) and all commands that work only with the GFB. + * + * New registers: + * FIFO_CURSOR_SCREEN_ID, VIDEO_DATA_GMRID, VIDEO_DST_SCREEN_ID + * + * New 2D commands: + * DEFINE_SCREEN, DESTROY_SCREEN, DEFINE_GMRFB, BLIT_GMRFB_TO_SCREEN, + * BLIT_SCREEN_TO_GMRFB, ANNOTATION_FILL, ANNOTATION_COPY + * + * New 3D commands: + * BLIT_SURFACE_TO_SCREEN + * + * New guarantees: + * + * - The host will not read or write guest memory, including the GFB, + * except when explicitly initiated by a DMA command. + * + * - All DMA, including legacy DMA like UPDATE and PRESENT_READBACK, + * is guaranteed to complete before any subsequent FENCEs. + * + * - All legacy commands which affect a Screen (UPDATE, PRESENT, + * PRESENT_READBACK) as well as new Screen blit commands will + * all behave consistently as blits, and memory will be read + * or written in FIFO order. + * + * For example, if you PRESENT from one SVGA3D surface to multiple + * places on the screen, the data copied will always be from the + * SVGA3D surface at the time the PRESENT was issued in the FIFO. + * This was not necessarily true on devices without Screen Object. + * + * This means that on devices that support Screen Object, the + * PRESENT_READBACK command should not be necessary unless you + * actually want to read back the results of 3D rendering into + * system memory. (And for that, the BLIT_SCREEN_TO_GMRFB + * command provides a strict superset of functionality.) + * + * - When a screen is resized, either using Screen Object commands or + * legacy multimon registers, its contents are preserved. + */ + +#define SVGA_FIFO_CAP_NONE 0 +#define SVGA_FIFO_CAP_FENCE (1<<0) +#define SVGA_FIFO_CAP_ACCELFRONT (1<<1) +#define SVGA_FIFO_CAP_PITCHLOCK (1<<2) +#define SVGA_FIFO_CAP_VIDEO (1<<3) +#define SVGA_FIFO_CAP_CURSOR_BYPASS_3 (1<<4) +#define SVGA_FIFO_CAP_ESCAPE (1<<5) +#define SVGA_FIFO_CAP_RESERVE (1<<6) +#define SVGA_FIFO_CAP_SCREEN_OBJECT (1<<7) + + +/* + * FIFO Flags + * + * Accel Front -- Driver should use front buffer only commands + */ + +#define SVGA_FIFO_FLAG_NONE 0 +#define SVGA_FIFO_FLAG_ACCELFRONT (1<<0) +#define SVGA_FIFO_FLAG_RESERVED (1<<31) // Internal use only + +/* + * FIFO reservation sentinel value + */ + +#define SVGA_FIFO_RESERVED_UNKNOWN 0xffffffff + + +/* + * Video overlay support + */ + +#define SVGA_NUM_OVERLAY_UNITS 32 + + +/* + * Video capabilities that the guest is currently using + */ + +#define SVGA_VIDEO_FLAG_COLORKEY 0x0001 + + +/* + * Offsets for the video overlay registers + */ + +enum { + SVGA_VIDEO_ENABLED = 0, + SVGA_VIDEO_FLAGS, + SVGA_VIDEO_DATA_OFFSET, + SVGA_VIDEO_FORMAT, + SVGA_VIDEO_COLORKEY, + SVGA_VIDEO_SIZE, // Deprecated + SVGA_VIDEO_WIDTH, + SVGA_VIDEO_HEIGHT, + SVGA_VIDEO_SRC_X, + SVGA_VIDEO_SRC_Y, + SVGA_VIDEO_SRC_WIDTH, + SVGA_VIDEO_SRC_HEIGHT, + SVGA_VIDEO_DST_X, // Signed int32 + SVGA_VIDEO_DST_Y, // Signed int32 + SVGA_VIDEO_DST_WIDTH, + SVGA_VIDEO_DST_HEIGHT, + SVGA_VIDEO_PITCH_1, + SVGA_VIDEO_PITCH_2, + SVGA_VIDEO_PITCH_3, + SVGA_VIDEO_DATA_GMRID, // Optional, defaults to SVGA_GMR_FRAMEBUFFER + SVGA_VIDEO_DST_SCREEN_ID, // Optional, defaults to virtual coords (SVGA_ID_INVALID) + SVGA_VIDEO_NUM_REGS +}; + + +/* + * SVGA Overlay Units + * + * width and height relate to the entire source video frame. + * srcX, srcY, srcWidth and srcHeight represent subset of the source + * video frame to be displayed. + */ + +typedef struct SVGAOverlayUnit { + uint32 enabled; + uint32 flags; + uint32 dataOffset; + uint32 format; + uint32 colorKey; + uint32 size; + uint32 width; + uint32 height; + uint32 srcX; + uint32 srcY; + uint32 srcWidth; + uint32 srcHeight; + int32 dstX; + int32 dstY; + uint32 dstWidth; + uint32 dstHeight; + uint32 pitches[3]; + uint32 dataGMRId; + uint32 dstScreenId; +} SVGAOverlayUnit; + + +/* + * SVGAScreenObject -- + * + * This is a new way to represent a guest's multi-monitor screen or + * Unity window. Screen objects are only supported if the + * SVGA_FIFO_CAP_SCREEN_OBJECT capability bit is set. + * + * If Screen Objects are supported, they can be used to fully + * replace the functionality provided by the framebuffer registers + * (SVGA_REG_WIDTH, HEIGHT, etc.) and by SVGA_CAP_DISPLAY_TOPOLOGY. + * + * The screen object is a struct with guaranteed binary + * compatibility. New flags can be added, and the struct may grow, + * but existing fields must retain their meaning. + * + */ + +#define SVGA_SCREEN_HAS_ROOT (1 << 0) // Screen is present in the virtual coord space +#define SVGA_SCREEN_IS_PRIMARY (1 << 1) // Guest considers this screen to be 'primary' +#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2) // Guest is running a fullscreen app here + +typedef +struct SVGAScreenObject { + uint32 structSize; // sizeof(SVGAScreenObject) + uint32 id; + uint32 flags; + struct { + uint32 width; + uint32 height; + } size; + struct { + int32 x; + int32 y; + } root; // Only used if SVGA_SCREEN_HAS_ROOT is set. +} SVGAScreenObject; + + +/* + * Commands in the command FIFO: + * + * Command IDs defined below are used for the traditional 2D FIFO + * communication (not all commands are available for all versions of the + * SVGA FIFO protocol). + * + * Note the holes in the command ID numbers: These commands have been + * deprecated, and the old IDs must not be reused. + * + * Command IDs from 1000 to 1999 are reserved for use by the SVGA3D + * protocol. + * + * Each command's parameters are described by the comments and + * structs below. + */ + +typedef enum { + SVGA_CMD_INVALID_CMD = 0, + SVGA_CMD_UPDATE = 1, + SVGA_CMD_RECT_COPY = 3, + SVGA_CMD_DEFINE_CURSOR = 19, + SVGA_CMD_DEFINE_ALPHA_CURSOR = 22, + SVGA_CMD_UPDATE_VERBOSE = 25, + SVGA_CMD_FRONT_ROP_FILL = 29, + SVGA_CMD_FENCE = 30, + SVGA_CMD_ESCAPE = 33, + SVGA_CMD_DEFINE_SCREEN = 34, + SVGA_CMD_DESTROY_SCREEN = 35, + SVGA_CMD_DEFINE_GMRFB = 36, + SVGA_CMD_BLIT_GMRFB_TO_SCREEN = 37, + SVGA_CMD_BLIT_SCREEN_TO_GMRFB = 38, + SVGA_CMD_ANNOTATION_FILL = 39, + SVGA_CMD_ANNOTATION_COPY = 40, + SVGA_CMD_MAX +} SVGAFifoCmdId; + +#define SVGA_CMD_MAX_ARGS 64 + + +/* + * SVGA_CMD_UPDATE -- + * + * This is a DMA transfer which copies from the Guest Framebuffer + * (GFB) at BAR1 + SVGA_REG_FB_OFFSET to any screens which + * intersect with the provided virtual rectangle. + * + * This command does not support using arbitrary guest memory as a + * data source- it only works with the pre-defined GFB memory. + * This command also does not support signed virtual coordinates. + * If you have defined screens (using SVGA_CMD_DEFINE_SCREEN) with + * negative root x/y coordinates, the negative portion of those + * screens will not be reachable by this command. + * + * This command is not necessary when using framebuffer + * traces. Traces are automatically enabled if the SVGA FIFO is + * disabled, and you may explicitly enable/disable traces using + * SVGA_REG_TRACES. With traces enabled, any write to the GFB will + * automatically act as if a subsequent SVGA_CMD_UPDATE was issued. + * + * Traces and SVGA_CMD_UPDATE are the only supported ways to render + * pseudocolor screen updates. The newer Screen Object commands + * only support true color formats. + * + * Availability: + * Always available. + */ + +typedef +struct { + uint32 x; + uint32 y; + uint32 width; + uint32 height; +} SVGAFifoCmdUpdate; + + +/* + * SVGA_CMD_RECT_COPY -- + * + * Perform a rectangular DMA transfer from one area of the GFB to + * another, and copy the result to any screens which intersect it. + * + * Availability: + * SVGA_CAP_RECT_COPY + */ + +typedef +struct { + uint32 srcX; + uint32 srcY; + uint32 destX; + uint32 destY; + uint32 width; + uint32 height; +} SVGAFifoCmdRectCopy; + + +/* + * SVGA_CMD_DEFINE_CURSOR -- + * + * Provide a new cursor image, as an AND/XOR mask. + * + * The recommended way to position the cursor overlay is by using + * the SVGA_FIFO_CURSOR_* registers, supported by the + * SVGA_FIFO_CAP_CURSOR_BYPASS_3 capability. + * + * Availability: + * SVGA_CAP_CURSOR + */ + +typedef +struct { + uint32 id; // Reserved, must be zero. + uint32 hotspotX; + uint32 hotspotY; + uint32 width; + uint32 height; + uint32 andMaskDepth; // Value must be 1 or equal to BITS_PER_PIXEL + uint32 xorMaskDepth; // Value must be 1 or equal to BITS_PER_PIXEL + /* + * Followed by scanline data for AND mask, then XOR mask. + * Each scanline is padded to a 32-bit boundary. + */ +} SVGAFifoCmdDefineCursor; + + +/* + * SVGA_CMD_DEFINE_ALPHA_CURSOR -- + * + * Provide a new cursor image, in 32-bit BGRA format. + * + * The recommended way to position the cursor overlay is by using + * the SVGA_FIFO_CURSOR_* registers, supported by the + * SVGA_FIFO_CAP_CURSOR_BYPASS_3 capability. + * + * Availability: + * SVGA_CAP_ALPHA_CURSOR + */ + +typedef +struct { + uint32 id; // Reserved, must be zero. + uint32 hotspotX; + uint32 hotspotY; + uint32 width; + uint32 height; + /* Followed by scanline data */ +} SVGAFifoCmdDefineAlphaCursor; + + +/* + * SVGA_CMD_UPDATE_VERBOSE -- + * + * Just like SVGA_CMD_UPDATE, but also provide a per-rectangle + * 'reason' value, an opaque cookie which is used by internal + * debugging tools. Third party drivers should not use this + * command. + * + * Availability: + * SVGA_CAP_EXTENDED_FIFO + */ + +typedef +struct { + uint32 x; + uint32 y; + uint32 width; + uint32 height; + uint32 reason; +} SVGAFifoCmdUpdateVerbose; + + +/* + * SVGA_CMD_FRONT_ROP_FILL -- + * + * This is a hint which tells the SVGA device that the driver has + * just filled a rectangular region of the GFB with a solid + * color. Instead of reading these pixels from the GFB, the device + * can assume that they all equal 'color'. This is primarily used + * for remote desktop protocols. + * + * Availability: + * SVGA_FIFO_CAP_ACCELFRONT + */ + +#define SVGA_ROP_COPY 0x03 + +typedef +struct { + uint32 color; // In the same format as the GFB + uint32 x; + uint32 y; + uint32 width; + uint32 height; + uint32 rop; // Must be SVGA_ROP_COPY +} SVGAFifoCmdFrontRopFill; + + +/* + * SVGA_CMD_FENCE -- + * + * Insert a synchronization fence. When the SVGA device reaches + * this command, it will copy the 'fence' value into the + * SVGA_FIFO_FENCE register. It will also compare the fence against + * SVGA_FIFO_FENCE_GOAL. If the fence matches the goal and the + * SVGA_IRQFLAG_FENCE_GOAL interrupt is enabled, the device will + * raise this interrupt. + * + * Availability: + * SVGA_FIFO_FENCE for this command, + * SVGA_CAP_IRQMASK for SVGA_FIFO_FENCE_GOAL. + */ + +typedef +struct { + uint32 fence; +} SVGAFifoCmdFence; + + +/* + * SVGA_CMD_ESCAPE -- + * + * Send an extended or vendor-specific variable length command. + * This is used for video overlay, third party plugins, and + * internal debugging tools. See svga_escape.h + * + * Availability: + * SVGA_FIFO_CAP_ESCAPE + */ + +typedef +struct { + uint32 nsid; + uint32 size; + /* followed by 'size' bytes of data */ +} SVGAFifoCmdEscape; + + +/* + * SVGA_CMD_DEFINE_SCREEN -- + * + * Define or redefine an SVGAScreenObject. See the description of + * SVGAScreenObject above. The video driver is responsible for + * generating new screen IDs. They should be small positive + * integers. The virtual device will have an implementation + * specific upper limit on the number of screen IDs + * supported. Drivers are responsible for recycling IDs. The first + * valid ID is zero. + * + * - Interaction with other registers: + * + * For backwards compatibility, when the GFB mode registers (WIDTH, + * HEIGHT, PITCHLOCK, BITS_PER_PIXEL) are modified, the SVGA device + * deletes all screens other than screen #0, and redefines screen + * #0 according to the specified mode. Drivers that use + * SVGA_CMD_DEFINE_SCREEN should destroy or redefine screen #0. + * + * If you use screen objects, do not use the legacy multi-mon + * registers (SVGA_REG_NUM_GUEST_DISPLAYS, SVGA_REG_DISPLAY_*). + * + * Availability: + * SVGA_FIFO_CAP_SCREEN_OBJECT + */ + +typedef +struct { + SVGAScreenObject screen; // Variable-length according to version +} SVGAFifoCmdDefineScreen; + + +/* + * SVGA_CMD_DESTROY_SCREEN -- + * + * Destroy an SVGAScreenObject. Its ID is immediately available for + * re-use. + * + * Availability: + * SVGA_FIFO_CAP_SCREEN_OBJECT + */ + +typedef +struct { + uint32 screenId; +} SVGAFifoCmdDestroyScreen; + + +/* + * SVGA_CMD_DEFINE_GMRFB -- + * + * This command sets a piece of SVGA device state called the + * Guest Memory Region Framebuffer, or GMRFB. The GMRFB is a + * piece of light-weight state which identifies the location and + * format of an image in guest memory or in BAR1. The GMRFB has + * an arbitrary size, and it doesn't need to match the geometry + * of the GFB or any screen object. + * + * The GMRFB can be redefined as often as you like. You could + * always use the same GMRFB, you could redefine it before + * rendering from a different guest screen, or you could even + * redefine it before every blit. + * + * There are multiple ways to use this command. The simplest way is + * to use it to move the framebuffer either to elsewhere in the GFB + * (BAR1) memory region, or to a user-defined GMR. This lets a + * driver use a framebuffer allocated entirely out of normal system + * memory, which we encourage. + * + * Another way to use this command is to set up a ring buffer of + * updates in GFB memory. If a driver wants to ensure that no + * frames are skipped by the SVGA device, it is important that the + * driver not modify the source data for a blit until the device is + * done processing the command. One efficient way to accomplish + * this is to use a ring of small DMA buffers. Each buffer is used + * for one blit, then we move on to the next buffer in the + * ring. The FENCE mechanism is used to protect each buffer from + * re-use until the device is finished with that buffer's + * corresponding blit. + * + * This command does not affect the meaning of SVGA_CMD_UPDATE. + * UPDATEs always occur from the legacy GFB memory area. This + * command has no support for pseudocolor GMRFBs. Currently only + * true-color 15, 16, and 24-bit depths are supported. Future + * devices may expose capabilities for additional framebuffer + * formats. + * + * The default GMRFB value is undefined. Drivers must always send + * this command at least once before performing any blit from the + * GMRFB. + * + * Availability: + * SVGA_FIFO_CAP_SCREEN_OBJECT + */ + +typedef +struct { + SVGAGuestPtr ptr; + uint32 bytesPerLine; + SVGAGMRImageFormat format; +} SVGAFifoCmdDefineGMRFB; + + +/* + * SVGA_CMD_BLIT_GMRFB_TO_SCREEN -- + * + * This is a guest-to-host blit. It performs a DMA operation to + * copy a rectangular region of pixels from the current GMRFB to + * one or more Screen Objects. + * + * The destination coordinate may be specified relative to a + * screen's origin (if a screen ID is specified) or relative to the + * virtual coordinate system's origin (if the screen ID is + * SVGA_ID_INVALID). The actual destination may span zero or more + * screens, in the case of a virtual destination rect or a rect + * which extends off the edge of the specified screen. + * + * This command writes to the screen's "base layer": the underlying + * framebuffer which exists below any cursor or video overlays. No + * action is necessary to explicitly hide or update any overlays + * which exist on top of the updated region. + * + * The SVGA device is guaranteed to finish reading from the GMRFB + * by the time any subsequent FENCE commands are reached. + * + * This command consumes an annotation. See the + * SVGA_CMD_ANNOTATION_* commands for details. + * + * Availability: + * SVGA_FIFO_CAP_SCREEN_OBJECT + */ + +typedef +struct { + SVGASignedPoint srcOrigin; + SVGASignedRect destRect; + uint32 destScreenId; +} SVGAFifoCmdBlitGMRFBToScreen; + + +/* + * SVGA_CMD_BLIT_SCREEN_TO_GMRFB -- + * + * This is a host-to-guest blit. It performs a DMA operation to + * copy a rectangular region of pixels from a single Screen Object + * back to the current GMRFB. + * + * Usage note: This command should be used rarely. It will + * typically be inefficient, but it is necessary for some types of + * synchronization between 3D (GPU) and 2D (CPU) rendering into + * overlapping areas of a screen. + * + * The source coordinate is specified relative to a screen's + * origin. The provided screen ID must be valid. If any parameters + * are invalid, the resulting pixel values are undefined. + * + * This command reads the screen's "base layer". Overlays like + * video and cursor are not included, but any data which was sent + * using a blit-to-screen primitive will be available, no matter + * whether the data's original source was the GMRFB or the 3D + * acceleration hardware. + * + * Note that our guest-to-host blits and host-to-guest blits aren't + * symmetric in their current implementation. While the parameters + * are identical, host-to-guest blits are a lot less featureful. + * They do not support clipping: If the source parameters don't + * fully fit within a screen, the blit fails. They must originate + * from exactly one screen. Virtual coordinates are not directly + * supported. + * + * Host-to-guest blits do support the same set of GMRFB formats + * offered by guest-to-host blits. + * + * The SVGA device is guaranteed to finish writing to the GMRFB by + * the time any subsequent FENCE commands are reached. + * + * Availability: + * SVGA_FIFO_CAP_SCREEN_OBJECT + */ + +typedef +struct { + SVGASignedPoint destOrigin; + SVGASignedRect srcRect; + uint32 srcScreenId; +} SVGAFifoCmdBlitScreenToGMRFB; + + +/* + * SVGA_CMD_ANNOTATION_FILL -- + * + * This is a blit annotation. This command stores a small piece of + * device state which is consumed by the next blit-to-screen + * command. The state is only cleared by commands which are + * specifically documented as consuming an annotation. Other + * commands (such as ESCAPEs for debugging) may intervene between + * the annotation and its associated blit. + * + * This annotation is a promise about the contents of the next + * blit: The video driver is guaranteeing that all pixels in that + * blit will have the same value, specified here as a color in + * SVGAColorBGRX format. + * + * The SVGA device can still render the blit correctly even if it + * ignores this annotation, but the annotation may allow it to + * perform the blit more efficiently, for example by ignoring the + * source data and performing a fill in hardware. + * + * This annotation is most important for performance when the + * user's display is being remoted over a network connection. + * + * Availability: + * SVGA_FIFO_CAP_SCREEN_OBJECT + */ + +typedef +struct { + SVGAColorBGRX color; +} SVGAFifoCmdAnnotationFill; + + +/* + * SVGA_CMD_ANNOTATION_COPY -- + * + * This is a blit annotation. See SVGA_CMD_ANNOTATION_FILL for more + * information about annotations. + * + * This annotation is a promise about the contents of the next + * blit: The video driver is guaranteeing that all pixels in that + * blit will have the same value as those which already exist at an + * identically-sized region on the same or a different screen. + * + * Note that the source pixels for the COPY in this annotation are + * sampled before applying the anqnotation's associated blit. They + * are allowed to overlap with the blit's destination pixels. + * + * The copy source rectangle is specified the same way as the blit + * destination: it can be a rectangle which spans zero or more + * screens, specified relative to either a screen or to the virtual + * coordinate system's origin. If the source rectangle includes + * pixels which are not from exactly one screen, the results are + * undefined. + * + * Availability: + * SVGA_FIFO_CAP_SCREEN_OBJECT + */ + +typedef +struct { + SVGASignedPoint srcOrigin; + uint32 srcScreenId; +} SVGAFifoCmdAnnotationCopy; + +#endif diff --git a/src/gallium/drivers/svga/include/svga_types.h b/src/gallium/drivers/svga/include/svga_types.h new file mode 100644 index 0000000000..7fd9bab03a --- /dev/null +++ b/src/gallium/drivers/svga/include/svga_types.h @@ -0,0 +1,46 @@ +/********************************************************** + * Copyright 1998-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef _SVGA_TYPES_H_ +#define _SVGA_TYPES_H_ + +#include "pipe/p_compiler.h" + +typedef int64_t int64; +typedef uint64_t uint64; + +typedef int32_t int32; +typedef uint32_t uint32; + +typedef int16_t int16; +typedef uint16_t uint16; + +typedef int8_t int8; +typedef uint8_t uint8; + +typedef uint8_t Bool; + +#endif /* _SVGA_TYPES_H_ */ + diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c new file mode 100644 index 0000000000..a0da7d7e5d --- /dev/null +++ b/src/gallium/drivers/svga/svga_cmd.c @@ -0,0 +1,1427 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * svga_cmd.c -- + * + * Command construction utility for the SVGA3D protocol used by + * the VMware SVGA device, based on the svgautil library. + */ + +#include "svga_winsys.h" +#include "svga_screen_buffer.h" +#include "svga_screen_texture.h" +#include "svga_cmd.h" + +/* + *---------------------------------------------------------------------- + * + * surface_to_surfaceid -- + * + * Utility function for surface ids. + * Can handle null surface. Does a surface_reallocation so you need + * to have allocated the fifo space before converting. + * + * Results: + * id is filld out. + * + * Side effects: + * One surface relocation is preformed for texture handle. + * + *---------------------------------------------------------------------- + */ + +static INLINE +void surface_to_surfaceid(struct svga_winsys_context *swc, // IN + struct pipe_surface *surface, // IN + SVGA3dSurfaceImageId *id, // OUT + unsigned flags) // IN +{ + if(surface) { + struct svga_surface *s = svga_surface(surface); + swc->surface_relocation(swc, &id->sid, s->handle, flags); + id->face = s->real_face; /* faces have the same order */ + id->mipmap = s->real_level; + } + else { + id->sid = SVGA3D_INVALID_ID; + id->face = 0; + id->mipmap = 0; + } +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_FIFOReserve -- + * + * Reserve space for an SVGA3D FIFO command. + * + * The 2D SVGA commands have been around for a while, so they + * have a rather asymmetric structure. The SVGA3D protocol is + * more uniform: each command begins with a header containing the + * command number and the full size. + * + * This is a convenience wrapper around SVGA_FIFOReserve. We + * reserve space for the whole command, and write the header. + * + * This function must be paired with SVGA_FIFOCommitAll(). + * + * Results: + * Returns a pointer to the space reserved for command-specific + * data. It must be 'cmdSize' bytes long. + * + * Side effects: + * Begins a FIFO reservation. + * + *---------------------------------------------------------------------- + */ + +void * +SVGA3D_FIFOReserve(struct svga_winsys_context *swc, + uint32 cmd, // IN + uint32 cmdSize, // IN + uint32 nr_relocs) // IN +{ + SVGA3dCmdHeader *header; + + header = swc->reserve(swc, sizeof *header + cmdSize, nr_relocs); + if(!header) + return NULL; + + header->id = cmd; + header->size = cmdSize; + + return &header[1]; +} + + +void +SVGA_FIFOCommitAll(struct svga_winsys_context *swc) +{ + swc->commit(swc); +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_DefineContext -- + * + * Create a new context, to be referred to with the provided ID. + * + * Context objects encapsulate all render state, and shader + * objects are per-context. + * + * Surfaces are not per-context. The same surface can be shared + * between multiple contexts, and surface operations can occur + * without a context. + * + * If the provided context ID already existed, it is redefined. + * + * Context IDs are arbitrary small non-negative integers, + * global to the entire SVGA device. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_DefineContext(struct svga_winsys_context *swc) // IN +{ + SVGA3dCmdDefineContext *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_CONTEXT_DEFINE, sizeof *cmd, 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_DestroyContext -- + * + * Delete a context created with SVGA3D_DefineContext. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_DestroyContext(struct svga_winsys_context *swc) // IN +{ + SVGA3dCmdDestroyContext *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_CONTEXT_DESTROY, sizeof *cmd, 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_BeginDefineSurface -- + * + * Begin a SURFACE_DEFINE command. This reserves space for it in + * the FIFO, and returns pointers to the command's faces and + * mipsizes arrays. + * + * This function must be paired with SVGA_FIFOCommitAll(). + * The faces and mipSizes arrays are initialized to zero. + * + * This creates a "surface" object in the SVGA3D device, + * with the provided surface ID (sid). Surfaces are generic + * containers for host VRAM objects like textures, vertex + * buffers, and depth/stencil buffers. + * + * Surfaces are hierarchial: + * + * - Surface may have multiple faces (for cube maps) + * + * - Each face has a list of mipmap levels + * + * - Each mipmap image may have multiple volume + * slices, if the image is three dimensional. + * + * - Each slice is a 2D array of 'blocks' + * + * - Each block may be one or more pixels. + * (Usually 1, more for DXT or YUV formats.) + * + * Surfaces are generic host VRAM objects. The SVGA3D device + * may optimize surfaces according to the format they were + * created with, but this format does not limit the ways in + * which the surface may be used. For example, a depth surface + * can be used as a texture, or a floating point image may + * be used as a vertex buffer. Some surface usages may be + * lower performance, due to software emulation, but any + * usage should work with any surface. + * + * If 'sid' is already defined, the old surface is deleted + * and this new surface replaces it. + * + * Surface IDs are arbitrary small non-negative integers, + * global to the entire SVGA device. + * + * Results: + * Returns pointers to arrays allocated in the FIFO for 'faces' + * and 'mipSizes'. + * + * Side effects: + * Begins a FIFO reservation. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_BeginDefineSurface(struct svga_winsys_context *swc, + struct svga_winsys_surface *sid, // IN + SVGA3dSurfaceFlags flags, // IN + SVGA3dSurfaceFormat format, // IN + SVGA3dSurfaceFace **faces, // OUT + SVGA3dSize **mipSizes, // OUT + uint32 numMipSizes) // IN +{ + SVGA3dCmdDefineSurface *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SURFACE_DEFINE, sizeof *cmd + + sizeof **mipSizes * numMipSizes, 1); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + swc->surface_relocation(swc, &cmd->sid, sid, PIPE_BUFFER_USAGE_GPU_WRITE); + cmd->surfaceFlags = flags; + cmd->format = format; + + *faces = &cmd->face[0]; + *mipSizes = (SVGA3dSize*) &cmd[1]; + + memset(*faces, 0, sizeof **faces * SVGA3D_MAX_SURFACE_FACES); + memset(*mipSizes, 0, sizeof **mipSizes * numMipSizes); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_DefineSurface2D -- + * + * This is a simplified version of SVGA3D_BeginDefineSurface(), + * which does not support cube maps, mipmaps, or volume textures. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_DefineSurface2D(struct svga_winsys_context *swc, // IN + struct svga_winsys_surface *sid, // IN + uint32 width, // IN + uint32 height, // IN + SVGA3dSurfaceFormat format) // IN +{ + SVGA3dSize *mipSizes; + SVGA3dSurfaceFace *faces; + enum pipe_error ret; + + ret = SVGA3D_BeginDefineSurface(swc, + sid, 0, format, &faces, &mipSizes, 1); + if(ret != PIPE_OK) + return ret; + + faces[0].numMipLevels = 1; + + mipSizes[0].width = width; + mipSizes[0].height = height; + mipSizes[0].depth = 1; + + swc->commit(swc);; + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_DestroySurface -- + * + * Release the host VRAM encapsulated by a particular surface ID. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_DestroySurface(struct svga_winsys_context *swc, + struct svga_winsys_surface *sid) // IN +{ + SVGA3dCmdDestroySurface *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SURFACE_DESTROY, sizeof *cmd, 1); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + swc->surface_relocation(swc, &cmd->sid, sid, PIPE_BUFFER_USAGE_GPU_READ); + swc->commit(swc);; + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_BeginSurfaceDMA-- + * + * Begin a SURFACE_DMA command. This reserves space for it in + * the FIFO, and returns a pointer to the command's box array. + * This function must be paired with SVGA_FIFOCommitAll(). + * + * When the SVGA3D device asynchronously processes this FIFO + * command, a DMA operation is performed between host VRAM and + * a generic SVGAGuestPtr. The guest pointer may refer to guest + * VRAM (provided by the SVGA PCI device) or to guest system + * memory that has been set up as a Guest Memory Region (GMR) + * by the SVGA device. + * + * The guest's DMA buffer must remain valid (not freed, paged out, + * or overwritten) until the host has finished processing this + * command. The guest can determine that the host has finished + * by using the SVGA device's FIFO Fence mechanism. + * + * The guest's image buffer can be an arbitrary size and shape. + * Guest image data is interpreted according to the SVGA3D surface + * format specified when the surface was defined. + * + * The caller may optionally define the guest image's pitch. + * guestImage->pitch can either be zero (assume image is tightly + * packed) or it must be the number of bytes between vertically + * adjacent image blocks. + * + * The provided copybox list specifies which regions of the source + * image are to be copied, and where they appear on the destination. + * + * NOTE: srcx/srcy are always on the guest image and x/y are + * always on the host image, regardless of the actual transfer + * direction! + * + * For efficiency, the SVGA3D device is free to copy more data + * than specified. For example, it may round copy boxes outwards + * such that they lie on particular alignment boundaries. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_SurfaceDMA(struct svga_winsys_context *swc, + struct svga_transfer *st, // IN + SVGA3dTransferType transfer, // IN + const SVGA3dCopyBox *boxes, // IN + uint32 numBoxes) // IN +{ + struct svga_texture *texture = svga_texture(st->base.texture); + SVGA3dCmdSurfaceDMA *cmd; + SVGA3dCmdSurfaceDMASuffix *pSuffix; + uint32 boxesSize = sizeof *boxes * numBoxes; + unsigned region_flags; + unsigned surface_flags; + + if(transfer == SVGA3D_WRITE_HOST_VRAM) { + region_flags = PIPE_BUFFER_USAGE_GPU_READ; + surface_flags = PIPE_BUFFER_USAGE_GPU_WRITE; + } + else if(transfer == SVGA3D_READ_HOST_VRAM) { + region_flags = PIPE_BUFFER_USAGE_GPU_WRITE; + surface_flags = PIPE_BUFFER_USAGE_GPU_READ; + } + else { + assert(0); + return PIPE_ERROR_BAD_INPUT; + } + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SURFACE_DMA, + sizeof *cmd + boxesSize + sizeof *pSuffix, + 2); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + swc->region_relocation(swc, &cmd->guest.ptr, st->hwbuf, 0, region_flags); + cmd->guest.pitch = st->base.stride; + + swc->surface_relocation(swc, &cmd->host.sid, texture->handle, surface_flags); + cmd->host.face = st->base.face; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */ + cmd->host.mipmap = st->base.level; + + cmd->transfer = transfer; + + memcpy(&cmd[1], boxes, boxesSize); + + pSuffix = (SVGA3dCmdSurfaceDMASuffix *)((uint8_t*)cmd + sizeof *cmd + boxesSize); + pSuffix->suffixSize = sizeof *pSuffix; + pSuffix->maximumOffset = st->hw_nblocksy*st->base.stride; + memset(&pSuffix->flags, 0, sizeof pSuffix->flags); + + swc->commit(swc); + + return PIPE_OK; +} + + +enum pipe_error +SVGA3D_BufferDMA(struct svga_winsys_context *swc, + struct svga_winsys_buffer *guest, + struct svga_winsys_surface *host, + SVGA3dTransferType transfer, // IN + uint32 size, // IN + uint32 offset, // IN + SVGA3dSurfaceDMAFlags flags) // IN +{ + SVGA3dCmdSurfaceDMA *cmd; + SVGA3dCopyBox *box; + SVGA3dCmdSurfaceDMASuffix *pSuffix; + unsigned region_flags; + unsigned surface_flags; + + if(transfer == SVGA3D_WRITE_HOST_VRAM) { + region_flags = PIPE_BUFFER_USAGE_GPU_READ; + surface_flags = PIPE_BUFFER_USAGE_GPU_WRITE; + } + else if(transfer == SVGA3D_READ_HOST_VRAM) { + region_flags = PIPE_BUFFER_USAGE_GPU_WRITE; + surface_flags = PIPE_BUFFER_USAGE_GPU_READ; + } + else { + assert(0); + return PIPE_ERROR_BAD_INPUT; + } + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SURFACE_DMA, + sizeof *cmd + sizeof *box + sizeof *pSuffix, + 2); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + swc->region_relocation(swc, &cmd->guest.ptr, guest, 0, region_flags); + cmd->guest.pitch = 0; + + swc->surface_relocation(swc, &cmd->host.sid, host, surface_flags); + cmd->host.face = 0; + cmd->host.mipmap = 0; + + cmd->transfer = transfer; + + box = (SVGA3dCopyBox *)&cmd[1]; + box->x = offset; + box->y = 0; + box->z = 0; + box->w = size; + box->h = 1; + box->d = 1; + box->srcx = offset; + box->srcy = 0; + box->srcz = 0; + + pSuffix = (SVGA3dCmdSurfaceDMASuffix *)((uint8_t*)cmd + sizeof *cmd + sizeof *box); + pSuffix->suffixSize = sizeof *pSuffix; + pSuffix->maximumOffset = offset + size; + pSuffix->flags = flags; + + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_SetRenderTarget -- + * + * Bind a surface object to a particular render target attachment + * point on the current context. Render target attachment points + * exist for color buffers, a depth buffer, and a stencil buffer. + * + * The SVGA3D device is quite lenient about the types of surfaces + * that may be used as render targets. The color buffers must + * all be the same size, but the depth and stencil buffers do not + * have to be the same size as the color buffer. All attachments + * are optional. + * + * Some combinations of render target formats may require software + * emulation, depending on the capabilities of the host graphics + * API and graphics hardware. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_SetRenderTarget(struct svga_winsys_context *swc, + SVGA3dRenderTargetType type, // IN + struct pipe_surface *surface) // IN +{ + SVGA3dCmdSetRenderTarget *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SETRENDERTARGET, sizeof *cmd, 1); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + + cmd->cid = swc->cid; + + cmd->type = type; + + surface_to_surfaceid(swc, surface, &cmd->target, PIPE_BUFFER_USAGE_GPU_WRITE); + + swc->commit(swc); + + return PIPE_OK; +} + + + + + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_DefineShader -- + * + * Upload the bytecode for a new shader. The bytecode is "SVGA3D + * format", which is theoretically a binary-compatible superset + * of Microsoft's DirectX shader bytecode. In practice, the + * SVGA3D bytecode doesn't yet have any extensions to DirectX's + * bytecode format. + * + * The SVGA3D device supports shader models 1.1 through 2.0. + * + * The caller chooses a shader ID (small positive integer) by + * which this shader will be identified in future commands. This + * ID is in a namespace which is per-context and per-shader-type. + * + * 'bytecodeLen' is specified in bytes. It must be a multiple of 4. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_DefineShader(struct svga_winsys_context *swc, + uint32 shid, // IN + SVGA3dShaderType type, // IN + const uint32 *bytecode, // IN + uint32 bytecodeLen) // IN +{ + SVGA3dCmdDefineShader *cmd; + + assert(bytecodeLen % 4 == 0); + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SHADER_DEFINE, sizeof *cmd + bytecodeLen, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->shid = shid; + cmd->type = type; + memcpy(&cmd[1], bytecode, bytecodeLen); + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_DestroyShader -- + * + * Delete a shader that was created by SVGA3D_DefineShader. If + * the shader was the current vertex or pixel shader for its + * context, rendering results are undefined until a new shader is + * bound. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_DestroyShader(struct svga_winsys_context *swc, + uint32 shid, // IN + SVGA3dShaderType type) // IN +{ + SVGA3dCmdDestroyShader *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SHADER_DESTROY, sizeof *cmd, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->shid = shid; + cmd->type = type; + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_SetShaderConst -- + * + * Set the value of a shader constant. + * + * Shader constants are analogous to uniform variables in GLSL, + * except that they belong to the render context rather than to + * an individual shader. + * + * Constants may have one of three types: A 4-vector of floats, + * a 4-vector of integers, or a single boolean flag. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_SetShaderConst(struct svga_winsys_context *swc, + uint32 reg, // IN + SVGA3dShaderType type, // IN + SVGA3dShaderConstType ctype, // IN + const void *value) // IN +{ + SVGA3dCmdSetShaderConst *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SET_SHADER_CONST, sizeof *cmd, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->reg = reg; + cmd->type = type; + cmd->ctype = ctype; + + switch (ctype) { + + case SVGA3D_CONST_TYPE_FLOAT: + case SVGA3D_CONST_TYPE_INT: + memcpy(&cmd->values, value, sizeof cmd->values); + break; + + case SVGA3D_CONST_TYPE_BOOL: + memset(&cmd->values, 0, sizeof cmd->values); + cmd->values[0] = *(uint32*)value; + break; + + default: + assert(0); + break; + + } + swc->commit(swc); + + return PIPE_OK; +} + + + + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_SetShader -- + * + * Switch active shaders. This binds a new vertex or pixel shader + * to the specified context. + * + * A shader ID of SVGA3D_INVALID_ID unbinds any shader, switching + * back to the fixed function vertex or pixel pipeline. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_SetShader(struct svga_winsys_context *swc, + SVGA3dShaderType type, // IN + uint32 shid) // IN +{ + SVGA3dCmdSetShader *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SET_SHADER, sizeof *cmd, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->type = type; + cmd->shid = shid; + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_BeginClear -- + * + * Begin a CLEAR command. This reserves space for it in the FIFO, + * and returns a pointer to the command's rectangle array. This + * function must be paired with SVGA_FIFOCommitAll(). + * + * Clear is a rendering operation which fills a list of + * rectangles with constant values on all render target types + * indicated by 'flags'. + * + * Clear is not affected by clipping, depth test, or other + * render state which affects the fragment pipeline. + * + * Results: + * None. + * + * Side effects: + * May write to attached render target surfaces. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_BeginClear(struct svga_winsys_context *swc, + SVGA3dClearFlag flags, // IN + uint32 color, // IN + float depth, // IN + uint32 stencil, // IN + SVGA3dRect **rects, // OUT + uint32 numRects) // IN +{ + SVGA3dCmdClear *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_CLEAR, + sizeof *cmd + sizeof **rects * numRects, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->clearFlag = flags; + cmd->color = color; + cmd->depth = depth; + cmd->stencil = stencil; + *rects = (SVGA3dRect*) &cmd[1]; + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_ClearRect -- + * + * This is a simplified version of SVGA3D_BeginClear(). + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_ClearRect(struct svga_winsys_context *swc, + SVGA3dClearFlag flags, // IN + uint32 color, // IN + float depth, // IN + uint32 stencil, // IN + uint32 x, // IN + uint32 y, // IN + uint32 w, // IN + uint32 h) // IN +{ + SVGA3dRect *rect; + enum pipe_error ret; + + ret = SVGA3D_BeginClear(swc, flags, color, depth, stencil, &rect, 1); + if(ret != PIPE_OK) + return PIPE_ERROR_OUT_OF_MEMORY; + + memset(rect, 0, sizeof *rect); + rect->x = x; + rect->y = y; + rect->w = w; + rect->h = h; + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_BeginDrawPrimitives -- + * + * Begin a DRAW_PRIMITIVES command. This reserves space for it in + * the FIFO, and returns a pointer to the command's arrays. + * This function must be paired with SVGA_FIFOCommitAll(). + * + * Drawing commands consist of two variable-length arrays: + * SVGA3dVertexDecl elements declare a set of vertex buffers to + * use while rendering, and SVGA3dPrimitiveRange elements specify + * groups of primitives each with an optional index buffer. + * + * The decls and ranges arrays are initialized to zero. + * + * Results: + * None. + * + * Side effects: + * May write to attached render target surfaces. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_BeginDrawPrimitives(struct svga_winsys_context *swc, + SVGA3dVertexDecl **decls, // OUT + uint32 numVertexDecls, // IN + SVGA3dPrimitiveRange **ranges, // OUT + uint32 numRanges) // IN +{ + SVGA3dCmdDrawPrimitives *cmd; + SVGA3dVertexDecl *declArray; + SVGA3dPrimitiveRange *rangeArray; + uint32 declSize = sizeof **decls * numVertexDecls; + uint32 rangeSize = sizeof **ranges * numRanges; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_DRAW_PRIMITIVES, + sizeof *cmd + declSize + rangeSize, + numVertexDecls + numRanges); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->numVertexDecls = numVertexDecls; + cmd->numRanges = numRanges; + + declArray = (SVGA3dVertexDecl*) &cmd[1]; + rangeArray = (SVGA3dPrimitiveRange*) &declArray[numVertexDecls]; + + memset(declArray, 0, declSize); + memset(rangeArray, 0, rangeSize); + + *decls = declArray; + *ranges = rangeArray; + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_BeginSurfaceCopy -- + * + * Begin a SURFACE_COPY command. This reserves space for it in + * the FIFO, and returns a pointer to the command's arrays. This + * function must be paired with SVGA_FIFOCommitAll(). + * + * The box array is initialized with zeroes. + * + * Results: + * None. + * + * Side effects: + * Asynchronously copies a list of boxes from surface to surface. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_BeginSurfaceCopy(struct svga_winsys_context *swc, + struct pipe_surface *src, // IN + struct pipe_surface *dest, // IN + SVGA3dCopyBox **boxes, // OUT + uint32 numBoxes) // IN +{ + SVGA3dCmdSurfaceCopy *cmd; + uint32 boxesSize = sizeof **boxes * numBoxes; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SURFACE_COPY, sizeof *cmd + boxesSize, + 2); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + surface_to_surfaceid(swc, src, &cmd->src, PIPE_BUFFER_USAGE_GPU_READ); + surface_to_surfaceid(swc, dest, &cmd->dest, PIPE_BUFFER_USAGE_GPU_WRITE); + *boxes = (SVGA3dCopyBox*) &cmd[1]; + + memset(*boxes, 0, boxesSize); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_SurfaceStretchBlt -- + * + * Issue a SURFACE_STRETCHBLT command: an asynchronous + * surface-to-surface blit, with scaling. + * + * Results: + * None. + * + * Side effects: + * Asynchronously copies one box from surface to surface. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_SurfaceStretchBlt(struct svga_winsys_context *swc, + struct pipe_surface *src, // IN + struct pipe_surface *dest, // IN + SVGA3dBox *boxSrc, // IN + SVGA3dBox *boxDest, // IN + SVGA3dStretchBltMode mode) // IN +{ + SVGA3dCmdSurfaceStretchBlt *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SURFACE_STRETCHBLT, sizeof *cmd, + 2); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + surface_to_surfaceid(swc, src, &cmd->src, PIPE_BUFFER_USAGE_GPU_READ); + surface_to_surfaceid(swc, dest, &cmd->dest, PIPE_BUFFER_USAGE_GPU_WRITE); + cmd->boxSrc = *boxSrc; + cmd->boxDest = *boxDest; + cmd->mode = mode; + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_SetViewport -- + * + * Set the current context's viewport rectangle. The viewport + * is clipped to the dimensions of the current render target, + * then all rendering is clipped to the viewport. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_SetViewport(struct svga_winsys_context *swc, + SVGA3dRect *rect) // IN +{ + SVGA3dCmdSetViewport *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SETVIEWPORT, sizeof *cmd, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->rect = *rect; + swc->commit(swc); + + return PIPE_OK; +} + + + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_SetScissorRect -- + * + * Set the current context's scissor rectangle. If scissor + * is enabled then all rendering is clipped to the scissor. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_SetScissorRect(struct svga_winsys_context *swc, + SVGA3dRect *rect) // IN +{ + SVGA3dCmdSetScissorRect *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SETSCISSORRECT, sizeof *cmd, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->rect = *rect; + swc->commit(swc); + + return PIPE_OK; +} + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_SetClipPlane -- + * + * Set one of the current context's clip planes. If the clip + * plane is enabled then all 3d rendering is clipped to against + * the plane. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error SVGA3D_SetClipPlane(struct svga_winsys_context *swc, + uint32 index, const float *plane) +{ + SVGA3dCmdSetClipPlane *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SETCLIPPLANE, sizeof *cmd, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->index = index; + cmd->plane[0] = plane[0]; + cmd->plane[1] = plane[1]; + cmd->plane[2] = plane[2]; + cmd->plane[3] = plane[3]; + swc->commit(swc); + + return PIPE_OK; +} + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_SetZRange -- + * + * Set the range of the depth buffer to use. 'min' and 'max' + * are values between 0.0 and 1.0. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_SetZRange(struct svga_winsys_context *swc, + float zMin, // IN + float zMax) // IN +{ + SVGA3dCmdSetZRange *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SETZRANGE, sizeof *cmd, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->zRange.min = zMin; + cmd->zRange.max = zMax; + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_BeginSetTextureState -- + * + * Begin a SETTEXTURESTATE command. This reserves space for it in + * the FIFO, and returns a pointer to the command's texture state + * array. This function must be paired with SVGA_FIFOCommitAll(). + * + * This command sets rendering state which is per-texture-unit. + * + * XXX: Individual texture states need documentation. However, + * they are very similar to the texture states defined by + * Direct3D. The D3D documentation is a good starting point + * for understanding SVGA3D texture states. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_BeginSetTextureState(struct svga_winsys_context *swc, + SVGA3dTextureState **states, // OUT + uint32 numStates) // IN +{ + SVGA3dCmdSetTextureState *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SETTEXTURESTATE, + sizeof *cmd + sizeof **states * numStates, + numStates); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + *states = (SVGA3dTextureState*) &cmd[1]; + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_BeginSetRenderState -- + * + * Begin a SETRENDERSTATE command. This reserves space for it in + * the FIFO, and returns a pointer to the command's texture state + * array. This function must be paired with SVGA_FIFOCommitAll(). + * + * This command sets rendering state which is global to the context. + * + * XXX: Individual render states need documentation. However, + * they are very similar to the render states defined by + * Direct3D. The D3D documentation is a good starting point + * for understanding SVGA3D render states. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_BeginSetRenderState(struct svga_winsys_context *swc, + SVGA3dRenderState **states, // OUT + uint32 numStates) // IN +{ + SVGA3dCmdSetRenderState *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SETRENDERSTATE, + sizeof *cmd + sizeof **states * numStates, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + *states = (SVGA3dRenderState*) &cmd[1]; + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_BeginQuery-- + * + * Issues a SVGA_3D_CMD_BEGIN_QUERY command. + * + * Results: + * None. + * + * Side effects: + * Commits space in the FIFO memory. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_BeginQuery(struct svga_winsys_context *swc, + SVGA3dQueryType type) // IN +{ + SVGA3dCmdBeginQuery *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_BEGIN_QUERY, + sizeof *cmd, + 0); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->type = type; + + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_EndQuery-- + * + * Issues a SVGA_3D_CMD_END_QUERY command. + * + * Results: + * None. + * + * Side effects: + * Commits space in the FIFO memory. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_EndQuery(struct svga_winsys_context *swc, + SVGA3dQueryType type, // IN + struct svga_winsys_buffer *buffer) // IN/OUT +{ + SVGA3dCmdEndQuery *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_END_QUERY, + sizeof *cmd, + 1); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->type = type; + + swc->region_relocation(swc, &cmd->guestResult, buffer, 0, + PIPE_BUFFER_USAGE_GPU_WRITE); + + swc->commit(swc); + + return PIPE_OK; +} + + +/* + *---------------------------------------------------------------------- + * + * SVGA3D_WaitForQuery-- + * + * Issues a SVGA_3D_CMD_WAIT_FOR_QUERY command. This reserves space + * for it in the FIFO. This doesn't actually wait for the query to + * finish but instead tells the host to start a wait at the driver + * level. The caller can wait on the status variable in the + * guestPtr memory or send an insert fence instruction after this + * command and wait on the fence. + * + * Results: + * None. + * + * Side effects: + * Commits space in the FIFO memory. + * + *---------------------------------------------------------------------- + */ + +enum pipe_error +SVGA3D_WaitForQuery(struct svga_winsys_context *swc, + SVGA3dQueryType type, // IN + struct svga_winsys_buffer *buffer) // IN/OUT +{ + SVGA3dCmdWaitForQuery *cmd; + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_WAIT_FOR_QUERY, + sizeof *cmd, + 1); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + cmd->cid = swc->cid; + cmd->type = type; + + swc->region_relocation(swc, &cmd->guestResult, buffer, 0, + PIPE_BUFFER_USAGE_GPU_WRITE); + + swc->commit(swc); + + return PIPE_OK; +} diff --git a/src/gallium/drivers/svga/svga_cmd.h b/src/gallium/drivers/svga/svga_cmd.h new file mode 100644 index 0000000000..8041054769 --- /dev/null +++ b/src/gallium/drivers/svga/svga_cmd.h @@ -0,0 +1,235 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/* + * svga_cmd.h -- + * + * Command construction utility for the SVGA3D protocol used by + * the VMware SVGA device, based on the svgautil library. + */ + +#ifndef __SVGA3D_H__ +#define __SVGA3D_H__ + + +#include "svga_types.h" +#include "svga_reg.h" +#include "svga3d_reg.h" + +#include "pipe/p_defines.h" + + +struct pipe_buffer; +struct pipe_surface; +struct svga_transfer; +struct svga_winsys_context; +struct svga_winsys_buffer; +struct svga_winsys_surface; + + +/* + * SVGA Device Interoperability + */ + +void * +SVGA3D_FIFOReserve(struct svga_winsys_context *swc, uint32 cmd, uint32 cmdSize, uint32 nr_relocs); + +void +SVGA_FIFOCommitAll(struct svga_winsys_context *swc); + + +/* + * Context Management + */ + +enum pipe_error +SVGA3D_DefineContext(struct svga_winsys_context *swc); + +enum pipe_error +SVGA3D_DestroyContext(struct svga_winsys_context *swc); + + +/* + * Surface Management + */ + +enum pipe_error +SVGA3D_BeginDefineSurface(struct svga_winsys_context *swc, + struct svga_winsys_surface *sid, + SVGA3dSurfaceFlags flags, + SVGA3dSurfaceFormat format, + SVGA3dSurfaceFace **faces, + SVGA3dSize **mipSizes, + uint32 numMipSizes); +enum pipe_error +SVGA3D_DefineSurface2D(struct svga_winsys_context *swc, + struct svga_winsys_surface *sid, + uint32 width, + uint32 height, + SVGA3dSurfaceFormat format); +enum pipe_error +SVGA3D_DestroySurface(struct svga_winsys_context *swc, + struct svga_winsys_surface *sid); + + +/* + * Surface Operations + */ + +enum pipe_error +SVGA3D_SurfaceDMA(struct svga_winsys_context *swc, + struct svga_transfer *st, + SVGA3dTransferType transfer, + const SVGA3dCopyBox *boxes, + uint32 numBoxes); + +enum pipe_error +SVGA3D_BufferDMA(struct svga_winsys_context *swc, + struct svga_winsys_buffer *guest, + struct svga_winsys_surface *host, + SVGA3dTransferType transfer, + uint32 size, + uint32 offset, + SVGA3dSurfaceDMAFlags flags); + +/* + * Drawing Operations + */ + + +enum pipe_error +SVGA3D_BeginClear(struct svga_winsys_context *swc, + SVGA3dClearFlag flags, + uint32 color, float depth, uint32 stencil, + SVGA3dRect **rects, uint32 numRects); + +enum pipe_error +SVGA3D_ClearRect(struct svga_winsys_context *swc, + SVGA3dClearFlag flags, uint32 color, float depth, + uint32 stencil, uint32 x, uint32 y, uint32 w, uint32 h); + +enum pipe_error +SVGA3D_BeginDrawPrimitives(struct svga_winsys_context *swc, + SVGA3dVertexDecl **decls, + uint32 numVertexDecls, + SVGA3dPrimitiveRange **ranges, + uint32 numRanges); + +/* + * Blits + */ + +enum pipe_error +SVGA3D_BeginSurfaceCopy(struct svga_winsys_context *swc, + struct pipe_surface *src, + struct pipe_surface *dest, + SVGA3dCopyBox **boxes, uint32 numBoxes); + + +enum pipe_error +SVGA3D_SurfaceStretchBlt(struct svga_winsys_context *swc, + struct pipe_surface *src, + struct pipe_surface *dest, + SVGA3dBox *boxSrc, SVGA3dBox *boxDest, + SVGA3dStretchBltMode mode); + +/* + * Shared FFP/Shader Render State + */ + +enum pipe_error +SVGA3D_SetRenderTarget(struct svga_winsys_context *swc, + SVGA3dRenderTargetType type, + struct pipe_surface *surface); + +enum pipe_error +SVGA3D_SetZRange(struct svga_winsys_context *swc, + float zMin, float zMax); + +enum pipe_error +SVGA3D_SetViewport(struct svga_winsys_context *swc, + SVGA3dRect *rect); + +enum pipe_error +SVGA3D_SetScissorRect(struct svga_winsys_context *swc, + SVGA3dRect *rect); + +enum pipe_error +SVGA3D_SetClipPlane(struct svga_winsys_context *swc, + uint32 index, const float *plane); + +enum pipe_error +SVGA3D_BeginSetTextureState(struct svga_winsys_context *swc, + SVGA3dTextureState **states, + uint32 numStates); + +enum pipe_error +SVGA3D_BeginSetRenderState(struct svga_winsys_context *swc, + SVGA3dRenderState **states, + uint32 numStates); + + +/* + * Shaders + */ + +enum pipe_error +SVGA3D_DefineShader(struct svga_winsys_context *swc, + uint32 shid, SVGA3dShaderType type, + const uint32 *bytecode, uint32 bytecodeLen); + +enum pipe_error +SVGA3D_DestroyShader(struct svga_winsys_context *swc, + uint32 shid, SVGA3dShaderType type); + +enum pipe_error +SVGA3D_SetShaderConst(struct svga_winsys_context *swc, + uint32 reg, SVGA3dShaderType type, + SVGA3dShaderConstType ctype, const void *value); + +enum pipe_error +SVGA3D_SetShader(struct svga_winsys_context *swc, + SVGA3dShaderType type, uint32 shid); + + +/* + * Queries + */ + +enum pipe_error +SVGA3D_BeginQuery(struct svga_winsys_context *swc, + SVGA3dQueryType type); + +enum pipe_error +SVGA3D_EndQuery(struct svga_winsys_context *swc, + SVGA3dQueryType type, + struct svga_winsys_buffer *buffer); + +enum pipe_error +SVGA3D_WaitForQuery(struct svga_winsys_context *swc, + SVGA3dQueryType type, + struct svga_winsys_buffer *buffer); + +#endif /* __SVGA3D_H__ */ diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c new file mode 100644 index 0000000000..73233957f3 --- /dev/null +++ b/src/gallium/drivers/svga/svga_context.c @@ -0,0 +1,269 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "svga_cmd.h" + +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "util/u_upload_mgr.h" + +#include "svga_context.h" +#include "svga_screen.h" +#include "svga_screen_texture.h" +#include "svga_screen_buffer.h" +#include "svga_winsys.h" +#include "svga_swtnl.h" +#include "svga_draw.h" +#include "svga_debug.h" +#include "svga_state.h" + + +static void svga_destroy( struct pipe_context *pipe ) +{ + struct svga_context *svga = svga_context( pipe ); + unsigned shader; + + svga_cleanup_framebuffer( svga ); + svga_cleanup_tss_binding( svga ); + + svga_hwtnl_destroy( svga->hwtnl ); + + svga_cleanup_vertex_state(svga); + + svga->swc->destroy(svga->swc); + + svga_destroy_swtnl( svga ); + + u_upload_destroy( svga->upload_vb ); + u_upload_destroy( svga->upload_ib ); + + for(shader = 0; shader < PIPE_SHADER_TYPES; ++shader) + pipe_buffer_reference( &svga->curr.cb[shader], NULL ); + + FREE( svga ); +} + +static unsigned int +svga_is_texture_referenced( struct pipe_context *pipe, + struct pipe_texture *texture, + unsigned face, unsigned level) +{ + struct svga_texture *tex = svga_texture(texture); + struct svga_screen *ss = svga_screen(pipe->screen); + + /** + * The screen does not cache texture writes. + */ + + if (!tex->handle || ss->sws->surface_is_flushed(ss->sws, tex->handle)) + return PIPE_UNREFERENCED; + + /** + * sws->surface_is_flushed() does not distinguish between read references + * and write references. So assume a reference is both. + */ + + return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; +} + +static unsigned int +svga_is_buffer_referenced( struct pipe_context *pipe, + struct pipe_buffer *buf) + +{ + struct svga_screen *ss = svga_screen(pipe->screen); + struct svga_buffer *sbuf = svga_buffer(buf); + + /** + * XXX: Check this. + * The screen may cache buffer writes, but when we map, we map out + * of those cached writes, so we don't need to set a + * PIPE_REFERENCED_FOR_WRITE flag for cached buffers. + */ + + if (!sbuf->handle || ss->sws->surface_is_flushed(ss->sws, sbuf->handle)) + return PIPE_UNREFERENCED; + + /** + * sws->surface_is_flushed() does not distinguish between read references + * and write references. So assume a reference is both, + * however, we make an exception for index- and vertex buffers, to avoid + * a flush in st_bufferobj_get_subdata, during display list replay. + */ + + if (sbuf->base.usage & (PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_INDEX)) + return PIPE_REFERENCED_FOR_READ; + + return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; +} + + +struct pipe_context *svga_context_create( struct pipe_screen *screen ) +{ + struct svga_screen *svgascreen = svga_screen(screen); + struct svga_context *svga = NULL; + enum pipe_error ret; + + svga = CALLOC_STRUCT(svga_context); + if (svga == NULL) + goto error1; + + svga->pipe.winsys = screen->winsys; + svga->pipe.screen = screen; + svga->pipe.destroy = svga_destroy; + svga->pipe.clear = svga_clear; + + svga->pipe.is_texture_referenced = svga_is_texture_referenced; + svga->pipe.is_buffer_referenced = svga_is_buffer_referenced; + + svga->swc = svgascreen->sws->context_create(svgascreen->sws); + if(!svga->swc) + goto error2; + + svga_init_blend_functions(svga); + svga_init_blit_functions(svga); + svga_init_depth_stencil_functions(svga); + svga_init_draw_functions(svga); + svga_init_flush_functions(svga); + svga_init_misc_functions(svga); + svga_init_rasterizer_functions(svga); + svga_init_sampler_functions(svga); + svga_init_fs_functions(svga); + svga_init_vs_functions(svga); + svga_init_vertex_functions(svga); + svga_init_constbuffer_functions(svga); + svga_init_query_functions(svga); + + /* debug */ + svga->debug.no_swtnl = debug_get_bool_option("SVGA_NO_SWTNL", FALSE); + svga->debug.force_swtnl = debug_get_bool_option("SVGA_FORCE_SWTNL", FALSE); + svga->debug.use_min_mipmap = debug_get_bool_option("SVGA_USE_MIN_MIPMAP", FALSE); + svga->debug.disable_shader = debug_get_num_option("SVGA_DISABLE_SHADER", ~0); + + if (!svga_init_swtnl(svga)) + goto error3; + + svga->upload_ib = u_upload_create( svga->pipe.screen, + 32 * 1024, + 16, + PIPE_BUFFER_USAGE_INDEX ); + if (svga->upload_ib == NULL) + goto error4; + + svga->upload_vb = u_upload_create( svga->pipe.screen, + 128 * 1024, + 16, + PIPE_BUFFER_USAGE_VERTEX ); + if (svga->upload_vb == NULL) + goto error5; + + svga->hwtnl = svga_hwtnl_create( svga, + svga->upload_ib, + svga->swc ); + if (svga->hwtnl == NULL) + goto error6; + + + ret = svga_emit_initial_state( svga ); + if (ret) + goto error7; + + /* Avoid shortcircuiting state with initial value of zero. + */ + memset(&svga->state.hw_clear, 0xcd, sizeof(svga->state.hw_clear)); + memset(&svga->state.hw_clear.framebuffer, 0x0, + sizeof(svga->state.hw_clear.framebuffer)); + + memset(&svga->state.hw_draw, 0xcd, sizeof(svga->state.hw_draw)); + memset(&svga->state.hw_draw.views, 0x0, sizeof(svga->state.hw_draw.views)); + svga->state.hw_draw.num_views = 0; + + svga->dirty = ~0; + svga->state.white_fs_id = SVGA3D_INVALID_ID; + + LIST_INITHEAD(&svga->dirty_buffers); + + return &svga->pipe; + +error7: + svga_hwtnl_destroy( svga->hwtnl ); +error6: + u_upload_destroy( svga->upload_vb ); +error5: + u_upload_destroy( svga->upload_ib ); +error4: + svga_destroy_swtnl(svga); +error3: + svga->swc->destroy(svga->swc); +error2: + FREE(svga); +error1: + return NULL; +} + + +void svga_context_flush( struct svga_context *svga, + struct pipe_fence_handle **pfence ) +{ + struct svga_screen *svgascreen = svga_screen(svga->pipe.screen); + + /* Unmap upload manager buffers: + */ + u_upload_flush(svga->upload_vb); + u_upload_flush(svga->upload_ib); + + /* Flush screen, to ensure that texture dma uploads are processed + * before submitting commands. + */ + svga_screen_flush(svgascreen, NULL); + + svga_context_flush_buffers(svga); + + /* Flush pending commands to hardware: + */ + svga->swc->flush(svga->swc, pfence); + + if (SVGA_DEBUG & DEBUG_SYNC) { + if (pfence && *pfence) + svga->pipe.screen->fence_finish( svga->pipe.screen, *pfence, 0); + } +} + + +void svga_hwtnl_flush_retry( struct svga_context *svga ) +{ + enum pipe_error ret = PIPE_OK; + + ret = svga_hwtnl_flush( svga->hwtnl ); + if (ret == PIPE_ERROR_OUT_OF_MEMORY) { + svga_context_flush( svga, NULL ); + ret = svga_hwtnl_flush( svga->hwtnl ); + } + + assert(ret == 0); +} + diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h new file mode 100644 index 0000000000..9a3e92fd8d --- /dev/null +++ b/src/gallium/drivers/svga/svga_context.h @@ -0,0 +1,443 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_CONTEXT_H +#define SVGA_CONTEXT_H + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "util/u_double_list.h" + +#include "tgsi/tgsi_scan.h" + + +#define SVGA_TEX_UNITS 8 + +struct draw_vertex_shader; +struct svga_shader_result; +struct SVGACmdMemory; +struct u_upload_mgr; + + +struct svga_shader +{ + const struct tgsi_token *tokens; + + struct tgsi_shader_info info; + + struct svga_shader_result *results; + + unsigned id; + + boolean use_sm30; +}; + +struct svga_fragment_shader +{ + struct svga_shader base; +}; + +struct svga_vertex_shader +{ + struct svga_shader base; + + struct draw_vertex_shader *draw_shader; +}; + + +struct svga_cache_context; +struct svga_tracked_state; + +struct svga_blend_state { + + boolean need_white_fragments; + + /* Should be per-render-target: + */ + struct { + uint8_t writemask; + + boolean blend_enable; + uint8_t srcblend; + uint8_t dstblend; + uint8_t blendeq; + + boolean separate_alpha_blend_enable; + uint8_t srcblend_alpha; + uint8_t dstblend_alpha; + uint8_t blendeq_alpha; + + } rt[1]; +}; + +struct svga_depth_stencil_state { + unsigned zfunc:8; + unsigned zenable:1; + unsigned zwriteenable:1; + + unsigned alphatestenable:1; + unsigned alphafunc:8; + + struct { + unsigned enabled:1; + unsigned func:8; + unsigned fail:8; + unsigned zfail:8; + unsigned pass:8; + } stencil[2]; + + /* SVGA3D has one ref/mask/writemask triple shared between front & + * back face stencil. We really need two: + */ + unsigned stencil_ref:8; + unsigned stencil_mask:8; + unsigned stencil_writemask:8; + + float alpharef; +}; + +#define SVGA_UNFILLED_DISABLE 0 +#define SVGA_UNFILLED_LINE 1 +#define SVGA_UNFILLED_POINT 2 + +#define SVGA_PIPELINE_FLAG_POINTS (1<svga = svga; + hwtnl->upload_ib = upload_ib; + + hwtnl->cmd.swc = swc; + + return hwtnl; + +fail: + return NULL; +} + +void svga_hwtnl_destroy( struct svga_hwtnl *hwtnl ) +{ + int i, j; + + for (i = 0; i < PIPE_PRIM_MAX; i++) { + for (j = 0; j < IDX_CACHE_MAX; j++) { + pipe_buffer_reference( &hwtnl->index_cache[i][j].buffer, + NULL ); + } + } + + for (i = 0; i < hwtnl->cmd.vdecl_count; i++) + pipe_buffer_reference(&hwtnl->cmd.vdecl_vb[i], NULL); + + for (i = 0; i < hwtnl->cmd.prim_count; i++) + pipe_buffer_reference(&hwtnl->cmd.prim_ib[i], NULL); + + + FREE(hwtnl); +} + + +void svga_hwtnl_set_flatshade( struct svga_hwtnl *hwtnl, + boolean flatshade, + boolean flatshade_first ) +{ + hwtnl->hw_pv = PV_FIRST; + hwtnl->api_pv = (flatshade && !flatshade_first) ? PV_LAST : PV_FIRST; +} + +void svga_hwtnl_set_unfilled( struct svga_hwtnl *hwtnl, + unsigned mode ) +{ + hwtnl->api_fillmode = mode; +} + +void svga_hwtnl_reset_vdecl( struct svga_hwtnl *hwtnl, + unsigned count ) +{ + unsigned i; + + assert(hwtnl->cmd.prim_count == 0); + + for (i = count; i < hwtnl->cmd.vdecl_count; i++) { + pipe_buffer_reference(&hwtnl->cmd.vdecl_vb[i], + NULL); + } + + hwtnl->cmd.vdecl_count = count; +} + + +void svga_hwtnl_vdecl( struct svga_hwtnl *hwtnl, + unsigned i, + const SVGA3dVertexDecl *decl, + struct pipe_buffer *vb) +{ + assert(hwtnl->cmd.prim_count == 0); + + assert( i < hwtnl->cmd.vdecl_count ); + + hwtnl->cmd.vdecl[i] = *decl; + + pipe_buffer_reference(&hwtnl->cmd.vdecl_vb[i], + vb); +} + + + +enum pipe_error +svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) +{ + struct svga_winsys_context *swc = hwtnl->cmd.swc; + struct svga_context *svga = hwtnl->svga; + enum pipe_error ret; + + if (hwtnl->cmd.prim_count) { + struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX]; + struct svga_winsys_surface *ib_handle[QSZ]; + struct svga_winsys_surface *handle; + SVGA3dVertexDecl *vdecl; + SVGA3dPrimitiveRange *prim; + unsigned i; + + for (i = 0; i < hwtnl->cmd.vdecl_count; i++) { + handle = svga_buffer_handle(svga, hwtnl->cmd.vdecl_vb[i]); + if (handle == NULL) + return PIPE_ERROR_OUT_OF_MEMORY; + + vb_handle[i] = handle; + } + + for (i = 0; i < hwtnl->cmd.prim_count; i++) { + if (hwtnl->cmd.prim_ib[i]) { + handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]); + if (handle == NULL) + return PIPE_ERROR_OUT_OF_MEMORY; + } + else + handle = NULL; + + ib_handle[i] = handle; + } + + ret = SVGA3D_BeginDrawPrimitives(swc, + &vdecl, + hwtnl->cmd.vdecl_count, + &prim, + hwtnl->cmd.prim_count); + if (ret != PIPE_OK) + return ret; + + + memcpy( vdecl, + hwtnl->cmd.vdecl, + hwtnl->cmd.vdecl_count * sizeof hwtnl->cmd.vdecl[0]); + + for (i = 0; i < hwtnl->cmd.vdecl_count; i++) { + /* Given rangeHint is considered to be relative to indexBias, and + * indexBias varies per primitive, we cannot accurately supply an + * rangeHint when emitting more than one primitive per draw command. + */ + if (hwtnl->cmd.prim_count == 1) { + vdecl[i].rangeHint.first = hwtnl->cmd.min_index[0]; + vdecl[i].rangeHint.last = hwtnl->cmd.max_index[0] + 1; + } + else { + vdecl[i].rangeHint.first = 0; + vdecl[i].rangeHint.last = 0; + } + + swc->surface_relocation(swc, + &vdecl[i].array.surfaceId, + vb_handle[i], + PIPE_BUFFER_USAGE_GPU_READ); + } + + memcpy( prim, + hwtnl->cmd.prim, + hwtnl->cmd.prim_count * sizeof hwtnl->cmd.prim[0]); + + for (i = 0; i < hwtnl->cmd.prim_count; i++) { + swc->surface_relocation(swc, + &prim[i].indexArray.surfaceId, + ib_handle[i], + PIPE_BUFFER_USAGE_GPU_READ); + pipe_buffer_reference(&hwtnl->cmd.prim_ib[i], NULL); + } + + SVGA_FIFOCommitAll( swc ); + hwtnl->cmd.prim_count = 0; + } + + return PIPE_OK; +} + + + + + +/*********************************************************************** + * Internal functions: + */ + +enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl, + const SVGA3dPrimitiveRange *range, + unsigned min_index, + unsigned max_index, + struct pipe_buffer *ib ) +{ + int ret = PIPE_OK; + +#ifdef DEBUG + { + unsigned i; + for (i = 0; i < hwtnl->cmd.vdecl_count; i++) { + struct pipe_buffer *vb = hwtnl->cmd.vdecl_vb[i]; + unsigned size = vb ? vb->size : 0; + unsigned offset = hwtnl->cmd.vdecl[i].array.offset; + unsigned stride = hwtnl->cmd.vdecl[i].array.stride; + unsigned index_bias = range->indexBias; + unsigned width; + + assert(vb); + assert(size); + assert(offset < size); + assert(index_bias >= 0); + assert(min_index <= max_index); + assert(offset + index_bias*stride < size); + assert(offset + (index_bias + min_index)*stride < size); + + switch (hwtnl->cmd.vdecl[i].identity.type) { + case SVGA3D_DECLTYPE_FLOAT1: + width = 4; + break; + case SVGA3D_DECLTYPE_FLOAT2: + width = 4*2; + break; + case SVGA3D_DECLTYPE_FLOAT3: + width = 4*3; + break; + case SVGA3D_DECLTYPE_FLOAT4: + width = 4*4; + break; + case SVGA3D_DECLTYPE_D3DCOLOR: + width = 4; + break; + case SVGA3D_DECLTYPE_UBYTE4: + width = 1*4; + break; + case SVGA3D_DECLTYPE_SHORT2: + width = 2*2; + break; + case SVGA3D_DECLTYPE_SHORT4: + width = 2*4; + break; + case SVGA3D_DECLTYPE_UBYTE4N: + width = 1*4; + break; + case SVGA3D_DECLTYPE_SHORT2N: + width = 2*2; + break; + case SVGA3D_DECLTYPE_SHORT4N: + width = 2*4; + break; + case SVGA3D_DECLTYPE_USHORT2N: + width = 2*2; + break; + case SVGA3D_DECLTYPE_USHORT4N: + width = 2*4; + break; + case SVGA3D_DECLTYPE_UDEC3: + width = 4; + break; + case SVGA3D_DECLTYPE_DEC3N: + width = 4; + break; + case SVGA3D_DECLTYPE_FLOAT16_2: + width = 2*2; + break; + case SVGA3D_DECLTYPE_FLOAT16_4: + width = 2*4; + break; + default: + assert(0); + width = 0; + break; + } + + assert(!stride || width <= stride); + assert(offset + (index_bias + max_index)*stride + width <= size); + } + + assert(range->indexWidth == range->indexArray.stride); + + if(ib) { + unsigned size = ib->size; + unsigned offset = range->indexArray.offset; + unsigned stride = range->indexArray.stride; + unsigned count; + + assert(size); + assert(offset < size); + assert(stride); + + switch (range->primType) { + case SVGA3D_PRIMITIVE_POINTLIST: + count = range->primitiveCount; + break; + case SVGA3D_PRIMITIVE_LINELIST: + count = range->primitiveCount * 2; + break; + case SVGA3D_PRIMITIVE_LINESTRIP: + count = range->primitiveCount + 1; + break; + case SVGA3D_PRIMITIVE_TRIANGLELIST: + count = range->primitiveCount * 3; + break; + case SVGA3D_PRIMITIVE_TRIANGLESTRIP: + count = range->primitiveCount + 2; + break; + case SVGA3D_PRIMITIVE_TRIANGLEFAN: + count = range->primitiveCount + 2; + break; + default: + assert(0); + count = 0; + break; + } + + assert(offset + count*stride <= size); + } + } +#endif + + if (hwtnl->cmd.prim_count+1 >= QSZ) { + ret = svga_hwtnl_flush( hwtnl ); + if (ret != PIPE_OK) + return ret; + } + + /* min/max indices are relative to bias */ + hwtnl->cmd.min_index[hwtnl->cmd.prim_count] = min_index; + hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index; + + hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range; + + pipe_buffer_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib); + hwtnl->cmd.prim_count++; + + return ret; +} diff --git a/src/gallium/drivers/svga/svga_draw.h b/src/gallium/drivers/svga/svga_draw.h new file mode 100644 index 0000000000..14553b17b5 --- /dev/null +++ b/src/gallium/drivers/svga/svga_draw.h @@ -0,0 +1,83 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_DRAW_H +#define SVGA_DRAW_H + +#include "pipe/p_compiler.h" + +#include "svga_hw_reg.h" + +struct svga_hwtnl; +struct svga_winsys_context; +struct svga_screen; +struct svga_context; +struct pipe_buffer; +struct u_upload_mgr; + +struct svga_hwtnl *svga_hwtnl_create( struct svga_context *svga, + struct u_upload_mgr *upload_ib, + struct svga_winsys_context *swc ); + +void svga_hwtnl_destroy( struct svga_hwtnl *hwtnl ); + +void svga_hwtnl_set_flatshade( struct svga_hwtnl *hwtnl, + boolean flatshade, + boolean flatshade_first ); + +void svga_hwtnl_set_unfilled( struct svga_hwtnl *hwtnl, + unsigned mode ); + +void svga_hwtnl_vdecl( struct svga_hwtnl *hwtnl, + unsigned i, + const SVGA3dVertexDecl *decl, + struct pipe_buffer *vb); + +void svga_hwtnl_reset_vdecl( struct svga_hwtnl *hwtnl, + unsigned count ); + + +enum pipe_error +svga_hwtnl_draw_arrays( struct svga_hwtnl *hwtnl, + unsigned prim, + unsigned start, + unsigned count); + +enum pipe_error +svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl, + struct pipe_buffer *indexBuffer, + unsigned index_size, + unsigned min_index, + unsigned max_index, + unsigned prim, + unsigned start, + unsigned count, + unsigned bias ); + +enum pipe_error +svga_hwtnl_flush( struct svga_hwtnl *hwtnl ); + + +#endif /* SVGA_DRAW_H_ */ diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c new file mode 100644 index 0000000000..75492dffca --- /dev/null +++ b/src/gallium/drivers/svga/svga_draw_arrays.c @@ -0,0 +1,297 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "svga_cmd.h" + +#include "pipe/p_inlines.h" +#include "util/u_prim.h" +#include "indices/u_indices.h" + +#include "svga_hw_reg.h" +#include "svga_draw.h" +#include "svga_draw_private.h" +#include "svga_context.h" + + +#define DBG 0 + + + + +static enum pipe_error generate_indices( struct svga_hwtnl *hwtnl, + unsigned nr, + unsigned index_size, + u_generate_func generate, + struct pipe_buffer **out_buf ) +{ + struct pipe_screen *screen = hwtnl->svga->pipe.screen; + unsigned size = index_size * nr; + struct pipe_buffer *dst = NULL; + void *dst_map = NULL; + + dst = screen->buffer_create( screen, 32, + PIPE_BUFFER_USAGE_INDEX | + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ, + size ); + if (dst == NULL) + goto fail; + + dst_map = pipe_buffer_map( screen, dst, PIPE_BUFFER_USAGE_CPU_WRITE ); + if (dst_map == NULL) + goto fail; + + generate( nr, + dst_map ); + + pipe_buffer_unmap( screen, dst ); + + *out_buf = dst; + return PIPE_OK; + +fail: + if (dst_map) + screen->buffer_unmap( screen, dst ); + + if (dst) + screen->buffer_destroy( dst ); + + return PIPE_ERROR_OUT_OF_MEMORY; +} + +static boolean compare( unsigned cached_nr, + unsigned nr, + unsigned type ) +{ + if (type == U_GENERATE_REUSABLE) + return cached_nr >= nr; + else + return cached_nr == nr; +} + +static enum pipe_error retrieve_or_generate_indices( struct svga_hwtnl *hwtnl, + unsigned prim, + unsigned gen_type, + unsigned gen_nr, + unsigned gen_size, + u_generate_func generate, + struct pipe_buffer **out_buf ) +{ + enum pipe_error ret = PIPE_OK; + int i; + + for (i = 0; i < IDX_CACHE_MAX; i++) { + if (hwtnl->index_cache[prim][i].buffer != NULL && + hwtnl->index_cache[prim][i].generate == generate) + { + if (compare(hwtnl->index_cache[prim][i].gen_nr, gen_nr, gen_type)) + { + pipe_buffer_reference( out_buf, + hwtnl->index_cache[prim][i].buffer ); + + if (DBG) + debug_printf("%s retrieve %d/%d\n", __FUNCTION__, i, gen_nr); + + return PIPE_OK; + } + else if (gen_type == U_GENERATE_REUSABLE) + { + pipe_buffer_reference( &hwtnl->index_cache[prim][i].buffer, + NULL ); + + if (DBG) + debug_printf("%s discard %d/%d\n", __FUNCTION__, + i, hwtnl->index_cache[prim][i].gen_nr); + + break; + } + } + } + + if (i == IDX_CACHE_MAX) + { + unsigned smallest = 0; + unsigned smallest_size = ~0; + + for (i = 0; i < IDX_CACHE_MAX && smallest_size; i++) { + if (hwtnl->index_cache[prim][i].buffer == NULL) + { + smallest = i; + smallest_size = 0; + } + else if (hwtnl->index_cache[prim][i].gen_nr < smallest) + { + smallest = i; + smallest_size = hwtnl->index_cache[prim][i].gen_nr; + } + } + + assert (smallest != IDX_CACHE_MAX); + + pipe_buffer_reference( &hwtnl->index_cache[prim][smallest].buffer, + NULL ); + + if (DBG) + debug_printf("%s discard smallest %d/%d\n", __FUNCTION__, + smallest, smallest_size); + + i = smallest; + } + + + ret = generate_indices( hwtnl, + gen_nr, + gen_size, + generate, + out_buf ); + if (ret != PIPE_OK) + return ret; + + + hwtnl->index_cache[prim][i].generate = generate; + hwtnl->index_cache[prim][i].gen_nr = gen_nr; + pipe_buffer_reference( &hwtnl->index_cache[prim][i].buffer, + *out_buf ); + + if (DBG) + debug_printf("%s cache %d/%d\n", __FUNCTION__, + i, hwtnl->index_cache[prim][i].gen_nr); + + return PIPE_OK; +} + + + +static enum pipe_error +simple_draw_arrays( struct svga_hwtnl *hwtnl, + unsigned prim, unsigned start, unsigned count ) +{ + SVGA3dPrimitiveRange range; + unsigned hw_prim; + unsigned hw_count; + + hw_prim = svga_translate_prim(prim, count, &hw_count); + if (hw_count == 0) + return PIPE_ERROR_BAD_INPUT; + + range.primType = hw_prim; + range.primitiveCount = hw_count; + range.indexArray.surfaceId = SVGA3D_INVALID_ID; + range.indexArray.offset = 0; + range.indexArray.stride = 0; + range.indexWidth = 0; + range.indexBias = start; + + /* Min/max index should be calculated prior to applying bias, so we + * end up with min_index = 0, max_index = count - 1 and everybody + * looking at those numbers knows to adjust them by + * range.indexBias. + */ + return svga_hwtnl_prim( hwtnl, &range, 0, count - 1, NULL ); +} + + + + + + + + + + +enum pipe_error +svga_hwtnl_draw_arrays( struct svga_hwtnl *hwtnl, + unsigned prim, + unsigned start, + unsigned count) +{ + unsigned gen_prim, gen_size, gen_nr, gen_type; + u_generate_func gen_func; + enum pipe_error ret = PIPE_OK; + + if (hwtnl->api_fillmode != PIPE_POLYGON_MODE_FILL && + prim >= PIPE_PRIM_TRIANGLES) + { + gen_type = u_unfilled_generator( prim, + start, + count, + hwtnl->api_fillmode, + &gen_prim, + &gen_size, + &gen_nr, + &gen_func ); + } + else { + gen_type = u_index_generator( svga_hw_prims, + prim, + start, + count, + hwtnl->api_pv, + hwtnl->hw_pv, + &gen_prim, + &gen_size, + &gen_nr, + &gen_func ); + } + + if (gen_type == U_GENERATE_LINEAR) { + return simple_draw_arrays( hwtnl, gen_prim, start, count ); + } + else { + struct pipe_buffer *gen_buf = NULL; + + /* Need to draw as indexed primitive. + * Potentially need to run the gen func to build an index buffer. + */ + ret = retrieve_or_generate_indices( hwtnl, + prim, + gen_type, + gen_nr, + gen_size, + gen_func, + &gen_buf ); + if (ret) + goto done; + + ret = svga_hwtnl_simple_draw_range_elements( hwtnl, + gen_buf, + gen_size, + 0, + count - 1, + gen_prim, + 0, + gen_nr, + start ); + if (ret) + goto done; + + done: + if (gen_buf) + pipe_buffer_reference( &gen_buf, NULL ); + + return ret; + } +} + diff --git a/src/gallium/drivers/svga/svga_draw_elements.c b/src/gallium/drivers/svga/svga_draw_elements.c new file mode 100644 index 0000000000..167d817831 --- /dev/null +++ b/src/gallium/drivers/svga/svga_draw_elements.c @@ -0,0 +1,255 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "util/u_prim.h" +#include "util/u_upload_mgr.h" +#include "indices/u_indices.h" + +#include "svga_cmd.h" +#include "svga_draw.h" +#include "svga_draw_private.h" +#include "svga_screen_buffer.h" +#include "svga_winsys.h" +#include "svga_context.h" + +#include "svga_hw_reg.h" + + +static enum pipe_error +translate_indices( struct svga_hwtnl *hwtnl, + struct pipe_buffer *src, + unsigned offset, + unsigned nr, + unsigned index_size, + u_translate_func translate, + struct pipe_buffer **out_buf ) +{ + struct pipe_screen *screen = hwtnl->svga->pipe.screen; + unsigned size = index_size * nr; + const void *src_map = NULL; + struct pipe_buffer *dst = NULL; + void *dst_map = NULL; + + dst = screen->buffer_create( screen, 32, + PIPE_BUFFER_USAGE_INDEX | + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ, + size ); + if (dst == NULL) + goto fail; + + src_map = pipe_buffer_map( screen, src, PIPE_BUFFER_USAGE_CPU_READ ); + if (src_map == NULL) + goto fail; + + dst_map = pipe_buffer_map( screen, dst, PIPE_BUFFER_USAGE_CPU_WRITE ); + if (dst_map == NULL) + goto fail; + + translate( (const char *)src_map + offset, + nr, + dst_map ); + + pipe_buffer_unmap( screen, src ); + pipe_buffer_unmap( screen, dst ); + + *out_buf = dst; + return PIPE_OK; + +fail: + if (src_map) + screen->buffer_unmap( screen, src ); + + if (dst_map) + screen->buffer_unmap( screen, dst ); + + if (dst) + screen->buffer_destroy( dst ); + + return PIPE_ERROR_OUT_OF_MEMORY; +} + + + + + +enum pipe_error +svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl, + struct pipe_buffer *index_buffer, + unsigned index_size, + unsigned min_index, + unsigned max_index, + unsigned prim, + unsigned start, + unsigned count, + unsigned bias ) +{ + struct pipe_buffer *upload_buffer = NULL; + SVGA3dPrimitiveRange range; + unsigned hw_prim; + unsigned hw_count; + unsigned index_offset = start * index_size; + int ret = PIPE_OK; + + hw_prim = svga_translate_prim(prim, count, &hw_count); + if (hw_count == 0) + goto done; + + if (index_buffer && + svga_buffer_is_user_buffer(index_buffer)) + { + assert( index_buffer->size >= index_offset + count * index_size ); + + ret = u_upload_buffer( hwtnl->upload_ib, + index_offset, + count * index_size, + index_buffer, + &index_offset, + &upload_buffer ); + if (ret) + goto done; + + /* Don't need to worry about refcounting index_buffer as this is + * just a stack variable without a counted reference of its own. + * The caller holds the reference. + */ + index_buffer = upload_buffer; + } + + range.primType = hw_prim; + range.primitiveCount = hw_count; + range.indexArray.offset = index_offset; + range.indexArray.stride = index_size; + range.indexWidth = index_size; + range.indexBias = bias; + + ret = svga_hwtnl_prim( hwtnl, &range, min_index, max_index, index_buffer ); + if (ret) + goto done; + +done: + if (upload_buffer) + pipe_buffer_reference( &upload_buffer, NULL ); + + return ret; +} + + + + +enum pipe_error +svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl, + struct pipe_buffer *index_buffer, + unsigned index_size, + unsigned min_index, + unsigned max_index, + unsigned prim, unsigned start, unsigned count, + unsigned bias) +{ + unsigned gen_prim, gen_size, gen_nr, gen_type; + u_translate_func gen_func; + enum pipe_error ret = PIPE_OK; + + if (hwtnl->api_fillmode != PIPE_POLYGON_MODE_FILL && + prim >= PIPE_PRIM_TRIANGLES) + { + gen_type = u_unfilled_translator( prim, + index_size, + count, + hwtnl->api_fillmode, + &gen_prim, + &gen_size, + &gen_nr, + &gen_func ); + } + else + { + gen_type = u_index_translator( svga_hw_prims, + prim, + index_size, + count, + hwtnl->api_pv, + hwtnl->hw_pv, + &gen_prim, + &gen_size, + &gen_nr, + &gen_func ); + } + + + if (gen_type == U_TRANSLATE_MEMCPY) { + /* No need for translation, just pass through to hardware: + */ + return svga_hwtnl_simple_draw_range_elements( hwtnl, index_buffer, + index_size, + min_index, + max_index, + gen_prim, start, count, bias ); + } + else { + struct pipe_buffer *gen_buf = NULL; + + /* Need to allocate a new index buffer and run the translate + * func to populate it. Could potentially cache this translated + * index buffer with the original to avoid future + * re-translations. Not much point if we're just accelerating + * GL though, as index buffers are typically used only once + * there. + */ + ret = translate_indices( hwtnl, + index_buffer, + start * index_size, + gen_nr, + gen_size, + gen_func, + &gen_buf ); + if (ret) + goto done; + + ret = svga_hwtnl_simple_draw_range_elements( hwtnl, + gen_buf, + gen_size, + min_index, + max_index, + gen_prim, + 0, + gen_nr, + bias ); + if (ret) + goto done; + + done: + if (gen_buf) + pipe_buffer_reference( &gen_buf, NULL ); + + return ret; + } +} + + + + + diff --git a/src/gallium/drivers/svga/svga_draw_private.h b/src/gallium/drivers/svga/svga_draw_private.h new file mode 100644 index 0000000000..9aa40e1664 --- /dev/null +++ b/src/gallium/drivers/svga/svga_draw_private.h @@ -0,0 +1,158 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_DRAW_H_ +#define SVGA_DRAW_H_ + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" +#include "indices/u_indices.h" +#include "svga_hw_reg.h" +#include "svga3d_shaderdefs.h" + +struct svga_context; +struct u_upload_mgr; + +/* Should include polygon? + */ +static const unsigned svga_hw_prims = + ((1 << PIPE_PRIM_POINTS) | + (1 << PIPE_PRIM_LINES) | + (1 << PIPE_PRIM_LINE_STRIP) | + (1 << PIPE_PRIM_TRIANGLES) | + (1 << PIPE_PRIM_TRIANGLE_STRIP) | + (1 << PIPE_PRIM_TRIANGLE_FAN)); + + +static INLINE unsigned svga_translate_prim(unsigned mode, + unsigned count, + unsigned *out_count) +{ + switch (mode) { + case PIPE_PRIM_POINTS: + *out_count = count; + return SVGA3D_PRIMITIVE_POINTLIST; + + case PIPE_PRIM_LINES: + *out_count = count / 2; + return SVGA3D_PRIMITIVE_LINELIST; + + case PIPE_PRIM_LINE_STRIP: + *out_count = count - 1; + return SVGA3D_PRIMITIVE_LINESTRIP; + + case PIPE_PRIM_TRIANGLES: + *out_count = count / 3; + return SVGA3D_PRIMITIVE_TRIANGLELIST; + + case PIPE_PRIM_TRIANGLE_STRIP: + *out_count = count - 2; + return SVGA3D_PRIMITIVE_TRIANGLESTRIP; + + case PIPE_PRIM_TRIANGLE_FAN: + *out_count = count - 2; + return SVGA3D_PRIMITIVE_TRIANGLEFAN; + + default: + assert(0); + *out_count = 0; + return 0; + } +} + + +struct index_cache { + u_generate_func generate; + unsigned gen_nr; + + /* If non-null, this buffer is filled by calling + * generate(nr, map(buffer)) + */ + struct pipe_buffer *buffer; +}; + +#define QSZ 32 + +struct draw_cmd { + struct svga_winsys_context *swc; + + SVGA3dVertexDecl vdecl[SVGA3D_INPUTREG_MAX]; + struct pipe_buffer *vdecl_vb[SVGA3D_INPUTREG_MAX]; + unsigned vdecl_count; + + SVGA3dPrimitiveRange prim[QSZ]; + struct pipe_buffer *prim_ib[QSZ]; + unsigned prim_count; + unsigned min_index[QSZ]; + unsigned max_index[QSZ]; +}; + +#define IDX_CACHE_MAX 8 + +struct svga_hwtnl { + struct svga_context *svga; + struct u_upload_mgr *upload_ib; + + /* Flatshade information: + */ + unsigned api_pv; + unsigned hw_pv; + unsigned api_fillmode; + + /* Cache the results of running a particular generate func on each + * primitive type. + */ + struct index_cache index_cache[PIPE_PRIM_MAX][IDX_CACHE_MAX]; + + /* Try to build the maximal draw command packet before emitting: + */ + struct draw_cmd cmd; +}; + + + +/*********************************************************************** + * Internal functions + */ +enum pipe_error +svga_hwtnl_prim( struct svga_hwtnl *hwtnl, + const SVGA3dPrimitiveRange *range, + unsigned min_index, + unsigned max_index, + struct pipe_buffer *ib ); + +enum pipe_error +svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl, + struct pipe_buffer *indexBuffer, + unsigned index_size, + unsigned min_index, + unsigned max_index, + unsigned prim, + unsigned start, + unsigned count, + unsigned bias ); + + +#endif diff --git a/src/gallium/drivers/svga/svga_hw_reg.h b/src/gallium/drivers/svga/svga_hw_reg.h new file mode 100644 index 0000000000..183f4b918e --- /dev/null +++ b/src/gallium/drivers/svga/svga_hw_reg.h @@ -0,0 +1,42 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_HW_REG_H +#define SVGA_HW_REG_H + +#include "pipe/p_compiler.h" + +#if defined(PIPE_CC_GCC) +#ifndef HAVE_STDINT_H +#define HAVE_STDINT_H +#endif +#endif + +#include "svga_types.h" + +#include "svga3d_reg.h" + + +#endif diff --git a/src/gallium/drivers/svga/svga_pipe_blend.c b/src/gallium/drivers/svga/svga_pipe_blend.c new file mode 100644 index 0000000000..855d228755 --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_blend.c @@ -0,0 +1,246 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "svga_context.h" +#include "svga_state.h" + +#include "svga_hw_reg.h" + + +static INLINE unsigned +svga_translate_blend_factor(unsigned factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ZERO: return SVGA3D_BLENDOP_ZERO; + case PIPE_BLENDFACTOR_SRC_ALPHA: return SVGA3D_BLENDOP_SRCALPHA; + case PIPE_BLENDFACTOR_ONE: return SVGA3D_BLENDOP_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: return SVGA3D_BLENDOP_SRCCOLOR; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: return SVGA3D_BLENDOP_INVSRCCOLOR; + case PIPE_BLENDFACTOR_DST_COLOR: return SVGA3D_BLENDOP_DESTCOLOR; + case PIPE_BLENDFACTOR_INV_DST_COLOR: return SVGA3D_BLENDOP_INVDESTCOLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return SVGA3D_BLENDOP_INVSRCALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: return SVGA3D_BLENDOP_DESTALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: return SVGA3D_BLENDOP_INVDESTALPHA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return SVGA3D_BLENDOP_SRCALPHASAT; + case PIPE_BLENDFACTOR_CONST_COLOR: return SVGA3D_BLENDOP_BLENDFACTOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: return SVGA3D_BLENDOP_INVBLENDFACTOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: return SVGA3D_BLENDOP_BLENDFACTOR; /* ? */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return SVGA3D_BLENDOP_INVBLENDFACTOR; /* ? */ + default: + assert(0); + return SVGA3D_BLENDOP_ZERO; + } +} + +static INLINE unsigned +svga_translate_blend_func(unsigned mode) +{ + switch (mode) { + case PIPE_BLEND_ADD: return SVGA3D_BLENDEQ_ADD; + case PIPE_BLEND_SUBTRACT: return SVGA3D_BLENDEQ_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: return SVGA3D_BLENDEQ_REVSUBTRACT; + case PIPE_BLEND_MIN: return SVGA3D_BLENDEQ_MINIMUM; + case PIPE_BLEND_MAX: return SVGA3D_BLENDEQ_MAXIMUM; + default: + assert(0); + return SVGA3D_BLENDEQ_ADD; + } +} + + +static void * +svga_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *templ) +{ + struct svga_blend_state *blend = CALLOC_STRUCT( svga_blend_state ); + unsigned i; + + + /* Fill in the per-rendertarget blend state. We currently only + * have one rendertarget. + */ + for (i = 0; i < 1; i++) { + /* No way to set this in SVGA3D, and no way to correctly implement it on + * top of D3D9 API. Instead we try to simulate with various blend modes. + */ + if (templ->logicop_enable) { + switch (templ->logicop_func) { + case PIPE_LOGICOP_XOR: + blend->need_white_fragments = TRUE; + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE; + blend->rt[i].dstblend = SVGA3D_BLENDOP_ONE; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_SUBTRACT; + break; + case PIPE_LOGICOP_CLEAR: + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_ZERO; + blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM; + break; + case PIPE_LOGICOP_COPY: + blend->rt[i].blend_enable = FALSE; + break; + case PIPE_LOGICOP_COPY_INVERTED: + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR; + blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD; + break; + case PIPE_LOGICOP_NOOP: + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_ZERO; + blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD; + break; + case PIPE_LOGICOP_SET: + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE; + blend->rt[i].dstblend = SVGA3D_BLENDOP_ONE; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM; + break; + case PIPE_LOGICOP_INVERT: + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR; + blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD; + break; + case PIPE_LOGICOP_AND: + /* Approximate with minimum - works for the 0 & anything case: */ + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR; + blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM; + break; + case PIPE_LOGICOP_AND_REVERSE: + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR; + blend->rt[i].dstblend = SVGA3D_BLENDOP_INVDESTCOLOR; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM; + break; + case PIPE_LOGICOP_AND_INVERTED: + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR; + blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM; + break; + case PIPE_LOGICOP_OR: + /* Approximate with maximum - works for the 1 | anything case: */ + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR; + blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM; + break; + case PIPE_LOGICOP_OR_REVERSE: + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR; + blend->rt[i].dstblend = SVGA3D_BLENDOP_INVDESTCOLOR; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM; + break; + case PIPE_LOGICOP_OR_INVERTED: + blend->rt[i].blend_enable = TRUE; + blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR; + blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR; + blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM; + break; + case PIPE_LOGICOP_NAND: + case PIPE_LOGICOP_NOR: + case PIPE_LOGICOP_EQUIV: + /* Fill these in with plausible values */ + blend->rt[i].blend_enable = FALSE; + break; + default: + assert(0); + break; + } + } + else { + blend->rt[i].blend_enable = templ->blend_enable; + + if (templ->blend_enable) { + blend->rt[i].srcblend = svga_translate_blend_factor(templ->rgb_src_factor); + blend->rt[i].dstblend = svga_translate_blend_factor(templ->rgb_dst_factor); + blend->rt[i].blendeq = svga_translate_blend_func(templ->rgb_func); + blend->rt[i].srcblend_alpha = svga_translate_blend_factor(templ->alpha_src_factor); + blend->rt[i].dstblend_alpha = svga_translate_blend_factor(templ->alpha_dst_factor); + blend->rt[i].blendeq_alpha = svga_translate_blend_func(templ->alpha_func); + + if (blend->rt[i].srcblend_alpha != blend->rt[i].srcblend || + blend->rt[i].dstblend_alpha != blend->rt[i].dstblend || + blend->rt[i].blendeq_alpha != blend->rt[i].blendeq) + { + blend->rt[i].separate_alpha_blend_enable = TRUE; + } + } + } + + blend->rt[i].writemask = templ->colormask; + } + + return blend; +} + +static void svga_bind_blend_state(struct pipe_context *pipe, + void *blend) +{ + struct svga_context *svga = svga_context(pipe); + + svga->curr.blend = (struct svga_blend_state*)blend; + svga->dirty |= SVGA_NEW_BLEND; +} + + +static void svga_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + FREE(blend); +} + +static void svga_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct svga_context *svga = svga_context(pipe); + + svga->curr.blend_color = *blend_color; + + svga->dirty |= SVGA_NEW_BLEND; +} + + +void svga_init_blend_functions( struct svga_context *svga ) +{ + svga->pipe.create_blend_state = svga_create_blend_state; + svga->pipe.bind_blend_state = svga_bind_blend_state; + svga->pipe.delete_blend_state = svga_delete_blend_state; + + svga->pipe.set_blend_color = svga_set_blend_color; +} + + + diff --git a/src/gallium/drivers/svga/svga_pipe_blit.c b/src/gallium/drivers/svga/svga_pipe_blit.c new file mode 100644 index 0000000000..5a4a8c0f5f --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_blit.c @@ -0,0 +1,84 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "svga_screen_texture.h" +#include "svga_context.h" +#include "svga_cmd.h" + +#define FILE_DEBUG_FLAG DEBUG_BLIT + + +static void svga_surface_copy(struct pipe_context *pipe, + struct pipe_surface *dest, + unsigned destx, unsigned desty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct svga_context *svga = svga_context(pipe); + SVGA3dCopyBox *box; + enum pipe_error ret; + + svga_hwtnl_flush_retry( svga ); + + ret = SVGA3D_BeginSurfaceCopy(svga->swc, + src, + dest, + &box, + 1); + if(ret != PIPE_OK) { + + svga_context_flush(svga, NULL); + + ret = SVGA3D_BeginSurfaceCopy(svga->swc, + src, + dest, + &box, + 1); + assert(ret == PIPE_OK); + } + + box->x = destx; + box->y = desty; + box->z = 0; + box->w = width; + box->h = height; + box->d = 1; + box->srcx = srcx; + box->srcy = srcy; + box->srcz = 0; + + SVGA_FIFOCommitAll(svga->swc); + + svga_surface(dest)->dirty = TRUE; + svga_propagate_surface(pipe, dest); +} + + +void +svga_init_blit_functions(struct svga_context *svga) +{ + svga->pipe.surface_copy = svga_surface_copy; +} diff --git a/src/gallium/drivers/svga/svga_pipe_clear.c b/src/gallium/drivers/svga/svga_pipe_clear.c new file mode 100644 index 0000000000..8977d26541 --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_clear.c @@ -0,0 +1,119 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "svga_cmd.h" + +#include "pipe/p_defines.h" +#include "util/u_pack_color.h" + +#include "svga_context.h" +#include "svga_state.h" + + +static enum pipe_error +try_clear(struct svga_context *svga, + unsigned buffers, + const float *rgba, + double depth, + unsigned stencil) +{ + int ret = PIPE_OK; + SVGA3dRect rect = { 0, 0, 0, 0 }; + boolean restore_viewport = FALSE; + SVGA3dClearFlag flags = 0; + struct pipe_framebuffer_state *fb = &svga->curr.framebuffer; + unsigned color = 0; + + ret = svga_update_state(svga, SVGA_STATE_HW_CLEAR); + if (ret) + return ret; + + if ((buffers & PIPE_CLEAR_COLOR) && fb->cbufs[0]) { + flags |= SVGA3D_CLEAR_COLOR; + util_pack_color(rgba, PIPE_FORMAT_A8R8G8B8_UNORM, &color); + + rect.w = fb->cbufs[0]->width; + rect.h = fb->cbufs[0]->height; + } + + if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && fb->zsbuf) { + flags |= SVGA3D_CLEAR_DEPTH; + + if (svga->curr.framebuffer.zsbuf->format == PIPE_FORMAT_Z24S8_UNORM) + flags |= SVGA3D_CLEAR_STENCIL; + + rect.w = MAX2(rect.w, fb->zsbuf->width); + rect.h = MAX2(rect.h, fb->zsbuf->height); + } + + if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) { + restore_viewport = TRUE; + ret = SVGA3D_SetViewport(svga->swc, &rect); + if (ret) + return ret; + } + + ret = SVGA3D_ClearRect(svga->swc, flags, color, depth, stencil, + rect.x, rect.y, rect.w, rect.h); + if (ret != PIPE_OK) + return ret; + + if (restore_viewport) { + memcpy(&rect, &svga->state.hw_clear.viewport, sizeof rect); + ret = SVGA3D_SetViewport(svga->swc, &rect); + } + + return ret; +} + +/** + * Clear the given surface to the specified value. + * No masking, no scissor (clear entire buffer). + */ +void +svga_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, + double depth, unsigned stencil) +{ + struct svga_context *svga = svga_context( pipe ); + int ret; + + ret = try_clear( svga, buffers, rgba, depth, stencil ); + + if (ret == PIPE_ERROR_OUT_OF_MEMORY) { + /* Flush command buffer and retry: + */ + svga_context_flush( svga, NULL ); + + ret = try_clear( svga, buffers, rgba, depth, stencil ); + } + + /* + * Mark target surfaces as dirty + * TODO Mark only cleared surfaces. + */ + svga_mark_surfaces_dirty(svga); + + assert (ret == PIPE_OK); +} diff --git a/src/gallium/drivers/svga/svga_pipe_constants.c b/src/gallium/drivers/svga/svga_pipe_constants.c new file mode 100644 index 0000000000..10e7a12189 --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_constants.c @@ -0,0 +1,74 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_parse.h" + +#include "svga_context.h" +#include "svga_state.h" +#include "svga_hw_reg.h" +#include "svga_cmd.h" + +/*********************************************************************** + * Constant buffers + */ + +struct svga_constbuf +{ + unsigned type; + float (*data)[4]; + unsigned count; +}; + + + +static void svga_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct svga_context *svga = svga_context(pipe); + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + pipe_buffer_reference( &svga->curr.cb[shader], + buf->buffer ); + + if (shader == PIPE_SHADER_FRAGMENT) + svga->dirty |= SVGA_NEW_FS_CONST_BUFFER; + else + svga->dirty |= SVGA_NEW_VS_CONST_BUFFER; +} + + + +void svga_init_constbuffer_functions( struct svga_context *svga ) +{ + svga->pipe.set_constant_buffer = svga_set_constant_buffer; +} + diff --git a/src/gallium/drivers/svga/svga_pipe_depthstencil.c b/src/gallium/drivers/svga/svga_pipe_depthstencil.c new file mode 100644 index 0000000000..df636c08a0 --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_depthstencil.c @@ -0,0 +1,153 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "svga_context.h" +#include "svga_state.h" +#include "svga_hw_reg.h" + + +static INLINE unsigned +svga_translate_compare_func(unsigned func) +{ + switch (func) { + case PIPE_FUNC_NEVER: return SVGA3D_CMP_NEVER; + case PIPE_FUNC_LESS: return SVGA3D_CMP_LESS; + case PIPE_FUNC_LEQUAL: return SVGA3D_CMP_LESSEQUAL; + case PIPE_FUNC_GREATER: return SVGA3D_CMP_GREATER; + case PIPE_FUNC_GEQUAL: return SVGA3D_CMP_GREATEREQUAL; + case PIPE_FUNC_NOTEQUAL: return SVGA3D_CMP_NOTEQUAL; + case PIPE_FUNC_EQUAL: return SVGA3D_CMP_EQUAL; + case PIPE_FUNC_ALWAYS: return SVGA3D_CMP_ALWAYS; + default: + assert(0); + return SVGA3D_CMP_ALWAYS; + } +} + +static INLINE unsigned +svga_translate_stencil_op(unsigned op) +{ + switch (op) { + case PIPE_STENCIL_OP_KEEP: return SVGA3D_STENCILOP_KEEP; + case PIPE_STENCIL_OP_ZERO: return SVGA3D_STENCILOP_ZERO; + case PIPE_STENCIL_OP_REPLACE: return SVGA3D_STENCILOP_REPLACE; + case PIPE_STENCIL_OP_INCR: return SVGA3D_STENCILOP_INCR; + case PIPE_STENCIL_OP_DECR: return SVGA3D_STENCILOP_DECR; + case PIPE_STENCIL_OP_INCR_WRAP: return SVGA3D_STENCILOP_INCRSAT; /* incorrect? */ + case PIPE_STENCIL_OP_DECR_WRAP: return SVGA3D_STENCILOP_DECRSAT; /* incorrect? */ + case PIPE_STENCIL_OP_INVERT: return SVGA3D_STENCILOP_INVERT; + default: + assert(0); + return SVGA3D_STENCILOP_KEEP; + } +} + + +static void * +svga_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *templ) +{ + struct svga_depth_stencil_state *ds = CALLOC_STRUCT( svga_depth_stencil_state ); + + /* Don't try to figure out CW/CCW correspondence with + * stencil[0]/[1] at this point. Presumably this can change as + * back/front face are modified. + */ + ds->stencil[0].enabled = templ->stencil[0].enabled; + if (ds->stencil[0].enabled) { + ds->stencil[0].func = svga_translate_compare_func(templ->stencil[0].func); + ds->stencil[0].fail = svga_translate_stencil_op(templ->stencil[0].fail_op); + ds->stencil[0].zfail = svga_translate_stencil_op(templ->stencil[0].zfail_op); + ds->stencil[0].pass = svga_translate_stencil_op(templ->stencil[0].zpass_op); + + /* SVGA3D has one ref/mask/writemask triple shared between front & + * back face stencil. We really need two: + */ + ds->stencil_ref = templ->stencil[0].ref_value & 0xff; + ds->stencil_mask = templ->stencil[0].valuemask & 0xff; + ds->stencil_writemask = templ->stencil[0].writemask & 0xff; + } + + + ds->stencil[1].enabled = templ->stencil[1].enabled; + if (templ->stencil[1].enabled) { + ds->stencil[1].func = svga_translate_compare_func(templ->stencil[1].func); + ds->stencil[1].fail = svga_translate_stencil_op(templ->stencil[1].fail_op); + ds->stencil[1].zfail = svga_translate_stencil_op(templ->stencil[1].zfail_op); + ds->stencil[1].pass = svga_translate_stencil_op(templ->stencil[1].zpass_op); + + ds->stencil_ref = templ->stencil[1].ref_value & 0xff; + ds->stencil_mask = templ->stencil[1].valuemask & 0xff; + ds->stencil_writemask = templ->stencil[1].writemask & 0xff; + } + + + ds->zenable = templ->depth.enabled; + if (ds->zenable) { + ds->zfunc = svga_translate_compare_func(templ->depth.func); + ds->zwriteenable = templ->depth.writemask; + } + + ds->alphatestenable = templ->alpha.enabled; + if (ds->alphatestenable) { + ds->alphafunc = svga_translate_compare_func(templ->alpha.func); + ds->alpharef = templ->alpha.ref_value; + } + + return ds; +} + +static void svga_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct svga_context *svga = svga_context(pipe); + + svga->curr.depth = (const struct svga_depth_stencil_state *)depth_stencil; + svga->dirty |= SVGA_NEW_DEPTH_STENCIL; +} + +static void svga_delete_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + FREE(depth_stencil); +} + + + +void svga_init_depth_stencil_functions( struct svga_context *svga ) +{ + svga->pipe.create_depth_stencil_alpha_state = svga_create_depth_stencil_state; + svga->pipe.bind_depth_stencil_alpha_state = svga_bind_depth_stencil_state; + svga->pipe.delete_depth_stencil_alpha_state = svga_delete_depth_stencil_state; +} + + + + diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c new file mode 100644 index 0000000000..71a552862e --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -0,0 +1,261 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "svga_cmd.h" + +#include "pipe/p_inlines.h" +#include "util/u_prim.h" +#include "util/u_time.h" +#include "indices/u_indices.h" + +#include "svga_hw_reg.h" +#include "svga_context.h" +#include "svga_screen.h" +#include "svga_winsys.h" +#include "svga_draw.h" +#include "svga_state.h" +#include "svga_swtnl.h" +#include "svga_debug.h" + + + +static enum pipe_error +retry_draw_range_elements( struct svga_context *svga, + struct pipe_buffer *index_buffer, + unsigned index_size, + unsigned min_index, + unsigned max_index, + unsigned prim, + unsigned start, + unsigned count, + boolean do_retry ) +{ + enum pipe_error ret = 0; + + svga_hwtnl_set_unfilled( svga->hwtnl, + svga->curr.rast->hw_unfilled ); + + svga_hwtnl_set_flatshade( svga->hwtnl, + svga->curr.rast->templ.flatshade, + svga->curr.rast->templ.flatshade_first ); + + + ret = svga_update_state( svga, SVGA_STATE_HW_DRAW ); + if (ret) + goto retry; + + ret = svga_hwtnl_draw_range_elements( svga->hwtnl, + index_buffer, index_size, + min_index, max_index, + prim, start, count, 0 ); + if (ret) + goto retry; + + if (svga->curr.any_user_vertex_buffers) { + ret = svga_hwtnl_flush( svga->hwtnl ); + if (ret) + goto retry; + } + + return PIPE_OK; + +retry: + svga_context_flush( svga, NULL ); + + if (do_retry) + { + return retry_draw_range_elements( svga, + index_buffer, index_size, + min_index, max_index, + prim, start, count, + FALSE ); + } + + return ret; +} + + +static enum pipe_error +retry_draw_arrays( struct svga_context *svga, + unsigned prim, + unsigned start, + unsigned count, + boolean do_retry ) +{ + enum pipe_error ret; + + svga_hwtnl_set_unfilled( svga->hwtnl, + svga->curr.rast->hw_unfilled ); + + svga_hwtnl_set_flatshade( svga->hwtnl, + svga->curr.rast->templ.flatshade, + svga->curr.rast->templ.flatshade_first ); + + ret = svga_update_state( svga, SVGA_STATE_HW_DRAW ); + if (ret) + goto retry; + + ret = svga_hwtnl_draw_arrays( svga->hwtnl, prim, + start, count ); + if (ret) + goto retry; + + if (svga->curr.any_user_vertex_buffers) { + ret = svga_hwtnl_flush( svga->hwtnl ); + if (ret) + goto retry; + } + + return 0; + +retry: + if (ret == PIPE_ERROR_OUT_OF_MEMORY && do_retry) + { + svga_context_flush( svga, NULL ); + + return retry_draw_arrays( svga, + prim, + start, + count, + FALSE ); + } + + return ret; +} + + + + + +static boolean +svga_draw_range_elements( struct pipe_context *pipe, + struct pipe_buffer *index_buffer, + unsigned index_size, + unsigned min_index, + unsigned max_index, + unsigned prim, unsigned start, unsigned count) +{ + struct svga_context *svga = svga_context( pipe ); + unsigned reduced_prim = u_reduced_prim(prim); + enum pipe_error ret = 0; + + if (!u_trim_pipe_prim( prim, &count )) + return TRUE; + + /* + * Mark currently bound target surfaces as dirty + * doesn't really matter if it is done before drawing. + * + * TODO If we ever normaly return something other then + * true we should not mark it as dirty then. + */ + svga_mark_surfaces_dirty(svga_context(pipe)); + + if (svga->curr.reduced_prim != reduced_prim) { + svga->curr.reduced_prim = reduced_prim; + svga->dirty |= SVGA_NEW_REDUCED_PRIMITIVE; + } + + svga_update_state_retry( svga, SVGA_STATE_NEED_SWTNL ); + +#ifdef DEBUG + if (svga->curr.vs->base.id == svga->debug.disable_shader || + svga->curr.fs->base.id == svga->debug.disable_shader) + return 0; +#endif + + if (svga->state.sw.need_swtnl) + { + ret = svga_swtnl_draw_range_elements( svga, + index_buffer, + index_size, + min_index, max_index, + prim, + start, count ); + } + else { + if (index_buffer) { + ret = retry_draw_range_elements( svga, + index_buffer, + index_size, + min_index, + max_index, + prim, + start, + count, + TRUE ); + } + else { + ret = retry_draw_arrays( svga, + prim, + start, + count, + TRUE ); + } + } + + if (SVGA_DEBUG & DEBUG_FLUSH) { + static unsigned id; + debug_printf("%s %d\n", __FUNCTION__, id++); + if (id > 1300) + util_time_sleep( 2000 ); + + svga_hwtnl_flush_retry( svga ); + svga_context_flush(svga, NULL); + } + + return ret == PIPE_OK; +} + + +static boolean +svga_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *index_buffer, + unsigned index_size, + unsigned prim, unsigned start, unsigned count) +{ + return svga_draw_range_elements( pipe, index_buffer, + index_size, + 0, 0xffffffff, + prim, start, count ); +} + +static boolean +svga_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) +{ + return svga_draw_range_elements(pipe, NULL, 0, + start, start + count - 1, + prim, + start, count); +} + + +void svga_init_draw_functions( struct svga_context *svga ) +{ + svga->pipe.draw_arrays = svga_draw_arrays; + svga->pipe.draw_elements = svga_draw_elements; + svga->pipe.draw_range_elements = svga_draw_range_elements; +} diff --git a/src/gallium/drivers/svga/svga_pipe_flush.c b/src/gallium/drivers/svga/svga_pipe_flush.c new file mode 100644 index 0000000000..942366de72 --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_flush.c @@ -0,0 +1,68 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_defines.h" +#include "svga_screen.h" +#include "svga_screen_texture.h" +#include "svga_context.h" +#include "svga_winsys.h" +#include "svga_draw.h" +#include "svga_debug.h" + +#include "svga_hw_reg.h" + + + + +static void svga_flush( struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence ) +{ + struct svga_context *svga = svga_context(pipe); + int i; + + /* Emit buffered drawing commands. + */ + svga_hwtnl_flush_retry( svga ); + + /* Emit back-copy from render target view to texture. + */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (svga->curr.framebuffer.cbufs[i]) + svga_propagate_surface(pipe, svga->curr.framebuffer.cbufs[i]); + } + if (svga->curr.framebuffer.zsbuf) + svga_propagate_surface(pipe, svga->curr.framebuffer.zsbuf); + + /* Flush command queue. + */ + svga_context_flush(svga, fence); +} + + +void svga_init_flush_functions( struct svga_context *svga ) +{ + svga->pipe.flush = svga_flush; +} diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c new file mode 100644 index 0000000000..e3be840d92 --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_fs.c @@ -0,0 +1,124 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_text.h" + +#include "svga_screen.h" +#include "svga_context.h" +#include "svga_state.h" +#include "svga_tgsi.h" +#include "svga_hw_reg.h" +#include "svga_cmd.h" +#include "svga_draw.h" +#include "svga_debug.h" + + +/*********************************************************************** + * Fragment shaders + */ + +static void * +svga_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct svga_context *svga = svga_context(pipe); + struct svga_screen *svgascreen = svga_screen(pipe->screen); + struct svga_fragment_shader *fs; + + fs = CALLOC_STRUCT(svga_fragment_shader); + if (!fs) + return NULL; + + fs->base.tokens = tgsi_dup_tokens(templ->tokens); + + /* Collect basic info that we'll need later: + */ + tgsi_scan_shader(fs->base.tokens, &fs->base.info); + + fs->base.id = svga->debug.shader_id++; + fs->base.use_sm30 = svgascreen->use_ps30; + + if (SVGA_DEBUG & DEBUG_TGSI || 0) { + debug_printf("%s id: %u, inputs: %u, outputs: %u\n", + __FUNCTION__, fs->base.id, + fs->base.info.num_inputs, fs->base.info.num_outputs); + } + + return fs; +} + +static void +svga_bind_fs_state(struct pipe_context *pipe, void *shader) +{ + struct svga_fragment_shader *fs = (struct svga_fragment_shader *) shader; + struct svga_context *svga = svga_context(pipe); + + svga->curr.fs = fs; + svga->dirty |= SVGA_NEW_FS; +} + +static +void svga_delete_fs_state(struct pipe_context *pipe, void *shader) +{ + struct svga_context *svga = svga_context(pipe); + struct svga_fragment_shader *fs = (struct svga_fragment_shader *) shader; + struct svga_shader_result *result, *tmp; + enum pipe_error ret; + + svga_hwtnl_flush_retry( svga ); + + for (result = fs->base.results; result; result = tmp ) { + tmp = result->next; + + ret = SVGA3D_DestroyShader(svga->swc, + result->id, + SVGA3D_SHADERTYPE_PS ); + if(ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_DestroyShader(svga->swc, + result->id, + SVGA3D_SHADERTYPE_PS ); + assert(ret == PIPE_OK); + } + + svga_destroy_shader_result( result ); + } + + FREE((void *)fs->base.tokens); + FREE(fs); +} + + +void svga_init_fs_functions( struct svga_context *svga ) +{ + svga->pipe.create_fs_state = svga_create_fs_state; + svga->pipe.bind_fs_state = svga_bind_fs_state; + svga->pipe.delete_fs_state = svga_delete_fs_state; +} + diff --git a/src/gallium/drivers/svga/svga_pipe_misc.c b/src/gallium/drivers/svga/svga_pipe_misc.c new file mode 100644 index 0000000000..58cb1e6e23 --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_misc.c @@ -0,0 +1,187 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "svga_cmd.h" + +#include "svga_context.h" +#include "svga_screen_texture.h" +#include "svga_state.h" +#include "svga_winsys.h" + +#include "svga_hw_reg.h" + + + + +static void svga_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct svga_context *svga = svga_context(pipe); + + memcpy( &svga->curr.scissor, scissor, sizeof(*scissor) ); + svga->dirty |= SVGA_NEW_SCISSOR; +} + + +static void svga_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ + /* overridden by the draw module */ +} + + +void svga_cleanup_framebuffer(struct svga_context *svga) +{ + struct pipe_framebuffer_state *curr = &svga->curr.framebuffer; + struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer; + int i; + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + pipe_surface_reference(&curr->cbufs[i], NULL); + pipe_surface_reference(&hw->cbufs[i], NULL); + } + + pipe_surface_reference(&curr->zsbuf, NULL); + pipe_surface_reference(&hw->zsbuf, NULL); +} + + +#define DEPTH_BIAS_SCALE_FACTOR_D16 ((float)(1<<15)) +#define DEPTH_BIAS_SCALE_FACTOR_D24S8 ((float)(1<<23)) +#define DEPTH_BIAS_SCALE_FACTOR_D32 ((float)(1<<31)) + + +static void svga_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct svga_context *svga = svga_context(pipe); + struct pipe_framebuffer_state *dst = &svga->curr.framebuffer; + boolean propagate = FALSE; + int i; + + dst->width = fb->width; + dst->height = fb->height; + dst->nr_cbufs = fb->nr_cbufs; + + /* check if we need to propaget any of the target surfaces */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (dst->cbufs[i] && dst->cbufs[i] != fb->cbufs[i]) + if (svga_surface_needs_propagation(dst->cbufs[i])) + propagate = TRUE; + } + + if (propagate) { + /* make sure that drawing calls comes before propagation calls */ + svga_hwtnl_flush_retry( svga ); + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + if (dst->cbufs[i] && dst->cbufs[i] != fb->cbufs[i]) + svga_propagate_surface(pipe, dst->cbufs[i]); + } + + /* XXX: Actually the virtual hardware may support rendertargets with + * different size, depending on the host API and driver, but since we cannot + * know that make no such assumption here. */ + for(i = 0; i < fb->nr_cbufs; ++i) { + if (fb->zsbuf && fb->cbufs[i]) { + assert(fb->zsbuf->width == fb->cbufs[i]->width); + assert(fb->zsbuf->height == fb->cbufs[i]->height); + } + } + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + pipe_surface_reference(&dst->cbufs[i], fb->cbufs[i]); + pipe_surface_reference(&dst->zsbuf, fb->zsbuf); + + + if (svga->curr.framebuffer.zsbuf) + { + switch (svga->curr.framebuffer.zsbuf->format) { + case PIPE_FORMAT_Z16_UNORM: + svga->curr.depthscale = 1.0f / DEPTH_BIAS_SCALE_FACTOR_D16; + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + svga->curr.depthscale = 1.0f / DEPTH_BIAS_SCALE_FACTOR_D24S8; + break; + case PIPE_FORMAT_Z32_UNORM: + svga->curr.depthscale = 1.0f / DEPTH_BIAS_SCALE_FACTOR_D32; + break; + case PIPE_FORMAT_Z32_FLOAT: + svga->curr.depthscale = 1.0f / ((float)(1<<23)); + break; + default: + svga->curr.depthscale = 0.0f; + break; + } + } + else { + svga->curr.depthscale = 0.0f; + } + + svga->dirty |= SVGA_NEW_FRAME_BUFFER; +} + + + +static void svga_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct svga_context *svga = svga_context(pipe); + + svga->curr.clip = *clip; /* struct copy */ + + svga->dirty |= SVGA_NEW_CLIP; +} + + + +/* Called when driver state tracker notices changes to the viewport + * matrix: + */ +static void svga_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct svga_context *svga = svga_context(pipe); + + svga->curr.viewport = *viewport; /* struct copy */ + + svga->dirty |= SVGA_NEW_VIEWPORT; +} + + + +void svga_init_misc_functions( struct svga_context *svga ) +{ + svga->pipe.set_scissor_state = svga_set_scissor_state; + svga->pipe.set_polygon_stipple = svga_set_polygon_stipple; + svga->pipe.set_framebuffer_state = svga_set_framebuffer_state; + svga->pipe.set_clip_state = svga_set_clip_state; + svga->pipe.set_viewport_state = svga_set_viewport_state; +} + + diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c new file mode 100644 index 0000000000..01336b0a2c --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_query.c @@ -0,0 +1,267 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "util/u_memory.h" + +#include "svga_cmd.h" +#include "svga_context.h" +#include "svga_screen.h" +#include "svga_screen_buffer.h" +#include "svga_winsys.h" +#include "svga_draw.h" +#include "svga_debug.h" + + +/* Fixme: want a public base class for all pipe structs, even if there + * isn't much in them. + */ +struct pipe_query { + int dummy; +}; + +struct svga_query { + struct pipe_query base; + SVGA3dQueryType type; + struct svga_winsys_buffer *hwbuf; + volatile SVGA3dQueryResult *queryResult; + struct pipe_fence_handle *fence; +}; + +/*********************************************************************** + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static INLINE struct svga_query * +svga_query( struct pipe_query *q ) +{ + return (struct svga_query *)q; +} + +static boolean svga_get_query_result(struct pipe_context *pipe, + struct pipe_query *q, + boolean wait, + uint64_t *result); + +static struct pipe_query *svga_create_query( struct pipe_context *pipe, + unsigned query_type ) +{ + struct svga_screen *svgascreen = svga_screen(pipe->screen); + struct svga_winsys_screen *sws = svgascreen->sws; + struct svga_query *sq; + + SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__); + + sq = CALLOC_STRUCT(svga_query); + if (!sq) + goto no_sq; + + sq->type = SVGA3D_QUERYTYPE_OCCLUSION; + + sq->hwbuf = svga_winsys_buffer_create(svgascreen, + 1, + SVGA_BUFFER_USAGE_PINNED, + sizeof *sq->queryResult); + if(!sq->hwbuf) + goto no_hwbuf; + + sq->queryResult = (SVGA3dQueryResult *)sws->buffer_map(sws, + sq->hwbuf, + PIPE_BUFFER_USAGE_CPU_WRITE); + if(!sq->queryResult) + goto no_query_result; + + sq->queryResult->totalSize = sizeof *sq->queryResult; + sq->queryResult->state = SVGA3D_QUERYSTATE_NEW; + + /* + * We request the buffer to be pinned and assume it is always mapped. + * + * The reason is that we don't want to wait for fences when checking the + * query status. + */ + sws->buffer_unmap(sws, sq->hwbuf); + + return &sq->base; + +no_query_result: + sws->buffer_destroy(sws, sq->hwbuf); +no_hwbuf: + FREE(sq); +no_sq: + return NULL; +} + +static void svga_destroy_query(struct pipe_context *pipe, + struct pipe_query *q) +{ + struct svga_screen *svgascreen = svga_screen(pipe->screen); + struct svga_winsys_screen *sws = svgascreen->sws; + struct svga_query *sq = svga_query( q ); + + SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__); + sws->buffer_destroy(sws, sq->hwbuf); + sws->fence_reference(sws, &sq->fence, NULL); + FREE(sq); +} + +static void svga_begin_query(struct pipe_context *pipe, + struct pipe_query *q) +{ + struct svga_screen *svgascreen = svga_screen(pipe->screen); + struct svga_winsys_screen *sws = svgascreen->sws; + struct svga_context *svga = svga_context( pipe ); + struct svga_query *sq = svga_query( q ); + enum pipe_error ret; + + SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__); + + assert(!svga->sq); + + /* Need to flush out buffered drawing commands so that they don't + * get counted in the query results. + */ + svga_hwtnl_flush_retry(svga); + + if(sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) { + /* The application doesn't care for the pending query result. We cannot + * let go the existing buffer and just get a new one because its storage + * may be reused for other purposes and clobbered by the host when it + * determines the query result. So the only option here is to wait for + * the existing query's result -- not a big deal, given that no sane + * application would do this. + */ + uint64_t result; + + svga_get_query_result(pipe, q, TRUE, &result); + + assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING); + } + + sq->queryResult->state = SVGA3D_QUERYSTATE_NEW; + sws->fence_reference(sws, &sq->fence, NULL); + + ret = SVGA3D_BeginQuery(svga->swc, sq->type); + if(ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_BeginQuery(svga->swc, sq->type); + assert(ret == PIPE_OK); + } + + svga->sq = sq; +} + +static void svga_end_query(struct pipe_context *pipe, + struct pipe_query *q) +{ + struct svga_context *svga = svga_context( pipe ); + struct svga_query *sq = svga_query( q ); + enum pipe_error ret; + + SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__); + assert(svga->sq == sq); + + svga_hwtnl_flush_retry(svga); + + /* Set to PENDING before sending EndQuery. */ + sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING; + + ret = SVGA3D_EndQuery( svga->swc, sq->type, sq->hwbuf); + if(ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_EndQuery( svga->swc, sq->type, sq->hwbuf); + assert(ret == PIPE_OK); + } + + /* TODO: Delay flushing. We don't really need to flush here, just ensure + * that there is one flush before svga_get_query_result attempts to get the + * result */ + svga_context_flush(svga, NULL); + + svga->sq = NULL; +} + +static boolean svga_get_query_result(struct pipe_context *pipe, + struct pipe_query *q, + boolean wait, + uint64_t *result) +{ + struct svga_context *svga = svga_context( pipe ); + struct svga_screen *svgascreen = svga_screen( pipe->screen ); + struct svga_winsys_screen *sws = svgascreen->sws; + struct svga_query *sq = svga_query( q ); + SVGA3dQueryState state; + + SVGA_DBG(DEBUG_QUERY, "%s wait: %d\n", __FUNCTION__); + + /* The query status won't be updated by the host unless + * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause a + * synchronous wait on the host */ + if(!sq->fence) { + enum pipe_error ret; + + ret = SVGA3D_WaitForQuery( svga->swc, sq->type, sq->hwbuf); + if(ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_WaitForQuery( svga->swc, sq->type, sq->hwbuf); + assert(ret == PIPE_OK); + } + + svga_context_flush(svga, &sq->fence); + + assert(sq->fence); + } + + state = sq->queryResult->state; + if(state == SVGA3D_QUERYSTATE_PENDING) { + if(!wait) + return FALSE; + + sws->fence_finish(sws, sq->fence, 0); + + state = sq->queryResult->state; + } + + assert(state == SVGA3D_QUERYSTATE_SUCCEEDED || + state == SVGA3D_QUERYSTATE_FAILED); + + *result = (uint64_t)sq->queryResult->result32; + + SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, (unsigned)*result); + + return TRUE; +} + + + +void svga_init_query_functions( struct svga_context *svga ) +{ + svga->pipe.create_query = svga_create_query; + svga->pipe.destroy_query = svga_destroy_query; + svga->pipe.begin_query = svga_begin_query; + svga->pipe.end_query = svga_end_query; + svga->pipe.get_query_result = svga_get_query_result; +} diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c new file mode 100644 index 0000000000..b03f8eb9cf --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c @@ -0,0 +1,250 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "draw/draw_context.h" +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "svga_context.h" +#include "svga_state.h" + +#include "svga_hw_reg.h" + +/* Hardware frontwinding is always set up as SVGA3D_FRONTWINDING_CW. + */ +static SVGA3dFace svga_translate_cullmode( unsigned mode, + unsigned front_winding ) +{ + switch (mode) { + case PIPE_WINDING_NONE: + return SVGA3D_FACE_NONE; + case PIPE_WINDING_CCW: + return SVGA3D_FACE_BACK; + case PIPE_WINDING_CW: + return SVGA3D_FACE_FRONT; + case PIPE_WINDING_BOTH: + return SVGA3D_FACE_FRONT_BACK; + default: + assert(0); + return SVGA3D_FACE_NONE; + } +} + +static SVGA3dShadeMode svga_translate_flatshade( unsigned mode ) +{ + return mode ? SVGA3D_SHADEMODE_FLAT : SVGA3D_SHADEMODE_SMOOTH; +} + + +static void * +svga_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *templ) +{ + struct svga_rasterizer_state *rast = CALLOC_STRUCT( svga_rasterizer_state ); + /* need this for draw module. */ + rast->templ = *templ; + + /* light_twoside - XXX: need fragment shader varient */ + /* poly_smooth - XXX: no fallback available */ + /* poly_stipple_enable - draw module */ + /* point_sprite - ? */ + /* point_size_per_vertex - ? */ + /* sprite_coord_mode - ??? */ + /* bypass_vs_viewport_and_clip - handled by viewport setup */ + /* flatshade_first - handled by index translation */ + /* gl_rasterization_rules - XXX - viewport code */ + /* line_width - draw module */ + /* fill_cw, fill_ccw - draw module or index translation */ + + rast->shademode = svga_translate_flatshade( templ->flatshade ); + rast->cullmode = svga_translate_cullmode( templ->cull_mode, + templ->front_winding ); + rast->scissortestenable = templ->scissor; + rast->multisampleantialias = templ->multisample; + rast->antialiasedlineenable = templ->line_smooth; + rast->lastpixel = templ->line_last_pixel; + rast->pointsize = templ->point_size; + rast->pointsize_min = templ->point_size_min; + rast->pointsize_max = templ->point_size_max; + rast->hw_unfilled = PIPE_POLYGON_MODE_FILL; + + /* Use swtnl + decomposition implement these: + */ + if (templ->poly_stipple_enable) + rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + + if (templ->line_width != 1.0 && + templ->line_width != 0.0) + rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES; + + if (templ->line_stipple_enable) { + /* LinePattern not implemented on all backends. + */ + if (0) { + SVGA3dLinePattern lp; + lp.repeat = templ->line_stipple_factor + 1; + lp.pattern = templ->line_stipple_pattern; + rast->linepattern = lp.uintValue; + } + else { + rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES; + } + } + + if (templ->point_smooth) + rast->need_pipeline |= SVGA_PIPELINE_FLAG_POINTS; + + { + boolean offset_cw = templ->offset_cw; + boolean offset_ccw = templ->offset_ccw; + boolean offset = 0; + int fill_cw = templ->fill_cw; + int fill_ccw = templ->fill_ccw; + int fill = PIPE_POLYGON_MODE_FILL; + + switch (templ->cull_mode) { + case PIPE_WINDING_BOTH: + offset = 0; + fill = PIPE_POLYGON_MODE_FILL; + break; + + case PIPE_WINDING_CW: + offset = offset_ccw; + fill = fill_ccw; + break; + + case PIPE_WINDING_CCW: + offset = offset_cw; + fill = fill_cw; + break; + + case PIPE_WINDING_NONE: + if (fill_cw != fill_ccw || offset_cw != offset_ccw) + { + /* Always need the draw module to work out different + * front/back fill modes: + */ + rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + } + else { + offset = offset_ccw; + fill = fill_ccw; + } + break; + + default: + assert(0); + break; + } + + /* Unfilled primitive modes aren't implemented on all virtual + * hardware. We can do some unfilled processing with index + * translation, but otherwise need the draw module: + */ + if (fill != PIPE_POLYGON_MODE_FILL && + (templ->flatshade || + templ->light_twoside || + offset || + templ->cull_mode != PIPE_WINDING_NONE)) + { + fill = PIPE_POLYGON_MODE_FILL; + rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + } + + /* If we are decomposing to lines, and lines need the pipeline, + * then we also need the pipeline for tris. + */ + if (fill == PIPE_POLYGON_MODE_LINE && + (rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES)) + { + fill = PIPE_POLYGON_MODE_FILL; + rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + } + + /* Similarly for points: + */ + if (fill == PIPE_POLYGON_MODE_POINT && + (rast->need_pipeline & SVGA_PIPELINE_FLAG_POINTS)) + { + fill = PIPE_POLYGON_MODE_FILL; + rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS; + } + + if (offset) { + rast->slopescaledepthbias = templ->offset_scale; + rast->depthbias = templ->offset_units; + } + + rast->hw_unfilled = fill; + } + + + + + if (rast->need_pipeline & SVGA_PIPELINE_FLAG_TRIS) { + /* Turn off stuff which will get done in the draw module: + */ + rast->hw_unfilled = PIPE_POLYGON_MODE_FILL; + rast->slopescaledepthbias = 0; + rast->depthbias = 0; + } + + return rast; +} + +static void svga_bind_rasterizer_state( struct pipe_context *pipe, + void *state ) +{ + struct svga_context *svga = svga_context(pipe); + struct svga_rasterizer_state *raster = (struct svga_rasterizer_state *)state; + + svga->curr.rast = raster; + + draw_set_rasterizer_state(svga->swtnl.draw, raster ? &raster->templ : NULL); + + svga->dirty |= SVGA_NEW_RAST; +} + +static void svga_delete_rasterizer_state(struct pipe_context *pipe, + void *raster) +{ + FREE(raster); +} + + +void svga_init_rasterizer_functions( struct svga_context *svga ) +{ + svga->pipe.create_rasterizer_state = svga_create_rasterizer_state; + svga->pipe.bind_rasterizer_state = svga_bind_rasterizer_state; + svga->pipe.delete_rasterizer_state = svga_delete_rasterizer_state; +} + + +/*********************************************************************** + * Hardware state update + */ + diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c new file mode 100644 index 0000000000..3eeca6b784 --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_sampler.c @@ -0,0 +1,243 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" +#include "tgsi/tgsi_parse.h" + +#include "svga_context.h" +#include "svga_screen_texture.h" +#include "svga_state.h" + +#include "svga_hw_reg.h" + +#include "svga_debug.h" + +static INLINE unsigned +translate_wrap_mode(unsigned wrap) +{ + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + return SVGA3D_TEX_ADDRESS_WRAP; + + case PIPE_TEX_WRAP_CLAMP: + return SVGA3D_TEX_ADDRESS_CLAMP; + + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + /* Unfortunately SVGA3D_TEX_ADDRESS_EDGE not respected by + * hardware. + */ + return SVGA3D_TEX_ADDRESS_CLAMP; + + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return SVGA3D_TEX_ADDRESS_BORDER; + + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return SVGA3D_TEX_ADDRESS_MIRROR; + + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return SVGA3D_TEX_ADDRESS_MIRRORONCE; + + default: + assert(0); + return SVGA3D_TEX_ADDRESS_WRAP; + } +} + +static INLINE unsigned translate_img_filter( unsigned filter ) +{ + switch (filter) { + case PIPE_TEX_FILTER_NEAREST: return SVGA3D_TEX_FILTER_NEAREST; + case PIPE_TEX_FILTER_LINEAR: return SVGA3D_TEX_FILTER_LINEAR; + case PIPE_TEX_FILTER_ANISO: return SVGA3D_TEX_FILTER_ANISOTROPIC; + default: + assert(0); + return SVGA3D_TEX_FILTER_NEAREST; + } +} + +static INLINE unsigned translate_mip_filter( unsigned filter ) +{ + switch (filter) { + case PIPE_TEX_MIPFILTER_NONE: return SVGA3D_TEX_FILTER_NONE; + case PIPE_TEX_MIPFILTER_NEAREST: return SVGA3D_TEX_FILTER_NEAREST; + case PIPE_TEX_MIPFILTER_LINEAR: return SVGA3D_TEX_FILTER_LINEAR; + default: + assert(0); + return SVGA3D_TEX_FILTER_NONE; + } +} + +static void * +svga_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + struct svga_context *svga = svga_context(pipe); + struct svga_sampler_state *cso = CALLOC_STRUCT( svga_sampler_state ); + + cso->mipfilter = translate_mip_filter(sampler->min_mip_filter); + cso->magfilter = translate_img_filter( sampler->mag_img_filter ); + cso->minfilter = translate_img_filter( sampler->min_img_filter ); + cso->aniso_level = MAX2( (unsigned) sampler->max_anisotropy, 1 ); + cso->lod_bias = sampler->lod_bias; + cso->addressu = translate_wrap_mode(sampler->wrap_s); + cso->addressv = translate_wrap_mode(sampler->wrap_t); + cso->addressw = translate_wrap_mode(sampler->wrap_r); + cso->normalized_coords = sampler->normalized_coords; + cso->compare_mode = sampler->compare_mode; + cso->compare_func = sampler->compare_func; + + { + ubyte r = float_to_ubyte(sampler->border_color[0]); + ubyte g = float_to_ubyte(sampler->border_color[1]); + ubyte b = float_to_ubyte(sampler->border_color[2]); + ubyte a = float_to_ubyte(sampler->border_color[3]); + + util_pack_color_ub( r, g, b, a, + PIPE_FORMAT_B8G8R8A8_UNORM, + &cso->bordercolor ); + } + + /* No SVGA3D support for: + * - min/max LOD clamping + */ + cso->min_lod = 0; + cso->view_min_lod = MAX2(sampler->min_lod, 0); + cso->view_max_lod = MAX2(sampler->max_lod, 0); + + /* Use min_mipmap */ + if (svga->debug.use_min_mipmap) { + if (cso->view_min_lod == cso->view_max_lod) { + cso->min_lod = cso->view_min_lod; + cso->view_min_lod = 0; + cso->view_max_lod = 1000; /* Just a high number */ + cso->mipfilter = SVGA3D_TEX_FILTER_NONE; + } + } + + SVGA_DBG(DEBUG_VIEWS, "min %u, view(min %u, max %u) lod, mipfilter %s\n", + cso->min_lod, cso->view_min_lod, cso->view_max_lod, + cso->mipfilter == SVGA3D_TEX_FILTER_NONE ? "SVGA3D_TEX_FILTER_NONE" : "SOMETHING"); + + return cso; +} + +static void svga_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct svga_context *svga = svga_context(pipe); + unsigned i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == svga->curr.num_samplers && + !memcmp(svga->curr.sampler, sampler, num * sizeof(void *))) { + debug_printf("sampler noop\n"); + return; + } + + for (i = 0; i < num; i++) + svga->curr.sampler[i] = sampler[i]; + + for (i = num; i < svga->curr.num_samplers; i++) + svga->curr.sampler[i] = NULL; + + svga->curr.num_samplers = num; + svga->dirty |= SVGA_NEW_SAMPLER; +} + +static void svga_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE(sampler); +} + + +static void svga_set_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) +{ + struct svga_context *svga = svga_context(pipe); + unsigned flag_1d = 0; + unsigned flag_srgb = 0; + uint i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == svga->curr.num_textures && + !memcmp(svga->curr.texture, texture, num * sizeof(struct pipe_texture *))) { + if (0) debug_printf("texture noop\n"); + return; + } + + for (i = 0; i < num; i++) { + pipe_texture_reference(&svga->curr.texture[i], + texture[i]); + + if (!texture[i]) + continue; + + if (texture[i]->format == PIPE_FORMAT_A8R8G8B8_SRGB) + flag_srgb |= 1 << i; + + if (texture[i]->target == PIPE_TEXTURE_1D) + flag_1d |= 1 << i; + } + + for (i = num; i < svga->curr.num_textures; i++) + pipe_texture_reference(&svga->curr.texture[i], + NULL); + + svga->curr.num_textures = num; + svga->dirty |= SVGA_NEW_TEXTURE_BINDING; + + if (flag_srgb != svga->curr.tex_flags.flag_srgb || + flag_1d != svga->curr.tex_flags.flag_1d) + { + svga->dirty |= SVGA_NEW_TEXTURE_FLAGS; + svga->curr.tex_flags.flag_1d = flag_1d; + svga->curr.tex_flags.flag_srgb = flag_srgb; + } +} + + + +void svga_init_sampler_functions( struct svga_context *svga ) +{ + svga->pipe.create_sampler_state = svga_create_sampler_state; + svga->pipe.bind_sampler_states = svga_bind_sampler_states; + svga->pipe.delete_sampler_state = svga_delete_sampler_state; + svga->pipe.set_sampler_textures = svga_set_sampler_textures; +} + + + diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c new file mode 100644 index 0000000000..28e2787e0d --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_vertex.c @@ -0,0 +1,115 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_parse.h" + +#include "svga_screen.h" +#include "svga_screen_buffer.h" +#include "svga_context.h" +#include "svga_state.h" +#include "svga_winsys.h" + +#include "svga_hw_reg.h" + + +static void svga_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct svga_context *svga = svga_context(pipe); + unsigned i; + boolean any_user_buffer = FALSE; + + /* Check for no change */ + if (count == svga->curr.num_vertex_buffers && + memcmp(svga->curr.vb, buffers, count * sizeof buffers[0]) == 0) + return; + + /* Adjust refcounts */ + for (i = 0; i < count; i++) { + pipe_buffer_reference(&svga->curr.vb[i].buffer, buffers[i].buffer); + if (svga_buffer(buffers[i].buffer)->user) + any_user_buffer = TRUE; + } + + for ( ; i < svga->curr.num_vertex_buffers; i++) + pipe_buffer_reference(&svga->curr.vb[i].buffer, NULL); + + /* Copy remaining data */ + memcpy(svga->curr.vb, buffers, count * sizeof buffers[0]); + svga->curr.num_vertex_buffers = count; + svga->curr.any_user_vertex_buffers = any_user_buffer; + + svga->dirty |= SVGA_NEW_VBUFFER; +} + +static void svga_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *elements) +{ + struct svga_context *svga = svga_context(pipe); + unsigned i; + + for (i = 0; i < count; i++) + svga->curr.ve[i] = elements[i]; + + svga->curr.num_vertex_elements = count; + svga->dirty |= SVGA_NEW_VELEMENT; +} + + +static void svga_set_edgeflags(struct pipe_context *pipe, + const unsigned *bitfield) +{ + struct svga_context *svga = svga_context(pipe); + + if (bitfield != NULL || svga->curr.edgeflags != NULL) { + svga->curr.edgeflags = bitfield; + svga->dirty |= SVGA_NEW_EDGEFLAGS; + } +} + + +void svga_cleanup_vertex_state( struct svga_context *svga ) +{ + unsigned i; + + for (i = 0 ; i < svga->curr.num_vertex_buffers; i++) + pipe_buffer_reference(&svga->curr.vb[i].buffer, NULL); +} + + +void svga_init_vertex_functions( struct svga_context *svga ) +{ + svga->pipe.set_vertex_buffers = svga_set_vertex_buffers; + svga->pipe.set_vertex_elements = svga_set_vertex_elements; + svga->pipe.set_edgeflags = svga_set_edgeflags; +} + + diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c new file mode 100644 index 0000000000..e5ffe668c3 --- /dev/null +++ b/src/gallium/drivers/svga/svga_pipe_vs.c @@ -0,0 +1,189 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "draw/draw_context.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_text.h" + +#include "svga_screen.h" +#include "svga_context.h" +#include "svga_state.h" +#include "svga_tgsi.h" +#include "svga_hw_reg.h" +#include "svga_cmd.h" +#include "svga_debug.h" + + +static const struct tgsi_token *substitute_vs( + unsigned shader_id, + const struct tgsi_token *old_tokens ) +{ +#if 0 + if (shader_id == 12) { + static struct tgsi_token tokens[300]; + + const char *text = + "VERT1.1\n" + "DCL IN[0]\n" + "DCL IN[1]\n" + "DCL IN[2]\n" + "DCL OUT[0], POSITION\n" + "DCL TEMP[0..4]\n" + "IMM FLT32 { 1.0000, 1.0000, 1.0000, 1.0000 }\n" + "IMM FLT32 { 0.45, 1.0000, 1.0000, 1.0000 }\n" + "IMM FLT32 { 1.297863, 0.039245, 0.035993, 0.035976}\n" + "IMM FLT32 { -0.019398, 1.696131, -0.202151, -0.202050 }\n" + "IMM FLT32 { 0.051711, -0.348713, -0.979204, -0.978714 }\n" + "IMM FLT32 { 0.000000, 0.000003, 139.491577, 141.421356 }\n" + "DCL CONST[0..7]\n" + "DCL CONST[9..16]\n" + " MOV TEMP[2], IMM[0]\n" + + " MOV TEMP[2].xyz, IN[2]\n" + " MOV TEMP[2].xyz, IN[0]\n" + " MOV TEMP[2].xyz, IN[1]\n" + + " MUL TEMP[1], IMM[3], TEMP[2].yyyy\n" + " MAD TEMP[3], IMM[2], TEMP[2].xxxx, TEMP[1]\n" + " MAD TEMP[1], IMM[4], TEMP[2].zzzz, TEMP[3]\n" + " MAD TEMP[4], IMM[5], TEMP[2].wwww, TEMP[1]\n" + + " MOV OUT[0], TEMP[4]\n" + " END\n"; + + if (!tgsi_text_translate( text, + tokens, + Elements(tokens) )) + { + assert(0); + return NULL; + } + + return tokens; + } +#endif + + return old_tokens; +} + + +/*********************************************************************** + * Vertex shaders + */ + +static void * +svga_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct svga_context *svga = svga_context(pipe); + struct svga_screen *svgascreen = svga_screen(pipe->screen); + struct svga_vertex_shader *vs = CALLOC_STRUCT(svga_vertex_shader); + if (!vs) + return NULL; + + /* substitute a debug shader? + */ + vs->base.tokens = tgsi_dup_tokens(substitute_vs(svga->debug.shader_id, + templ->tokens)); + + + /* Collect basic info that we'll need later: + */ + tgsi_scan_shader(vs->base.tokens, &vs->base.info); + + { + /* Need to do construct a new template in case we substitued a + * debug shader. + */ + struct pipe_shader_state tmp2 = *templ; + tmp2.tokens = vs->base.tokens; + vs->draw_shader = draw_create_vertex_shader(svga->swtnl.draw, &tmp2); + } + + vs->base.id = svga->debug.shader_id++; + vs->base.use_sm30 = svgascreen->use_vs30; + + if (SVGA_DEBUG & DEBUG_TGSI || 0) { + debug_printf("%s id: %u, inputs: %u, outputs: %u\n", + __FUNCTION__, vs->base.id, + vs->base.info.num_inputs, vs->base.info.num_outputs); + } + + return vs; +} + +static void svga_bind_vs_state(struct pipe_context *pipe, void *shader) +{ + struct svga_vertex_shader *vs = (struct svga_vertex_shader *)shader; + struct svga_context *svga = svga_context(pipe); + + svga->curr.vs = vs; + svga->dirty |= SVGA_NEW_VS; +} + + +static void svga_delete_vs_state(struct pipe_context *pipe, void *shader) +{ + struct svga_context *svga = svga_context(pipe); + struct svga_vertex_shader *vs = (struct svga_vertex_shader *)shader; + struct svga_shader_result *result, *tmp; + enum pipe_error ret; + + svga_hwtnl_flush_retry( svga ); + + draw_delete_vertex_shader(svga->swtnl.draw, vs->draw_shader); + + for (result = vs->base.results; result; result = tmp ) { + tmp = result->next; + + ret = SVGA3D_DestroyShader(svga->swc, + result->id, + SVGA3D_SHADERTYPE_VS ); + if(ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_DestroyShader(svga->swc, + result->id, + SVGA3D_SHADERTYPE_VS ); + assert(ret == PIPE_OK); + } + + svga_destroy_shader_result( result ); + } + + FREE((void *)vs->base.tokens); + FREE(vs); +} + + +void svga_init_vs_functions( struct svga_context *svga ) +{ + svga->pipe.create_vs_state = svga_create_vs_state; + svga->pipe.bind_vs_state = svga_bind_vs_state; + svga->pipe.delete_vs_state = svga_delete_vs_state; +} + diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c new file mode 100644 index 0000000000..3afcaffff5 --- /dev/null +++ b/src/gallium/drivers/svga/svga_screen.c @@ -0,0 +1,435 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "util/u_string.h" +#include "util/u_math.h" + +#include "svga_winsys.h" +#include "svga_context.h" +#include "svga_screen.h" +#include "svga_screen_texture.h" +#include "svga_screen_buffer.h" +#include "svga_cmd.h" +#include "svga_debug.h" + +#include "svga_hw_reg.h" +#include "svga3d_shaderdefs.h" + + +#ifdef DEBUG +int SVGA_DEBUG = 0; + +static const struct debug_named_value svga_debug_flags[] = { + { "dma", DEBUG_DMA }, + { "tgsi", DEBUG_TGSI }, + { "pipe", DEBUG_PIPE }, + { "state", DEBUG_STATE }, + { "screen", DEBUG_SCREEN }, + { "tex", DEBUG_TEX }, + { "swtnl", DEBUG_SWTNL }, + { "const", DEBUG_CONSTS }, + { "viewport", DEBUG_VIEWPORT }, + { "views", DEBUG_VIEWS }, + { "perf", DEBUG_PERF }, + { "flush", DEBUG_FLUSH }, + { "sync", DEBUG_SYNC }, + {NULL, 0} +}; +#endif + +static const char * +svga_get_vendor( struct pipe_screen *pscreen ) +{ + return "VMware, Inc."; +} + + +static const char * +svga_get_name( struct pipe_screen *pscreen ) +{ +#ifdef DEBUG + /* Only return internal details in the DEBUG version: + */ + return "SVGA3D; build: DEBUG; mutex: " PIPE_ATOMIC; +#else + return "SVGA3D; build: RELEASE; "; +#endif +} + + + + +static float +svga_get_paramf(struct pipe_screen *screen, int param) +{ + struct svga_screen *svgascreen = svga_screen(screen); + struct svga_winsys_screen *sws = svgascreen->sws; + SVGA3dDevCapResult result; + + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 7.0; + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + /* Keep this to a reasonable size to avoid failures in + * conform/pntaa.c: + */ + return 80.0; + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 4.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 16; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return svgascreen->use_ps30 && svgascreen->use_vs30; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + if(!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_RENDER_TARGETS, &result)) + return 1; + if(!result.u) + return 1; + return MIN2(result.u, PIPE_MAX_COLOR_BUFS); + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return SVGA_MAX_TEXTURE_LEVELS; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return SVGA_MAX_TEXTURE_LEVELS; + + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: /* req. for GL 1.4 */ + return 1; + + case PIPE_CAP_BLEND_EQUATION_SEPARATE: /* req. for GL 1.5 */ + return 1; + + default: + return 0; + } +} + + +/* This is a fairly pointless interface + */ +static int +svga_get_param(struct pipe_screen *screen, int param) +{ + return (int) svga_get_paramf( screen, param ); +} + + +static INLINE SVGA3dDevCapIndex +svga_translate_format_cap(enum pipe_format format) +{ + switch(format) { + + case PIPE_FORMAT_A8R8G8B8_UNORM: + return SVGA3D_DEVCAP_SURFACEFMT_A8R8G8B8; + case PIPE_FORMAT_X8R8G8B8_UNORM: + return SVGA3D_DEVCAP_SURFACEFMT_X8R8G8B8; + + case PIPE_FORMAT_R5G6B5_UNORM: + return SVGA3D_DEVCAP_SURFACEFMT_R5G6B5; + case PIPE_FORMAT_A1R5G5B5_UNORM: + return SVGA3D_DEVCAP_SURFACEFMT_A1R5G5B5; + case PIPE_FORMAT_A4R4G4B4_UNORM: + return SVGA3D_DEVCAP_SURFACEFMT_A4R4G4B4; + + case PIPE_FORMAT_Z16_UNORM: + return SVGA3D_DEVCAP_SURFACEFMT_Z_D16; + case PIPE_FORMAT_Z24S8_UNORM: + return SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8; + case PIPE_FORMAT_Z24X8_UNORM: + return SVGA3D_DEVCAP_SURFACEFMT_Z_D24X8; + + case PIPE_FORMAT_A8_UNORM: + return SVGA3D_DEVCAP_SURFACEFMT_ALPHA8; + case PIPE_FORMAT_L8_UNORM: + return SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8; + + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + return SVGA3D_DEVCAP_SURFACEFMT_DXT1; + case PIPE_FORMAT_DXT3_RGBA: + return SVGA3D_DEVCAP_SURFACEFMT_DXT3; + case PIPE_FORMAT_DXT5_RGBA: + return SVGA3D_DEVCAP_SURFACEFMT_DXT5; + + default: + return SVGA3D_DEVCAP_MAX; + } +} + + +static boolean +svga_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) +{ + struct svga_winsys_screen *sws = svga_screen(screen)->sws; + SVGA3dDevCapIndex index; + SVGA3dDevCapResult result; + + assert(tex_usage); + + /* Override host capabilities */ + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + switch(format) { + + /* Often unsupported/problematic. This means we end up with the same + * visuals for all virtual hardware implementations. + */ + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + return FALSE; + + /* Simulate ability to render into compressed textures */ + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + return TRUE; + + default: + break; + } + } + + /* Try to query the host */ + index = svga_translate_format_cap(format); + if( index < SVGA3D_DEVCAP_MAX && + sws->get_cap(sws, index, &result) ) + { + SVGA3dSurfaceFormatCaps mask; + + mask.value = 0; + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) + mask.offscreenRenderTarget = 1; + if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) + mask.zStencil = 1; + if (tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) + mask.texture = 1; + + if ((result.u & mask.value) == mask.value) + return TRUE; + else + return FALSE; + } + + /* Use our translate functions directly rather than relying on a + * duplicated list of supported formats which is prone to getting + * out of sync: + */ + if(tex_usage & (PIPE_TEXTURE_USAGE_RENDER_TARGET | PIPE_TEXTURE_USAGE_DEPTH_STENCIL)) + return svga_translate_format_render(format) != SVGA3D_FORMAT_INVALID; + else + return svga_translate_format(format) != SVGA3D_FORMAT_INVALID; +} + + +static void +svga_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct svga_winsys_screen *sws = svga_screen(screen)->sws; + sws->fence_reference(sws, ptr, fence); +} + + +static int +svga_fence_signalled(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flag) +{ + struct svga_winsys_screen *sws = svga_screen(screen)->sws; + return sws->fence_signalled(sws, fence, flag); +} + + +static int +svga_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flag) +{ + struct svga_winsys_screen *sws = svga_screen(screen)->sws; + return sws->fence_finish(sws, fence, flag); +} + + +static void +svga_destroy_screen( struct pipe_screen *screen ) +{ + struct svga_screen *svgascreen = svga_screen(screen); + + svga_screen_cache_cleanup(svgascreen); + + pipe_mutex_destroy(svgascreen->swc_mutex); + pipe_mutex_destroy(svgascreen->tex_mutex); + + svgascreen->swc->destroy(svgascreen->swc); + + svgascreen->sws->destroy(svgascreen->sws); + + FREE(svgascreen); +} + + +/** + * Create a new svga_screen object + */ +struct pipe_screen * +svga_screen_create(struct svga_winsys_screen *sws) +{ + struct svga_screen *svgascreen; + struct pipe_screen *screen; + SVGA3dDevCapResult result; + +#ifdef DEBUG + SVGA_DEBUG = debug_get_flags_option("SVGA_DEBUG", svga_debug_flags, 0 ); +#endif + + svgascreen = CALLOC_STRUCT(svga_screen); + if (!svgascreen) + goto error1; + + svgascreen->debug.force_level_surface_view = + debug_get_bool_option("SVGA_FORCE_LEVEL_SURFACE_VIEW", FALSE); + svgascreen->debug.force_surface_view = + debug_get_bool_option("SVGA_FORCE_SURFACE_VIEW", FALSE); + svgascreen->debug.force_sampler_view = + debug_get_bool_option("SVGA_FORCE_SAMPLER_VIEW", FALSE); + svgascreen->debug.no_surface_view = + debug_get_bool_option("SVGA_NO_SURFACE_VIEW", FALSE); + svgascreen->debug.no_sampler_view = + debug_get_bool_option("SVGA_NO_SAMPLER_VIEW", FALSE); + + screen = &svgascreen->screen; + + screen->destroy = svga_destroy_screen; + screen->get_name = svga_get_name; + screen->get_vendor = svga_get_vendor; + screen->get_param = svga_get_param; + screen->get_paramf = svga_get_paramf; + screen->is_format_supported = svga_is_format_supported; + screen->fence_reference = svga_fence_reference; + screen->fence_signalled = svga_fence_signalled; + screen->fence_finish = svga_fence_finish; + svgascreen->sws = sws; + + svga_screen_init_texture_functions(screen); + svga_screen_init_buffer_functions(screen); + + svgascreen->use_ps30 = + sws->get_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION, &result) && + result.u >= SVGA3DPSVERSION_30 ? TRUE : FALSE; + + svgascreen->use_vs30 = + sws->get_cap(sws, SVGA3D_DEVCAP_VERTEX_SHADER_VERSION, &result) && + result.u >= SVGA3DVSVERSION_30 ? TRUE : FALSE; + +#if 1 + /* Shader model 2.0 is unsupported at the moment. */ + if(!svgascreen->use_ps30 || !svgascreen->use_vs30) + goto error2; +#else + if(debug_get_bool_option("SVGA_NO_SM30", FALSE)) + svgascreen->use_vs30 = svgascreen->use_ps30 = FALSE; +#endif + + svgascreen->swc = sws->context_create(sws); + if(!svgascreen->swc) + goto error2; + + pipe_mutex_init(svgascreen->tex_mutex); + pipe_mutex_init(svgascreen->swc_mutex); + + LIST_INITHEAD(&svgascreen->cached_buffers); + + svga_screen_cache_init(svgascreen); + + return screen; +error2: + FREE(svgascreen); +error1: + return NULL; +} + +void svga_screen_flush( struct svga_screen *svgascreen, + struct pipe_fence_handle **pfence ) +{ + struct pipe_fence_handle *fence = NULL; + + SVGA_DBG(DEBUG_PERF, "%s\n", __FUNCTION__); + + pipe_mutex_lock(svgascreen->swc_mutex); + svgascreen->swc->flush(svgascreen->swc, &fence); + pipe_mutex_unlock(svgascreen->swc_mutex); + + svga_screen_cache_flush(svgascreen, fence); + + if(pfence) + *pfence = fence; + else + svgascreen->sws->fence_reference(svgascreen->sws, &fence, NULL); +} + +struct svga_winsys_screen * +svga_winsys_screen(struct pipe_screen *screen) +{ + return svga_screen(screen)->sws; +} + +#ifdef DEBUG +struct svga_screen * +svga_screen(struct pipe_screen *screen) +{ + assert(screen); + assert(screen->destroy == svga_destroy_screen); + return (struct svga_screen *)screen; +} +#endif diff --git a/src/gallium/drivers/svga/svga_screen.h b/src/gallium/drivers/svga/svga_screen.h new file mode 100644 index 0000000000..b94ca7fc1c --- /dev/null +++ b/src/gallium/drivers/svga/svga_screen.h @@ -0,0 +1,95 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_SCREEN_H +#define SVGA_SCREEN_H + + +#include "pipe/p_screen.h" +#include "pipe/p_thread.h" + +#include "util/u_double_list.h" + +#include "svga_screen_cache.h" + + +struct svga_winsys_screen; +struct svga_winsys_context; +struct SVGACmdMemory; + +#define SVGA_COMBINE_USERBUFFERS 1 + +/** + * Subclass of pipe_screen + */ +struct svga_screen +{ + struct pipe_screen screen; + struct svga_winsys_screen *sws; + + unsigned use_ps30; + unsigned use_vs30; + + struct { + boolean force_level_surface_view; + boolean force_surface_view; + boolean no_surface_view; + boolean force_sampler_view; + boolean no_sampler_view; + } debug; + + /* The screen needs its own context */ + struct svga_winsys_context *swc; + struct SVGACmdMemory *fifo; + + unsigned texture_timestamp; + pipe_mutex tex_mutex; + pipe_mutex swc_mutex; /* Protects the use of swc and dirty_buffers */ + + /** + * List of buffers with cached GMR. Ordered from the most recently used to + * the least recently used + */ + struct list_head cached_buffers; + + struct svga_host_surface_cache cache; +}; + +#ifndef DEBUG +/** cast wrapper */ +static INLINE struct svga_screen * +svga_screen(struct pipe_screen *pscreen) +{ + return (struct svga_screen *) pscreen; +} +#else +struct svga_screen * +svga_screen(struct pipe_screen *screen); +#endif + +void svga_screen_flush( struct svga_screen *svga_screen, + struct pipe_fence_handle **pfence ); + +#endif /* SVGA_SCREEN_H */ diff --git a/src/gallium/drivers/svga/svga_screen_buffer.c b/src/gallium/drivers/svga/svga_screen_buffer.c new file mode 100644 index 0000000000..3b7811734e --- /dev/null +++ b/src/gallium/drivers/svga/svga_screen_buffer.c @@ -0,0 +1,820 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "svga_cmd.h" + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_thread.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "svga_context.h" +#include "svga_screen.h" +#include "svga_screen_buffer.h" +#include "svga_winsys.h" +#include "svga_debug.h" + + +/** + * Vertex and index buffers have to be treated slightly differently from + * regular guest memory regions because the SVGA device sees them as + * surfaces, and the state tracker can create/destroy without the pipe + * driver, therefore we must do the uploads from the vws. + */ +static INLINE boolean +svga_buffer_needs_hw_storage(unsigned usage) +{ + return usage & (PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_INDEX); +} + + +static INLINE enum pipe_error +svga_buffer_create_host_surface(struct svga_screen *ss, + struct svga_buffer *sbuf) +{ + if(!sbuf->handle) { + sbuf->key.flags = 0; + + sbuf->key.format = SVGA3D_BUFFER; + if(sbuf->base.usage & PIPE_BUFFER_USAGE_VERTEX) + sbuf->key.flags |= SVGA3D_SURFACE_HINT_VERTEXBUFFER; + if(sbuf->base.usage & PIPE_BUFFER_USAGE_INDEX) + sbuf->key.flags |= SVGA3D_SURFACE_HINT_INDEXBUFFER; + + sbuf->key.size.width = sbuf->base.size; + sbuf->key.size.height = 1; + sbuf->key.size.depth = 1; + + sbuf->key.numFaces = 1; + sbuf->key.numMipLevels = 1; + + sbuf->handle = svga_screen_surface_create(ss, &sbuf->key); + if(!sbuf->handle) + return PIPE_ERROR_OUT_OF_MEMORY; + + /* Always set the discard flag on the first time the buffer is written + * as svga_screen_surface_create might have passed a recycled host + * buffer. + */ + sbuf->hw.flags.discard = TRUE; + + SVGA_DBG(DEBUG_DMA, " grab sid %p sz %d\n", sbuf->handle, sbuf->base.size); + } + + return PIPE_OK; +} + + +static INLINE void +svga_buffer_destroy_host_surface(struct svga_screen *ss, + struct svga_buffer *sbuf) +{ + if(sbuf->handle) { + SVGA_DBG(DEBUG_DMA, " ungrab sid %p sz %d\n", sbuf->handle, sbuf->base.size); + svga_screen_surface_destroy(ss, &sbuf->key, &sbuf->handle); + } +} + + +static INLINE void +svga_buffer_destroy_hw_storage(struct svga_screen *ss, struct svga_buffer *sbuf) +{ + struct svga_winsys_screen *sws = ss->sws; + + assert(!sbuf->map.count); + assert(sbuf->hw.buf); + if(sbuf->hw.buf) { + sws->buffer_destroy(sws, sbuf->hw.buf); + sbuf->hw.buf = NULL; + assert(sbuf->head.prev && sbuf->head.next); + LIST_DEL(&sbuf->head); +#ifdef DEBUG + sbuf->head.next = sbuf->head.prev = NULL; +#endif + } +} + +static INLINE enum pipe_error +svga_buffer_backup(struct svga_screen *ss, struct svga_buffer *sbuf) +{ + if (sbuf->hw.buf && sbuf->hw.num_ranges) { + void *src; + + if (!sbuf->swbuf) + sbuf->swbuf = align_malloc(sbuf->base.size, sbuf->base.alignment); + if (!sbuf->swbuf) + return PIPE_ERROR_OUT_OF_MEMORY; + + src = ss->sws->buffer_map(ss->sws, sbuf->hw.buf, + PIPE_BUFFER_USAGE_CPU_READ); + if (!src) + return PIPE_ERROR; + + memcpy(sbuf->swbuf, src, sbuf->base.size); + ss->sws->buffer_unmap(ss->sws, sbuf->hw.buf); + } + + return PIPE_OK; +} + +/** + * Try to make GMR space available by freeing the hardware storage of + * unmapped + */ +boolean +svga_buffer_free_cached_hw_storage(struct svga_screen *ss) +{ + struct list_head *curr; + struct svga_buffer *sbuf; + enum pipe_error ret = PIPE_OK; + + curr = ss->cached_buffers.prev; + + /* free the least recently used buffer's hw storage which is not mapped */ + do { + if(curr == &ss->cached_buffers) + return FALSE; + + sbuf = LIST_ENTRY(struct svga_buffer, curr, head); + + curr = curr->prev; + if (sbuf->map.count == 0) + ret = svga_buffer_backup(ss, sbuf); + + } while(sbuf->map.count != 0 || ret != PIPE_OK); + + svga_buffer_destroy_hw_storage(ss, sbuf); + + return TRUE; +} + +struct svga_winsys_buffer * +svga_winsys_buffer_create( struct svga_screen *ss, + unsigned alignment, + unsigned usage, + unsigned size ) +{ + struct svga_winsys_screen *sws = ss->sws; + struct svga_winsys_buffer *buf; + + /* Just try */ + buf = sws->buffer_create(sws, alignment, usage, size); + if(!buf) { + + SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "flushing screen to find %d bytes GMR\n", + size); + + /* Try flushing all pending DMAs */ + svga_screen_flush(ss, NULL); + buf = sws->buffer_create(sws, alignment, usage, size); + + SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "evicting buffers to find %d bytes GMR\n", + size); + + /* Try evicing all buffer storage */ + while(!buf && svga_buffer_free_cached_hw_storage(ss)) + buf = sws->buffer_create(sws, alignment, usage, size); + } + + return buf; +} + + +/** + * Allocate DMA'ble storage for the buffer. + * + * Called before mapping a buffer. + */ +static INLINE enum pipe_error +svga_buffer_create_hw_storage(struct svga_screen *ss, + struct svga_buffer *sbuf) +{ + if(!sbuf->hw.buf) { + unsigned alignment = sbuf->base.alignment; + unsigned usage = 0; + unsigned size = sbuf->base.size; + + sbuf->hw.buf = svga_winsys_buffer_create(ss, alignment, usage, size); + if(!sbuf->hw.buf) + return PIPE_ERROR_OUT_OF_MEMORY; + + assert(!sbuf->needs_flush); + assert(!sbuf->head.prev && !sbuf->head.next); + LIST_ADD(&sbuf->head, &ss->cached_buffers); + } + + return PIPE_OK; +} + + +/** + * Variant of SVGA3D_BufferDMA which leaves the copy box temporarily in blank. + */ +static enum pipe_error +svga_buffer_upload_command(struct svga_context *svga, + struct svga_buffer *sbuf) +{ + struct svga_winsys_context *swc = svga->swc; + struct svga_winsys_buffer *guest = sbuf->hw.buf; + struct svga_winsys_surface *host = sbuf->handle; + SVGA3dTransferType transfer = SVGA3D_WRITE_HOST_VRAM; + SVGA3dSurfaceDMAFlags flags = sbuf->hw.flags; + SVGA3dCmdSurfaceDMA *cmd; + uint32 numBoxes = sbuf->hw.num_ranges; + SVGA3dCopyBox *boxes; + SVGA3dCmdSurfaceDMASuffix *pSuffix; + unsigned region_flags; + unsigned surface_flags; + struct pipe_buffer *dummy; + + if(transfer == SVGA3D_WRITE_HOST_VRAM) { + region_flags = PIPE_BUFFER_USAGE_GPU_READ; + surface_flags = PIPE_BUFFER_USAGE_GPU_WRITE; + } + else if(transfer == SVGA3D_READ_HOST_VRAM) { + region_flags = PIPE_BUFFER_USAGE_GPU_WRITE; + surface_flags = PIPE_BUFFER_USAGE_GPU_READ; + } + else { + assert(0); + return PIPE_ERROR_BAD_INPUT; + } + + assert(numBoxes); + + cmd = SVGA3D_FIFOReserve(swc, + SVGA_3D_CMD_SURFACE_DMA, + sizeof *cmd + numBoxes * sizeof *boxes + sizeof *pSuffix, + 2); + if(!cmd) + return PIPE_ERROR_OUT_OF_MEMORY; + + swc->region_relocation(swc, &cmd->guest.ptr, guest, 0, region_flags); + cmd->guest.pitch = 0; + + swc->surface_relocation(swc, &cmd->host.sid, host, surface_flags); + cmd->host.face = 0; + cmd->host.mipmap = 0; + + cmd->transfer = transfer; + + sbuf->hw.boxes = (SVGA3dCopyBox *)&cmd[1]; + sbuf->hw.svga = svga; + + /* Increment reference count */ + dummy = NULL; + pipe_buffer_reference(&dummy, &sbuf->base); + + pSuffix = (SVGA3dCmdSurfaceDMASuffix *)((uint8_t*)cmd + sizeof *cmd + numBoxes * sizeof *boxes); + pSuffix->suffixSize = sizeof *pSuffix; + pSuffix->maximumOffset = sbuf->base.size; + pSuffix->flags = flags; + + swc->commit(swc); + + return PIPE_OK; +} + + +/** + * Patch up the upload DMA command reserved by svga_buffer_upload_command + * with the final ranges. + */ +static void +svga_buffer_upload_flush(struct svga_context *svga, + struct svga_buffer *sbuf) +{ + struct svga_screen *ss = svga_screen(svga->pipe.screen); + SVGA3dCopyBox *boxes; + unsigned i; + + assert(sbuf->handle); + assert(sbuf->hw.buf); + assert(sbuf->hw.num_ranges); + assert(sbuf->hw.svga == svga); + assert(sbuf->hw.boxes); + + /* + * Patch the DMA command with the final copy box. + */ + + SVGA_DBG(DEBUG_DMA, "dma to sid %p\n", sbuf->handle); + + boxes = sbuf->hw.boxes; + for(i = 0; i < sbuf->hw.num_ranges; ++i) { + SVGA_DBG(DEBUG_DMA, " bytes %u - %u\n", + sbuf->hw.ranges[i].start, sbuf->hw.ranges[i].end); + + boxes[i].x = sbuf->hw.ranges[i].start; + boxes[i].y = 0; + boxes[i].z = 0; + boxes[i].w = sbuf->hw.ranges[i].end - sbuf->hw.ranges[i].start; + boxes[i].h = 1; + boxes[i].d = 1; + boxes[i].srcx = sbuf->hw.ranges[i].start; + boxes[i].srcy = 0; + boxes[i].srcz = 0; + } + + sbuf->hw.num_ranges = 0; + memset(&sbuf->hw.flags, 0, sizeof sbuf->hw.flags); + + assert(sbuf->head.prev && sbuf->head.next); + LIST_DEL(&sbuf->head); + sbuf->needs_flush = FALSE; + /* XXX: do we care about cached_buffers any more ?*/ + LIST_ADD(&sbuf->head, &ss->cached_buffers); + + sbuf->hw.svga = NULL; + sbuf->hw.boxes = NULL; + + /* Decrement reference count */ + pipe_buffer_reference((struct pipe_buffer **)&sbuf, NULL); +} + + +/** + * Queue a DMA upload of a range of this buffer to the host. + * + * This function only notes the range down. It doesn't actually emit a DMA + * upload command. That only happens when a context tries to refer to this + * buffer, and the DMA upload command is added to that context's command buffer. + * + * We try to lump as many contiguous DMA transfers together as possible. + */ +static void +svga_buffer_upload_queue(struct svga_buffer *sbuf, + unsigned start, + unsigned end) +{ + unsigned i; + + assert(sbuf->hw.buf); + assert(end > start); + + /* + * Try to grow one of the ranges. + * + * Note that it is not this function task to care about overlapping ranges, + * as the GMR was already given so it is too late to do anything. Situations + * where overlapping ranges may pose a problem should be detected via + * pipe_context::is_buffer_referenced and the context that refers to the + * buffer should be flushed. + */ + + for(i = 0; i < sbuf->hw.num_ranges; ++i) { + if(start <= sbuf->hw.ranges[i].end && sbuf->hw.ranges[i].start <= end) { + sbuf->hw.ranges[i].start = MIN2(sbuf->hw.ranges[i].start, start); + sbuf->hw.ranges[i].end = MAX2(sbuf->hw.ranges[i].end, end); + return; + } + } + + /* + * We cannot add a new range to an existing DMA command, so patch-up the + * pending DMA upload and start clean. + */ + + if(sbuf->needs_flush) + svga_buffer_upload_flush(sbuf->hw.svga, sbuf); + + assert(!sbuf->needs_flush); + assert(!sbuf->hw.svga); + assert(!sbuf->hw.boxes); + + /* + * Add a new range. + */ + + sbuf->hw.ranges[sbuf->hw.num_ranges].start = start; + sbuf->hw.ranges[sbuf->hw.num_ranges].end = end; + ++sbuf->hw.num_ranges; +} + + +static void * +svga_buffer_map_range( struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned offset, unsigned length, + unsigned usage ) +{ + struct svga_screen *ss = svga_screen(screen); + struct svga_winsys_screen *sws = ss->sws; + struct svga_buffer *sbuf = svga_buffer( buf ); + void *map; + + if(sbuf->swbuf) { + /* User/malloc buffer */ + map = sbuf->swbuf; + } + else { + if(!sbuf->hw.buf) { + struct svga_winsys_surface *handle = sbuf->handle; + + if(svga_buffer_create_hw_storage(ss, sbuf) != PIPE_OK) + return NULL; + + /* Populate the hardware storage if the host surface pre-existed */ + if((usage & PIPE_BUFFER_USAGE_CPU_READ) && handle) { + SVGA3dSurfaceDMAFlags flags; + enum pipe_error ret; + struct pipe_fence_handle *fence = NULL; + + SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "dma from sid %p, bytes %u - %u\n", + sbuf->handle, 0, sbuf->base.size); + + memset(&flags, 0, sizeof flags); + + ret = SVGA3D_BufferDMA(ss->swc, + sbuf->hw.buf, + sbuf->handle, + SVGA3D_READ_HOST_VRAM, + sbuf->base.size, + 0, + flags); + if(ret != PIPE_OK) { + ss->swc->flush(ss->swc, NULL); + + ret = SVGA3D_BufferDMA(ss->swc, + sbuf->hw.buf, + sbuf->handle, + SVGA3D_READ_HOST_VRAM, + sbuf->base.size, + 0, + flags); + assert(ret == PIPE_OK); + } + + ss->swc->flush(ss->swc, &fence); + sws->fence_finish(sws, fence, 0); + sws->fence_reference(sws, &fence, NULL); + } + } + else { + if((usage & PIPE_BUFFER_USAGE_CPU_READ) && !sbuf->needs_flush) { + /* We already had the hardware storage but we would have to issue + * a download if we hadn't, so move the buffer to the begginning + * of the LRU list. + */ + assert(sbuf->head.prev && sbuf->head.next); + LIST_DEL(&sbuf->head); + LIST_ADD(&sbuf->head, &ss->cached_buffers); + } + } + + map = sws->buffer_map(sws, sbuf->hw.buf, usage); + } + + if(map) { + pipe_mutex_lock(ss->swc_mutex); + + ++sbuf->map.count; + + if (usage & PIPE_BUFFER_USAGE_CPU_WRITE) { + assert(sbuf->map.count <= 1); + sbuf->map.writing = TRUE; + if (usage & PIPE_BUFFER_USAGE_FLUSH_EXPLICIT) + sbuf->map.flush_explicit = TRUE; + } + + pipe_mutex_unlock(ss->swc_mutex); + } + + return map; +} + +static void +svga_buffer_flush_mapped_range( struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned offset, unsigned length) +{ + struct svga_buffer *sbuf = svga_buffer( buf ); + struct svga_screen *ss = svga_screen(screen); + + pipe_mutex_lock(ss->swc_mutex); + assert(sbuf->map.writing); + if(sbuf->map.writing) { + assert(sbuf->map.flush_explicit); + if(sbuf->hw.buf) + svga_buffer_upload_queue(sbuf, offset, offset + length); + } + pipe_mutex_unlock(ss->swc_mutex); +} + +static void +svga_buffer_unmap( struct pipe_screen *screen, + struct pipe_buffer *buf) +{ + struct svga_screen *ss = svga_screen(screen); + struct svga_winsys_screen *sws = ss->sws; + struct svga_buffer *sbuf = svga_buffer( buf ); + + pipe_mutex_lock(ss->swc_mutex); + + assert(sbuf->map.count); + if(sbuf->map.count) + --sbuf->map.count; + + if(sbuf->hw.buf) + sws->buffer_unmap(sws, sbuf->hw.buf); + + if(sbuf->map.writing) { + if(!sbuf->map.flush_explicit) { + /* No mapped range was flushed -- flush the whole buffer */ + SVGA_DBG(DEBUG_DMA, "flushing the whole buffer\n"); + + if(sbuf->hw.buf) + svga_buffer_upload_queue(sbuf, 0, sbuf->base.size); + } + + sbuf->map.writing = FALSE; + sbuf->map.flush_explicit = FALSE; + } + + pipe_mutex_unlock(ss->swc_mutex); +} + +static void +svga_buffer_destroy( struct pipe_buffer *buf ) +{ + struct svga_screen *ss = svga_screen(buf->screen); + struct svga_buffer *sbuf = svga_buffer( buf ); + + assert(!p_atomic_read(&buf->reference.count)); + + assert(!sbuf->needs_flush); + + if(sbuf->handle) { + SVGA_DBG(DEBUG_DMA, "release sid %p sz %d\n", sbuf->handle, sbuf->base.size); + svga_screen_surface_destroy(ss, &sbuf->key, &sbuf->handle); + } + + if(sbuf->hw.buf) + svga_buffer_destroy_hw_storage(ss, sbuf); + + if(sbuf->swbuf && !sbuf->user) + align_free(sbuf->swbuf); + + FREE(sbuf); +} + +static struct pipe_buffer * +svga_buffer_create(struct pipe_screen *screen, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct svga_screen *ss = svga_screen(screen); + struct svga_buffer *sbuf; + + sbuf = CALLOC_STRUCT(svga_buffer); + if(!sbuf) + goto error1; + + sbuf->magic = SVGA_BUFFER_MAGIC; + + pipe_reference_init(&sbuf->base.reference, 1); + sbuf->base.screen = screen; + sbuf->base.alignment = alignment; + sbuf->base.usage = usage; + sbuf->base.size = size; + + if(svga_buffer_needs_hw_storage(usage)) { + if(svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK) + goto error2; + } + else { + if(alignment < sizeof(void*)) + alignment = sizeof(void*); + + usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + + sbuf->swbuf = align_malloc(size, alignment); + if(!sbuf->swbuf) + goto error2; + } + + return &sbuf->base; + +error2: + FREE(sbuf); +error1: + return NULL; +} + +static struct pipe_buffer * +svga_user_buffer_create(struct pipe_screen *screen, + void *ptr, + unsigned bytes) +{ + struct svga_buffer *sbuf; + + sbuf = CALLOC_STRUCT(svga_buffer); + if(!sbuf) + goto no_sbuf; + + sbuf->magic = SVGA_BUFFER_MAGIC; + + sbuf->swbuf = ptr; + sbuf->user = TRUE; + + pipe_reference_init(&sbuf->base.reference, 1); + sbuf->base.screen = screen; + sbuf->base.alignment = 1; + sbuf->base.usage = 0; + sbuf->base.size = bytes; + + return &sbuf->base; + +no_sbuf: + return NULL; +} + + +void +svga_screen_init_buffer_functions(struct pipe_screen *screen) +{ + screen->buffer_create = svga_buffer_create; + screen->user_buffer_create = svga_user_buffer_create; + screen->buffer_map_range = svga_buffer_map_range; + screen->buffer_flush_mapped_range = svga_buffer_flush_mapped_range; + screen->buffer_unmap = svga_buffer_unmap; + screen->buffer_destroy = svga_buffer_destroy; +} + + +/** + * Copy the contents of the user buffer / malloc buffer to a hardware buffer. + */ +static INLINE enum pipe_error +svga_buffer_update_hw(struct svga_screen *ss, struct svga_buffer *sbuf) +{ + if(!sbuf->hw.buf) { + enum pipe_error ret; + void *map; + + assert(sbuf->swbuf); + if(!sbuf->swbuf) + return PIPE_ERROR; + + ret = svga_buffer_create_hw_storage(ss, sbuf); + assert(ret == PIPE_OK); + if(ret != PIPE_OK) + return ret; + + pipe_mutex_lock(ss->swc_mutex); + map = ss->sws->buffer_map(ss->sws, sbuf->hw.buf, PIPE_BUFFER_USAGE_CPU_WRITE); + assert(map); + if(!map) { + pipe_mutex_unlock(ss->swc_mutex); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + memcpy(map, sbuf->swbuf, sbuf->base.size); + ss->sws->buffer_unmap(ss->sws, sbuf->hw.buf); + + /* This user/malloc buffer is now indistinguishable from a gpu buffer */ + assert(!sbuf->map.count); + if(!sbuf->map.count) { + if(sbuf->user) + sbuf->user = FALSE; + else + align_free(sbuf->swbuf); + sbuf->swbuf = NULL; + } + + svga_buffer_upload_queue(sbuf, 0, sbuf->base.size); + } + + pipe_mutex_unlock(ss->swc_mutex); + return PIPE_OK; +} + + +struct svga_winsys_surface * +svga_buffer_handle(struct svga_context *svga, + struct pipe_buffer *buf) +{ + struct pipe_screen *screen = svga->pipe.screen; + struct svga_screen *ss = svga_screen(screen); + struct svga_buffer *sbuf; + enum pipe_error ret; + + if(!buf) + return NULL; + + sbuf = svga_buffer(buf); + + assert(!sbuf->map.count); + + if(!sbuf->handle) { + ret = svga_buffer_create_host_surface(ss, sbuf); + if(ret != PIPE_OK) + return NULL; + + ret = svga_buffer_update_hw(ss, sbuf); + if(ret != PIPE_OK) + return NULL; + } + + if(!sbuf->needs_flush && sbuf->hw.num_ranges) { + /* Queue the buffer for flushing */ + ret = svga_buffer_upload_command(svga, sbuf); + if(ret != PIPE_OK) + /* XXX: Should probably have a richer return value */ + return NULL; + + assert(sbuf->hw.svga == svga); + + sbuf->needs_flush = TRUE; + assert(sbuf->head.prev && sbuf->head.next); + LIST_DEL(&sbuf->head); + LIST_ADDTAIL(&sbuf->head, &svga->dirty_buffers); + } + + return sbuf->handle; +} + +struct pipe_buffer * +svga_screen_buffer_wrap_surface(struct pipe_screen *screen, + enum SVGA3dSurfaceFormat format, + struct svga_winsys_surface *srf) +{ + struct pipe_buffer *buf; + struct svga_buffer *sbuf; + struct svga_winsys_screen *sws = svga_winsys_screen(screen); + + buf = svga_buffer_create(screen, 0, SVGA_BUFFER_USAGE_WRAPPED, 0); + if (!buf) + return NULL; + + sbuf = svga_buffer(buf); + + /* + * We are not the creator of this surface and therefore we must not + * cache it for reuse. The caching code only caches SVGA3D_BUFFER surfaces + * so make sure this isn't one of those. + */ + + assert(format != SVGA3D_BUFFER); + sbuf->key.format = format; + sws->surface_reference(sws, &sbuf->handle, srf); + + return buf; +} + + +struct svga_winsys_surface * +svga_screen_buffer_get_winsys_surface(struct pipe_buffer *buffer) +{ + struct svga_winsys_screen *sws = svga_winsys_screen(buffer->screen); + struct svga_winsys_surface *vsurf = NULL; + + sws->surface_reference(sws, &vsurf, svga_buffer(buffer)->handle); + return vsurf; +} + +void +svga_context_flush_buffers(struct svga_context *svga) +{ + struct list_head *curr, *next; + struct svga_buffer *sbuf; + + curr = svga->dirty_buffers.next; + next = curr->next; + while(curr != &svga->dirty_buffers) { + sbuf = LIST_ENTRY(struct svga_buffer, curr, head); + + assert(p_atomic_read(&sbuf->base.reference.count) != 0); + assert(sbuf->needs_flush); + + svga_buffer_upload_flush(svga, sbuf); + + curr = next; + next = curr->next; + } +} diff --git a/src/gallium/drivers/svga/svga_screen_buffer.h b/src/gallium/drivers/svga/svga_screen_buffer.h new file mode 100644 index 0000000000..5d7af5a7c5 --- /dev/null +++ b/src/gallium/drivers/svga/svga_screen_buffer.h @@ -0,0 +1,190 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_BUFFER_H +#define SVGA_BUFFER_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + +#include "util/u_double_list.h" + +#include "svga_screen_cache.h" + + +#define SVGA_BUFFER_MAGIC 0x344f9005 + +/** + * Maximum number of discontiguous ranges + */ +#define SVGA_BUFFER_MAX_RANGES 32 + + +struct svga_screen; +struct svga_context; +struct svga_winsys_buffer; +struct svga_winsys_surface; + + +struct svga_buffer_range +{ + unsigned start; + unsigned end; +}; + + +/** + * Describe a + * + * This holds the information to emit a SVGA3dCmdSurfaceDMA. + */ +struct svga_buffer_upload +{ + /** + * Guest memory region. + */ + struct svga_winsys_buffer *buf; + + struct svga_buffer_range ranges[SVGA_BUFFER_MAX_RANGES]; + unsigned num_ranges; + + SVGA3dSurfaceDMAFlags flags; + + /** + * Pointer to the DMA copy box *inside* the command buffer. + */ + SVGA3dCopyBox *boxes; + + /** + * Context that has the pending DMA to this buffer. + */ + struct svga_context *svga; +}; + + +/** + * SVGA pipe buffer. + */ +struct svga_buffer +{ + struct pipe_buffer base; + + /** + * Marker to detect bad casts in runtime. + */ + uint32_t magic; + + /** + * Regular (non DMA'able) memory. + * + * Used for user buffers or for buffers which we know before hand that can + * never be used by the virtual hardware directly, such as constant buffers. + */ + void *swbuf; + + /** + * Whether swbuf was created by the user or not. + */ + boolean user; + + /** + * DMA'ble memory. + * + * A piece of GMR memory. It is created when mapping the buffer, and will be + * used to upload/download vertex data from the host. + */ + struct svga_buffer_upload hw; + + /** + * Creation key for the host surface handle. + * + * This structure describes all the host surface characteristics so that it + * can be looked up in cache, since creating a host surface is often a slow + * operation. + */ + struct svga_host_surface_cache_key key; + + /** + * Host surface handle. + * + * This is a platform independent abstraction for host SID. We create when + * trying to bind + */ + struct svga_winsys_surface *handle; + + struct { + unsigned count; + boolean writing; + boolean flush_explicit; + } map; + + boolean needs_flush; + struct list_head head; +}; + + +static INLINE struct svga_buffer * +svga_buffer(struct pipe_buffer *buffer) +{ + if (buffer) { + assert(((struct svga_buffer *)buffer)->magic == SVGA_BUFFER_MAGIC); + return (struct svga_buffer *)buffer; + } + return NULL; +} + + +/** + * Returns TRUE for user buffers. We may + * decide to use an alternate upload path for these buffers. + */ +static INLINE boolean +svga_buffer_is_user_buffer( struct pipe_buffer *buffer ) +{ + return svga_buffer(buffer)->user; +} + + +void +svga_screen_init_buffer_functions(struct pipe_screen *screen); + +struct svga_winsys_surface * +svga_buffer_handle(struct svga_context *svga, + struct pipe_buffer *buf); + +void +svga_context_flush_buffers(struct svga_context *svga); + +boolean +svga_buffer_free_cached_hw_storage(struct svga_screen *ss); + +struct svga_winsys_buffer * +svga_winsys_buffer_create(struct svga_screen *ss, + unsigned alignment, + unsigned usage, + unsigned size); + +#endif /* SVGA_BUFFER_H */ diff --git a/src/gallium/drivers/svga/svga_screen_cache.c b/src/gallium/drivers/svga/svga_screen_cache.c new file mode 100644 index 0000000000..7360c1688b --- /dev/null +++ b/src/gallium/drivers/svga/svga_screen_cache.c @@ -0,0 +1,307 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "util/u_memory.h" + +#include "svga_debug.h" +#include "svga_winsys.h" +#include "svga_screen.h" +#include "svga_screen_cache.h" + + +#define SVGA_SURFACE_CACHE_ENABLED 1 + + +/** + * Compute the bucket for this key. + * + * We simply compute log2(width) for now, but + */ +static INLINE unsigned +svga_screen_cache_bucket(const struct svga_host_surface_cache_key *key) +{ + unsigned bucket = 0; + unsigned size = key->size.width; + + while ((size >>= 1)) + ++bucket; + + if(key->flags & SVGA3D_SURFACE_HINT_INDEXBUFFER) + bucket += 32; + + assert(bucket < SVGA_HOST_SURFACE_CACHE_BUCKETS); + + return bucket; +} + + +static INLINE struct svga_winsys_surface * +svga_screen_cache_lookup(struct svga_screen *svgascreen, + const struct svga_host_surface_cache_key *key) +{ + struct svga_host_surface_cache *cache = &svgascreen->cache; + struct svga_winsys_screen *sws = svgascreen->sws; + struct svga_host_surface_cache_entry *entry; + struct svga_winsys_surface *handle = NULL; + struct list_head *curr, *next; + unsigned bucket; + unsigned tries = 0; + + bucket = svga_screen_cache_bucket(key); + + pipe_mutex_lock(cache->mutex); + + curr = cache->bucket[bucket].next; + next = curr->next; + while(curr != &cache->bucket[bucket]) { + ++tries; + + entry = LIST_ENTRY(struct svga_host_surface_cache_entry, curr, bucket_head); + + assert(entry->handle); + + if(memcmp(&entry->key, key, sizeof *key) == 0 && + sws->fence_signalled( sws, entry->fence, 0 ) == 0) { + assert(sws->surface_is_flushed(sws, entry->handle)); + + handle = entry->handle; // Reference is transfered here. + entry->handle = NULL; + + LIST_DEL(&entry->bucket_head); + + LIST_DEL(&entry->head); + + LIST_ADD(&entry->head, &cache->empty); + + break; + } + + curr = next; + next = curr->next; + } + + pipe_mutex_unlock(cache->mutex); + +#if 0 + _debug_printf("%s: cache %s after %u tries\n", __FUNCTION__, handle ? "hit" : "miss", tries); +#else + (void)tries; +#endif + + return handle; +} + + +/* + * Transfers a handle reference. + */ + +static INLINE void +svga_screen_cache_add(struct svga_screen *svgascreen, + const struct svga_host_surface_cache_key *key, + struct svga_winsys_surface **p_handle) +{ + struct svga_host_surface_cache *cache = &svgascreen->cache; + struct svga_winsys_screen *sws = svgascreen->sws; + struct svga_host_surface_cache_entry *entry = NULL; + struct svga_winsys_surface *handle = *p_handle; + + + assert(handle); + if(!handle) + return; + + *p_handle = NULL; + pipe_mutex_lock(cache->mutex); + + if(!LIST_IS_EMPTY(&cache->empty)) { + /* use the first empty entry */ + entry = LIST_ENTRY(struct svga_host_surface_cache_entry, cache->empty.next, head); + + LIST_DEL(&entry->head); + } + else if(!LIST_IS_EMPTY(&cache->unused)) { + /* free the last used buffer and reuse its entry */ + entry = LIST_ENTRY(struct svga_host_surface_cache_entry, cache->unused.prev, head); + SVGA_DBG(DEBUG_DMA, "unref sid %p\n", entry->handle); + sws->surface_reference(sws, &entry->handle, NULL); + + LIST_DEL(&entry->bucket_head); + + LIST_DEL(&entry->head); + } + + if(entry) { + entry->handle = handle; + memcpy(&entry->key, key, sizeof entry->key); + + LIST_ADD(&entry->head, &cache->validated); + } + else { + /* Couldn't cache the buffer -- this really shouldn't happen */ + SVGA_DBG(DEBUG_DMA, "unref sid %p\n", handle); + sws->surface_reference(sws, &handle, NULL); + } + + pipe_mutex_unlock(cache->mutex); +} + + +/** + * Called during the screen flush to move all buffers not in a validate list + * into the unused list. + */ +void +svga_screen_cache_flush(struct svga_screen *svgascreen, + struct pipe_fence_handle *fence) +{ + struct svga_host_surface_cache *cache = &svgascreen->cache; + struct svga_winsys_screen *sws = svgascreen->sws; + struct svga_host_surface_cache_entry *entry; + struct list_head *curr, *next; + unsigned bucket; + + pipe_mutex_lock(cache->mutex); + + curr = cache->validated.next; + next = curr->next; + while(curr != &cache->validated) { + entry = LIST_ENTRY(struct svga_host_surface_cache_entry, curr, head); + + assert(entry->handle); + + if(sws->surface_is_flushed(sws, entry->handle)) { + LIST_DEL(&entry->head); + + svgascreen->sws->fence_reference(svgascreen->sws, &entry->fence, fence); + + LIST_ADD(&entry->head, &cache->unused); + + bucket = svga_screen_cache_bucket(&entry->key); + LIST_ADD(&entry->bucket_head, &cache->bucket[bucket]); + } + + curr = next; + next = curr->next; + } + + pipe_mutex_unlock(cache->mutex); +} + + +void +svga_screen_cache_cleanup(struct svga_screen *svgascreen) +{ + struct svga_host_surface_cache *cache = &svgascreen->cache; + struct svga_winsys_screen *sws = svgascreen->sws; + unsigned i; + + for(i = 0; i < SVGA_HOST_SURFACE_CACHE_SIZE; ++i) { + if(cache->entries[i].handle) { + SVGA_DBG(DEBUG_DMA, "unref sid %p\n", cache->entries[i].handle); + sws->surface_reference(sws, &cache->entries[i].handle, NULL); + } + + if(cache->entries[i].fence) + svgascreen->sws->fence_reference(svgascreen->sws, &cache->entries[i].fence, NULL); + } + + pipe_mutex_destroy(cache->mutex); +} + + +enum pipe_error +svga_screen_cache_init(struct svga_screen *svgascreen) +{ + struct svga_host_surface_cache *cache = &svgascreen->cache; + unsigned i; + + pipe_mutex_init(cache->mutex); + + for(i = 0; i < SVGA_HOST_SURFACE_CACHE_BUCKETS; ++i) + LIST_INITHEAD(&cache->bucket[i]); + + LIST_INITHEAD(&cache->unused); + + LIST_INITHEAD(&cache->validated); + + LIST_INITHEAD(&cache->empty); + for(i = 0; i < SVGA_HOST_SURFACE_CACHE_SIZE; ++i) + LIST_ADDTAIL(&cache->entries[i].head, &cache->empty); + + return PIPE_OK; +} + + +struct svga_winsys_surface * +svga_screen_surface_create(struct svga_screen *svgascreen, + struct svga_host_surface_cache_key *key) +{ + struct svga_winsys_screen *sws = svgascreen->sws; + struct svga_winsys_surface *handle = NULL; + + if (SVGA_SURFACE_CACHE_ENABLED && key->format == SVGA3D_BUFFER) { + /* round the buffer size up to the nearest power of two to increase the + * probability of cache hits */ + uint32_t size = 1; + while(size < key->size.width) + size <<= 1; + key->size.width = size; + + handle = svga_screen_cache_lookup(svgascreen, key); + if (handle) + SVGA_DBG(DEBUG_DMA, " reuse sid %p sz %d\n", handle, size); + } + + if (!handle) { + handle = sws->surface_create(sws, + key->flags, + key->format, + key->size, + key->numFaces, + key->numMipLevels); + if (handle) + SVGA_DBG(DEBUG_DMA, "create sid %p sz %d\n", handle, key->size); + } + + return handle; +} + + +void +svga_screen_surface_destroy(struct svga_screen *svgascreen, + const struct svga_host_surface_cache_key *key, + struct svga_winsys_surface **p_handle) +{ + struct svga_winsys_screen *sws = svgascreen->sws; + + if(SVGA_SURFACE_CACHE_ENABLED && key->format == SVGA3D_BUFFER) { + svga_screen_cache_add(svgascreen, key, p_handle); + } + else { + SVGA_DBG(DEBUG_DMA, "unref sid %p\n", *p_handle); + sws->surface_reference(sws, p_handle, NULL); + } +} diff --git a/src/gallium/drivers/svga/svga_screen_cache.h b/src/gallium/drivers/svga/svga_screen_cache.h new file mode 100644 index 0000000000..1bbe987768 --- /dev/null +++ b/src/gallium/drivers/svga/svga_screen_cache.h @@ -0,0 +1,135 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_SCREEN_CACHE_H_ +#define SVGA_SCREEN_CACHE_H_ + + +#include "svga_types.h" +#include "svga_reg.h" +#include "svga3d_reg.h" + +#include "pipe/p_thread.h" + +#include "util/u_double_list.h" + + +/* TODO: Reduce this once we don't allocate an index buffer per draw call */ +#define SVGA_HOST_SURFACE_CACHE_SIZE 1024 + +#define SVGA_HOST_SURFACE_CACHE_BUCKETS 64 + + +struct svga_winsys_surface; +struct svga_screen; + +/** + * Same as svga_winsys_screen::surface_create. + */ +struct svga_host_surface_cache_key +{ + SVGA3dSurfaceFlags flags; + SVGA3dSurfaceFormat format; + SVGA3dSize size; + uint32_t numFaces; + uint32_t numMipLevels; +}; + + +struct svga_host_surface_cache_entry +{ + /** + * Head for the LRU list, svga_host_surface_cache::unused, and + * svga_host_surface_cache::empty + */ + struct list_head head; + + /** Head for the bucket lists. */ + struct list_head bucket_head; + + struct svga_host_surface_cache_key key; + struct svga_winsys_surface *handle; + + struct pipe_fence_handle *fence; +}; + + +/** + * Cache of the host surfaces. + * + * A cache entry can be in the following stages: + * 1. empty + * 2. holding a buffer in a validate list + * 3. holding a flushed buffer (not in any validate list) with an active fence + * 4. holding a flushed buffer with an expired fence + * + * An entry progresses from 1 -> 2 -> 3 -> 4. When we need an entry to put a + * buffer into we preferencial take from 1, or from the least recentely used + * buffer from 3/4. + */ +struct svga_host_surface_cache +{ + pipe_mutex mutex; + + /* Unused buffers are put in buckets to speed up lookups */ + struct list_head bucket[SVGA_HOST_SURFACE_CACHE_BUCKETS]; + + /* Entries with unused buffers, ordered from most to least recently used + * (3 and 4) */ + struct list_head unused; + + /* Entries with buffers still in validate lists (2) */ + struct list_head validated; + + /** Empty entries (1) */ + struct list_head empty; + + /** The actual storage for the entries */ + struct svga_host_surface_cache_entry entries[SVGA_HOST_SURFACE_CACHE_SIZE]; +}; + + +void +svga_screen_cache_cleanup(struct svga_screen *svgascreen); + +void +svga_screen_cache_flush(struct svga_screen *svgascreen, + struct pipe_fence_handle *fence); + +enum pipe_error +svga_screen_cache_init(struct svga_screen *svgascreen); + + +struct svga_winsys_surface * +svga_screen_surface_create(struct svga_screen *svgascreen, + struct svga_host_surface_cache_key *key); + +void +svga_screen_surface_destroy(struct svga_screen *svgascreen, + const struct svga_host_surface_cache_key *key, + struct svga_winsys_surface **handle); + + +#endif /* SVGA_SCREEN_CACHE_H_ */ diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c new file mode 100644 index 0000000000..8472dea04d --- /dev/null +++ b/src/gallium/drivers/svga/svga_screen_texture.c @@ -0,0 +1,1065 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "svga_cmd.h" + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_thread.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "svga_screen.h" +#include "svga_context.h" +#include "svga_screen_texture.h" +#include "svga_screen_buffer.h" +#include "svga_winsys.h" +#include "svga_debug.h" +#include "svga_screen_buffer.h" + +#include + + +/* XXX: This isn't a real hardware flag, but just a hack for kernel to + * know about primary surfaces. Find a better way to accomplish this. + */ +#define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9) + + +/* + * Helper function and arrays + */ + +SVGA3dSurfaceFormat +svga_translate_format(enum pipe_format format) +{ + switch(format) { + + case PIPE_FORMAT_A8R8G8B8_UNORM: + return SVGA3D_A8R8G8B8; + case PIPE_FORMAT_X8R8G8B8_UNORM: + return SVGA3D_X8R8G8B8; + + /* Required for GL2.1: + */ + case PIPE_FORMAT_A8R8G8B8_SRGB: + return SVGA3D_A8R8G8B8; + + case PIPE_FORMAT_R5G6B5_UNORM: + return SVGA3D_R5G6B5; + case PIPE_FORMAT_A1R5G5B5_UNORM: + return SVGA3D_A1R5G5B5; + case PIPE_FORMAT_A4R4G4B4_UNORM: + return SVGA3D_A4R4G4B4; + + + /* XXX: Doesn't seem to work properly. + case PIPE_FORMAT_Z32_UNORM: + return SVGA3D_Z_D32; + */ + case PIPE_FORMAT_Z16_UNORM: + return SVGA3D_Z_D16; + case PIPE_FORMAT_Z24S8_UNORM: + return SVGA3D_Z_D24S8; + case PIPE_FORMAT_Z24X8_UNORM: + return SVGA3D_Z_D24X8; + + case PIPE_FORMAT_A8_UNORM: + return SVGA3D_ALPHA8; + case PIPE_FORMAT_L8_UNORM: + return SVGA3D_LUMINANCE8; + + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + return SVGA3D_DXT1; + case PIPE_FORMAT_DXT3_RGBA: + return SVGA3D_DXT3; + case PIPE_FORMAT_DXT5_RGBA: + return SVGA3D_DXT5; + + default: + return SVGA3D_FORMAT_INVALID; + } +} + + +SVGA3dSurfaceFormat +svga_translate_format_render(enum pipe_format format) +{ + switch(format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z32_UNORM: + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_L8_UNORM: + return svga_translate_format(format); + +#if 1 + /* For on host conversion */ + case PIPE_FORMAT_DXT1_RGB: + return SVGA3D_X8R8G8B8; + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + return SVGA3D_A8R8G8B8; +#endif + + default: + return SVGA3D_FORMAT_INVALID; + } +} + + +static INLINE void +svga_transfer_dma_band(struct svga_transfer *st, + SVGA3dTransferType transfer, + unsigned y, unsigned h, unsigned srcy) +{ + struct svga_texture *texture = svga_texture(st->base.texture); + struct svga_screen *screen = svga_screen(texture->base.screen); + SVGA3dCopyBox box; + enum pipe_error ret; + + SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - (%u, %u, %u), %ubpp\n", + transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from", + texture->handle, + st->base.face, + st->base.x, + y, + st->base.zslice, + st->base.x + st->base.width, + y + h, + st->base.zslice + 1, + texture->base.block.size*8/(texture->base.block.width*texture->base.block.height)); + + box.x = st->base.x; + box.y = y; + box.z = st->base.zslice; + box.w = st->base.width; + box.h = h; + box.d = 1; + box.srcx = 0; + box.srcy = srcy; + box.srcz = 0; + + pipe_mutex_lock(screen->swc_mutex); + ret = SVGA3D_SurfaceDMA(screen->swc, st, transfer, &box, 1); + if(ret != PIPE_OK) { + screen->swc->flush(screen->swc, NULL); + ret = SVGA3D_SurfaceDMA(screen->swc, st, transfer, &box, 1); + assert(ret == PIPE_OK); + } + pipe_mutex_unlock(screen->swc_mutex); +} + + +static INLINE void +svga_transfer_dma(struct svga_transfer *st, + SVGA3dTransferType transfer) +{ + struct svga_texture *texture = svga_texture(st->base.texture); + struct svga_screen *screen = svga_screen(texture->base.screen); + struct svga_winsys_screen *sws = screen->sws; + struct pipe_fence_handle *fence = NULL; + + if (transfer == SVGA3D_READ_HOST_VRAM) { + SVGA_DBG(DEBUG_PERF, "%s: readback transfer\n", __FUNCTION__); + } + + + if(!st->swbuf) { + /* Do the DMA transfer in a single go */ + + svga_transfer_dma_band(st, transfer, st->base.y, st->base.height, 0); + + if(transfer == SVGA3D_READ_HOST_VRAM) { + svga_screen_flush(screen, &fence); + sws->fence_finish(sws, fence, 0); + //sws->fence_reference(sws, &fence, NULL); + } + } + else { + unsigned y, h, srcy; + h = st->hw_nblocksy * st->base.block.height; + srcy = 0; + for(y = 0; y < st->base.height; y += h) { + unsigned offset, length; + void *hw, *sw; + + if (y + h > st->base.height) + h = st->base.height - y; + + /* Transfer band must be aligned to pixel block boundaries */ + assert(y % st->base.block.height == 0); + assert(h % st->base.block.height == 0); + + offset = y * st->base.stride / st->base.block.height; + length = h * st->base.stride / st->base.block.height; + + sw = (uint8_t *)st->swbuf + offset; + + if(transfer == SVGA3D_WRITE_HOST_VRAM) { + /* Wait for the previous DMAs to complete */ + /* TODO: keep one DMA (at half the size) in the background */ + if(y) { + svga_screen_flush(screen, &fence); + sws->fence_finish(sws, fence, 0); + //sws->fence_reference(sws, &fence, NULL); + } + + hw = sws->buffer_map(sws, st->hwbuf, PIPE_BUFFER_USAGE_CPU_WRITE); + assert(hw); + if(hw) { + memcpy(hw, sw, length); + sws->buffer_unmap(sws, st->hwbuf); + } + } + + svga_transfer_dma_band(st, transfer, y, h, srcy); + + if(transfer == SVGA3D_READ_HOST_VRAM) { + svga_screen_flush(screen, &fence); + sws->fence_finish(sws, fence, 0); + + hw = sws->buffer_map(sws, st->hwbuf, PIPE_BUFFER_USAGE_CPU_READ); + assert(hw); + if(hw) { + memcpy(sw, hw, length); + sws->buffer_unmap(sws, st->hwbuf); + } + } + } + } +} + + +static struct pipe_texture * +svga_texture_create(struct pipe_screen *screen, + const struct pipe_texture *templat) +{ + struct svga_screen *svgascreen = svga_screen(screen); + struct svga_winsys_screen *sws = svgascreen->sws; + struct svga_texture *tex = CALLOC_STRUCT(svga_texture); + unsigned width, height, depth; + SVGA3dSurfaceFlags flags = 0; + SVGA3dSurfaceFormat format; + SVGA3dSize size; + uint32 numFaces; + uint32 numMipLevels; + unsigned level; + + if (!tex) + goto error1; + + tex->base = *templat; + pipe_reference_init(&tex->base.reference, 1); + tex->base.screen = screen; + + assert(templat->last_level < SVGA_MAX_TEXTURE_LEVELS); + if(templat->last_level >= SVGA_MAX_TEXTURE_LEVELS) + goto error2; + + width = templat->width[0]; + height = templat->height[0]; + depth = templat->depth[0]; + for(level = 0; level <= templat->last_level; ++level) { + tex->base.width[level] = width; + tex->base.height[level] = height; + tex->base.depth[level] = depth; + tex->base.nblocksx[level] = pf_get_nblocksx(&tex->base.block, width); + tex->base.nblocksy[level] = pf_get_nblocksy(&tex->base.block, height); + width = minify(width); + height = minify(height); + depth = minify(depth); + } + + size.width = templat->width[0]; + size.height = templat->height[0]; + size.depth = templat->depth[0]; + + if(templat->target == PIPE_TEXTURE_CUBE) { + flags |= SVGA3D_SURFACE_CUBEMAP; + numFaces = 6; + } + else { + numFaces = 1; + } + + if(templat->tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) + flags |= SVGA3D_SURFACE_HINT_TEXTURE; + + if(templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) + flags |= SVGA3D_SURFACE_HINT_SCANOUT; + + /* + * XXX: Never pass the SVGA3D_SURFACE_HINT_RENDERTARGET hint. Mesa cannot + * know beforehand whether a texture will be used as a rendertarget or not + * and it always requests PIPE_TEXTURE_USAGE_RENDER_TARGET, therefore + * passing the SVGA3D_SURFACE_HINT_RENDERTARGET here defeats its purpose. + */ +#if 0 + if((templat->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) && + !pf_is_compressed(templat->format)) + flags |= SVGA3D_SURFACE_HINT_RENDERTARGET; +#endif + + if(templat->tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) + flags |= SVGA3D_SURFACE_HINT_DEPTHSTENCIL; + + numMipLevels = templat->last_level + 1; + + format = svga_translate_format(templat->format); + if(format == SVGA3D_FORMAT_INVALID) + goto error2; + + tex->handle = sws->surface_create(sws, flags, format, size, numFaces, numMipLevels); + if (tex->handle) + SVGA_DBG(DEBUG_DMA, "create sid %p (texture)\n", tex->handle); + + return &tex->base; + +error2: + FREE(tex); +error1: + return NULL; +} + + +static struct pipe_texture * +svga_texture_blanket(struct pipe_screen * screen, + const struct pipe_texture *base, + const unsigned *stride, + struct pipe_buffer *buffer) +{ + struct svga_texture *tex; + struct svga_buffer *sbuf = svga_buffer(buffer); + struct svga_winsys_screen *sws = svga_winsys_screen(screen); + assert(screen); + + /* Only supports one type */ + if (base->target != PIPE_TEXTURE_2D || + base->last_level != 0 || + base->depth[0] != 1) { + return NULL; + } + + /** + * We currently can't do texture blanket on + * SVGA3D_BUFFER. Need to blit to a temporary surface? + */ + + assert(sbuf->handle); + if (!sbuf->handle) + return NULL; + + if (svga_translate_format(base->format) != sbuf->key.format) { + unsigned f1 = svga_translate_format(base->format); + unsigned f2 = sbuf->key.format; + + /* It's okay for XRGB and ARGB or depth with/out stencil to get mixed up */ + if ( !( (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_A8R8G8B8) || + (f1 == SVGA3D_A8R8G8B8 && f2 == SVGA3D_X8R8G8B8) || + (f1 == SVGA3D_Z_D24X8 && f2 == SVGA3D_Z_D24S8) ) ) { + debug_printf("%s wrong format %u != %u\n", __FUNCTION__, f1, f2); + return NULL; + } + } + + tex = CALLOC_STRUCT(svga_texture); + if (!tex) + return NULL; + + tex->base = *base; + + if (sbuf->key.format == 1) + tex->base.format = PIPE_FORMAT_X8R8G8B8_UNORM; + else if (sbuf->key.format == 2) + tex->base.format = PIPE_FORMAT_A8R8G8B8_UNORM; + + pipe_reference_init(&tex->base.reference, 1); + tex->base.screen = screen; + + sws->surface_reference(sws, &tex->handle, sbuf->handle); + + return &tex->base; +} + + +static void +svga_texture_destroy(struct pipe_texture *pt) +{ + struct svga_screen *ss = svga_screen(pt->screen); + struct svga_texture *tex = (struct svga_texture *)pt; + + ss->texture_timestamp++; + + svga_sampler_view_reference(&tex->cached_view, NULL); + + /* + DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); + */ + SVGA_DBG(DEBUG_DMA, "unref sid %p (texture)\n", tex->handle); + ss->sws->surface_reference(ss->sws, &tex->handle, NULL); + + FREE(tex); +} + + +static void +svga_texture_copy_handle(struct svga_context *svga, + struct svga_screen *ss, + struct svga_winsys_surface *src_handle, + unsigned src_x, unsigned src_y, unsigned src_z, + unsigned src_level, unsigned src_face, + struct svga_winsys_surface *dst_handle, + unsigned dst_x, unsigned dst_y, unsigned dst_z, + unsigned dst_level, unsigned dst_face, + unsigned width, unsigned height, unsigned depth) +{ + struct svga_surface dst, src; + enum pipe_error ret; + SVGA3dCopyBox box, *boxes; + + assert(svga || ss); + + src.handle = src_handle; + src.real_level = src_level; + src.real_face = src_face; + src.real_zslice = 0; + + dst.handle = dst_handle; + dst.real_level = dst_level; + dst.real_face = dst_face; + dst.real_zslice = 0; + + box.x = dst_x; + box.y = dst_y; + box.z = dst_z; + box.w = width; + box.h = height; + box.d = depth; + box.srcx = src_x; + box.srcy = src_y; + box.srcz = src_z; + +/* + SVGA_DBG(DEBUG_VIEWS, "mipcopy src: %p %u (%ux%ux%u), dst: %p %u (%ux%ux%u)\n", + src_handle, src_level, src_x, src_y, src_z, + dst_handle, dst_level, dst_x, dst_y, dst_z); +*/ + + if (svga) { + ret = SVGA3D_BeginSurfaceCopy(svga->swc, + &src.base, + &dst.base, + &boxes, 1); + if(ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_BeginSurfaceCopy(svga->swc, + &src.base, + &dst.base, + &boxes, 1); + assert(ret == PIPE_OK); + } + *boxes = box; + SVGA_FIFOCommitAll(svga->swc); + } else { + pipe_mutex_lock(ss->swc_mutex); + ret = SVGA3D_BeginSurfaceCopy(ss->swc, + &src.base, + &dst.base, + &boxes, 1); + if(ret != PIPE_OK) { + ss->swc->flush(ss->swc, NULL); + ret = SVGA3D_BeginSurfaceCopy(ss->swc, + &src.base, + &dst.base, + &boxes, 1); + assert(ret == PIPE_OK); + } + *boxes = box; + SVGA_FIFOCommitAll(ss->swc); + pipe_mutex_unlock(ss->swc_mutex); + } +} + +static struct svga_winsys_surface * +svga_texture_view_surface(struct pipe_context *pipe, + struct svga_texture *tex, + SVGA3dSurfaceFormat format, + unsigned start_mip, + unsigned num_mip, + int face_pick, + int zslice_pick) +{ + struct svga_screen *ss = svga_screen(tex->base.screen); + struct svga_winsys_screen *sws = ss->sws; + struct svga_winsys_surface *handle; + int i, j; + SVGA3dSurfaceFlags flags = 0; + SVGA3dSize size; + uint32 numFaces; + uint32 numMipLevels = num_mip; + unsigned z_offset = 0; + + SVGA_DBG(DEBUG_PERF, + "svga: Create surface view: face %d zslice %d mips %d..%d\n", + face_pick, zslice_pick, start_mip, start_mip+num_mip-1); + + size.width = tex->base.width[start_mip]; + size.height = tex->base.height[start_mip]; + size.depth = zslice_pick < 0 ? tex->base.depth[start_mip] : 1; + assert(size.depth == 1); + + if(tex->base.target == PIPE_TEXTURE_CUBE && face_pick < 0) { + flags |= SVGA3D_SURFACE_CUBEMAP; + numFaces = 6; + } else { + numFaces = 1; + } + + if(format == SVGA3D_FORMAT_INVALID) + return NULL; + + handle = sws->surface_create(sws, flags, format, size, numFaces, numMipLevels); + + if (!handle) + return NULL; + + SVGA_DBG(DEBUG_DMA, "create sid %p (texture view)\n", handle); + + if (face_pick < 0) + face_pick = 0; + + if (zslice_pick >= 0) + z_offset = zslice_pick; + + for (i = 0; i < num_mip; i++) { + for (j = 0; j < numFaces; j++) { + if(tex->defined[j + face_pick][i + start_mip]) { + unsigned depth = zslice_pick < 0 ? tex->base.depth[i + start_mip] : 1; + svga_texture_copy_handle(svga_context(pipe), ss, + tex->handle, 0, 0, z_offset, i + start_mip, j + face_pick, + handle, 0, 0, 0, i, j, + tex->base.width[i + start_mip], tex->base.height[i + start_mip], depth); + } + } + } + + return handle; +} + + +static struct pipe_surface * +svga_get_tex_surface(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) +{ + struct svga_texture *tex = svga_texture(pt); + struct svga_surface *s; + struct pipe_surface *ps; + boolean render = flags & PIPE_BUFFER_USAGE_GPU_WRITE ? TRUE : FALSE; + boolean view = FALSE; + SVGA3dSurfaceFormat format; + + s = CALLOC_STRUCT(svga_surface); + ps = &s->base; + if (!ps) + return NULL; + + pipe_reference_init(&ps->reference, 1); + pipe_texture_reference(&ps->texture, pt); + ps->format = pt->format; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->usage = flags; + ps->level = level; + ps->face = face; + ps->zslice = zslice; + + if (!render) + format = svga_translate_format(pt->format); + else + format = svga_translate_format_render(pt->format); + + assert(format != SVGA3D_FORMAT_INVALID); + assert(!(flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE)); + + + if (svga_screen(screen)->debug.force_surface_view) + view = TRUE; + + /* Currently only used for compressed textures */ + if (render && (format != svga_translate_format(pt->format))) { + view = TRUE; + } + + if (level != 0 && svga_screen(screen)->debug.force_level_surface_view) + view = TRUE; + + if (pt->target == PIPE_TEXTURE_3D) + view = TRUE; + + if (svga_screen(screen)->debug.no_surface_view) + view = FALSE; + + if (view) { + SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u face %u z %u, %p\n", + pt, level, face, zslice, ps); + + s->handle = svga_texture_view_surface(NULL, tex, format, level, 1, face, zslice); + s->real_face = 0; + s->real_level = 0; + s->real_zslice = 0; + } else { + struct svga_winsys_screen *sws = svga_winsys_screen(screen); + + SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, face %u, z %u, %p\n", + pt, level, face, zslice, ps); + + sws->surface_reference(sws, &s->handle, tex->handle); + s->real_face = face; + s->real_level = level; + s->real_zslice = zslice; + } + + return ps; +} + + +static void +svga_tex_surface_destroy(struct pipe_surface *surf) +{ + struct svga_surface *s = svga_surface(surf); + struct svga_screen *ss = svga_screen(surf->texture->screen); + + SVGA_DBG(DEBUG_DMA, "unref sid %p (tex surface)\n", s->handle); + ss->sws->surface_reference(ss->sws, &s->handle, NULL); + pipe_texture_reference(&surf->texture, NULL); + FREE(surf); +} + + +static INLINE void +svga_mark_surface_dirty(struct pipe_surface *surf) +{ + struct svga_surface *s = svga_surface(surf); + + if(!s->dirty) { + struct svga_texture *tex = svga_texture(surf->texture); + + s->dirty = TRUE; + + if (s->handle == tex->handle) + tex->defined[surf->face][surf->level] = TRUE; + else { + /* this will happen later in svga_propagate_surface */ + } + } +} + + +void svga_mark_surfaces_dirty(struct svga_context *svga) +{ + unsigned i; + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (svga->curr.framebuffer.cbufs[i]) + svga_mark_surface_dirty(svga->curr.framebuffer.cbufs[i]); + } + if (svga->curr.framebuffer.zsbuf) + svga_mark_surface_dirty(svga->curr.framebuffer.zsbuf); +} + +/** + * Progagate any changes from surfaces to texture. + * pipe is optional context to inline the blit command in. + */ +void +svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf) +{ + struct svga_surface *s = svga_surface(surf); + struct svga_texture *tex = svga_texture(surf->texture); + struct svga_screen *ss = svga_screen(surf->texture->screen); + + if (!s->dirty) + return; + + s->dirty = FALSE; + ss->texture_timestamp++; + tex->view_age[surf->level] = ++(tex->age); + + if (s->handle != tex->handle) { + SVGA_DBG(DEBUG_VIEWS, "svga: Surface propagate: tex %p, level %u, from %p\n", tex, surf->level, surf); + svga_texture_copy_handle(svga_context(pipe), ss, + s->handle, 0, 0, 0, s->real_level, s->real_face, + tex->handle, 0, 0, surf->zslice, surf->level, surf->face, + tex->base.width[surf->level], tex->base.height[surf->level], 1); + tex->defined[surf->face][surf->level] = TRUE; + } +} + +/** + * Check if we should call svga_propagate_surface on the surface. + */ +extern boolean +svga_surface_needs_propagation(struct pipe_surface *surf) +{ + struct svga_surface *s = svga_surface(surf); + struct svga_texture *tex = svga_texture(surf->texture); + + return s->dirty && s->handle != tex->handle; +} + + +static struct pipe_transfer * +svga_get_tex_transfer(struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned face, unsigned level, unsigned zslice, + enum pipe_transfer_usage usage, unsigned x, unsigned y, + unsigned w, unsigned h) +{ + struct svga_screen *ss = svga_screen(screen); + struct svga_winsys_screen *sws = ss->sws; + struct svga_transfer *st; + + /* We can't map texture storage directly */ + if (usage & PIPE_TRANSFER_MAP_DIRECTLY) + return NULL; + + st = CALLOC_STRUCT(svga_transfer); + if (!st) + return NULL; + + st->base.format = texture->format; + st->base.block = texture->block; + st->base.x = x; + st->base.y = y; + st->base.width = w; + st->base.height = h; + st->base.nblocksx = pf_get_nblocksx(&texture->block, w); + st->base.nblocksy = pf_get_nblocksy(&texture->block, h); + st->base.stride = st->base.nblocksx*st->base.block.size; + st->base.usage = usage; + st->base.face = face; + st->base.level = level; + st->base.zslice = zslice; + + st->hw_nblocksy = st->base.nblocksy; + + st->hwbuf = svga_winsys_buffer_create(ss, + 1, + 0, + st->hw_nblocksy*st->base.stride); + while(!st->hwbuf && (st->hw_nblocksy /= 2)) { + st->hwbuf = svga_winsys_buffer_create(ss, + 1, + 0, + st->hw_nblocksy*st->base.stride); + } + + if(!st->hwbuf) + goto no_hwbuf; + + if(st->hw_nblocksy < st->base.nblocksy) { + /* We couldn't allocate a hardware buffer big enough for the transfer, + * so allocate regular malloc memory instead */ + debug_printf("%s: failed to allocate %u KB of DMA, splitting into %u x %u KB DMA transfers\n", + __FUNCTION__, + (st->base.nblocksy*st->base.stride + 1023)/1024, + (st->base.nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy, + (st->hw_nblocksy*st->base.stride + 1023)/1024); + st->swbuf = MALLOC(st->base.nblocksy*st->base.stride); + if(!st->swbuf) + goto no_swbuf; + } + + pipe_texture_reference(&st->base.texture, texture); + + if (usage & PIPE_TRANSFER_READ) + svga_transfer_dma(st, SVGA3D_READ_HOST_VRAM); + + return &st->base; + +no_swbuf: + sws->buffer_destroy(sws, st->hwbuf); +no_hwbuf: + FREE(st); + return NULL; +} + + +static void * +svga_transfer_map( struct pipe_screen *screen, + struct pipe_transfer *transfer ) +{ + struct svga_screen *ss = svga_screen(screen); + struct svga_winsys_screen *sws = ss->sws; + struct svga_transfer *st = svga_transfer(transfer); + + if(st->swbuf) + return st->swbuf; + else + /* The wait for read transfers already happened when svga_transfer_dma + * was called. */ + return sws->buffer_map(sws, st->hwbuf, + pipe_transfer_buffer_flags(transfer)); +} + + +static void +svga_transfer_unmap(struct pipe_screen *screen, + struct pipe_transfer *transfer) +{ + struct svga_screen *ss = svga_screen(screen); + struct svga_winsys_screen *sws = ss->sws; + struct svga_transfer *st = svga_transfer(transfer); + + if(!st->swbuf) + sws->buffer_unmap(sws, st->hwbuf); +} + + +static void +svga_tex_transfer_destroy(struct pipe_transfer *transfer) +{ + struct svga_texture *tex = svga_texture(transfer->texture); + struct svga_screen *ss = svga_screen(transfer->texture->screen); + struct svga_winsys_screen *sws = ss->sws; + struct svga_transfer *st = svga_transfer(transfer); + + if (st->base.usage & PIPE_TRANSFER_WRITE) { + svga_transfer_dma(st, SVGA3D_WRITE_HOST_VRAM); + ss->texture_timestamp++; + tex->view_age[transfer->level] = ++(tex->age); + tex->defined[transfer->face][transfer->level] = TRUE; + } + + pipe_texture_reference(&st->base.texture, NULL); + FREE(st->swbuf); + sws->buffer_destroy(sws, st->hwbuf); + FREE(st); +} + +void +svga_screen_init_texture_functions(struct pipe_screen *screen) +{ + screen->texture_create = svga_texture_create; + screen->texture_destroy = svga_texture_destroy; + screen->get_tex_surface = svga_get_tex_surface; + screen->tex_surface_destroy = svga_tex_surface_destroy; + screen->texture_blanket = svga_texture_blanket; + screen->get_tex_transfer = svga_get_tex_transfer; + screen->transfer_map = svga_transfer_map; + screen->transfer_unmap = svga_transfer_unmap; + screen->tex_transfer_destroy = svga_tex_transfer_destroy; +} + +/*********************************************************************** + */ + +struct svga_sampler_view * +svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt, + unsigned min_lod, unsigned max_lod) +{ + struct svga_screen *ss = svga_screen(pt->screen); + struct svga_winsys_screen *sws = ss->sws; + struct svga_texture *tex = svga_texture(pt); + struct svga_sampler_view *sv = NULL; + SVGA3dSurfaceFormat format = svga_translate_format(pt->format); + boolean view = TRUE; + + assert(pt); + assert(min_lod >= 0); + assert(min_lod <= max_lod); + assert(max_lod <= pt->last_level); + + + /* Is a view needed */ + { + /* + * Can't control max lod. For first level views and when we only + * look at one level we disable mip filtering to achive the same + * results as a view. + */ + if (min_lod == 0 && max_lod >= pt->last_level) + view = FALSE; + + if (pf_is_compressed(pt->format) && view) { + format = svga_translate_format_render(pt->format); + } + + if (ss->debug.no_sampler_view) + view = FALSE; + + if (ss->debug.force_sampler_view) + view = TRUE; + } + + /* First try the cache */ + if (view) { + pipe_mutex_lock(ss->tex_mutex); + if (tex->cached_view && + tex->cached_view->min_lod == min_lod && + tex->cached_view->max_lod == max_lod) { + svga_sampler_view_reference(&sv, tex->cached_view); + pipe_mutex_unlock(ss->tex_mutex); + SVGA_DBG(DEBUG_VIEWS, "svga: Sampler view: reuse %p, %u %u, last %u\n", + pt, min_lod, max_lod, pt->last_level); + svga_validate_sampler_view(svga_context(pipe), sv); + return sv; + } + pipe_mutex_unlock(ss->tex_mutex); + } + + sv = CALLOC_STRUCT(svga_sampler_view); + pipe_reference_init(&sv->reference, 1); + sv->texture = tex; + sv->min_lod = min_lod; + sv->max_lod = max_lod; + + /* No view needed just use the whole texture */ + if (!view) { + SVGA_DBG(DEBUG_VIEWS, + "svga: Sampler view: no %p, mips %u..%u, nr %u, size (%ux%ux%u), last %u\n", + pt, min_lod, max_lod, + max_lod - min_lod + 1, + pt->width[0], + pt->height[0], + pt->depth[0], + pt->last_level); + sws->surface_reference(sws, &sv->handle, tex->handle); + return sv; + } + + SVGA_DBG(DEBUG_VIEWS, + "svga: Sampler view: yes %p, mips %u..%u, nr %u, size (%ux%ux%u), last %u\n", + pt, min_lod, max_lod, + max_lod - min_lod + 1, + pt->width[0], + pt->height[0], + pt->depth[0], + pt->last_level); + + sv->age = tex->age; + sv->handle = svga_texture_view_surface(pipe, tex, format, + min_lod, + max_lod - min_lod + 1, + -1, -1); + + if (!sv->handle) { + assert(0); + sws->surface_reference(sws, &sv->handle, tex->handle); + return sv; + } + + pipe_mutex_lock(ss->tex_mutex); + svga_sampler_view_reference(&tex->cached_view, sv); + pipe_mutex_unlock(ss->tex_mutex); + + return sv; +} + +void +svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *v) +{ + struct svga_texture *tex = v->texture; + unsigned numFaces; + unsigned age = 0; + int i, k; + + assert(svga); + + if (v->handle == v->texture->handle) + return; + + age = tex->age; + + if(tex->base.target == PIPE_TEXTURE_CUBE) + numFaces = 6; + else + numFaces = 1; + + for (i = v->min_lod; i <= v->max_lod; i++) { + for (k = 0; k < numFaces; k++) { + if (v->age < tex->view_age[i]) + svga_texture_copy_handle(svga, NULL, + tex->handle, 0, 0, 0, i, k, + v->handle, 0, 0, 0, i - v->min_lod, k, + tex->base.width[i], + tex->base.height[i], + tex->base.depth[i]); + } + } + + v->age = age; +} + +void +svga_destroy_sampler_view_priv(struct svga_sampler_view *v) +{ + struct svga_screen *ss = svga_screen(v->texture->base.screen); + + SVGA_DBG(DEBUG_DMA, "unref sid %p (sampler view)\n", v->handle); + ss->sws->surface_reference(ss->sws, &v->handle, NULL); + + FREE(v); +} + +boolean +svga_screen_buffer_from_texture(struct pipe_texture *texture, + struct pipe_buffer **buffer, + unsigned *stride) +{ + struct svga_texture *stex = svga_texture(texture); + + *buffer = svga_screen_buffer_wrap_surface + (texture->screen, + svga_translate_format(texture->format), + stex->handle); + + *stride = pf_get_nblocksx(&texture->block, texture->width[0]) * + texture->block.size; + + return *buffer != NULL; +} + + +struct svga_winsys_surface * +svga_screen_texture_get_winsys_surface(struct pipe_texture *texture) +{ + struct svga_winsys_screen *sws = svga_winsys_screen(texture->screen); + struct svga_winsys_surface *vsurf = NULL; + + sws->surface_reference(sws, &vsurf, svga_texture(texture)->handle); + return vsurf; +} diff --git a/src/gallium/drivers/svga/svga_screen_texture.h b/src/gallium/drivers/svga/svga_screen_texture.h new file mode 100644 index 0000000000..1e6fef59a3 --- /dev/null +++ b/src/gallium/drivers/svga/svga_screen_texture.h @@ -0,0 +1,177 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_TEXTURE_H +#define SVGA_TEXTURE_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + + +struct pipe_context; +struct pipe_screen; +struct svga_context; +struct svga_winsys_surface; +enum SVGA3dSurfaceFormat; + + +#define SVGA_MAX_TEXTURE_LEVELS 12 /* 2048x2048 */ + + +/** + * A sampler's view into a texture + * + * We currently cache one sampler view on + * the texture and in there by holding a reference + * from the texture to the sampler view. + * + * Because of this we can not hold a refernce to the + * texture from the sampler view. So the user + * of the sampler views must make sure that the + * texture has a reference take for as long as + * the sampler view is refrenced. + * + * Just unreferencing the sampler_view before the + * texture is enough. + */ +struct svga_sampler_view +{ + struct pipe_reference reference; + + struct svga_texture *texture; + + int min_lod; + int max_lod; + + unsigned age; + + struct svga_winsys_surface *handle; +}; + + +struct svga_texture +{ + struct pipe_texture base; + + struct svga_winsys_surface *handle; + + boolean defined[6][PIPE_MAX_TEXTURE_LEVELS]; + + struct svga_sampler_view *cached_view; + + unsigned view_age[SVGA_MAX_TEXTURE_LEVELS]; + unsigned age; + + boolean views_modified; +}; + + +struct svga_surface +{ + struct pipe_surface base; + + struct svga_winsys_surface *handle; + + unsigned real_face; + unsigned real_level; + unsigned real_zslice; + + boolean dirty; +}; + + +struct svga_transfer +{ + struct pipe_transfer base; + + struct svga_winsys_buffer *hwbuf; + + /* Height of the hardware buffer in pixel blocks */ + unsigned hw_nblocksy; + + /* Temporary malloc buffer when we can't allocate a hardware buffer + * big enough */ + void *swbuf; +}; + + +static INLINE struct svga_texture * +svga_texture(struct pipe_texture *texture) +{ + return (struct svga_texture *)texture; +} + +static INLINE struct svga_surface * +svga_surface(struct pipe_surface *surface) +{ + assert(surface); + return (struct svga_surface *)surface; +} + +static INLINE struct svga_transfer * +svga_transfer(struct pipe_transfer *transfer) +{ + assert(transfer); + return (struct svga_transfer *)transfer; +} + +extern struct svga_sampler_view * +svga_get_tex_sampler_view(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned min_lod, unsigned max_lod); + +void +svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *v); + +void +svga_destroy_sampler_view_priv(struct svga_sampler_view *v); + +static INLINE void +svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_view *v) +{ + struct svga_sampler_view *old = *ptr; + + if (pipe_reference((struct pipe_reference **)ptr, &v->reference)) + svga_destroy_sampler_view_priv(old); +} + +extern void +svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf); + +extern boolean +svga_surface_needs_propagation(struct pipe_surface *surf); + +extern void +svga_screen_init_texture_functions(struct pipe_screen *screen); + +enum SVGA3dSurfaceFormat +svga_translate_format(enum pipe_format format); + +enum SVGA3dSurfaceFormat +svga_translate_format_render(enum pipe_format format); + + +#endif /* SVGA_TEXTURE_H */ diff --git a/src/gallium/drivers/svga/svga_state.c b/src/gallium/drivers/svga/svga_state.c new file mode 100644 index 0000000000..1c21d3acfe --- /dev/null +++ b/src/gallium/drivers/svga/svga_state.c @@ -0,0 +1,278 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "util/u_debug.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "draw/draw_context.h" + +#include "svga_context.h" +#include "svga_screen.h" +#include "svga_state.h" +#include "svga_draw.h" +#include "svga_cmd.h" +#include "svga_hw_reg.h" + +/* This is just enough to decide whether we need to use the draw + * module (swtnl) or not. + */ +static const struct svga_tracked_state *need_swtnl_state[] = +{ + &svga_update_need_swvfetch, + &svga_update_need_pipeline, + &svga_update_need_swtnl, + NULL +}; + + +/* Atoms to update hardware state prior to emitting a clear or draw + * packet. + */ +static const struct svga_tracked_state *hw_clear_state[] = +{ + &svga_hw_scissor, + &svga_hw_viewport, + &svga_hw_framebuffer, + NULL +}; + + +/* Atoms to update hardware state prior to emitting a draw packet. + */ +static const struct svga_tracked_state *hw_draw_state[] = +{ + &svga_hw_update_zero_stride, + &svga_hw_fs, + &svga_hw_vs, + &svga_hw_rss, + &svga_hw_tss, + &svga_hw_tss_binding, + &svga_hw_clip_planes, + &svga_hw_vdecl, + &svga_hw_fs_parameters, + &svga_hw_vs_parameters, + NULL +}; + + +static const struct svga_tracked_state *swtnl_draw_state[] = +{ + &svga_update_swtnl_draw, + &svga_update_swtnl_vdecl, + NULL +}; + +/* Flattens the graph of state dependencies. Could swap the positions + * of hw_clear_state and need_swtnl_state without breaking anything. + */ +static const struct svga_tracked_state **state_levels[] = +{ + need_swtnl_state, + hw_clear_state, + hw_draw_state, + swtnl_draw_state +}; + + + +static unsigned check_state( unsigned a, + unsigned b ) +{ + return (a & b); +} + +static void accumulate_state( unsigned *a, + unsigned b ) +{ + *a |= b; +} + + +static void xor_states( unsigned *result, + unsigned a, + unsigned b ) +{ + *result = a ^ b; +} + + + +static int update_state( struct svga_context *svga, + const struct svga_tracked_state *atoms[], + unsigned *state ) +{ + boolean debug = TRUE; + enum pipe_error ret = 0; + unsigned i; + + ret = svga_hwtnl_flush( svga->hwtnl ); + if (ret != 0) + return ret; + + if (debug) { + /* Debug version which enforces various sanity checks on the + * state flags which are generated and checked to help ensure + * state atoms are ordered correctly in the list. + */ + unsigned examined, prev; + + examined = 0; + prev = *state; + + for (i = 0; atoms[i] != NULL; i++) { + unsigned generated; + + assert(atoms[i]->dirty); + assert(atoms[i]->update); + + if (check_state(*state, atoms[i]->dirty)) { + if (0) + debug_printf("update: %s\n", atoms[i]->name); + ret = atoms[i]->update( svga, *state ); + if (ret != 0) + return ret; + } + + /* generated = (prev ^ state) + * if (examined & generated) + * fail; + */ + xor_states(&generated, prev, *state); + if (check_state(examined, generated)) { + debug_printf("state atom %s generated state already examined\n", + atoms[i]->name); + assert(0); + } + + prev = *state; + accumulate_state(&examined, atoms[i]->dirty); + } + } + else { + for (i = 0; atoms[i] != NULL; i++) { + if (check_state(*state, atoms[i]->dirty)) { + ret = atoms[i]->update( svga, *state ); + if (ret != 0) + return ret; + } + } + } + + return 0; +} + + + +int svga_update_state( struct svga_context *svga, + unsigned max_level ) +{ + struct svga_screen *screen = svga_screen(svga->pipe.screen); + int ret = 0; + int i; + + /* Check for updates to bound textures. This can't be done in an + * atom as there is no flag which could provoke this test, and we + * cannot create one. + */ + if (svga->state.texture_timestamp != screen->texture_timestamp) { + svga->state.texture_timestamp = screen->texture_timestamp; + svga->dirty |= SVGA_NEW_TEXTURE; + } + + for (i = 0; i <= max_level; i++) { + svga->dirty |= svga->state.dirty[i]; + + if (svga->dirty) { + ret = update_state( svga, + state_levels[i], + &svga->dirty ); + if (ret != 0) + return ret; + + svga->state.dirty[i] = 0; + } + } + + for (; i < SVGA_STATE_MAX; i++) + svga->state.dirty[i] |= svga->dirty; + + svga->dirty = 0; + return 0; +} + + + + +void svga_update_state_retry( struct svga_context *svga, + unsigned max_level ) +{ + int ret; + + ret = svga_update_state( svga, max_level ); + + if (ret == PIPE_ERROR_OUT_OF_MEMORY) { + svga_context_flush(svga, NULL); + ret = svga_update_state( svga, max_level ); + } + + assert( ret == 0 ); +} + + + +#define EMIT_RS(_rs, _count, _name, _value) \ +do { \ + _rs[_count].state = _name; \ + _rs[_count].uintValue = _value; \ + _count++; \ +} while (0) + + +/* Setup any hardware state which will be constant through the life of + * a context. + */ +enum pipe_error svga_emit_initial_state( struct svga_context *svga ) +{ + SVGA3dRenderState *rs; + unsigned count = 0; + const unsigned COUNT = 2; + enum pipe_error ret; + + ret = SVGA3D_BeginSetRenderState( svga->swc, &rs, COUNT ); + if (ret) + return ret; + + /* Always use D3D style coordinate space as this is the only one + * which is implemented on all backends. + */ + EMIT_RS(rs, count, SVGA3D_RS_COORDINATETYPE, SVGA3D_COORDINATE_LEFTHANDED ); + EMIT_RS(rs, count, SVGA3D_RS_FRONTWINDING, SVGA3D_FRONTWINDING_CW ); + + assert( COUNT == count ); + SVGA_FIFOCommitAll( svga->swc ); + + return 0; + +} diff --git a/src/gallium/drivers/svga/svga_state.h b/src/gallium/drivers/svga/svga_state.h new file mode 100644 index 0000000000..22d5a6d552 --- /dev/null +++ b/src/gallium/drivers/svga/svga_state.h @@ -0,0 +1,95 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_STATE_H +#define SVGA_STATE_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" + +struct svga_context; + + +void svga_init_state( struct svga_context *svga ); +void svga_destroy_state( struct svga_context *svga ); + + +struct svga_tracked_state { + const char *name; + unsigned dirty; + int (*update)( struct svga_context *svga, unsigned dirty ); +}; + +/* NEED_SWTNL + */ +extern struct svga_tracked_state svga_update_need_swvfetch; +extern struct svga_tracked_state svga_update_need_pipeline; +extern struct svga_tracked_state svga_update_need_swtnl; + +/* HW_CLEAR + */ +extern struct svga_tracked_state svga_hw_viewport; +extern struct svga_tracked_state svga_hw_scissor; +extern struct svga_tracked_state svga_hw_framebuffer; + +/* HW_DRAW + */ +extern struct svga_tracked_state svga_hw_vs; +extern struct svga_tracked_state svga_hw_fs; +extern struct svga_tracked_state svga_hw_rss; +extern struct svga_tracked_state svga_hw_tss; +extern struct svga_tracked_state svga_hw_tss_binding; +extern struct svga_tracked_state svga_hw_clip_planes; +extern struct svga_tracked_state svga_hw_vdecl; +extern struct svga_tracked_state svga_hw_fs_parameters; +extern struct svga_tracked_state svga_hw_vs_parameters; +extern struct svga_tracked_state svga_hw_update_zero_stride; + +/* SWTNL_DRAW + */ +extern struct svga_tracked_state svga_update_swtnl_draw; +extern struct svga_tracked_state svga_update_swtnl_vdecl; + +/* Bring the hardware fully up-to-date so that we can emit draw + * commands. + */ +#define SVGA_STATE_NEED_SWTNL 0 +#define SVGA_STATE_HW_CLEAR 1 +#define SVGA_STATE_HW_DRAW 2 +#define SVGA_STATE_SWTNL_DRAW 3 +#define SVGA_STATE_MAX 4 + + +enum pipe_error svga_update_state( struct svga_context *svga, + unsigned level ); + +void svga_update_state_retry( struct svga_context *svga, + unsigned level ); + + +enum pipe_error svga_emit_initial_state( struct svga_context *svga ); + +#endif diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c new file mode 100644 index 0000000000..18cce7dde1 --- /dev/null +++ b/src/gallium/drivers/svga/svga_state_constants.c @@ -0,0 +1,239 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" + +#include "svga_context.h" +#include "svga_state.h" +#include "svga_cmd.h" +#include "svga_tgsi.h" +#include "svga_debug.h" + +#include "svga_hw_reg.h" + +/*********************************************************************** + * Hardware update + */ + +/* Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_* + */ +static int svga_shader_type( int unit ) +{ + return unit + 1; +} + + +static int emit_const( struct svga_context *svga, + int unit, + int i, + const float *value ) +{ + int ret = PIPE_OK; + + if (memcmp(svga->state.hw_draw.cb[unit][i], value, 4 * sizeof(float)) != 0) { + if (SVGA_DEBUG & DEBUG_CONSTS) + debug_printf("%s %s %d: %f %f %f %f\n", + __FUNCTION__, + unit == PIPE_SHADER_VERTEX ? "VERT" : "FRAG", + i, + value[0], + value[1], + value[2], + value[3]); + + ret = SVGA3D_SetShaderConst( svga->swc, + i, + svga_shader_type(unit), + SVGA3D_CONST_TYPE_FLOAT, + value ); + if (ret) + return ret; + + memcpy(svga->state.hw_draw.cb[unit][i], value, 4 * sizeof(float)); + } + + return ret; +} + +static int emit_consts( struct svga_context *svga, + int offset, + int unit ) +{ + struct pipe_screen *screen = svga->pipe.screen; + unsigned count; + const float (*data)[4] = NULL; + unsigned i; + int ret = PIPE_OK; + + if (svga->curr.cb[unit] == NULL) + goto done; + + count = svga->curr.cb[unit]->size / (4 * sizeof(float)); + + data = (const float (*)[4])pipe_buffer_map(screen, + svga->curr.cb[unit], + PIPE_BUFFER_USAGE_CPU_READ); + if (data == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto done; + } + + for (i = 0; i < count; i++) { + ret = emit_const( svga, unit, offset + i, data[i] ); + if (ret) + goto done; + } + +done: + if (data) + pipe_buffer_unmap(screen, svga->curr.cb[unit]); + + return ret; +} + +static int emit_fs_consts( struct svga_context *svga, + unsigned dirty ) +{ + const struct svga_shader_result *result = svga->state.hw_draw.fs; + const struct svga_fs_compile_key *key = &result->key.fkey; + int ret = 0; + + ret = emit_consts( svga, 0, PIPE_SHADER_FRAGMENT ); + if (ret) + return ret; + + /* The internally generated fragment shader for xor blending + * doesn't have a 'result' struct. It should be fixed to avoid + * this special case, but work around it with a NULL check: + */ + if (result != NULL && + key->num_unnormalized_coords) + { + unsigned offset = result->shader->info.file_max[TGSI_FILE_CONSTANT] + 1; + int i; + + for (i = 0; i < key->num_textures; i++) { + if (key->tex[i].unnormalized) { + struct pipe_texture *tex = svga->curr.texture[i]; + float data[4]; + + data[0] = 1.0 / (float)tex->width[0]; + data[1] = 1.0 / (float)tex->height[0]; + data[2] = 1.0; + data[3] = 1.0; + + ret = emit_const( svga, + PIPE_SHADER_FRAGMENT, + key->tex[i].width_height_idx + offset, + data ); + if (ret) + return ret; + } + } + + offset += key->num_unnormalized_coords; + } + + return 0; +} + + +struct svga_tracked_state svga_hw_fs_parameters = +{ + "hw fs params", + (SVGA_NEW_FS_CONST_BUFFER | + SVGA_NEW_FS_RESULT | + SVGA_NEW_TEXTURE_BINDING), + emit_fs_consts +}; + +/*********************************************************************** + */ + +static int emit_vs_consts( struct svga_context *svga, + unsigned dirty ) +{ + const struct svga_shader_result *result = svga->state.hw_draw.vs; + const struct svga_vs_compile_key *key = &result->key.vkey; + int ret = 0; + unsigned offset; + + /* SVGA_NEW_VS_RESULT + */ + if (result == NULL) + return 0; + + /* SVGA_NEW_VS_CONST_BUFFER + */ + ret = emit_consts( svga, 0, PIPE_SHADER_VERTEX ); + if (ret) + return ret; + + offset = result->shader->info.file_max[TGSI_FILE_CONSTANT] + 1; + + /* SVGA_NEW_VS_RESULT + */ + if (key->need_prescale) { + ret = emit_const( svga, PIPE_SHADER_VERTEX, offset++, + svga->state.hw_clear.prescale.scale ); + if (ret) + return ret; + + ret = emit_const( svga, PIPE_SHADER_VERTEX, offset++, + svga->state.hw_clear.prescale.translate ); + if (ret) + return ret; + } + + /* SVGA_NEW_ZERO_STRIDE + */ + if (key->zero_stride_vertex_elements) { + unsigned i, curr_zero_stride = 0; + for (i = 0; i < PIPE_MAX_ATTRIBS; ++i) { + if (key->zero_stride_vertex_elements & (1 << i)) { + ret = emit_const( svga, PIPE_SHADER_VERTEX, offset++, + svga->curr.zero_stride_constants + + 4 * curr_zero_stride ); + if (ret) + return ret; + ++curr_zero_stride; + } + } + } + + return 0; +} + + +struct svga_tracked_state svga_hw_vs_parameters = +{ + "hw vs params", + (SVGA_NEW_VS_CONST_BUFFER | + SVGA_NEW_ZERO_STRIDE | + SVGA_NEW_VS_RESULT), + emit_vs_consts +}; + diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c new file mode 100644 index 0000000000..7d7f93d8e3 --- /dev/null +++ b/src/gallium/drivers/svga/svga_state_framebuffer.c @@ -0,0 +1,455 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" + +#include "svga_context.h" +#include "svga_state.h" +#include "svga_cmd.h" +#include "svga_debug.h" + +#include "svga_hw_reg.h" + + +/*********************************************************************** + * Hardware state update + */ + + +static int emit_framebuffer( struct svga_context *svga, + unsigned dirty ) +{ + const struct pipe_framebuffer_state *curr = &svga->curr.framebuffer; + struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer; + unsigned i; + enum pipe_error ret; + + /* XXX: Need shadow state in svga->hw to eliminate redundant + * uploads, especially of NULL buffers. + */ + + for(i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) { + if (curr->cbufs[i] != hw->cbufs[i]) { + ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_COLOR0 + i, curr->cbufs[i]); + if (ret != PIPE_OK) + return ret; + + pipe_surface_reference(&hw->cbufs[i], curr->cbufs[i]); + } + } + + + if (curr->zsbuf != hw->zsbuf) { + ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_DEPTH, curr->zsbuf); + if (ret != PIPE_OK) + return ret; + + if (curr->zsbuf && + curr->zsbuf->format == PIPE_FORMAT_Z24S8_UNORM) { + ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, curr->zsbuf); + if (ret != PIPE_OK) + return ret; + } + else { + ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, NULL); + if (ret != PIPE_OK) + return ret; + } + + pipe_surface_reference(&hw->zsbuf, curr->zsbuf); + } + + + return 0; +} + + +struct svga_tracked_state svga_hw_framebuffer = +{ + "hw framebuffer state", + SVGA_NEW_FRAME_BUFFER, + emit_framebuffer +}; + + + + +/*********************************************************************** + */ + +static int emit_viewport( struct svga_context *svga, + unsigned dirty ) +{ + const struct pipe_viewport_state *viewport = &svga->curr.viewport; + struct svga_prescale prescale; + SVGA3dRect rect; + /* Not sure if this state is relevant with POSITIONT. Probably + * not, but setting to 0,1 avoids some state pingponging. + */ + float range_min = 0.0; + float range_max = 1.0; + float flip = -1.0; + boolean degenerate = FALSE; + enum pipe_error ret; + + float fb_width = svga->curr.framebuffer.width; + float fb_height = svga->curr.framebuffer.height; + + memset( &prescale, 0, sizeof(prescale) ); + + if (svga->curr.rast->templ.bypass_vs_clip_and_viewport) { + + /* Avoid POSITIONT as it has a non trivial implementation outside the D3D + * API. Always generate a vertex shader. + */ + rect.x = 0; + rect.y = 0; + rect.w = svga->curr.framebuffer.width; + rect.h = svga->curr.framebuffer.height; + + prescale.scale[0] = 2.0 / (float)rect.w; + prescale.scale[1] = - 2.0 / (float)rect.h; + prescale.scale[2] = 1.0; + prescale.scale[3] = 1.0; + prescale.translate[0] = -1.0f; + prescale.translate[1] = 1.0f; + prescale.translate[2] = 0; + prescale.translate[3] = 0; + prescale.enabled = TRUE; + } else { + + /* Examine gallium viewport transformation and produce a screen + * rectangle and possibly vertex shader pre-transformation to + * get the same results. + */ + float fx = viewport->scale[0] * -1.0 + viewport->translate[0]; + float fy = flip * viewport->scale[1] * -1.0 + viewport->translate[1]; + float fw = viewport->scale[0] * 2; + float fh = flip * viewport->scale[1] * 2; + + SVGA_DBG(DEBUG_VIEWPORT, + "\ninitial %f,%f %fx%f\n", + fx, + fy, + fw, + fh); + + prescale.scale[0] = 1.0; + prescale.scale[1] = 1.0; + prescale.scale[2] = 1.0; + prescale.scale[3] = 1.0; + prescale.translate[0] = 0; + prescale.translate[1] = 0; + prescale.translate[2] = 0; + prescale.translate[3] = 0; + prescale.enabled = TRUE; + + + + if (fw < 0) { + prescale.scale[0] *= -1.0; + prescale.translate[0] += -fw; + fw = -fw; + fx = viewport->scale[0] * 1.0 + viewport->translate[0]; + } + + if (fh < 0) { + prescale.scale[1] *= -1.0; + prescale.translate[1] += -fh; + fh = -fh; + fy = flip * viewport->scale[1] * 1.0 + viewport->translate[1]; + } + + if (fx < 0) { + prescale.translate[0] += fx; + prescale.scale[0] *= fw / (fw + fx); + fw += fx; + fx = 0; + } + + if (fy < 0) { + prescale.translate[1] += fy; + prescale.scale[1] *= fh / (fh + fy); + fh += fy; + fy = 0; + } + + if (fx + fw > fb_width) { + prescale.scale[0] *= fw / (fb_width - fx); + prescale.translate[0] -= fx * (fw / (fb_width - fx)); + prescale.translate[0] += fx; + fw = fb_width - fx; + + } + + if (fy + fh > fb_height) { + prescale.scale[1] *= fh / (fb_height - fy); + prescale.translate[1] -= fy * (fh / (fb_height - fy)); + prescale.translate[1] += fy; + fh = fb_height - fy; + } + + if (fw < 0 || fh < 0) { + fw = fh = fx = fy = 0; + degenerate = TRUE; + goto out; + } + + + /* D3D viewport is integer space. Convert fx,fy,etc. to + * integers. + * + * TODO: adjust pretranslate correct for any subpixel error + * introduced converting to integers. + */ + rect.x = fx; + rect.y = fy; + rect.w = fw; + rect.h = fh; + + SVGA_DBG(DEBUG_VIEWPORT, + "viewport error %f,%f %fx%f\n", + fabs((float)rect.x - fx), + fabs((float)rect.y - fy), + fabs((float)rect.w - fw), + fabs((float)rect.h - fh)); + + SVGA_DBG(DEBUG_VIEWPORT, + "viewport %d,%d %dx%d\n", + rect.x, + rect.y, + rect.w, + rect.h); + + + /* Finally, to get GL rasterization rules, need to tweak the + * screen-space coordinates slightly relative to D3D which is + * what hardware implements natively. + */ + if (svga->curr.rast->templ.gl_rasterization_rules) { + float adjust_x = 0.0; + float adjust_y = 0.0; + + switch (svga->curr.reduced_prim) { + case PIPE_PRIM_LINES: + adjust_x = -0.5; + adjust_y = 0; + break; + case PIPE_PRIM_POINTS: + case PIPE_PRIM_TRIANGLES: + adjust_x = -0.375; + adjust_y = -0.5; + break; + } + + prescale.translate[0] += adjust_x; + prescale.translate[1] += adjust_y; + prescale.translate[2] = 0.5; /* D3D clip space */ + prescale.scale[2] = 0.5; /* D3D clip space */ + } + + + range_min = viewport->scale[2] * -1.0 + viewport->translate[2]; + range_max = viewport->scale[2] * 1.0 + viewport->translate[2]; + + /* D3D (and by implication SVGA) doesn't like dealing with zmax + * less than zmin. Detect that case, flip the depth range and + * invert our z-scale factor to achieve the same effect. + */ + if (range_min > range_max) { + float range_tmp; + range_tmp = range_min; + range_min = range_max; + range_max = range_tmp; + prescale.scale[2] = -prescale.scale[2]; + } + } + + if (prescale.enabled) { + float H[2]; + float J[2]; + int i; + + SVGA_DBG(DEBUG_VIEWPORT, + "prescale %f,%f %fx%f\n", + prescale.translate[0], + prescale.translate[1], + prescale.scale[0], + prescale.scale[1]); + + H[0] = (float)rect.w / 2.0; + H[1] = -(float)rect.h / 2.0; + J[0] = (float)rect.x + (float)rect.w / 2.0; + J[1] = (float)rect.y + (float)rect.h / 2.0; + + SVGA_DBG(DEBUG_VIEWPORT, + "H %f,%f\n" + "J %fx%f\n", + H[0], + H[1], + J[0], + J[1]); + + /* Adjust prescale to take into account the fact that it is + * going to be applied prior to the perspective divide and + * viewport transformation. + * + * Vwin = H(Vc/Vc.w) + J + * + * We want to tweak Vwin with scale and translation from above, + * as in: + * + * Vwin' = S Vwin + T + * + * But we can only modify the values at Vc. Plugging all the + * above together, and rearranging, eventually we get: + * + * Vwin' = H(Vc'/Vc'.w) + J + * where: + * Vc' = SVc + KVc.w + * K = (T + (S-1)J) / H + * + * Overwrite prescale.translate with values for K: + */ + for (i = 0; i < 2; i++) { + prescale.translate[i] = ((prescale.translate[i] + + (prescale.scale[i] - 1.0) * J[i]) / H[i]); + } + + SVGA_DBG(DEBUG_VIEWPORT, + "clipspace %f,%f %fx%f\n", + prescale.translate[0], + prescale.translate[1], + prescale.scale[0], + prescale.scale[1]); + } + +out: + if (degenerate) { + rect.x = 0; + rect.y = 0; + rect.w = 1; + rect.h = 1; + prescale.enabled = FALSE; + } + + if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) { + ret = SVGA3D_SetViewport(svga->swc, &rect); + if(ret != PIPE_OK) + return ret; + + memcpy(&svga->state.hw_clear.viewport, &rect, sizeof(rect)); + assert(sizeof(rect) == sizeof(svga->state.hw_clear.viewport)); + } + + if (svga->state.hw_clear.depthrange.zmin != range_min || + svga->state.hw_clear.depthrange.zmax != range_max) + { + ret = SVGA3D_SetZRange(svga->swc, range_min, range_max ); + if(ret != PIPE_OK) + return ret; + + svga->state.hw_clear.depthrange.zmin = range_min; + svga->state.hw_clear.depthrange.zmax = range_max; + } + + if (memcmp(&prescale, &svga->state.hw_clear.prescale, sizeof prescale) != 0) { + svga->dirty |= SVGA_NEW_PRESCALE; + svga->state.hw_clear.prescale = prescale; + } + + return 0; +} + + +struct svga_tracked_state svga_hw_viewport = +{ + "hw viewport state", + ( SVGA_NEW_FRAME_BUFFER | + SVGA_NEW_VIEWPORT | + SVGA_NEW_RAST | + SVGA_NEW_REDUCED_PRIMITIVE ), + emit_viewport +}; + + +/*********************************************************************** + * Scissor state + */ +static int emit_scissor_rect( struct svga_context *svga, + unsigned dirty ) +{ + const struct pipe_scissor_state *scissor = &svga->curr.scissor; + SVGA3dRect rect; + + rect.x = scissor->minx; + rect.y = scissor->miny; + rect.w = scissor->maxx - scissor->minx; /* + 1 ?? */ + rect.h = scissor->maxy - scissor->miny; /* + 1 ?? */ + + return SVGA3D_SetScissorRect(svga->swc, &rect); +} + + +struct svga_tracked_state svga_hw_scissor = +{ + "hw scissor state", + SVGA_NEW_SCISSOR, + emit_scissor_rect +}; + + +/*********************************************************************** + * Userclip state + */ + +static int emit_clip_planes( struct svga_context *svga, + unsigned dirty ) +{ + unsigned i; + enum pipe_error ret; + + /* TODO: just emit directly from svga_set_clip_state()? + */ + for (i = 0; i < svga->curr.clip.nr; i++) { + ret = SVGA3D_SetClipPlane( svga->swc, + i, + svga->curr.clip.ucp[i] ); + if(ret != PIPE_OK) + return ret; + } + + return 0; +} + + +struct svga_tracked_state svga_hw_clip_planes = +{ + "hw viewport state", + SVGA_NEW_CLIP, + emit_clip_planes +}; diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c new file mode 100644 index 0000000000..6ec38ed3e4 --- /dev/null +++ b/src/gallium/drivers/svga/svga_state_fs.c @@ -0,0 +1,282 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" + +#include "svga_context.h" +#include "svga_state.h" +#include "svga_cmd.h" +#include "svga_tgsi.h" + +#include "svga_hw_reg.h" + + + +static INLINE int compare_fs_keys( const struct svga_fs_compile_key *a, + const struct svga_fs_compile_key *b ) +{ + unsigned keysize = svga_fs_key_size( a ); + return memcmp( a, b, keysize ); +} + + +static struct svga_shader_result *search_fs_key( struct svga_fragment_shader *fs, + const struct svga_fs_compile_key *key ) +{ + struct svga_shader_result *result = fs->base.results; + + assert(key); + + for ( ; result; result = result->next) { + if (compare_fs_keys( key, &result->key.fkey ) == 0) + return result; + } + + return NULL; +} + + +static enum pipe_error compile_fs( struct svga_context *svga, + struct svga_fragment_shader *fs, + const struct svga_fs_compile_key *key, + struct svga_shader_result **out_result ) +{ + struct svga_shader_result *result; + enum pipe_error ret; + + result = svga_translate_fragment_program( fs, key ); + if (result == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto fail; + } + + + ret = SVGA3D_DefineShader(svga->swc, + svga->state.next_fs_id, + SVGA3D_SHADERTYPE_PS, + result->tokens, + result->nr_tokens * sizeof result->tokens[0]); + if (ret) + goto fail; + + *out_result = result; + result->id = svga->state.next_fs_id++; + result->next = fs->base.results; + fs->base.results = result; + return PIPE_OK; + +fail: + if (result) + svga_destroy_shader_result( result ); + return ret; +} + +/* The blend workaround for simulating logicop xor behaviour requires + * that the incoming fragment color be white. This change achieves + * that by hooking up a hard-wired fragment shader that just emits + * color 1,1,1,1 + * + * This is a slightly incomplete solution as it assumes that the + * actual bound shader has no other effects beyond generating a + * fragment color. In particular shaders containing TEXKIL and/or + * depth-write will not have the correct behaviour, nor will those + * expecting to use alphatest. + * + * These are avoidable issues, but they are not much worse than the + * unavoidable ones associated with this technique, so it's not clear + * how much effort should be expended trying to resolve them - the + * ultimate result will still not be correct in most cases. + * + * Shader below was generated with: + * SVGA_DEBUG=tgsi ./mesa/progs/fp/fp-tri white.txt + */ +static int emit_white_fs( struct svga_context *svga ) +{ + int ret; + + /* ps_3_0 + * def c0, 1.000000, 0.000000, 0.000000, 1.000000 + * mov oC0, c0.x + * end + */ + static const unsigned white_tokens[] = { + 0xffff0300, + 0x05000051, + 0xa00f0000, + 0x3f800000, + 0x00000000, + 0x00000000, + 0x3f800000, + 0x02000001, + 0x800f0800, + 0xa0000000, + 0x0000ffff, + }; + + ret = SVGA3D_DefineShader(svga->swc, + svga->state.next_fs_id, + SVGA3D_SHADERTYPE_PS, + white_tokens, + sizeof(white_tokens)); + if (ret) + return ret; + + svga->state.white_fs_id = svga->state.next_fs_id++; + return 0; +} + + +/* SVGA_NEW_TEXTURE_BINDING + * SVGA_NEW_RAST + * SVGA_NEW_NEED_SWTNL + * SVGA_NEW_SAMPLER + */ +static int make_fs_key( const struct svga_context *svga, + struct svga_fs_compile_key *key ) +{ + int i; + int idx = 0; + + memset(key, 0, sizeof *key); + + /* Only need fragment shader fixup for twoside lighting if doing + * hwtnl. Otherwise the draw module does the whole job for us. + * + * SVGA_NEW_SWTNL + */ + if (!svga->state.sw.need_swtnl) { + /* SVGA_NEW_RAST + */ + key->light_twoside = svga->curr.rast->templ.light_twoside; + key->front_cw = (svga->curr.rast->templ.front_winding == + PIPE_WINDING_CW); + } + + + /* XXX: want to limit this to the textures that the shader actually + * refers to. + * + * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER + */ + for (i = 0; i < svga->curr.num_textures; i++) { + if (svga->curr.texture[i]) { + assert(svga->curr.sampler[i]); + key->tex[i].texture_target = svga->curr.texture[i]->target; + if (!svga->curr.sampler[i]->normalized_coords) { + key->tex[i].width_height_idx = idx++; + key->tex[i].unnormalized = TRUE; + ++key->num_unnormalized_coords; + } + } + } + key->num_textures = svga->curr.num_textures; + + idx = 0; + for (i = 0; i < svga->curr.num_samplers; ++i) { + if (svga->curr.sampler[i]) { + key->tex[i].compare_mode = svga->curr.sampler[i]->compare_mode; + key->tex[i].compare_func = svga->curr.sampler[i]->compare_func; + } + } + + return 0; +} + + + +static int emit_hw_fs( struct svga_context *svga, + unsigned dirty ) +{ + struct svga_shader_result *result = NULL; + unsigned id = SVGA3D_INVALID_ID; + int ret = 0; + + /* SVGA_NEW_BLEND + */ + if (svga->curr.blend->need_white_fragments) { + if (svga->state.white_fs_id == SVGA3D_INVALID_ID) { + ret = emit_white_fs( svga ); + if (ret) + return ret; + } + id = svga->state.white_fs_id; + } + else { + struct svga_fragment_shader *fs = svga->curr.fs; + struct svga_fs_compile_key key; + + /* SVGA_NEW_TEXTURE_BINDING + * SVGA_NEW_RAST + * SVGA_NEW_NEED_SWTNL + * SVGA_NEW_SAMPLER + */ + ret = make_fs_key( svga, &key ); + if (ret) + return ret; + + result = search_fs_key( fs, &key ); + if (!result) { + ret = compile_fs( svga, fs, &key, &result ); + if (ret) + return ret; + } + + assert (result); + id = result->id; + } + + assert(id != SVGA3D_INVALID_ID); + + if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT]) { + ret = SVGA3D_SetShader(svga->swc, + SVGA3D_SHADERTYPE_PS, + id ); + if (ret) + return ret; + + svga->dirty |= SVGA_NEW_FS_RESULT; + svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT] = id; + svga->state.hw_draw.fs = result; + } + + return 0; +} + +struct svga_tracked_state svga_hw_fs = +{ + "fragment shader (hwtnl)", + (SVGA_NEW_FS | + SVGA_NEW_TEXTURE_BINDING | + SVGA_NEW_NEED_SWTNL | + SVGA_NEW_RAST | + SVGA_NEW_SAMPLER | + SVGA_NEW_BLEND), + emit_hw_fs +}; + + + diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c new file mode 100644 index 0000000000..00201b8091 --- /dev/null +++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c @@ -0,0 +1,200 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_state.h" + + +#include "svga_context.h" +#include "svga_state.h" +#include "svga_debug.h" +#include "svga_hw_reg.h" + +/*********************************************************************** + */ + +static INLINE SVGA3dDeclType +svga_translate_vertex_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_R32_FLOAT: return SVGA3D_DECLTYPE_FLOAT1; + case PIPE_FORMAT_R32G32_FLOAT: return SVGA3D_DECLTYPE_FLOAT2; + case PIPE_FORMAT_R32G32B32_FLOAT: return SVGA3D_DECLTYPE_FLOAT3; + case PIPE_FORMAT_R32G32B32A32_FLOAT: return SVGA3D_DECLTYPE_FLOAT4; + case PIPE_FORMAT_B8G8R8A8_UNORM: return SVGA3D_DECLTYPE_D3DCOLOR; + case PIPE_FORMAT_R8G8B8A8_USCALED: return SVGA3D_DECLTYPE_UBYTE4; + case PIPE_FORMAT_R16G16_SSCALED: return SVGA3D_DECLTYPE_SHORT2; + case PIPE_FORMAT_R16G16B16A16_SSCALED: return SVGA3D_DECLTYPE_SHORT4; + case PIPE_FORMAT_R8G8B8A8_UNORM: return SVGA3D_DECLTYPE_UBYTE4N; + case PIPE_FORMAT_R16G16_SNORM: return SVGA3D_DECLTYPE_SHORT2N; + case PIPE_FORMAT_R16G16B16A16_SNORM: return SVGA3D_DECLTYPE_SHORT4N; + case PIPE_FORMAT_R16G16_UNORM: return SVGA3D_DECLTYPE_USHORT2N; + case PIPE_FORMAT_R16G16B16A16_UNORM: return SVGA3D_DECLTYPE_USHORT4N; + + /* These formats don't exist yet: + * + case PIPE_FORMAT_R10G10B10_USCALED: return SVGA3D_DECLTYPE_UDEC3; + case PIPE_FORMAT_R10G10B10_SNORM: return SVGA3D_DECLTYPE_DEC3N; + case PIPE_FORMAT_R16G16_FLOAT: return SVGA3D_DECLTYPE_FLOAT16_2; + case PIPE_FORMAT_R16G16B16A16_FLOAT: return SVGA3D_DECLTYPE_FLOAT16_4; + */ + + default: + /* There are many formats without hardware support. This case + * will be hit regularly, meaning we'll need swvfetch. + */ + return SVGA3D_DECLTYPE_MAX; + } +} + + +static int update_need_swvfetch( struct svga_context *svga, + unsigned dirty ) +{ + unsigned i; + boolean need_swvfetch = FALSE; + + for (i = 0; i < svga->curr.num_vertex_elements; i++) { + svga->state.sw.ve_format[i] = svga_translate_vertex_format(svga->curr.ve[i].src_format); + if (svga->state.sw.ve_format[i] == SVGA3D_DECLTYPE_MAX) { + need_swvfetch = TRUE; + break; + } + } + + if (need_swvfetch != svga->state.sw.need_swvfetch) { + svga->state.sw.need_swvfetch = need_swvfetch; + svga->dirty |= SVGA_NEW_NEED_SWVFETCH; + } + + return 0; +} + +struct svga_tracked_state svga_update_need_swvfetch = +{ + "update need_swvfetch", + ( SVGA_NEW_VELEMENT ), + update_need_swvfetch +}; + + +/*********************************************************************** + */ + +static int update_need_pipeline( struct svga_context *svga, + unsigned dirty ) +{ + + boolean need_pipeline = FALSE; + + /* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE + */ + if (svga->curr.rast->need_pipeline & (1 << svga->curr.reduced_prim)) { + SVGA_DBG(DEBUG_SWTNL, "%s: rast need_pipeline (%d) & prim (%x)\n", + __FUNCTION__, + svga->curr.rast->need_pipeline, + (1 << svga->curr.reduced_prim) ); + need_pipeline = TRUE; + } + + /* SVGA_NEW_EDGEFLAGS + */ + if (svga->curr.rast->hw_unfilled != PIPE_POLYGON_MODE_FILL && + svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES && + svga->curr.edgeflags != NULL) { + SVGA_DBG(DEBUG_SWTNL, "%s: edgeflags\n", __FUNCTION__); + need_pipeline = TRUE; + } + + /* SVGA_NEW_CLIP + */ + if (!svga->curr.rast->templ.bypass_vs_clip_and_viewport && + svga->curr.clip.nr) { + SVGA_DBG(DEBUG_SWTNL, "%s: userclip\n", __FUNCTION__); + need_pipeline = TRUE; + } + + if (need_pipeline != svga->state.sw.need_pipeline) { + svga->state.sw.need_pipeline = need_pipeline; + svga->dirty |= SVGA_NEW_NEED_PIPELINE; + } + + return 0; +} + + +struct svga_tracked_state svga_update_need_pipeline = +{ + "need pipeline", + (SVGA_NEW_RAST | + SVGA_NEW_CLIP | + SVGA_NEW_REDUCED_PRIMITIVE), + update_need_pipeline +}; + + +/*********************************************************************** + */ + +static int update_need_swtnl( struct svga_context *svga, + unsigned dirty ) +{ + boolean need_swtnl; + + if (svga->debug.no_swtnl) { + svga->state.sw.need_swvfetch = 0; + svga->state.sw.need_pipeline = 0; + } + + need_swtnl = (svga->state.sw.need_swvfetch || + svga->state.sw.need_pipeline); + + if (svga->debug.force_swtnl) { + need_swtnl = 1; + } + + if (need_swtnl != svga->state.sw.need_swtnl) { + SVGA_DBG(DEBUG_SWTNL|DEBUG_PERF, + "%s need_swvfetch: %s, need_pipeline %s\n", + __FUNCTION__, + svga->state.sw.need_swvfetch ? "true" : "false", + svga->state.sw.need_pipeline ? "true" : "false"); + + svga->state.sw.need_swtnl = need_swtnl; + svga->dirty |= SVGA_NEW_NEED_SWTNL; + svga->swtnl.new_vdecl = TRUE; + } + + return 0; +} + + +struct svga_tracked_state svga_update_need_swtnl = +{ + "need swtnl", + (SVGA_NEW_NEED_PIPELINE | + SVGA_NEW_NEED_SWVFETCH), + update_need_swtnl +}; diff --git a/src/gallium/drivers/svga/svga_state_rss.c b/src/gallium/drivers/svga/svga_state_rss.c new file mode 100644 index 0000000000..8b6803a285 --- /dev/null +++ b/src/gallium/drivers/svga/svga_state_rss.c @@ -0,0 +1,268 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" + +#include "svga_context.h" +#include "svga_state.h" +#include "svga_cmd.h" + +#include "svga_hw_reg.h" + + + +struct rs_queue { + unsigned rs_count; + SVGA3dRenderState rs[SVGA3D_RS_MAX]; +}; + + +#define EMIT_RS(svga, value, token, fail) \ +do { \ + if (svga->state.hw_draw.rs[SVGA3D_RS_##token] != value) { \ + svga_queue_rs( &queue, SVGA3D_RS_##token, value ); \ + svga->state.hw_draw.rs[SVGA3D_RS_##token] = value; \ + } \ +} while (0) + +#define EMIT_RS_FLOAT(svga, fvalue, token, fail) \ +do { \ + unsigned value = fui(fvalue); \ + if (svga->state.hw_draw.rs[SVGA3D_RS_##token] != value) { \ + svga_queue_rs( &queue, SVGA3D_RS_##token, value ); \ + svga->state.hw_draw.rs[SVGA3D_RS_##token] = value; \ + } \ +} while (0) + + +static INLINE void +svga_queue_rs( struct rs_queue *q, + unsigned rss, + unsigned value ) +{ + q->rs[q->rs_count].state = rss; + q->rs[q->rs_count].uintValue = value; + q->rs_count++; +} + + +/* Compare old and new render states and emit differences between them + * to hardware. Simplest implementation would be to emit the whole of + * the "to" state. + */ +static int emit_rss( struct svga_context *svga, + unsigned dirty ) +{ + struct rs_queue queue; + + queue.rs_count = 0; + + if (dirty & SVGA_NEW_BLEND) { + const struct svga_blend_state *curr = svga->curr.blend; + + EMIT_RS( svga, curr->rt[0].writemask, COLORWRITEENABLE, fail ); + EMIT_RS( svga, curr->rt[0].blend_enable, BLENDENABLE, fail ); + + if (curr->rt[0].blend_enable) { + EMIT_RS( svga, curr->rt[0].srcblend, SRCBLEND, fail ); + EMIT_RS( svga, curr->rt[0].dstblend, DSTBLEND, fail ); + EMIT_RS( svga, curr->rt[0].blendeq, BLENDEQUATION, fail ); + + EMIT_RS( svga, curr->rt[0].separate_alpha_blend_enable, + SEPARATEALPHABLENDENABLE, fail ); + + if (curr->rt[0].separate_alpha_blend_enable) { + EMIT_RS( svga, curr->rt[0].srcblend_alpha, SRCBLENDALPHA, fail ); + EMIT_RS( svga, curr->rt[0].dstblend_alpha, DSTBLENDALPHA, fail ); + EMIT_RS( svga, curr->rt[0].blendeq_alpha, BLENDEQUATIONALPHA, fail ); + } + } + } + + + if (dirty & (SVGA_NEW_DEPTH_STENCIL | SVGA_NEW_RAST)) { + const struct svga_depth_stencil_state *curr = svga->curr.depth; + const struct svga_rasterizer_state *rast = svga->curr.rast; + + if (!curr->stencil[0].enabled) + { + /* Stencil disabled + */ + EMIT_RS( svga, FALSE, STENCILENABLE, fail ); + EMIT_RS( svga, FALSE, STENCILENABLE2SIDED, fail ); + } + else if (curr->stencil[0].enabled && !curr->stencil[1].enabled) + { + /* Regular stencil + */ + EMIT_RS( svga, TRUE, STENCILENABLE, fail ); + EMIT_RS( svga, FALSE, STENCILENABLE2SIDED, fail ); + + EMIT_RS( svga, curr->stencil[0].func, STENCILFUNC, fail ); + EMIT_RS( svga, curr->stencil[0].fail, STENCILFAIL, fail ); + EMIT_RS( svga, curr->stencil[0].zfail, STENCILZFAIL, fail ); + EMIT_RS( svga, curr->stencil[0].pass, STENCILPASS, fail ); + + EMIT_RS( svga, curr->stencil_ref, STENCILREF, fail ); + EMIT_RS( svga, curr->stencil_mask, STENCILMASK, fail ); + EMIT_RS( svga, curr->stencil_writemask, STENCILWRITEMASK, fail ); + } + else + { + int cw, ccw; + + /* Hardware frontwinding is always CW, so if ours is also CW, + * then our definition of front face agrees with hardware. + * Otherwise need to flip. + */ + if (rast->templ.front_winding == PIPE_WINDING_CW) { + cw = 0; + ccw = 1; + } + else { + cw = 1; + ccw = 0; + } + + /* Twoside stencil + */ + EMIT_RS( svga, TRUE, STENCILENABLE, fail ); + EMIT_RS( svga, TRUE, STENCILENABLE2SIDED, fail ); + + EMIT_RS( svga, curr->stencil[cw].func, STENCILFUNC, fail ); + EMIT_RS( svga, curr->stencil[cw].fail, STENCILFAIL, fail ); + EMIT_RS( svga, curr->stencil[cw].zfail, STENCILZFAIL, fail ); + EMIT_RS( svga, curr->stencil[cw].pass, STENCILPASS, fail ); + + EMIT_RS( svga, curr->stencil[ccw].func, CCWSTENCILFUNC, fail ); + EMIT_RS( svga, curr->stencil[ccw].fail, CCWSTENCILFAIL, fail ); + EMIT_RS( svga, curr->stencil[ccw].zfail, CCWSTENCILZFAIL, fail ); + EMIT_RS( svga, curr->stencil[ccw].pass, CCWSTENCILPASS, fail ); + + EMIT_RS( svga, curr->stencil_ref, STENCILREF, fail ); + EMIT_RS( svga, curr->stencil_mask, STENCILMASK, fail ); + EMIT_RS( svga, curr->stencil_writemask, STENCILWRITEMASK, fail ); + } + + EMIT_RS( svga, curr->zenable, ZENABLE, fail ); + if (curr->zenable) { + EMIT_RS( svga, curr->zfunc, ZFUNC, fail ); + EMIT_RS( svga, curr->zwriteenable, ZWRITEENABLE, fail ); + } + + EMIT_RS( svga, curr->alphatestenable, ALPHATESTENABLE, fail ); + if (curr->alphatestenable) { + EMIT_RS( svga, curr->alphafunc, ALPHAFUNC, fail ); + EMIT_RS_FLOAT( svga, curr->alpharef, ALPHAREF, fail ); + } + } + + + if (dirty & SVGA_NEW_RAST) + { + const struct svga_rasterizer_state *curr = svga->curr.rast; + + /* Shademode: still need to rearrange index list to move + * flat-shading PV first vertex. + */ + EMIT_RS( svga, curr->shademode, SHADEMODE, fail ); + EMIT_RS( svga, curr->cullmode, CULLMODE, fail ); + EMIT_RS( svga, curr->scissortestenable, SCISSORTESTENABLE, fail ); + EMIT_RS( svga, curr->multisampleantialias, MULTISAMPLEANTIALIAS, fail ); + EMIT_RS( svga, curr->lastpixel, LASTPIXEL, fail ); + EMIT_RS( svga, curr->linepattern, LINEPATTERN, fail ); + EMIT_RS_FLOAT( svga, curr->pointsize, POINTSIZE, fail ); + EMIT_RS_FLOAT( svga, curr->pointsize_min, POINTSIZEMIN, fail ); + EMIT_RS_FLOAT( svga, curr->pointsize_max, POINTSIZEMAX, fail ); + } + + if (dirty & (SVGA_NEW_RAST | SVGA_NEW_FRAME_BUFFER | SVGA_NEW_NEED_PIPELINE)) + { + const struct svga_rasterizer_state *curr = svga->curr.rast; + float slope = 0.0; + float bias = 0.0; + + /* Need to modify depth bias according to bound depthbuffer + * format. Don't do hardware depthbias while the software + * pipeline is active. + */ + if (!svga->state.sw.need_pipeline && + svga->curr.framebuffer.zsbuf) + { + slope = curr->slopescaledepthbias; + bias = svga->curr.depthscale * curr->depthbias; + } + + EMIT_RS_FLOAT( svga, slope, SLOPESCALEDEPTHBIAS, fail ); + EMIT_RS_FLOAT( svga, bias, DEPTHBIAS, fail ); + } + + + if (queue.rs_count) { + SVGA3dRenderState *rs; + + if (SVGA3D_BeginSetRenderState( svga->swc, + &rs, + queue.rs_count ) != PIPE_OK) + goto fail; + + memcpy( rs, + queue.rs, + queue.rs_count * sizeof queue.rs[0]); + + SVGA_FIFOCommitAll( svga->swc ); + } + + /* Also blend color: + */ + + return 0; + +fail: + /* XXX: need to poison cached hardware state on failure to ensure + * dirty state gets re-emitted. Fix this by re-instating partial + * FIFOCommit command and only updating cached hw state once the + * initial allocation has succeeded. + */ + memset(svga->state.hw_draw.rs, 0xcd, sizeof(svga->state.hw_draw.rs)); + + return PIPE_ERROR_OUT_OF_MEMORY; +} + + +struct svga_tracked_state svga_hw_rss = +{ + "hw rss state", + + (SVGA_NEW_BLEND | + SVGA_NEW_DEPTH_STENCIL | + SVGA_NEW_RAST | + SVGA_NEW_FRAME_BUFFER | + SVGA_NEW_NEED_PIPELINE), + + emit_rss +}; diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c new file mode 100644 index 0000000000..b313794520 --- /dev/null +++ b/src/gallium/drivers/svga/svga_state_tss.c @@ -0,0 +1,279 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" + +#include "svga_screen_texture.h" +#include "svga_winsys.h" +#include "svga_context.h" +#include "svga_state.h" +#include "svga_cmd.h" + +#include "svga_hw_reg.h" + + +void svga_cleanup_tss_binding(struct svga_context *svga) +{ + int i; + unsigned count = MAX2( svga->curr.num_textures, + svga->state.hw_draw.num_views ); + + for (i = 0; i < count; i++) { + struct svga_hw_view_state *view = &svga->state.hw_draw.views[i]; + + svga_sampler_view_reference(&view->v, NULL); + pipe_texture_reference( &svga->curr.texture[i], NULL ); + pipe_texture_reference( &view->texture, NULL ); + + view->dirty = 1; + } +} + + +static int +update_tss_binding(struct svga_context *svga, + unsigned dirty ) +{ + unsigned i; + unsigned count = MAX2( svga->curr.num_textures, + svga->state.hw_draw.num_views ); + unsigned min_lod; + unsigned max_lod; + + + struct { + struct { + unsigned unit; + struct svga_hw_view_state *view; + } bind[PIPE_MAX_SAMPLERS]; + + unsigned bind_count; + } queue; + + queue.bind_count = 0; + + for (i = 0; i < count; i++) { + const struct svga_sampler_state *s = svga->curr.sampler[i]; + struct svga_hw_view_state *view = &svga->state.hw_draw.views[i]; + + /* get min max lod */ + if (svga->curr.texture[i]) { + min_lod = MAX2(s->view_min_lod, 0); + max_lod = MIN2(s->view_max_lod, svga->curr.texture[i]->last_level); + } else { + min_lod = 0; + max_lod = 0; + } + + if (view->texture != svga->curr.texture[i] || + view->min_lod != min_lod || + view->max_lod != max_lod) { + + svga_sampler_view_reference(&view->v, NULL); + pipe_texture_reference( &view->texture, svga->curr.texture[i] ); + + view->dirty = TRUE; + view->min_lod = min_lod; + view->max_lod = max_lod; + + if (svga->curr.texture[i]) + view->v = svga_get_tex_sampler_view(&svga->pipe, + svga->curr.texture[i], + min_lod, + max_lod); + } + + if (view->dirty) { + queue.bind[queue.bind_count].unit = i; + queue.bind[queue.bind_count].view = view; + queue.bind_count++; + } + else if (view->v) { + svga_validate_sampler_view(svga, view->v); + } + } + + svga->state.hw_draw.num_views = svga->curr.num_textures; + + if (queue.bind_count) { + SVGA3dTextureState *ts; + + if (SVGA3D_BeginSetTextureState( svga->swc, + &ts, + queue.bind_count ) != PIPE_OK) + goto fail; + + for (i = 0; i < queue.bind_count; i++) { + ts[i].stage = queue.bind[i].unit; + ts[i].name = SVGA3D_TS_BIND_TEXTURE; + + if (queue.bind[i].view->v) { + svga->swc->surface_relocation(svga->swc, + &ts[i].value, + queue.bind[i].view->v->handle, + PIPE_BUFFER_USAGE_GPU_READ); + } + else { + ts[i].value = SVGA3D_INVALID_ID; + } + + queue.bind[i].view->dirty = FALSE; + } + + SVGA_FIFOCommitAll( svga->swc ); + } + + return 0; + +fail: + return PIPE_ERROR_OUT_OF_MEMORY; +} + + +struct svga_tracked_state svga_hw_tss_binding = { + "texture binding emit", + SVGA_NEW_TEXTURE_BINDING | + SVGA_NEW_SAMPLER, + update_tss_binding +}; + + +/*********************************************************************** + */ + +struct ts_queue { + unsigned ts_count; + SVGA3dTextureState ts[PIPE_MAX_SAMPLERS*SVGA3D_TS_MAX]; +}; + + +#define EMIT_TS(svga, unit, val, token, fail) \ +do { \ + if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) { \ + svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val ); \ + svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val; \ + } \ +} while (0) + +#define EMIT_TS_FLOAT(svga, unit, fvalue, token, fail) \ +do { \ + unsigned val = fui(fvalue); \ + if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) { \ + svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val ); \ + svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val; \ + } \ +} while (0) + + +static INLINE void +svga_queue_tss( struct ts_queue *q, + unsigned unit, + unsigned tss, + unsigned value ) +{ + assert(q->ts_count < sizeof(q->ts)/sizeof(q->ts[0])); + q->ts[q->ts_count].stage = unit; + q->ts[q->ts_count].name = tss; + q->ts[q->ts_count].value = value; + q->ts_count++; +} + + +static int +update_tss(struct svga_context *svga, + unsigned dirty ) +{ + unsigned i; + struct ts_queue queue; + + queue.ts_count = 0; + for (i = 0; i < svga->curr.num_samplers; i++) { + if (svga->curr.sampler[i]) { + const struct svga_sampler_state *curr = svga->curr.sampler[i]; + + EMIT_TS(svga, i, curr->mipfilter, MIPFILTER, fail); + EMIT_TS(svga, i, curr->min_lod, TEXTURE_MIPMAP_LEVEL, fail); + EMIT_TS(svga, i, curr->magfilter, MAGFILTER, fail); + EMIT_TS(svga, i, curr->minfilter, MINFILTER, fail); + EMIT_TS(svga, i, curr->aniso_level, TEXTURE_ANISOTROPIC_LEVEL, fail); + EMIT_TS_FLOAT(svga, i, curr->lod_bias, TEXTURE_LOD_BIAS, fail); + EMIT_TS(svga, i, curr->addressu, ADDRESSU, fail); + EMIT_TS(svga, i, curr->addressw, ADDRESSW, fail); + EMIT_TS(svga, i, curr->bordercolor, BORDERCOLOR, fail); + // TEXCOORDINDEX -- hopefully not needed + + if (svga->curr.tex_flags.flag_1d & (1 << i)) { + debug_printf("wrap 1d tex %d\n", i); + EMIT_TS(svga, i, SVGA3D_TEX_ADDRESS_WRAP, ADDRESSV, fail); + } + else + EMIT_TS(svga, i, curr->addressv, ADDRESSV, fail); + + if (svga->curr.tex_flags.flag_srgb & (1 << i)) + EMIT_TS_FLOAT(svga, i, 2.2f, GAMMA, fail); + else + EMIT_TS_FLOAT(svga, i, 1.0f, GAMMA, fail); + + } + } + + if (queue.ts_count) { + SVGA3dTextureState *ts; + + if (SVGA3D_BeginSetTextureState( svga->swc, + &ts, + queue.ts_count ) != PIPE_OK) + goto fail; + + memcpy( ts, + queue.ts, + queue.ts_count * sizeof queue.ts[0]); + + SVGA_FIFOCommitAll( svga->swc ); + } + + return 0; + +fail: + /* XXX: need to poison cached hardware state on failure to ensure + * dirty state gets re-emitted. Fix this by re-instating partial + * FIFOCommit command and only updating cached hw state once the + * initial allocation has succeeded. + */ + memset(svga->state.hw_draw.ts, 0xcd, sizeof(svga->state.hw_draw.ts)); + + return PIPE_ERROR_OUT_OF_MEMORY; +} + + +struct svga_tracked_state svga_hw_tss = { + "texture state emit", + (SVGA_NEW_SAMPLER | + SVGA_NEW_TEXTURE_FLAGS), + update_tss +}; + diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c new file mode 100644 index 0000000000..c534308f50 --- /dev/null +++ b/src/gallium/drivers/svga/svga_state_vdecl.c @@ -0,0 +1,182 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_upload_mgr.h" + +#include "svga_context.h" +#include "svga_state.h" +#include "svga_draw.h" +#include "svga_tgsi.h" +#include "svga_screen.h" +#include "svga_screen_buffer.h" + +#include "svga_hw_reg.h" + + +static int +upload_user_buffers( struct svga_context *svga ) +{ + enum pipe_error ret = PIPE_OK; + int i; + int nr; + + if (0) + debug_printf("%s: %d\n", __FUNCTION__, svga->curr.num_vertex_buffers); + + nr = svga->curr.num_vertex_buffers; + + for (i = 0; i < nr; i++) + { + if (svga_buffer_is_user_buffer(svga->curr.vb[i].buffer)) + { + struct pipe_buffer *upload_buffer = NULL; + unsigned offset = /*svga->curr.vb[i].buffer_offset*/ 0; + unsigned size = svga->curr.vb[i].buffer->size /*- offset*/; + unsigned upload_offset; + + ret = u_upload_buffer( svga->upload_vb, + offset, + size, + svga->curr.vb[i].buffer, + &upload_offset, + &upload_buffer ); + if (ret) + return ret; + + if (0) + debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sz %d\n", + __FUNCTION__, + i, + svga->curr.vb[i].buffer, + upload_buffer, upload_offset, size); + + /* Make sure we release the old buffer and end up with the + * correct refcount on the uploaded buffer. + */ + pipe_buffer_reference( &svga->curr.vb[i].buffer, NULL ); + svga->curr.vb[i].buffer = upload_buffer; + svga->curr.vb[i].buffer_offset = upload_offset; + } + } + + if (0) + debug_printf("%s: DONE\n", __FUNCTION__); + + return ret; +} + + +/*********************************************************************** + */ + + +static int emit_hw_vs_vdecl( struct svga_context *svga, + unsigned dirty ) +{ + const struct pipe_vertex_element *ve = svga->curr.ve; + SVGA3dVertexDecl decl; + unsigned i; + + assert(svga->curr.num_vertex_elements >= + svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]); + + svga_hwtnl_reset_vdecl( svga->hwtnl, + svga->curr.num_vertex_elements ); + + for (i = 0; i < svga->curr.num_vertex_elements; i++) { + const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index]; + unsigned usage, index; + + + svga_generate_vdecl_semantics( i, &usage, &index ); + + /* SVGA_NEW_VELEMENT + */ + decl.identity.type = svga->state.sw.ve_format[i]; + decl.identity.method = SVGA3D_DECLMETHOD_DEFAULT; + decl.identity.usage = usage; + decl.identity.usageIndex = index; + decl.array.stride = vb->stride; + decl.array.offset = (vb->buffer_offset + + ve[i].src_offset); + + svga_hwtnl_vdecl( svga->hwtnl, + i, + &decl, + vb->buffer ); + } + + return 0; +} + + +static int emit_hw_vdecl( struct svga_context *svga, + unsigned dirty ) +{ + int ret = 0; + + /* SVGA_NEW_NEED_SWTNL + */ + if (svga->state.sw.need_swtnl) + return 0; /* Do not emit during swtnl */ + + /* If we get to here, we know that we're going to draw. Upload + * userbuffers now and try to combine multiple userbuffers from + * multiple draw calls into a single host buffer for performance. + */ + if (svga->curr.any_user_vertex_buffers && + SVGA_COMBINE_USERBUFFERS) + { + ret = upload_user_buffers( svga ); + if (ret) + return ret; + + svga->curr.any_user_vertex_buffers = FALSE; + } + + return emit_hw_vs_vdecl( svga, dirty ); +} + + +struct svga_tracked_state svga_hw_vdecl = +{ + "hw vertex decl state (hwtnl version)", + ( SVGA_NEW_NEED_SWTNL | + SVGA_NEW_VELEMENT | + SVGA_NEW_VBUFFER | + SVGA_NEW_RAST | + SVGA_NEW_FS | + SVGA_NEW_VS ), + emit_hw_vdecl +}; + + + + + + diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c new file mode 100644 index 0000000000..a947745732 --- /dev/null +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -0,0 +1,239 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "pipe/p_inlines.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "translate/translate.h" + +#include "svga_context.h" +#include "svga_state.h" +#include "svga_cmd.h" +#include "svga_tgsi.h" + +#include "svga_hw_reg.h" + +/*********************************************************************** + */ + + +static INLINE int compare_vs_keys( const struct svga_vs_compile_key *a, + const struct svga_vs_compile_key *b ) +{ + unsigned keysize = svga_vs_key_size( a ); + return memcmp( a, b, keysize ); +} + + +static struct svga_shader_result *search_vs_key( struct svga_vertex_shader *vs, + const struct svga_vs_compile_key *key ) +{ + struct svga_shader_result *result = vs->base.results; + + assert(key); + + for ( ; result; result = result->next) { + if (compare_vs_keys( key, &result->key.vkey ) == 0) + return result; + } + + return NULL; +} + + +static enum pipe_error compile_vs( struct svga_context *svga, + struct svga_vertex_shader *vs, + const struct svga_vs_compile_key *key, + struct svga_shader_result **out_result ) +{ + struct svga_shader_result *result; + enum pipe_error ret = PIPE_OK; + + result = svga_translate_vertex_program( vs, key ); + if (result == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto fail; + } + + ret = SVGA3D_DefineShader(svga->swc, + svga->state.next_vs_id, + SVGA3D_SHADERTYPE_VS, + result->tokens, + result->nr_tokens * sizeof result->tokens[0]); + if (ret) + goto fail; + + *out_result = result; + result->id = svga->state.next_vs_id++; + result->next = vs->base.results; + vs->base.results = result; + return PIPE_OK; + +fail: + if (result) + svga_destroy_shader_result( result ); + return ret; +} + +/* SVGA_NEW_PRESCALE, SVGA_NEW_RAST, SVGA_NEW_ZERO_STRIDE + */ +static int make_vs_key( struct svga_context *svga, + struct svga_vs_compile_key *key ) +{ + memset(key, 0, sizeof *key); + key->need_prescale = svga->state.hw_clear.prescale.enabled; + key->allow_psiz = svga->curr.rast->templ.point_size_per_vertex; + key->zero_stride_vertex_elements = + svga->curr.zero_stride_vertex_elements; + key->num_zero_stride_vertex_elements = + svga->curr.num_zero_stride_vertex_elements; + return 0; +} + + + +static int emit_hw_vs( struct svga_context *svga, + unsigned dirty ) +{ + struct svga_shader_result *result = NULL; + unsigned id = SVGA3D_INVALID_ID; + int ret = 0; + + /* SVGA_NEW_NEED_SWTNL */ + if (!svga->state.sw.need_swtnl) { + struct svga_vertex_shader *vs = svga->curr.vs; + struct svga_vs_compile_key key; + + ret = make_vs_key( svga, &key ); + if (ret) + return ret; + + result = search_vs_key( vs, &key ); + if (!result) { + ret = compile_vs( svga, vs, &key, &result ); + if (ret) + return ret; + } + + assert (result); + id = result->id; + } + + if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX]) { + ret = SVGA3D_SetShader(svga->swc, + SVGA3D_SHADERTYPE_VS, + id ); + if (ret) + return ret; + + svga->dirty |= SVGA_NEW_VS_RESULT; + svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX] = id; + svga->state.hw_draw.vs = result; + } + + return 0; +} + +struct svga_tracked_state svga_hw_vs = +{ + "vertex shader (hwtnl)", + (SVGA_NEW_VS | + SVGA_NEW_PRESCALE | + SVGA_NEW_NEED_SWTNL | + SVGA_NEW_ZERO_STRIDE), + emit_hw_vs +}; + + +/*********************************************************************** + */ +static int update_zero_stride( struct svga_context *svga, + unsigned dirty ) +{ + unsigned i; + + svga->curr.zero_stride_vertex_elements = 0; + svga->curr.num_zero_stride_vertex_elements = 0; + + for (i = 0; i < svga->curr.num_vertex_elements; i++) { + const struct pipe_vertex_element *vel = &svga->curr.ve[i]; + const struct pipe_vertex_buffer *vbuffer = &svga->curr.vb[ + vel->vertex_buffer_index]; + if (vbuffer->stride == 0) { + unsigned const_idx = + svga->curr.num_zero_stride_vertex_elements; + struct translate *translate; + struct translate_key key; + void *mapped_buffer; + + svga->curr.zero_stride_vertex_elements |= (1 << i); + ++svga->curr.num_zero_stride_vertex_elements; + + key.output_stride = 4 * sizeof(float); + key.nr_elements = 1; + key.element[0].input_format = vel->src_format; + key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + key.element[0].input_buffer = vel->vertex_buffer_index; + key.element[0].input_offset = vel->src_offset; + key.element[0].output_offset = const_idx * 4 * sizeof(float); + + translate_key_sanitize(&key); + /* translate_generic_create is technically private but + * we don't want to code-generate, just want generic + * translation */ + translate = translate_generic_create(&key); + + assert(vel->src_offset == 0); + + mapped_buffer = pipe_buffer_map_range(svga->pipe.screen, + vbuffer->buffer, + vel->src_offset, + pf_get_size(vel->src_format), + PIPE_BUFFER_USAGE_CPU_READ); + translate->set_buffer(translate, vel->vertex_buffer_index, + mapped_buffer, + vbuffer->stride); + translate->run(translate, 0, 1, + svga->curr.zero_stride_constants); + + pipe_buffer_unmap(svga->pipe.screen, + vbuffer->buffer); + translate->release(translate); + } + } + + if (svga->curr.num_zero_stride_vertex_elements) + svga->dirty |= SVGA_NEW_ZERO_STRIDE; + + return 0; +} + +struct svga_tracked_state svga_hw_update_zero_stride = +{ + "update zero_stride", + ( SVGA_NEW_VELEMENT | + SVGA_NEW_VBUFFER ), + update_zero_stride +}; diff --git a/src/gallium/drivers/svga/svga_swtnl.h b/src/gallium/drivers/svga/svga_swtnl.h new file mode 100644 index 0000000000..4882f26b17 --- /dev/null +++ b/src/gallium/drivers/svga/svga_swtnl.h @@ -0,0 +1,52 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_SWTNL_H +#define SVGA_SWTNL_H + +#include "pipe/p_compiler.h" + +struct svga_context; +struct pipe_context; +struct pipe_buffer; +struct vbuf_render; + + +boolean svga_init_swtnl( struct svga_context *svga ); +void svga_destroy_swtnl( struct svga_context *svga ); + + +enum pipe_error +svga_swtnl_draw_range_elements(struct svga_context *svga, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned prim, + unsigned start, + unsigned count); + + +#endif diff --git a/src/gallium/drivers/svga/svga_swtnl_backend.c b/src/gallium/drivers/svga/svga_swtnl_backend.c new file mode 100644 index 0000000000..b4f757a47a --- /dev/null +++ b/src/gallium/drivers/svga/svga_swtnl_backend.c @@ -0,0 +1,349 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "draw/draw_vbuf.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" + +#include "util/u_debug.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_simple_shaders.h" + +#include "svga_context.h" +#include "svga_state.h" +#include "svga_swtnl.h" + +#include "svga_types.h" +#include "svga_reg.h" +#include "svga3d_reg.h" +#include "svga_draw.h" +#include "svga_swtnl_private.h" + + +static const struct vertex_info * +svga_vbuf_render_get_vertex_info( struct vbuf_render *render ) +{ + struct svga_vbuf_render *svga_render = svga_vbuf_render(render); + struct svga_context *svga = svga_render->svga; + + svga_swtnl_update_vdecl(svga); + + return &svga_render->vertex_info; +} + + +static boolean +svga_vbuf_render_allocate_vertices( struct vbuf_render *render, + ushort vertex_size, + ushort nr_vertices ) +{ + struct svga_vbuf_render *svga_render = svga_vbuf_render(render); + struct svga_context *svga = svga_render->svga; + struct pipe_screen *screen = svga->pipe.screen; + size_t size = (size_t)nr_vertices * (size_t)vertex_size; + boolean new_vbuf = FALSE; + boolean new_ibuf = FALSE; + + if (svga_render->vertex_size != vertex_size) + svga->swtnl.new_vdecl = TRUE; + svga_render->vertex_size = (size_t)vertex_size; + + if (svga->swtnl.new_vbuf) + new_ibuf = new_vbuf = TRUE; + svga->swtnl.new_vbuf = FALSE; + + if (svga_render->vbuf_size < svga_render->vbuf_offset + svga_render->vbuf_used + size) + new_vbuf = TRUE; + + if (new_vbuf) + pipe_buffer_reference(&svga_render->vbuf, NULL); + if (new_ibuf) + pipe_buffer_reference(&svga_render->ibuf, NULL); + + if (!svga_render->vbuf) { + svga_render->vbuf_size = MAX2(size, svga_render->vbuf_alloc_size); + svga_render->vbuf = pipe_buffer_create(screen, + 0, + PIPE_BUFFER_USAGE_VERTEX, + svga_render->vbuf_size); + if(!svga_render->vbuf) { + svga_context_flush(svga, NULL); + svga_render->vbuf = pipe_buffer_create(screen, + 0, + PIPE_BUFFER_USAGE_VERTEX, + svga_render->vbuf_size); + assert(svga_render->vbuf); + } + + svga->swtnl.new_vdecl = TRUE; + svga_render->vbuf_offset = 0; + } else { + svga_render->vbuf_offset += svga_render->vbuf_used; + } + + svga_render->vbuf_used = 0; + + if (svga->swtnl.new_vdecl) + svga_render->vdecl_offset = svga_render->vbuf_offset; + + return TRUE; +} + +static void * +svga_vbuf_render_map_vertices( struct vbuf_render *render ) +{ + struct svga_vbuf_render *svga_render = svga_vbuf_render(render); + struct svga_context *svga = svga_render->svga; + struct pipe_screen *screen = svga->pipe.screen; + + char *ptr = (char*)pipe_buffer_map(screen, + svga_render->vbuf, + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_FLUSH_EXPLICIT); + return ptr + svga_render->vbuf_offset; +} + +static void +svga_vbuf_render_unmap_vertices( struct vbuf_render *render, + ushort min_index, + ushort max_index ) +{ + struct svga_vbuf_render *svga_render = svga_vbuf_render(render); + struct svga_context *svga = svga_render->svga; + struct pipe_screen *screen = svga->pipe.screen; + unsigned offset, length; + size_t used = svga_render->vertex_size * ((size_t)max_index + 1); + + offset = svga_render->vbuf_offset + svga_render->vertex_size * min_index; + length = svga_render->vertex_size * (max_index + 1 - min_index); + pipe_buffer_flush_mapped_range(screen, svga_render->vbuf, offset, length); + pipe_buffer_unmap(screen, svga_render->vbuf); + svga_render->min_index = min_index; + svga_render->max_index = max_index; + svga_render->vbuf_used = MAX2(svga_render->vbuf_used, used); +} + +static boolean +svga_vbuf_render_set_primitive( struct vbuf_render *render, + unsigned prim ) +{ + struct svga_vbuf_render *svga_render = svga_vbuf_render(render); + svga_render->prim = prim; + + return TRUE; +} + +static void +svga_vbuf_sumbit_state( struct svga_vbuf_render *svga_render ) +{ + struct svga_context *svga = svga_render->svga; + SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS]; + enum pipe_error ret; + int i; + + /* if the vdecl or vbuf hasn't changed do nothing */ + if (!svga->swtnl.new_vdecl) + return; + + memcpy(vdecl, svga_render->vdecl, sizeof(vdecl)); + + /* flush the hw state */ + ret = svga_hwtnl_flush(svga->hwtnl); + if (ret) { + svga_context_flush(svga, NULL); + ret = svga_hwtnl_flush(svga->hwtnl); + /* if we hit this path we might become synced with hw */ + svga->swtnl.new_vbuf = TRUE; + assert(ret == 0); + } + + svga_hwtnl_reset_vdecl(svga->hwtnl, svga_render->vdecl_count); + + for (i = 0; i < svga_render->vdecl_count; i++) { + vdecl[i].array.offset += svga_render->vdecl_offset; + + svga_hwtnl_vdecl( svga->hwtnl, + i, + &vdecl[i], + svga_render->vbuf ); + } + + /* We have already taken care of flatshading, so let the hwtnl + * module use whatever is most convenient: + */ + if (svga->state.sw.need_pipeline) { + svga_hwtnl_set_flatshade(svga->hwtnl, FALSE, FALSE); + svga_hwtnl_set_unfilled(svga->hwtnl, PIPE_POLYGON_MODE_FILL); + } + else { + svga_hwtnl_set_flatshade( svga->hwtnl, + svga->curr.rast->templ.flatshade, + svga->curr.rast->templ.flatshade_first ); + + svga_hwtnl_set_unfilled( svga->hwtnl, + svga->curr.rast->hw_unfilled ); + } + + svga->swtnl.new_vdecl = FALSE; +} + +static void +svga_vbuf_render_draw_arrays( struct vbuf_render *render, + unsigned start, + uint nr ) +{ + struct svga_vbuf_render *svga_render = svga_vbuf_render(render); + struct svga_context *svga = svga_render->svga; + unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size; + enum pipe_error ret = 0; + + svga_vbuf_sumbit_state(svga_render); + + /* Need to call update_state() again as the draw module may have + * altered some of our state behind our backs. Testcase: + * redbook/polys.c + */ + svga_update_state_retry( svga, SVGA_STATE_HW_DRAW ); + + ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr); + svga->swtnl.new_vbuf = TRUE; + assert(ret == PIPE_OK); + } +} + + +static void +svga_vbuf_render_draw( struct vbuf_render *render, + const ushort *indices, + uint nr_indices) +{ + struct svga_vbuf_render *svga_render = svga_vbuf_render(render); + struct svga_context *svga = svga_render->svga; + struct pipe_screen *screen = svga->pipe.screen; + unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size; + boolean ret; + size_t size = 2 * nr_indices; + + assert(( svga_render->vbuf_offset - svga_render->vdecl_offset) % svga_render->vertex_size == 0); + + if (svga_render->ibuf_size < svga_render->ibuf_offset + size) + pipe_buffer_reference(&svga_render->ibuf, NULL); + + if (!svga_render->ibuf) { + svga_render->ibuf_size = MAX2(size, svga_render->ibuf_alloc_size); + svga_render->ibuf = pipe_buffer_create(screen, + 0, + PIPE_BUFFER_USAGE_VERTEX, + svga_render->ibuf_size); + svga_render->ibuf_offset = 0; + } + + pipe_buffer_write(screen, svga_render->ibuf, + svga_render->ibuf_offset, 2 * nr_indices, indices); + + + /* off to hardware */ + svga_vbuf_sumbit_state(svga_render); + + /* Need to call update_state() again as the draw module may have + * altered some of our state behind our backs. Testcase: + * redbook/polys.c + */ + svga_update_state_retry( svga, SVGA_STATE_HW_DRAW ); + + ret = svga_hwtnl_draw_range_elements(svga->hwtnl, + svga_render->ibuf, + 2, + svga_render->min_index, + svga_render->max_index, + svga_render->prim, + svga_render->ibuf_offset / 2, nr_indices, bias); + if(ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = svga_hwtnl_draw_range_elements(svga->hwtnl, + svga_render->ibuf, + 2, + svga_render->min_index, + svga_render->max_index, + svga_render->prim, + svga_render->ibuf_offset / 2, nr_indices, bias); + svga->swtnl.new_vbuf = TRUE; + assert(ret == PIPE_OK); + } + + svga_render->ibuf_offset += size; +} + + +static void +svga_vbuf_render_release_vertices( struct vbuf_render *render ) +{ + +} + + +static void +svga_vbuf_render_destroy( struct vbuf_render *render ) +{ + struct svga_vbuf_render *svga_render = svga_vbuf_render(render); + + pipe_buffer_reference(&svga_render->vbuf, NULL); + pipe_buffer_reference(&svga_render->ibuf, NULL); + FREE(svga_render); +} + + +/** + * Create a new primitive render. + */ +struct vbuf_render * +svga_vbuf_render_create( struct svga_context *svga ) +{ + struct svga_vbuf_render *svga_render = CALLOC_STRUCT(svga_vbuf_render); + + svga_render->svga = svga; + svga_render->ibuf_size = 0; + svga_render->vbuf_size = 0; + svga_render->ibuf_alloc_size = 4*1024; + svga_render->vbuf_alloc_size = 64*1024; + svga_render->base.max_vertex_buffer_bytes = 64*1024/10; + svga_render->base.max_indices = 65536; + svga_render->base.get_vertex_info = svga_vbuf_render_get_vertex_info; + svga_render->base.allocate_vertices = svga_vbuf_render_allocate_vertices; + svga_render->base.map_vertices = svga_vbuf_render_map_vertices; + svga_render->base.unmap_vertices = svga_vbuf_render_unmap_vertices; + svga_render->base.set_primitive = svga_vbuf_render_set_primitive; + svga_render->base.draw = svga_vbuf_render_draw; + svga_render->base.draw_arrays = svga_vbuf_render_draw_arrays; + svga_render->base.release_vertices = svga_vbuf_render_release_vertices; + svga_render->base.destroy = svga_vbuf_render_destroy; + + return &svga_render->base; +} diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c new file mode 100644 index 0000000000..8b14c913f7 --- /dev/null +++ b/src/gallium/drivers/svga/svga_swtnl_draw.c @@ -0,0 +1,170 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" +#include "pipe/p_inlines.h" +#include "pipe/p_state.h" +#include "util/u_memory.h" + +#include "svga_context.h" +#include "svga_swtnl.h" +#include "svga_state.h" +#include "svga_swtnl_private.h" + + + +enum pipe_error +svga_swtnl_draw_range_elements(struct svga_context *svga, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned prim, unsigned start, unsigned count) +{ + struct draw_context *draw = svga->swtnl.draw; + unsigned i; + const void *map; + enum pipe_error ret; + + assert(!svga->dirty); + assert(svga->state.sw.need_swtnl); + assert(draw); + + ret = svga_update_state(svga, SVGA_STATE_SWTNL_DRAW); + if (ret) { + svga_context_flush(svga, NULL); + ret = svga_update_state(svga, SVGA_STATE_SWTNL_DRAW); + svga->swtnl.new_vbuf = TRUE; + assert(ret == PIPE_OK); + } + + /* + * Map vertex buffers + */ + for (i = 0; i < svga->curr.num_vertex_buffers; i++) { + map = pipe_buffer_map(svga->pipe.screen, + svga->curr.vb[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + + draw_set_mapped_vertex_buffer(draw, i, map); + } + + /* Map index buffer, if present */ + if (indexBuffer) { + map = pipe_buffer_map(svga->pipe.screen, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + + draw_set_mapped_element_buffer_range(draw, + indexSize, + min_index, + max_index, + map); + } + + if (svga->curr.cb[PIPE_SHADER_VERTEX]) { + map = pipe_buffer_map(svga->pipe.screen, + svga->curr.cb[PIPE_SHADER_VERTEX], + PIPE_BUFFER_USAGE_CPU_READ); + assert(map); + draw_set_mapped_constant_buffer( + draw, + map, + svga->curr.cb[PIPE_SHADER_VERTEX]->size); + } + + draw_arrays(svga->swtnl.draw, prim, start, count); + + draw_flush(svga->swtnl.draw); + + /* Ensure the draw module didn't touch this */ + assert(i == svga->curr.num_vertex_buffers); + + /* + * unmap vertex/index buffers + */ + for (i = 0; i < svga->curr.num_vertex_buffers; i++) { + pipe_buffer_unmap(svga->pipe.screen, svga->curr.vb[i].buffer); + draw_set_mapped_vertex_buffer(draw, i, NULL); + } + + if (indexBuffer) { + pipe_buffer_unmap(svga->pipe.screen, indexBuffer); + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + if (svga->curr.cb[PIPE_SHADER_VERTEX]) { + pipe_buffer_unmap(svga->pipe.screen, + svga->curr.cb[PIPE_SHADER_VERTEX]); + } + + return ret; +} + + + + +boolean svga_init_swtnl( struct svga_context *svga ) +{ + svga->swtnl.backend = svga_vbuf_render_create(svga); + if(!svga->swtnl.backend) + goto fail; + + /* + * Create drawing context and plug our rendering stage into it. + */ + svga->swtnl.draw = draw_create(); + if (svga->swtnl.draw == NULL) + goto fail; + + + draw_set_rasterize_stage(svga->swtnl.draw, + draw_vbuf_stage( svga->swtnl.draw, svga->swtnl.backend )); + + draw_set_render(svga->swtnl.draw, svga->swtnl.backend); + + draw_install_aaline_stage(svga->swtnl.draw, &svga->pipe); + draw_install_aapoint_stage(svga->swtnl.draw, &svga->pipe); + draw_install_pstipple_stage(svga->swtnl.draw, &svga->pipe); + + draw_set_driver_clipping(svga->swtnl.draw, debug_get_bool_option("SVGA_SWTNL_FSE", FALSE)); + + return TRUE; + +fail: + if (svga->swtnl.backend) + svga->swtnl.backend->destroy( svga->swtnl.backend ); + + if (svga->swtnl.draw) + draw_destroy( svga->swtnl.draw ); + + return FALSE; +} + + +void svga_destroy_swtnl( struct svga_context *svga ) +{ + draw_destroy( svga->swtnl.draw ); +} diff --git a/src/gallium/drivers/svga/svga_swtnl_private.h b/src/gallium/drivers/svga/svga_swtnl_private.h new file mode 100644 index 0000000000..9bbb42910f --- /dev/null +++ b/src/gallium/drivers/svga/svga_swtnl_private.h @@ -0,0 +1,93 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_SWTNL_PRIVATE_H +#define SVGA_SWTNL_PRIVATE_H + +#include "svga_swtnl.h" +#include "draw/draw_vertex.h" + +#include "svga_types.h" +#include "svga3d_reg.h" + +/** + * Primitive renderer for svga. + */ +struct svga_vbuf_render { + struct vbuf_render base; + + struct svga_context *svga; + struct vertex_info vertex_info; + + unsigned vertex_size; + + unsigned prim; + + struct pipe_buffer *vbuf; + struct pipe_buffer *ibuf; + + /* current size of buffer */ + size_t vbuf_size; + size_t ibuf_size; + + /* size of that the buffer should be */ + size_t vbuf_alloc_size; + size_t ibuf_alloc_size; + + /* current write place */ + size_t vbuf_offset; + size_t ibuf_offset; + + /* currently used */ + size_t vbuf_used; + + SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS]; + unsigned vdecl_offset; + unsigned vdecl_count; + + ushort min_index; + ushort max_index; +}; + +/** + * Basically a cast wrapper. + */ +static INLINE struct svga_vbuf_render * +svga_vbuf_render( struct vbuf_render *render ) +{ + assert(render); + return (struct svga_vbuf_render *)render; +} + + +struct vbuf_render * +svga_vbuf_render_create( struct svga_context *svga ); + + +int +svga_swtnl_update_vdecl( struct svga_context *svga ); + + +#endif diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c new file mode 100644 index 0000000000..1616312113 --- /dev/null +++ b/src/gallium/drivers/svga/svga_swtnl_state.c @@ -0,0 +1,242 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" +#include "pipe/p_inlines.h" +#include "pipe/p_state.h" +#include "util/u_memory.h" + +#include "svga_context.h" +#include "svga_swtnl.h" +#include "svga_state.h" + +#include "svga_swtnl_private.h" + + +#define SVGA_POINT_ADJ_X -0.375 +#define SVGA_POINT_ADJ_Y -0.5 + +#define SVGA_LINE_ADJ_X -0.5 +#define SVGA_LINE_ADJ_Y -0.5 + +#define SVGA_TRIANGLE_ADJ_X -0.375 +#define SVGA_TRIANGLE_ADJ_Y -0.5 + + +static void set_draw_viewport( struct svga_context *svga ) +{ + struct pipe_viewport_state vp = svga->curr.viewport; + float adjx = 0; + float adjy = 0; + + switch (svga->curr.reduced_prim) { + case PIPE_PRIM_POINTS: + adjx = SVGA_POINT_ADJ_X; + adjy = SVGA_POINT_ADJ_Y; + break; + case PIPE_PRIM_LINES: + /* XXX: This is to compensate for the fact that wide lines are + * going to be drawn with triangles, but we're not catching all + * cases where that will happen. + */ + if (svga->curr.rast->templ.line_width > 1.0) + { + adjx = SVGA_LINE_ADJ_X + 0.175; + adjy = SVGA_LINE_ADJ_Y - 0.175; + } + else { + adjx = SVGA_LINE_ADJ_X; + adjy = SVGA_LINE_ADJ_Y; + } + break; + case PIPE_PRIM_TRIANGLES: + adjx += SVGA_TRIANGLE_ADJ_X; + adjy += SVGA_TRIANGLE_ADJ_Y; + break; + } + + vp.translate[0] += adjx; + vp.translate[1] += adjy; + + draw_set_viewport_state(svga->swtnl.draw, &vp); +} + +static int update_swtnl_draw( struct svga_context *svga, + unsigned dirty ) +{ + draw_flush( svga->swtnl.draw ); + + if (dirty & SVGA_NEW_VS) + draw_bind_vertex_shader(svga->swtnl.draw, + svga->curr.vs->draw_shader); + + if (dirty & SVGA_NEW_VBUFFER) + draw_set_vertex_buffers(svga->swtnl.draw, + svga->curr.num_vertex_buffers, + svga->curr.vb); + + if (dirty & SVGA_NEW_VELEMENT) + draw_set_vertex_elements(svga->swtnl.draw, + svga->curr.num_vertex_elements, + svga->curr.ve ); + + if (dirty & SVGA_NEW_CLIP) + draw_set_clip_state(svga->swtnl.draw, + &svga->curr.clip); + + if (dirty & (SVGA_NEW_VIEWPORT | + SVGA_NEW_REDUCED_PRIMITIVE | + SVGA_NEW_RAST)) + set_draw_viewport( svga ); + + if (dirty & SVGA_NEW_RAST) + draw_set_rasterizer_state(svga->swtnl.draw, + &svga->curr.rast->templ); + + if (dirty & SVGA_NEW_FRAME_BUFFER) + draw_set_mrd(svga->swtnl.draw, + svga->curr.depthscale); + + if (dirty & SVGA_NEW_EDGEFLAGS) + draw_set_edgeflags( svga->swtnl.draw, + svga->curr.edgeflags ); + + return 0; +} + + +struct svga_tracked_state svga_update_swtnl_draw = +{ + "update draw module state", + (SVGA_NEW_VS | + SVGA_NEW_VBUFFER | + SVGA_NEW_VELEMENT | + SVGA_NEW_CLIP | + SVGA_NEW_VIEWPORT | + SVGA_NEW_RAST | + SVGA_NEW_FRAME_BUFFER | + SVGA_NEW_REDUCED_PRIMITIVE | + SVGA_NEW_EDGEFLAGS), + update_swtnl_draw +}; + + +int svga_swtnl_update_vdecl( struct svga_context *svga ) +{ + struct svga_vbuf_render *svga_render = svga_vbuf_render(svga->swtnl.backend); + struct draw_context *draw = svga->swtnl.draw; + struct vertex_info *vinfo = &svga_render->vertex_info; + SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS]; + const enum interp_mode colorInterp = + svga->curr.rast->templ.flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + const struct svga_fragment_shader *fs = svga->curr.fs; + int offset = 0; + int nr_decls = 0; + int src, i; + + memset(vinfo, 0, sizeof(*vinfo)); + memset(vdecl, 0, sizeof(vdecl)); + + /* always add position */ + src = draw_find_vs_output(draw, TGSI_SEMANTIC_POSITION, 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src); + vinfo->attrib[0].emit = EMIT_4F; + vdecl[0].array.offset = offset; + vdecl[0].identity.type = SVGA3D_DECLTYPE_FLOAT4; + vdecl[0].identity.usage = SVGA3D_DECLUSAGE_POSITIONT; + vdecl[0].identity.usageIndex = 0; + offset += 16; + nr_decls++; + + for (i = 0; i < fs->base.info.num_inputs; i++) { + unsigned name = fs->base.info.input_semantic_name[i]; + unsigned index = fs->base.info.input_semantic_index[i]; + src = draw_find_vs_output(draw, name, index); + vdecl[nr_decls].array.offset = offset; + vdecl[nr_decls].identity.usageIndex = fs->base.info.input_semantic_index[i]; + + switch (name) { + case TGSI_SEMANTIC_COLOR: + draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); + vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_COLOR; + vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT4; + offset += 16; + nr_decls++; + break; + case TGSI_SEMANTIC_GENERIC: + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_TEXCOORD; + vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT4; + vdecl[nr_decls].identity.usageIndex += 1; + offset += 16; + nr_decls++; + break; + case TGSI_SEMANTIC_FOG: + draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); + vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_TEXCOORD; + vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT1; + assert(vdecl[nr_decls].identity.usageIndex == 0); + offset += 4; + nr_decls++; + break; + case TGSI_SEMANTIC_POSITION: + /* generated internally, not a vertex shader output */ + break; + default: + assert(0); + } + } + + draw_compute_vertex_size(vinfo); + + svga_render->vdecl_count = nr_decls; + for (i = 0; i < svga_render->vdecl_count; i++) + vdecl[i].array.stride = offset; + + if (memcmp(svga_render->vdecl, vdecl, sizeof(vdecl)) == 0) + return 0; + + memcpy(svga_render->vdecl, vdecl, sizeof(vdecl)); + svga->swtnl.new_vdecl = TRUE; + + return 0; +} + + +static int update_swtnl_vdecl( struct svga_context *svga, + unsigned dirty ) +{ + return svga_swtnl_update_vdecl( svga ); +} + + +struct svga_tracked_state svga_update_swtnl_vdecl = +{ + "update draw module vdecl", + (SVGA_NEW_VS | + SVGA_NEW_FS), + update_swtnl_vdecl +}; diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c new file mode 100644 index 0000000000..44d0930bc0 --- /dev/null +++ b/src/gallium/drivers/svga/svga_tgsi.c @@ -0,0 +1,266 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + + +#include "pipe/p_compiler.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_defines.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_scan.h" +#include "util/u_memory.h" + +#include "svgadump/st_shader_dump.h" + +#include "svga_context.h" +#include "svga_tgsi.h" +#include "svga_tgsi_emit.h" +#include "svga_debug.h" + +#include "svga_hw_reg.h" +#include "svga3d_shaderdefs.h" + + +/* Sinkhole used only in error conditions. + */ +static char err_buf[128]; + +#if 0 +static void svga_destroy_shader_emitter( struct svga_shader_emitter *emit ) +{ + if (emit->buf != err_buf) + FREE(emit->buf); +} +#endif + + +static boolean svga_shader_expand( struct svga_shader_emitter *emit ) +{ + char *new_buf; + unsigned newsize = emit->size * 2; + + if(emit->buf != err_buf) + new_buf = REALLOC(emit->buf, emit->size, newsize); + else + new_buf = NULL; + + if (new_buf == NULL) { + emit->ptr = err_buf; + emit->buf = err_buf; + emit->size = sizeof(err_buf); + return FALSE; + } + + emit->size = newsize; + emit->ptr = new_buf + (emit->ptr - emit->buf); + emit->buf = new_buf; + return TRUE; +} + +static INLINE boolean reserve( struct svga_shader_emitter *emit, + unsigned nr_dwords ) +{ + if (emit->ptr - emit->buf + nr_dwords * sizeof(unsigned) >= emit->size) { + if (!svga_shader_expand( emit )) + return FALSE; + } + + return TRUE; +} + +boolean svga_shader_emit_dword( struct svga_shader_emitter *emit, + unsigned dword ) +{ + if (!reserve(emit, 1)) + return FALSE; + + *(unsigned *)emit->ptr = dword; + emit->ptr += sizeof dword; + return TRUE; +} + +boolean svga_shader_emit_dwords( struct svga_shader_emitter *emit, + const unsigned *dwords, + unsigned nr ) +{ + if (!reserve(emit, nr)) + return FALSE; + + memcpy( emit->ptr, dwords, nr * sizeof *dwords ); + emit->ptr += nr * sizeof *dwords; + return TRUE; +} + +boolean svga_shader_emit_opcode( struct svga_shader_emitter *emit, + unsigned opcode ) +{ + SVGA3dShaderInstToken *here; + + if (!reserve(emit, 1)) + return FALSE; + + here = (SVGA3dShaderInstToken *)emit->ptr; + here->value = opcode; + + if (emit->insn_offset) { + SVGA3dShaderInstToken *prev = (SVGA3dShaderInstToken *)(emit->buf + + emit->insn_offset); + prev->size = (here - prev) - 1; + } + + emit->insn_offset = emit->ptr - emit->buf; + emit->ptr += sizeof(unsigned); + return TRUE; +} + +#define SVGA3D_PS_2X (SVGA3D_PS_20 | 1) +#define SVGA3D_VS_2X (SVGA3D_VS_20 | 1) + +static boolean svga_shader_emit_header( struct svga_shader_emitter *emit ) +{ + SVGA3dShaderVersion header; + + memset( &header, 0, sizeof header ); + + switch (emit->unit) { + case PIPE_SHADER_FRAGMENT: + header.value = emit->use_sm30 ? SVGA3D_PS_30 : SVGA3D_PS_2X; + break; + case PIPE_SHADER_VERTEX: + header.value = emit->use_sm30 ? SVGA3D_VS_30 : SVGA3D_VS_2X; + break; + } + + return svga_shader_emit_dword( emit, header.value ); +} + + + + + +/* Parse TGSI shader and translate to SVGA/DX9 serialized + * representation. + * + * In this function SVGA shader is emitted to an in-memory buffer that + * can be dynamically grown. Once we've finished and know how large + * it is, it will be copied to a hardware buffer for upload. + */ +static struct svga_shader_result * +svga_tgsi_translate( const struct svga_shader *shader, + union svga_compile_key key, + unsigned unit ) +{ + struct svga_shader_result *result = NULL; + struct svga_shader_emitter emit; + int ret = 0; + + memset(&emit, 0, sizeof(emit)); + + emit.use_sm30 = shader->use_sm30; + emit.size = 1024; + emit.buf = MALLOC(emit.size); + if (emit.buf == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto fail; + } + + emit.ptr = emit.buf; + emit.unit = unit; + emit.key = key; + + tgsi_scan_shader( shader->tokens, &emit.info); + + emit.imm_start = emit.info.file_max[TGSI_FILE_CONSTANT] + 1; + + if (unit == PIPE_SHADER_FRAGMENT) + emit.imm_start += key.fkey.num_unnormalized_coords; + + if (unit == PIPE_SHADER_VERTEX) { + emit.imm_start += key.vkey.need_prescale ? 2 : 0; + emit.imm_start += key.vkey.num_zero_stride_vertex_elements; + } + + emit.nr_hw_const = (emit.imm_start + emit.info.file_max[TGSI_FILE_IMMEDIATE] + 1); + + emit.nr_hw_temp = emit.info.file_max[TGSI_FILE_TEMPORARY] + 1; + emit.in_main_func = TRUE; + + if (!svga_shader_emit_header( &emit )) + goto fail; + + if (!svga_shader_emit_instructions( &emit, shader->tokens )) + goto fail; + + result = CALLOC_STRUCT(svga_shader_result); + if (result == NULL) + goto fail; + + result->shader = shader; + result->tokens = (const unsigned *)emit.buf; + result->nr_tokens = (emit.ptr - emit.buf) / sizeof(unsigned); + memcpy(&result->key, &key, sizeof key); + + return result; + +fail: + FREE(result); + FREE(emit.buf); + return NULL; +} + + + + +struct svga_shader_result * +svga_translate_fragment_program( const struct svga_fragment_shader *fs, + const struct svga_fs_compile_key *fkey ) +{ + union svga_compile_key key; + memcpy(&key.fkey, fkey, sizeof *fkey); + + return svga_tgsi_translate( &fs->base, + key, + PIPE_SHADER_FRAGMENT ); +} + +struct svga_shader_result * +svga_translate_vertex_program( const struct svga_vertex_shader *vs, + const struct svga_vs_compile_key *vkey ) +{ + union svga_compile_key key; + memcpy(&key.vkey, vkey, sizeof *vkey); + + return svga_tgsi_translate( &vs->base, + key, + PIPE_SHADER_VERTEX ); +} + + +void svga_destroy_shader_result( struct svga_shader_result *result ) +{ + FREE((unsigned *)result->tokens); + FREE(result); +} + diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h new file mode 100644 index 0000000000..896c90a89a --- /dev/null +++ b/src/gallium/drivers/svga/svga_tgsi.h @@ -0,0 +1,139 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_TGSI_H +#define SVGA_TGSI_H + +#include "pipe/p_state.h" + +#include "svga_hw_reg.h" + +struct svga_fragment_shader; +struct svga_vertex_shader; +struct svga_shader; +struct tgsi_shader_info; +struct tgsi_token; + + +struct svga_vs_compile_key +{ + ubyte need_prescale:1; + ubyte allow_psiz:1; + unsigned zero_stride_vertex_elements; + ubyte num_zero_stride_vertex_elements:6; +}; + +struct svga_fs_compile_key +{ + boolean light_twoside:1; + boolean front_cw:1; + ubyte num_textures; + ubyte num_unnormalized_coords; + struct { + ubyte compare_mode : 1; + ubyte compare_func : 3; + ubyte unnormalized : 1; + + ubyte width_height_idx : 7; + + ubyte texture_target; + } tex[PIPE_MAX_SAMPLERS]; +}; + +union svga_compile_key { + struct svga_vs_compile_key vkey; + struct svga_fs_compile_key fkey; +}; + +struct svga_shader_result +{ + const struct svga_shader *shader; + + /* Parameters used to generate this compilation result: + */ + union svga_compile_key key; + + /* Compiled shader tokens: + */ + const unsigned *tokens; + unsigned nr_tokens; + + /* SVGA Shader ID: + */ + unsigned id; + + /* Next compilation result: + */ + struct svga_shader_result *next; +}; + + +/* TGSI doesn't provide use with VS input semantics (they're actually + * pretty meaningless), so we just generate some plausible ones here. + * This is called both from within the TGSI translator and when + * building vdecls to ensure they match up. + * + * The real use of this information is matching vertex elements to + * fragment shader inputs in the case where vertex shader is disabled. + */ +static INLINE void svga_generate_vdecl_semantics( unsigned idx, + unsigned *usage, + unsigned *usage_index ) +{ + if (idx == 0) { + *usage = SVGA3D_DECLUSAGE_POSITION; + *usage_index = 0; + } + else { + *usage = SVGA3D_DECLUSAGE_TEXCOORD; + *usage_index = idx - 1; + } +} + + + +static INLINE unsigned svga_vs_key_size( const struct svga_vs_compile_key *key ) +{ + return sizeof *key; +} + +static INLINE unsigned svga_fs_key_size( const struct svga_fs_compile_key *key ) +{ + return (const char *)&key->tex[key->num_textures].texture_target - + (const char *)key; +} + +struct svga_shader_result * +svga_translate_fragment_program( const struct svga_fragment_shader *fs, + const struct svga_fs_compile_key *fkey ); + +struct svga_shader_result * +svga_translate_vertex_program( const struct svga_vertex_shader *fs, + const struct svga_vs_compile_key *vkey ); + + +void svga_destroy_shader_result( struct svga_shader_result *result ); + +#endif diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c new file mode 100644 index 0000000000..54457082a0 --- /dev/null +++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c @@ -0,0 +1,280 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "util/u_memory.h" + +#include "svga_tgsi_emit.h" +#include "svga_context.h" + + + + +static boolean ps20_input( struct svga_shader_emitter *emit, + struct tgsi_declaration_semantic semantic, + unsigned idx ) +{ + struct src_register reg; + SVGA3DOpDclArgs dcl; + SVGA3dShaderInstToken opcode; + + opcode = inst_token( SVGA3DOP_DCL ); + dcl.values[0] = 0; + dcl.values[1] = 0; + + switch (semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + /* Special case: + */ + reg = src_register( SVGA3DREG_MISCTYPE, + SVGA3DMISCREG_POSITION ); + break; + case TGSI_SEMANTIC_COLOR: + reg = src_register( SVGA3DREG_INPUT, + semantic.SemanticIndex ); + break; + case TGSI_SEMANTIC_FOG: + assert(semantic.SemanticIndex == 0); + reg = src_register( SVGA3DREG_TEXTURE, 0 ); + break; + case TGSI_SEMANTIC_GENERIC: + reg = src_register( SVGA3DREG_TEXTURE, + semantic.SemanticIndex + 1 ); + break; + default: + assert(0); + return TRUE; + } + + emit->input_map[idx] = reg; + + dcl.dst = dst( reg ); + + dcl.usage = 0; + dcl.index = 0; + + dcl.values[0] |= 1<<31; + + return (emit_instruction(emit, opcode) && + svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values))); +} + + +static boolean ps20_output( struct svga_shader_emitter *emit, + struct tgsi_declaration_semantic semantic, + unsigned idx ) +{ + SVGA3dShaderDestToken reg; + + switch (semantic.SemanticName) { + case TGSI_SEMANTIC_COLOR: + if (semantic.SemanticIndex < PIPE_MAX_COLOR_BUFS) { + unsigned cbuf = semantic.SemanticIndex; + + emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, + emit->nr_hw_temp++ ); + emit->temp_col[cbuf] = emit->output_map[idx]; + emit->true_col[cbuf] = dst_register( SVGA3DREG_COLOROUT, + semantic.SemanticIndex ); + } + else { + assert(0); + reg = dst_register( SVGA3DREG_COLOROUT, 0 ); + } + break; + case TGSI_SEMANTIC_POSITION: + emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, + emit->nr_hw_temp++ ); + emit->temp_pos = emit->output_map[idx]; + emit->true_pos = dst_register( SVGA3DREG_DEPTHOUT, + semantic.SemanticIndex ); + break; + default: + assert(0); + reg = dst_register( SVGA3DREG_COLOROUT, 0 ); + break; + } + + return TRUE; +} + + +static boolean vs20_input( struct svga_shader_emitter *emit, + struct tgsi_declaration_semantic semantic, + unsigned idx ) +{ + SVGA3DOpDclArgs dcl; + SVGA3dShaderInstToken opcode; + + opcode = inst_token( SVGA3DOP_DCL ); + dcl.values[0] = 0; + dcl.values[1] = 0; + + emit->input_map[idx] = src_register( SVGA3DREG_INPUT, idx ); + dcl.dst = dst_register( SVGA3DREG_INPUT, idx ); + + assert(dcl.dst.reserved0); + + /* Mesa doesn't provide use with VS input semantics (they're + * actually pretty meaningless), so we just generate some plausible + * ones here. This has to match what we declare in the vdecl code + * in svga_pipe_vertex.c. + */ + if (idx == 0) { + dcl.usage = SVGA3D_DECLUSAGE_POSITION; + dcl.index = 0; + } + else { + dcl.usage = SVGA3D_DECLUSAGE_TEXCOORD; + dcl.index = idx - 1; + } + + dcl.values[0] |= 1<<31; + + return (emit_instruction(emit, opcode) && + svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values))); +} + + +static boolean vs20_output( struct svga_shader_emitter *emit, + struct tgsi_declaration_semantic semantic, + unsigned idx ) +{ + /* Don't emit dcl instruction for vs20 inputs + */ + + /* Just build the register map table: + */ + switch (semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + assert(semantic.SemanticIndex == 0); + emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, + emit->nr_hw_temp++ ); + emit->temp_pos = emit->output_map[idx]; + emit->true_pos = dst_register( SVGA3DREG_RASTOUT, + SVGA3DRASTOUT_POSITION); + break; + case TGSI_SEMANTIC_PSIZE: + assert(semantic.SemanticIndex == 0); + emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, + emit->nr_hw_temp++ ); + emit->temp_psiz = emit->output_map[idx]; + emit->true_psiz = dst_register( SVGA3DREG_RASTOUT, + SVGA3DRASTOUT_PSIZE ); + break; + case TGSI_SEMANTIC_FOG: + assert(semantic.SemanticIndex == 0); + emit->output_map[idx] = dst_register( SVGA3DREG_TEXCRDOUT, 0 ); + break; + case TGSI_SEMANTIC_COLOR: + /* oD0 */ + emit->output_map[idx] = dst_register( SVGA3DREG_ATTROUT, + semantic.SemanticIndex ); + break; + case TGSI_SEMANTIC_GENERIC: + emit->output_map[idx] = dst_register( SVGA3DREG_TEXCRDOUT, + semantic.SemanticIndex + 1 ); + break; + default: + assert(0); + emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, 0 ); + return FALSE; + } + + return TRUE; +} + +static boolean ps20_sampler( struct svga_shader_emitter *emit, + struct tgsi_declaration_semantic semantic, + unsigned idx ) +{ + SVGA3DOpDclArgs dcl; + SVGA3dShaderInstToken opcode; + + opcode = inst_token( SVGA3DOP_DCL ); + dcl.values[0] = 0; + dcl.values[1] = 0; + + dcl.dst = dst_register( SVGA3DREG_SAMPLER, idx ); + dcl.type = svga_tgsi_sampler_type( emit, idx ); + + return (emit_instruction(emit, opcode) && + svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values))); +} + + +boolean svga_translate_decl_sm20( struct svga_shader_emitter *emit, + const struct tgsi_full_declaration *decl ) +{ + unsigned first = decl->DeclarationRange.First; + unsigned last = decl->DeclarationRange.Last; + unsigned semantic = 0; + unsigned semantic_idx = 0; + unsigned idx; + + if (decl->Declaration.Semantic) { + semantic = decl->Semantic.SemanticName; + semantic_idx = decl->Semantic.SemanticIndex; + } + + for( idx = first; idx <= last; idx++ ) { + boolean ok; + + switch (decl->Declaration.File) { + case TGSI_FILE_SAMPLER: + assert (emit->unit == PIPE_SHADER_FRAGMENT); + ok = ps20_sampler( emit, decl->Semantic, idx ); + break; + + case TGSI_FILE_INPUT: + if (emit->unit == PIPE_SHADER_VERTEX) + ok = vs20_input( emit, decl->Semantic, idx ); + else + ok = ps20_input( emit, decl->Semantic, idx ); + break; + + case TGSI_FILE_OUTPUT: + if (emit->unit == PIPE_SHADER_VERTEX) + ok = vs20_output( emit, decl->Semantic, idx ); + else + ok = ps20_output( emit, decl->Semantic, idx ); + break; + + default: + /* don't need to declare other vars */ + ok = TRUE; + } + + if (!ok) + return FALSE; + } + + return TRUE; +} + + + diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c new file mode 100644 index 0000000000..08e7dfb117 --- /dev/null +++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c @@ -0,0 +1,385 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "util/u_memory.h" + +#include "svga_tgsi_emit.h" +#include "svga_context.h" + +static boolean translate_vs_ps_semantic( struct tgsi_declaration_semantic semantic, + unsigned *usage, + unsigned *idx ) +{ + switch (semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + *idx = semantic.SemanticIndex; + *usage = SVGA3D_DECLUSAGE_POSITION; + break; + case TGSI_SEMANTIC_COLOR: + + *idx = semantic.SemanticIndex; + *usage = SVGA3D_DECLUSAGE_COLOR; + break; + case TGSI_SEMANTIC_BCOLOR: + *idx = semantic.SemanticIndex + 2; /* sharing with COLOR */ + *usage = SVGA3D_DECLUSAGE_COLOR; + break; + case TGSI_SEMANTIC_FOG: + *idx = 0; + assert(semantic.SemanticIndex == 0); + *usage = SVGA3D_DECLUSAGE_TEXCOORD; + break; + case TGSI_SEMANTIC_PSIZE: + *idx = semantic.SemanticIndex; + *usage = SVGA3D_DECLUSAGE_PSIZE; + break; + case TGSI_SEMANTIC_GENERIC: + *idx = semantic.SemanticIndex + 1; /* texcoord[0] is reserved for fog */ + *usage = SVGA3D_DECLUSAGE_TEXCOORD; + break; + case TGSI_SEMANTIC_NORMAL: + *idx = semantic.SemanticIndex; + *usage = SVGA3D_DECLUSAGE_NORMAL; + break; + default: + assert(0); + *usage = SVGA3D_DECLUSAGE_TEXCOORD; + *idx = 0; + return FALSE; + } + + return TRUE; +} + + +static boolean emit_decl( struct svga_shader_emitter *emit, + SVGA3dShaderDestToken reg, + unsigned usage, + unsigned index ) +{ + SVGA3DOpDclArgs dcl; + SVGA3dShaderInstToken opcode; + + opcode = inst_token( SVGA3DOP_DCL ); + dcl.values[0] = 0; + dcl.values[1] = 0; + + dcl.dst = reg; + dcl.usage = usage; + dcl.index = index; + dcl.values[0] |= 1<<31; + + return (emit_instruction(emit, opcode) && + svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values))); +} + +static boolean emit_vface_decl( struct svga_shader_emitter *emit ) +{ + if (!emit->emitted_vface) { + SVGA3dShaderDestToken reg = + dst_register( SVGA3DREG_MISCTYPE, + SVGA3DMISCREG_FACE ); + + if (!emit_decl( emit, reg, 0, 0 )) + return FALSE; + + emit->emitted_vface = TRUE; + } + return TRUE; +} + +static boolean ps30_input( struct svga_shader_emitter *emit, + struct tgsi_declaration_semantic semantic, + unsigned idx ) +{ + unsigned usage, index; + SVGA3dShaderDestToken reg; + + if (semantic.SemanticName == TGSI_SEMANTIC_POSITION) { + emit->input_map[idx] = src_register( SVGA3DREG_MISCTYPE, + SVGA3DMISCREG_POSITION ); + + emit->input_map[idx].base.swizzle = TRANSLATE_SWIZZLE( TGSI_SWIZZLE_X, + TGSI_SWIZZLE_Y, + TGSI_SWIZZLE_Y, + TGSI_SWIZZLE_Y ); + + reg = writemask( dst(emit->input_map[idx]), + TGSI_WRITEMASK_XY ); + + return emit_decl( emit, reg, 0, 0 ); + } + else if (emit->key.fkey.light_twoside && + (semantic.SemanticName == TGSI_SEMANTIC_COLOR)) { + + if (!translate_vs_ps_semantic( semantic, &usage, &index )) + return FALSE; + + emit->internal_color_idx[emit->internal_color_count] = idx; + emit->input_map[idx] = src_register( SVGA3DREG_INPUT, emit->ps30_input_count ); + emit->ps30_input_count++; + emit->internal_color_count++; + + reg = dst( emit->input_map[idx] ); + + if (!emit_decl( emit, reg, usage, index )) + return FALSE; + + semantic.SemanticName = TGSI_SEMANTIC_BCOLOR; + if (!translate_vs_ps_semantic( semantic, &usage, &index )) + return FALSE; + + reg = dst_register( SVGA3DREG_INPUT, emit->ps30_input_count++ ); + + if (!emit_decl( emit, reg, usage, index )) + return FALSE; + + if (!emit_vface_decl( emit )) + return FALSE; + + return TRUE; + } + else if (semantic.SemanticName == TGSI_SEMANTIC_FACE) { + if (!emit_vface_decl( emit )) + return FALSE; + emit->emit_frontface = TRUE; + emit->internal_frontface_idx = idx; + return TRUE; + } + else { + + if (!translate_vs_ps_semantic( semantic, &usage, &index )) + return FALSE; + + emit->input_map[idx] = src_register( SVGA3DREG_INPUT, emit->ps30_input_count++ ); + reg = dst( emit->input_map[idx] ); + + return emit_decl( emit, reg, usage, index ); + } + +} + + +/* PS output registers are the same as 2.0 + */ +static boolean ps30_output( struct svga_shader_emitter *emit, + struct tgsi_declaration_semantic semantic, + unsigned idx ) +{ + SVGA3dShaderDestToken reg; + + switch (semantic.SemanticName) { + case TGSI_SEMANTIC_COLOR: + emit->output_map[idx] = dst_register( SVGA3DREG_COLOROUT, + semantic.SemanticIndex ); + break; + case TGSI_SEMANTIC_POSITION: + emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, + emit->nr_hw_temp++ ); + emit->temp_pos = emit->output_map[idx]; + emit->true_pos = dst_register( SVGA3DREG_DEPTHOUT, + semantic.SemanticIndex ); + break; + default: + assert(0); + reg = dst_register( SVGA3DREG_COLOROUT, 0 ); + break; + } + + return TRUE; +} + + +/* We still make up the input semantics the same as in 2.0 + */ +static boolean vs30_input( struct svga_shader_emitter *emit, + struct tgsi_declaration_semantic semantic, + unsigned idx ) +{ + SVGA3DOpDclArgs dcl; + SVGA3dShaderInstToken opcode; + unsigned usage, index; + + opcode = inst_token( SVGA3DOP_DCL ); + dcl.values[0] = 0; + dcl.values[1] = 0; + + if (emit->key.vkey.zero_stride_vertex_elements & (1 << idx)) { + unsigned i; + unsigned offset = 0; + unsigned start_idx = emit->info.file_max[TGSI_FILE_CONSTANT] + 1; + /* adjust for prescale constants */ + start_idx += emit->key.vkey.need_prescale ? 2 : 0; + /* compute the offset from the start of zero stride constants */ + for (i = 0; i < PIPE_MAX_ATTRIBS && i < idx; ++i) { + if (emit->key.vkey.zero_stride_vertex_elements & (1<input_map[idx] = src_register( SVGA3DREG_CONST, + start_idx + offset ); + } else { + emit->input_map[idx] = src_register( SVGA3DREG_INPUT, idx ); + dcl.dst = dst_register( SVGA3DREG_INPUT, idx ); + + assert(dcl.dst.reserved0); + + svga_generate_vdecl_semantics( idx, &usage, &index ); + + dcl.usage = usage; + dcl.index = index; + dcl.values[0] |= 1<<31; + + return (emit_instruction(emit, opcode) && + svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values))); + } + return TRUE; +} + +/* VS3.0 outputs have proper declarations and semantic info for + * matching against PS inputs. + */ +static boolean vs30_output( struct svga_shader_emitter *emit, + struct tgsi_declaration_semantic semantic, + unsigned idx ) +{ + SVGA3DOpDclArgs dcl; + SVGA3dShaderInstToken opcode; + unsigned usage, index; + + opcode = inst_token( SVGA3DOP_DCL ); + dcl.values[0] = 0; + dcl.values[1] = 0; + + if (!translate_vs_ps_semantic( semantic, &usage, &index )) + return FALSE; + + dcl.dst = dst_register( SVGA3DREG_OUTPUT, idx ); + dcl.usage = usage; + dcl.index = index; + dcl.values[0] |= 1<<31; + + if (semantic.SemanticName == TGSI_SEMANTIC_POSITION) { + assert(idx == 0); + emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, + emit->nr_hw_temp++ ); + emit->temp_pos = emit->output_map[idx]; + emit->true_pos = dcl.dst; + } + else if (semantic.SemanticName == TGSI_SEMANTIC_PSIZE) { + emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, + emit->nr_hw_temp++ ); + emit->temp_psiz = emit->output_map[idx]; + + /* This has the effect of not declaring psiz (below) and not + * emitting the final MOV to true_psiz in the postamble. + */ + if (!emit->key.vkey.allow_psiz) + return TRUE; + + emit->true_psiz = dcl.dst; + } + else { + emit->output_map[idx] = dcl.dst; + } + + + return (emit_instruction(emit, opcode) && + svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values))); +} + +static boolean ps30_sampler( struct svga_shader_emitter *emit, + struct tgsi_declaration_semantic semantic, + unsigned idx ) +{ + SVGA3DOpDclArgs dcl; + SVGA3dShaderInstToken opcode; + + opcode = inst_token( SVGA3DOP_DCL ); + dcl.values[0] = 0; + dcl.values[1] = 0; + + dcl.dst = dst_register( SVGA3DREG_SAMPLER, idx ); + dcl.type = svga_tgsi_sampler_type( emit, idx ); + dcl.values[0] |= 1<<31; + + return (emit_instruction(emit, opcode) && + svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values))); +} + + +boolean svga_translate_decl_sm30( struct svga_shader_emitter *emit, + const struct tgsi_full_declaration *decl ) +{ + unsigned first = decl->DeclarationRange.First; + unsigned last = decl->DeclarationRange.Last; + unsigned semantic = 0; + unsigned semantic_idx = 0; + unsigned idx; + + if (decl->Declaration.Semantic) { + semantic = decl->Semantic.SemanticName; + semantic_idx = decl->Semantic.SemanticIndex; + } + + for( idx = first; idx <= last; idx++ ) { + boolean ok; + + switch (decl->Declaration.File) { + case TGSI_FILE_SAMPLER: + assert (emit->unit == PIPE_SHADER_FRAGMENT); + ok = ps30_sampler( emit, decl->Semantic, idx ); + break; + + case TGSI_FILE_INPUT: + if (emit->unit == PIPE_SHADER_VERTEX) + ok = vs30_input( emit, decl->Semantic, idx ); + else + ok = ps30_input( emit, decl->Semantic, idx ); + break; + + case TGSI_FILE_OUTPUT: + if (emit->unit == PIPE_SHADER_VERTEX) + ok = vs30_output( emit, decl->Semantic, idx ); + else + ok = ps30_output( emit, decl->Semantic, idx ); + break; + + default: + /* don't need to declare other vars */ + ok = TRUE; + } + + if (!ok) + return FALSE; + } + + return TRUE; +} + + + diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h new file mode 100644 index 0000000000..2557824293 --- /dev/null +++ b/src/gallium/drivers/svga/svga_tgsi_emit.h @@ -0,0 +1,345 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_TGSI_EMIT_H +#define SVGA_TGSI_EMIT_H + +#include "tgsi/tgsi_scan.h" +#include "svga_hw_reg.h" +#include "svga_tgsi.h" +#include "svga3d_shaderdefs.h" + +struct src_register +{ + SVGA3dShaderSrcToken base; + SVGA3dShaderSrcToken indirect; +}; + + +struct svga_arl_consts { + int number; + int idx; + int swizzle; + int arl_num; +}; + +/* Internal functions: + */ + +struct svga_shader_emitter +{ + boolean use_sm30; + + unsigned size; + char *buf; + char *ptr; + + union svga_compile_key key; + struct tgsi_shader_info info; + int unit; + + int imm_start; + + int nr_hw_const; + int nr_hw_temp; + + int insn_offset; + + int internal_temp_count; + int internal_imm_count; + + int internal_color_idx[2]; /* diffuse, specular */ + int internal_color_count; + + boolean emitted_vface; + boolean emit_frontface; + int internal_frontface_idx; + + int ps30_input_count; + + boolean in_main_func; + + boolean created_zero_immediate; + int zero_immediate_idx; + + boolean created_loop_const; + int loop_const_idx; + + boolean created_sincos_consts; + int sincos_consts_idx; + + unsigned label[32]; + unsigned nr_labels; + + struct src_register input_map[PIPE_MAX_ATTRIBS]; + SVGA3dShaderDestToken output_map[PIPE_MAX_ATTRIBS]; + + struct src_register imm_0055; + SVGA3dShaderDestToken temp_pos; + SVGA3dShaderDestToken true_pos; + + SVGA3dShaderDestToken temp_col[PIPE_MAX_COLOR_BUFS]; + SVGA3dShaderDestToken true_col[PIPE_MAX_COLOR_BUFS]; + + SVGA3dShaderDestToken temp_psiz; + SVGA3dShaderDestToken true_psiz; + + struct svga_arl_consts arl_consts[12]; + int num_arl_consts; + int current_arl; +}; + + +boolean svga_shader_emit_dword( struct svga_shader_emitter *emit, + unsigned dword ); + +boolean svga_shader_emit_dwords( struct svga_shader_emitter *emit, + const unsigned *dwords, + unsigned nr ); + +boolean svga_shader_emit_opcode( struct svga_shader_emitter *emit, + unsigned opcode ); + +boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit, + const struct tgsi_token *tokens ); + +boolean svga_translate_decl_sm20( struct svga_shader_emitter *emit, + const struct tgsi_full_declaration *decl ); + +boolean svga_translate_decl_sm30( struct svga_shader_emitter *emit, + const struct tgsi_full_declaration *decl ); + + +static INLINE boolean emit_dst( struct svga_shader_emitter *emit, + SVGA3dShaderDestToken dest ) +{ + assert(dest.reserved0); + return svga_shader_emit_dword( emit, dest.value ); +} + +static INLINE boolean emit_src( struct svga_shader_emitter *emit, + const struct src_register src ) +{ + if (src.base.relAddr) { + assert(src.base.reserved0); + assert(src.indirect.reserved0); + return (svga_shader_emit_dword( emit, src.base.value ) && + svga_shader_emit_dword( emit, src.indirect.value )); + } + else { + assert(src.base.reserved0); + return svga_shader_emit_dword( emit, src.base.value ); + } +} + + +static INLINE boolean emit_instruction( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken opcode ) +{ + return svga_shader_emit_opcode( emit, opcode.value ); +} + + +static INLINE boolean emit_op1( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken inst, + SVGA3dShaderDestToken dest, + struct src_register src0 ) +{ + return (emit_instruction( emit, inst ) && + emit_dst( emit, dest ) && + emit_src( emit, src0 )); +} + +static INLINE boolean emit_op2( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken inst, + SVGA3dShaderDestToken dest, + struct src_register src0, + struct src_register src1 ) +{ + return (emit_instruction( emit, inst ) && + emit_dst( emit, dest ) && + emit_src( emit, src0 ) && + emit_src( emit, src1 )); +} + +static INLINE boolean emit_op3( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken inst, + SVGA3dShaderDestToken dest, + struct src_register src0, + struct src_register src1, + struct src_register src2 ) +{ + return (emit_instruction( emit, inst ) && + emit_dst( emit, dest ) && + emit_src( emit, src0 ) && + emit_src( emit, src1 ) && + emit_src( emit, src2 )); +} + + +#define TRANSLATE_SWIZZLE(x,y,z,w) ((x) | ((y) << 2) | ((z) << 4) | ((w) << 6)) +#define SWIZZLE_XYZW \ + TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Z,TGSI_SWIZZLE_W) +#define SWIZZLE_XXXX \ + TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,TGSI_SWIZZLE_X,TGSI_SWIZZLE_X,TGSI_SWIZZLE_X) +#define SWIZZLE_YYYY \ + TRANSLATE_SWIZZLE(TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Y) +#define SWIZZLE_ZZZZ \ + TRANSLATE_SWIZZLE(TGSI_SWIZZLE_Z,TGSI_SWIZZLE_Z,TGSI_SWIZZLE_Z,TGSI_SWIZZLE_Z) +#define SWIZZLE_WWWW \ + TRANSLATE_SWIZZLE(TGSI_SWIZZLE_W,TGSI_SWIZZLE_W,TGSI_SWIZZLE_W,TGSI_SWIZZLE_W) + + + +static INLINE SVGA3dShaderInstToken +inst_token( unsigned opcode ) +{ + SVGA3dShaderInstToken inst; + + inst.value = 0; + inst.op = opcode; + + return inst; +} + +static INLINE SVGA3dShaderDestToken +dst_register( unsigned file, + int number ) +{ + SVGA3dShaderDestToken dest; + + dest.value = 0; + dest.num = number; + dest.type_upper = file >> 3; + dest.relAddr = 0; + dest.reserved1 = 0; + dest.mask = 0xf; + dest.dstMod = 0; + dest.shfScale = 0; + dest.type_lower = file & 0x7; + dest.reserved0 = 1; /* is_reg */ + + return dest; +} + +static INLINE SVGA3dShaderDestToken +writemask( SVGA3dShaderDestToken dest, + unsigned mask ) +{ + dest.mask &= mask; + return dest; +} + + +static INLINE SVGA3dShaderSrcToken +src_token( unsigned file, int number ) +{ + SVGA3dShaderSrcToken src; + + src.value = 0; + src.num = number; + src.type_upper = file >> 3; + src.relAddr = 0; + src.reserved1 = 0; + src.swizzle = SWIZZLE_XYZW; + src.srcMod = 0; + src.type_lower = file & 0x7; + src.reserved0 = 1; /* is_reg */ + + return src; +} + + +static INLINE struct src_register +absolute( struct src_register src ) +{ + src.base.srcMod = SVGA3DSRCMOD_ABS; + + return src; +} + + +static INLINE struct src_register +negate( struct src_register src ) +{ + switch (src.base.srcMod) { + case SVGA3DSRCMOD_ABS: + src.base.srcMod = SVGA3DSRCMOD_ABSNEG; + break; + case SVGA3DSRCMOD_ABSNEG: + src.base.srcMod = SVGA3DSRCMOD_ABS; + break; + case SVGA3DSRCMOD_NEG: + src.base.srcMod = SVGA3DSRCMOD_NONE; + break; + case SVGA3DSRCMOD_NONE: + src.base.srcMod = SVGA3DSRCMOD_NEG; + break; + } + return src; +} + + +static INLINE struct src_register +src_register( unsigned file, int number ) +{ + struct src_register src; + + src.base = src_token( file, number ); + src.indirect.value = 0; + + return src; +} + +static INLINE SVGA3dShaderDestToken dst( struct src_register src ) +{ + return dst_register( SVGA3dShaderGetRegType( src.base.value ), + src.base.num ); +} + +static INLINE struct src_register src( SVGA3dShaderDestToken dst ) +{ + return src_register( SVGA3dShaderGetRegType( dst.value ), + dst.num ); +} + +static INLINE ubyte svga_tgsi_sampler_type( struct svga_shader_emitter *emit, + int idx ) +{ + switch (emit->key.fkey.tex[idx].texture_target) { + case PIPE_TEXTURE_1D: + return SVGA3DSAMP_2D; + case PIPE_TEXTURE_2D: + return SVGA3DSAMP_2D; + case PIPE_TEXTURE_3D: + return SVGA3DSAMP_VOLUME; + case PIPE_TEXTURE_CUBE: + return SVGA3DSAMP_CUBE; + } + + return SVGA3DSAMP_UNKNOWN; +} + +#endif diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c new file mode 100644 index 0000000000..ea409b7e16 --- /dev/null +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -0,0 +1,2716 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "util/u_memory.h" + +#include "svga_tgsi_emit.h" +#include "svga_context.h" + + +static boolean emit_vs_postamble( struct svga_shader_emitter *emit ); +static boolean emit_ps_postamble( struct svga_shader_emitter *emit ); + + + + +static unsigned +translate_opcode( + uint opcode ) +{ + switch (opcode) { + case TGSI_OPCODE_ABS: return SVGA3DOP_ABS; + case TGSI_OPCODE_ADD: return SVGA3DOP_ADD; + case TGSI_OPCODE_BREAKC: return SVGA3DOP_BREAKC; + case TGSI_OPCODE_DDX: return SVGA3DOP_DSX; + case TGSI_OPCODE_DDY: return SVGA3DOP_DSY; + case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD; + case TGSI_OPCODE_DP3: return SVGA3DOP_DP3; + case TGSI_OPCODE_DP4: return SVGA3DOP_DP4; + case TGSI_OPCODE_ENDFOR: return SVGA3DOP_ENDLOOP; + case TGSI_OPCODE_FRC: return SVGA3DOP_FRC; + case TGSI_OPCODE_BGNFOR: return SVGA3DOP_LOOP; + case TGSI_OPCODE_MAD: return SVGA3DOP_MAD; + case TGSI_OPCODE_MAX: return SVGA3DOP_MAX; + case TGSI_OPCODE_MIN: return SVGA3DOP_MIN; + case TGSI_OPCODE_MOV: return SVGA3DOP_MOV; + case TGSI_OPCODE_MUL: return SVGA3DOP_MUL; + case TGSI_OPCODE_NOP: return SVGA3DOP_NOP; + case TGSI_OPCODE_NRM4: return SVGA3DOP_NRM; + case TGSI_OPCODE_SSG: return SVGA3DOP_SGN; + default: + debug_printf("Unkown opcode %u\n", opcode); + assert( 0 ); + return SVGA3DOP_LAST_INST; + } +} + + +static unsigned translate_file( unsigned file ) +{ + switch (file) { + case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP; + case TGSI_FILE_INPUT: return SVGA3DREG_INPUT; + case TGSI_FILE_OUTPUT: return SVGA3DREG_OUTPUT; /* VS3.0+ only */ + case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST; + case TGSI_FILE_CONSTANT: return SVGA3DREG_CONST; + case TGSI_FILE_SAMPLER: return SVGA3DREG_SAMPLER; + case TGSI_FILE_ADDRESS: return SVGA3DREG_ADDR; + default: + assert( 0 ); + return SVGA3DREG_TEMP; + } +} + + + + + + +static SVGA3dShaderDestToken +translate_dst_register( struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn, + unsigned idx ) +{ + const struct tgsi_full_dst_register *reg = &insn->FullDstRegisters[idx]; + SVGA3dShaderDestToken dest; + + switch (reg->DstRegister.File) { + case TGSI_FILE_OUTPUT: + /* Output registers encode semantic information in their name. + * Need to lookup a table built at decl time: + */ + dest = emit->output_map[reg->DstRegister.Index]; + break; + + default: + dest = dst_register( translate_file( reg->DstRegister.File ), + reg->DstRegister.Index ); + break; + } + + dest.mask = reg->DstRegister.WriteMask; + + if (insn->Instruction.Saturate) + dest.dstMod = SVGA3DDSTMOD_SATURATE; + + return dest; +} + + +static struct src_register +swizzle( struct src_register src, + int x, + int y, + int z, + int w ) +{ + x = (src.base.swizzle >> (x * 2)) & 0x3; + y = (src.base.swizzle >> (y * 2)) & 0x3; + z = (src.base.swizzle >> (z * 2)) & 0x3; + w = (src.base.swizzle >> (w * 2)) & 0x3; + + src.base.swizzle = TRANSLATE_SWIZZLE(x,y,z,w); + + return src; +} + +static struct src_register +scalar( struct src_register src, + int comp ) +{ + return swizzle( src, comp, comp, comp, comp ); +} + +static INLINE boolean +svga_arl_needs_adjustment( const struct svga_shader_emitter *emit ) +{ + int i; + + for (i = 0; i < emit->num_arl_consts; ++i) { + if (emit->arl_consts[i].arl_num == emit->current_arl) + return TRUE; + } + return FALSE; +} + +static INLINE int +svga_arl_adjustment( const struct svga_shader_emitter *emit ) +{ + int i; + + for (i = 0; i < emit->num_arl_consts; ++i) { + if (emit->arl_consts[i].arl_num == emit->current_arl) + return emit->arl_consts[i].number; + } + return 0; +} + +static struct src_register +translate_src_register( const struct svga_shader_emitter *emit, + const struct tgsi_full_src_register *reg ) +{ + struct src_register src; + + switch (reg->SrcRegister.File) { + case TGSI_FILE_INPUT: + /* Input registers are referred to by their semantic name rather + * than by index. Use the mapping build up from the decls: + */ + src = emit->input_map[reg->SrcRegister.Index]; + break; + + case TGSI_FILE_IMMEDIATE: + /* Immediates are appended after TGSI constants in the D3D + * constant buffer. + */ + src = src_register( translate_file( reg->SrcRegister.File ), + reg->SrcRegister.Index + + emit->imm_start ); + break; + + default: + src = src_register( translate_file( reg->SrcRegister.File ), + reg->SrcRegister.Index ); + + break; + } + + /* Indirect addressing (for coninstant buffer lookups only) + */ + if (reg->SrcRegister.Indirect) + { + /* we shift the offset towards the minimum */ + if (svga_arl_needs_adjustment( emit )) { + src.base.num -= svga_arl_adjustment( emit ); + } + src.base.relAddr = 1; + + /* Not really sure what should go in the second token: + */ + src.indirect = src_token( SVGA3DREG_ADDR, + reg->SrcRegisterInd.Index ); + + src.indirect.swizzle = SWIZZLE_XXXX; + } + + src = swizzle( src, + reg->SrcRegister.SwizzleX, + reg->SrcRegister.SwizzleY, + reg->SrcRegister.SwizzleZ, + reg->SrcRegister.SwizzleW ); + + /* src.mod isn't a bitfield, unfortunately: + * See tgsi_util_get_full_src_register_sign_mode for implementation details. + */ + if (reg->SrcRegisterExtMod.Absolute) { + if (reg->SrcRegisterExtMod.Negate) + src.base.srcMod = SVGA3DSRCMOD_ABSNEG; + else + src.base.srcMod = SVGA3DSRCMOD_ABS; + } + else { + if (reg->SrcRegister.Negate != reg->SrcRegisterExtMod.Negate) + src.base.srcMod = SVGA3DSRCMOD_NEG; + else + src.base.srcMod = SVGA3DSRCMOD_NONE; + } + + return src; +} + + +/* + * Get a temporary register, return -1 if none available + */ +static INLINE SVGA3dShaderDestToken +get_temp( struct svga_shader_emitter *emit ) +{ + int i = emit->nr_hw_temp + emit->internal_temp_count++; + + return dst_register( SVGA3DREG_TEMP, i ); +} + +/* Release a single temp. Currently only effective if it was the last + * allocated temp, otherwise release will be delayed until the next + * call to reset_temp_regs(). + */ +static INLINE void +release_temp( struct svga_shader_emitter *emit, + SVGA3dShaderDestToken temp ) +{ + if (temp.num == emit->internal_temp_count - 1) + emit->internal_temp_count--; +} + +static void reset_temp_regs( struct svga_shader_emitter *emit ) +{ + emit->internal_temp_count = 0; +} + + +static boolean submit_op0( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken inst, + SVGA3dShaderDestToken dest ) +{ + return (emit_instruction( emit, inst ) && + emit_dst( emit, dest )); +} + +static boolean submit_op1( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken inst, + SVGA3dShaderDestToken dest, + struct src_register src0 ) +{ + return emit_op1( emit, inst, dest, src0 ); +} + + +/* SVGA shaders may not refer to >1 constant register in a single + * instruction. This function checks for that usage and inserts a + * move to temporary if detected. + * + * The same applies to input registers -- at most a single input + * register may be read by any instruction. + */ +static boolean submit_op2( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken inst, + SVGA3dShaderDestToken dest, + struct src_register src0, + struct src_register src1 ) +{ + SVGA3dShaderDestToken temp; + SVGA3dShaderRegType type0, type1; + boolean need_temp = FALSE; + + temp.value = 0; + type0 = SVGA3dShaderGetRegType( src0.base.value ); + type1 = SVGA3dShaderGetRegType( src1.base.value ); + + if (type0 == SVGA3DREG_CONST && + type1 == SVGA3DREG_CONST && + src0.base.num != src1.base.num) + need_temp = TRUE; + + if (type0 == SVGA3DREG_INPUT && + type1 == SVGA3DREG_INPUT && + src0.base.num != src1.base.num) + need_temp = TRUE; + + if (need_temp) + { + temp = get_temp( emit ); + + if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 )) + return FALSE; + + src0 = src( temp ); + } + + if (!emit_op2( emit, inst, dest, src0, src1 )) + return FALSE; + + if (need_temp) + release_temp( emit, temp ); + + return TRUE; +} + + +/* SVGA shaders may not refer to >1 constant register in a single + * instruction. This function checks for that usage and inserts a + * move to temporary if detected. + */ +static boolean submit_op3( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken inst, + SVGA3dShaderDestToken dest, + struct src_register src0, + struct src_register src1, + struct src_register src2 ) +{ + SVGA3dShaderDestToken temp0; + SVGA3dShaderDestToken temp1; + boolean need_temp0 = FALSE; + boolean need_temp1 = FALSE; + SVGA3dShaderRegType type0, type1, type2; + + temp0.value = 0; + temp1.value = 0; + type0 = SVGA3dShaderGetRegType( src0.base.value ); + type1 = SVGA3dShaderGetRegType( src1.base.value ); + type2 = SVGA3dShaderGetRegType( src2.base.value ); + + if (inst.op != SVGA3DOP_SINCOS) { + if (type0 == SVGA3DREG_CONST && + ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) || + (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num))) + need_temp0 = TRUE; + + if (type1 == SVGA3DREG_CONST && + (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num)) + need_temp1 = TRUE; + } + + if (type0 == SVGA3DREG_INPUT && + ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) || + (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num))) + need_temp0 = TRUE; + + if (type1 == SVGA3DREG_INPUT && + (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num)) + need_temp1 = TRUE; + + if (need_temp0) + { + temp0 = get_temp( emit ); + + if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 )) + return FALSE; + + src0 = src( temp0 ); + } + + if (need_temp1) + { + temp1 = get_temp( emit ); + + if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp1, src1 )) + return FALSE; + + src1 = src( temp1 ); + } + + if (!emit_op3( emit, inst, dest, src0, src1, src2 )) + return FALSE; + + if (need_temp1) + release_temp( emit, temp1 ); + if (need_temp0) + release_temp( emit, temp0 ); + return TRUE; +} + + +static boolean emit_def_const( struct svga_shader_emitter *emit, + SVGA3dShaderConstType type, + unsigned idx, + float a, + float b, + float c, + float d ) +{ + SVGA3DOpDefArgs def; + SVGA3dShaderInstToken opcode; + + switch (type) { + case SVGA3D_CONST_TYPE_FLOAT: + opcode = inst_token( SVGA3DOP_DEF ); + def.dst = dst_register( SVGA3DREG_CONST, idx ); + def.constValues[0] = a; + def.constValues[1] = b; + def.constValues[2] = c; + def.constValues[3] = d; + break; + case SVGA3D_CONST_TYPE_INT: + opcode = inst_token( SVGA3DOP_DEFI ); + def.dst = dst_register( SVGA3DREG_CONSTINT, idx ); + def.constIValues[0] = (int)a; + def.constIValues[1] = (int)b; + def.constIValues[2] = (int)c; + def.constIValues[3] = (int)d; + break; + default: + assert(0); + break; + } + + if (!emit_instruction(emit, opcode) || + !svga_shader_emit_dwords( emit, def.values, Elements(def.values))) + return FALSE; + + return TRUE; +} + +static INLINE boolean +create_zero_immediate( struct svga_shader_emitter *emit ) +{ + unsigned idx = emit->nr_hw_const++; + + if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, + idx, 0, 0, 0, 1 )) + return FALSE; + + emit->zero_immediate_idx = idx; + emit->created_zero_immediate = TRUE; + + return TRUE; +} + +static INLINE boolean +create_loop_const( struct svga_shader_emitter *emit ) +{ + unsigned idx = emit->nr_hw_const++; + + if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx, + 255, /* iteration count */ + 0, /* initial value */ + 1, /* step size */ + 0 /* not used, must be 0 */)) + return FALSE; + + emit->loop_const_idx = idx; + emit->created_loop_const = TRUE; + + return TRUE; +} + +static INLINE boolean +create_sincos_consts( struct svga_shader_emitter *emit ) +{ + unsigned idx = emit->nr_hw_const++; + + if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, + -1.5500992e-006f, + -2.1701389e-005f, + 0.0026041667f, + 0.00026041668f )) + return FALSE; + + emit->sincos_consts_idx = idx; + idx = emit->nr_hw_const++; + + if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, + -0.020833334f, + -0.12500000f, + 1.0f, + 0.50000000f )) + return FALSE; + + emit->created_sincos_consts = TRUE; + + return TRUE; +} + +static INLINE boolean +create_arl_consts( struct svga_shader_emitter *emit ) +{ + int i; + + for (i = 0; i < emit->num_arl_consts; i += 4) { + int j; + unsigned idx = emit->nr_hw_const++; + float vals[4]; + for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) { + vals[j] = emit->arl_consts[i + j].number; + emit->arl_consts[i + j].idx = idx; + switch (j) { + case 0: + emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X; + break; + case 1: + emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y; + break; + case 2: + emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z; + break; + case 3: + emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W; + break; + } + } + while (j < 4) + vals[j++] = 0; + + if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, + vals[0], vals[1], + vals[2], vals[3])) + return FALSE; + } + + return TRUE; +} + +static INLINE struct src_register +get_vface( struct svga_shader_emitter *emit ) +{ + assert(emit->emitted_vface); + return src_register(SVGA3DREG_MISCTYPE, + SVGA3DMISCREG_FACE); +} + +/* returns {0, 0, 0, 1} immediate */ +static INLINE struct src_register +get_zero_immediate( struct svga_shader_emitter *emit ) +{ + assert(emit->created_zero_immediate); + assert(emit->zero_immediate_idx >= 0); + return src_register( SVGA3DREG_CONST, + emit->zero_immediate_idx ); +} + +/* returns the loop const */ +static INLINE struct src_register +get_loop_const( struct svga_shader_emitter *emit ) +{ + assert(emit->created_loop_const); + assert(emit->loop_const_idx >= 0); + return src_register( SVGA3DREG_CONSTINT, + emit->loop_const_idx ); +} + +/* returns a sincos const */ +static INLINE struct src_register +get_sincos_const( struct svga_shader_emitter *emit, + unsigned index ) +{ + assert(emit->created_sincos_consts); + assert(emit->sincos_consts_idx >= 0); + assert(index == 0 || index == 1); + return src_register( SVGA3DREG_CONST, + emit->sincos_consts_idx + index ); +} + +static INLINE struct src_register +get_fake_arl_const( struct svga_shader_emitter *emit ) +{ + struct src_register reg; + int idx = 0, swizzle = 0, i; + + for (i = 0; i < emit->num_arl_consts; ++ i) { + if (emit->arl_consts[i].arl_num == emit->current_arl) { + idx = emit->arl_consts[i].idx; + swizzle = emit->arl_consts[i].swizzle; + } + } + + reg = src_register( SVGA3DREG_CONST, idx ); + return scalar(reg, swizzle); +} + +static INLINE struct src_register +get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num ) +{ + int idx; + struct src_register reg; + + /* the width/height indexes start right after constants */ + idx = emit->key.fkey.tex[sampler_num].width_height_idx + + emit->info.file_max[TGSI_FILE_CONSTANT] + 1; + + reg = src_register( SVGA3DREG_CONST, idx ); + return reg; +} + +static boolean emit_fake_arl(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + const struct src_register src0 = translate_src_register( + emit, &insn->FullSrcRegisters[0] ); + struct src_register src1 = get_fake_arl_const( emit ); + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + SVGA3dShaderDestToken tmp = get_temp( emit ); + + if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0)) + return FALSE; + + if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ), + src1)) + return FALSE; + + /* replicate the original swizzle */ + src1 = src(tmp); + src1.base.swizzle = src0.base.swizzle; + + return submit_op1( emit, inst_token( SVGA3DOP_MOVA ), + dst, src1 ); +} + +static boolean emit_if(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + const struct src_register src = translate_src_register( + emit, &insn->FullSrcRegisters[0] ); + struct src_register zero = get_zero_immediate( emit ); + SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC ); + + if_token.control = SVGA3DOPCOMPC_NE; + zero = scalar(zero, TGSI_SWIZZLE_X); + + return (emit_instruction( emit, if_token ) && + emit_src( emit, src ) && + emit_src( emit, zero ) ); +} + +static boolean emit_endif(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + return (emit_instruction( emit, + inst_token( SVGA3DOP_ENDIF ))); +} + +static boolean emit_else(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + return (emit_instruction( emit, + inst_token( SVGA3DOP_ELSE ))); +} + +/* Translate the following TGSI FLR instruction. + * FLR DST, SRC + * To the following SVGA3D instruction sequence. + * FRC TMP, SRC + * SUB DST, SRC, TMP + */ +static boolean emit_floor(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + const struct src_register src0 = translate_src_register( + emit, &insn->FullSrcRegisters[0] ); + SVGA3dShaderDestToken temp = get_temp( emit ); + + /* FRC TMP, SRC */ + if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 )) + return FALSE; + + /* SUB DST, SRC, TMP */ + if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0, + negate( src( temp ) ) )) + return FALSE; + + return TRUE; +} + + +/* Translate the following TGSI CMP instruction. + * CMP DST, SRC0, SRC1, SRC2 + * To the following SVGA3D instruction sequence. + * CMP DST, SRC0, SRC2, SRC1 + */ +static boolean emit_cmp(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + const struct src_register src0 = translate_src_register( + emit, &insn->FullSrcRegisters[0] ); + const struct src_register src1 = translate_src_register( + emit, &insn->FullSrcRegisters[1] ); + const struct src_register src2 = translate_src_register( + emit, &insn->FullSrcRegisters[2] ); + + /* CMP DST, SRC0, SRC2, SRC1 */ + return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, src0, src2, src1); +} + + + +/* Translate the following TGSI DIV instruction. + * DIV DST.xy, SRC0, SRC1 + * To the following SVGA3D instruction sequence. + * RCP TMP.x, SRC1.xxxx + * RCP TMP.y, SRC1.yyyy + * MUL DST.xy, SRC0, TMP + */ +static boolean emit_div(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + const struct src_register src0 = translate_src_register( + emit, &insn->FullSrcRegisters[0] ); + const struct src_register src1 = translate_src_register( + emit, &insn->FullSrcRegisters[1] ); + SVGA3dShaderDestToken temp = get_temp( emit ); + int i; + + /* For each enabled element, perform a RCP instruction. Note that + * RCP is scalar in SVGA3D: + */ + for (i = 0; i < 4; i++) { + unsigned channel = 1 << i; + if (dst.mask & channel) { + /* RCP TMP.?, SRC1.???? */ + if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ), + writemask(temp, channel), + scalar(src1, i) )) + return FALSE; + } + } + + /* Then multiply them out with a single mul: + * + * MUL DST, SRC0, TMP + */ + if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0, + src( temp ) )) + return FALSE; + + return TRUE; +} + +/* Translate the following TGSI DP2 instruction. + * DP2 DST, SRC1, SRC2 + * To the following SVGA3D instruction sequence. + * MUL TMP, SRC1, SRC2 + * ADD DST, TMP.xxxx, TMP.yyyy + */ +static boolean emit_dp2(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + const struct src_register src0 = translate_src_register( + emit, &insn->FullSrcRegisters[0] ); + const struct src_register src1 = translate_src_register( + emit, &insn->FullSrcRegisters[1] ); + SVGA3dShaderDestToken temp = get_temp( emit ); + struct src_register temp_src0, temp_src1; + + /* MUL TMP, SRC1, SRC2 */ + if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 )) + return FALSE; + + temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X); + temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y); + + /* ADD DST, TMP.xxxx, TMP.yyyy */ + if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, + temp_src0, temp_src1 )) + return FALSE; + + return TRUE; +} + + +/* Translate the following TGSI DPH instruction. + * DPH DST, SRC1, SRC2 + * To the following SVGA3D instruction sequence. + * DP3 TMP, SRC1, SRC2 + * ADD DST, TMP, SRC2.wwww + */ +static boolean emit_dph(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + const struct src_register src0 = translate_src_register( + emit, &insn->FullSrcRegisters[0] ); + struct src_register src1 = translate_src_register( + emit, &insn->FullSrcRegisters[1] ); + SVGA3dShaderDestToken temp = get_temp( emit ); + + /* DP3 TMP, SRC1, SRC2 */ + if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 )) + return FALSE; + + src1 = scalar(src1, TGSI_SWIZZLE_W); + + /* ADD DST, TMP, SRC2.wwww */ + if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, + src( temp ), src1 )) + return FALSE; + + return TRUE; +} + +/* Translate the following TGSI DST instruction. + * NRM DST, SRC + * To the following SVGA3D instruction sequence. + * DP3 TMP, SRC, SRC + * RSQ TMP, TMP + * MUL DST, SRC, TMP + */ +static boolean emit_nrm(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + const struct src_register src0 = translate_src_register( + emit, &insn->FullSrcRegisters[0] ); + SVGA3dShaderDestToken temp = get_temp( emit ); + + /* DP3 TMP, SRC, SRC */ + if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src0 )) + return FALSE; + + /* RSQ TMP, TMP */ + if (!submit_op1( emit, inst_token( SVGA3DOP_RSQ ), temp, src( temp ))) + return FALSE; + + /* MUL DST, SRC, TMP */ + if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, + src0, src( temp ))) + return FALSE; + + return TRUE; + +} + +static boolean do_emit_sincos(struct svga_shader_emitter *emit, + SVGA3dShaderDestToken dst, + struct src_register src0) +{ + src0 = scalar(src0, TGSI_SWIZZLE_X); + + if (emit->use_sm30) { + return submit_op1( emit, inst_token( SVGA3DOP_SINCOS ), + dst, src0 ); + } else { + struct src_register const1 = get_sincos_const( emit, 0 ); + struct src_register const2 = get_sincos_const( emit, 1 ); + + return submit_op3( emit, inst_token( SVGA3DOP_SINCOS ), + dst, src0, const1, const2 ); + } +} + +static boolean emit_sincos(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + struct src_register src0 = translate_src_register( + emit, &insn->FullSrcRegisters[0] ); + SVGA3dShaderDestToken temp = get_temp( emit ); + + /* SCS TMP SRC */ + if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 )) + return FALSE; + + /* MOV DST TMP */ + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) )) + return FALSE; + + return TRUE; +} + +/* + * SCS TMP SRC + * MOV DST TMP.yyyy + */ +static boolean emit_sin(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + struct src_register src0 = translate_src_register( + emit, &insn->FullSrcRegisters[0] ); + SVGA3dShaderDestToken temp = get_temp( emit ); + + /* SCS TMP SRC */ + if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0)) + return FALSE; + + src0 = scalar(src( temp ), TGSI_SWIZZLE_Y); + + /* MOV DST TMP.yyyy */ + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 )) + return FALSE; + + return TRUE; +} + +/* + * SCS TMP SRC + * MOV DST TMP.xxxx + */ +static boolean emit_cos(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + struct src_register src0 = translate_src_register( + emit, &insn->FullSrcRegisters[0] ); + SVGA3dShaderDestToken temp = get_temp( emit ); + + /* SCS TMP SRC */ + if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 )) + return FALSE; + + src0 = scalar(src( temp ), TGSI_SWIZZLE_X); + + /* MOV DST TMP.xxxx */ + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 )) + return FALSE; + + return TRUE; +} + + +/* + * ADD DST SRC0, negate(SRC0) + */ +static boolean emit_sub(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + struct src_register src0 = translate_src_register( + emit, &insn->FullSrcRegisters[0] ); + struct src_register src1 = translate_src_register( + emit, &insn->FullSrcRegisters[1] ); + + src1 = negate(src1); + + if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, + src0, src1 )) + return FALSE; + + return TRUE; +} + + +static boolean emit_kil(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderInstToken inst; + const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[0]; + struct src_register src0; + + inst = inst_token( SVGA3DOP_TEXKILL ); + src0 = translate_src_register( emit, reg ); + + if (reg->SrcRegisterExtMod.Absolute || + reg->SrcRegister.Negate != reg->SrcRegisterExtMod.Negate || + reg->SrcRegister.Indirect || + reg->SrcRegister.SwizzleX != 0 || + reg->SrcRegister.SwizzleY != 1 || + reg->SrcRegister.SwizzleZ != 2 || + reg->SrcRegister.File != TGSI_FILE_TEMPORARY) + { + SVGA3dShaderDestToken temp = get_temp( emit ); + + submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 ); + src0 = src( temp ); + } + + return submit_op0( emit, inst, dst(src0) ); +} + + +/* mesa state tracker always emits kilp as an unconditional + * kil */ +static boolean emit_kilp(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderInstToken inst; + SVGA3dShaderDestToken temp; + struct src_register one = get_zero_immediate( emit ); + + inst = inst_token( SVGA3DOP_TEXKILL ); + one = scalar( one, TGSI_SWIZZLE_W ); + + /* texkill doesn't allow negation on the operand so lets move + * negation of {1} to a temp register */ + temp = get_temp( emit ); + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, + negate( one ) )) + return FALSE; + + return submit_op0( emit, inst, temp ); +} + +/* Implement conditionals by initializing destination reg to 'fail', + * then set predicate reg with UFOP_SETP, then move 'pass' to dest + * based on predicate reg. + * + * SETP src0, cmp, src1 -- do this first to avoid aliasing problems. + * MOV dst, fail + * MOV dst, pass, p0 + */ +static boolean +emit_conditional(struct svga_shader_emitter *emit, + unsigned compare_func, + SVGA3dShaderDestToken dst, + struct src_register src0, + struct src_register src1, + struct src_register pass, + struct src_register fail) +{ + SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 ); + SVGA3dShaderInstToken setp_token, mov_token; + setp_token = inst_token( SVGA3DOP_SETP ); + + switch (compare_func) { + case PIPE_FUNC_NEVER: + return submit_op1( emit, inst_token( SVGA3DOP_MOV ), + dst, fail ); + break; + case PIPE_FUNC_LESS: + setp_token.control = SVGA3DOPCOMP_LT; + break; + case PIPE_FUNC_EQUAL: + setp_token.control = SVGA3DOPCOMP_EQ; + break; + case PIPE_FUNC_LEQUAL: + setp_token.control = SVGA3DOPCOMP_LE; + break; + case PIPE_FUNC_GREATER: + setp_token.control = SVGA3DOPCOMP_GT; + break; + case PIPE_FUNC_NOTEQUAL: + setp_token.control = SVGA3DOPCOMPC_NE; + break; + case PIPE_FUNC_GEQUAL: + setp_token.control = SVGA3DOPCOMP_GE; + break; + case PIPE_FUNC_ALWAYS: + return submit_op1( emit, inst_token( SVGA3DOP_MOV ), + dst, pass ); + break; + } + + /* SETP src0, COMPOP, src1 */ + if (!submit_op2( emit, setp_token, pred_reg, + src0, src1 )) + return FALSE; + + mov_token = inst_token( SVGA3DOP_MOV ); + + /* MOV dst, fail */ + if (!submit_op1( emit, mov_token, dst, + fail )) + return FALSE; + + /* MOV dst, pass (predicated) + * + * Note that the predicate reg (and possible modifiers) is passed + * as the first source argument. + */ + mov_token.predicated = 1; + if (!submit_op2( emit, mov_token, dst, + src( pred_reg ), pass )) + return FALSE; + + return TRUE; +} + + +static boolean +emit_select(struct svga_shader_emitter *emit, + unsigned compare_func, + SVGA3dShaderDestToken dst, + struct src_register src0, + struct src_register src1 ) +{ + /* There are some SVGA instructions which implement some selects + * directly, but they are only available in the vertex shader. + */ + if (emit->unit == PIPE_SHADER_VERTEX) { + switch (compare_func) { + case PIPE_FUNC_GEQUAL: + return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 ); + case PIPE_FUNC_LEQUAL: + return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 ); + case PIPE_FUNC_GREATER: + return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 ); + case PIPE_FUNC_LESS: + return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 ); + default: + break; + } + } + + + /* Otherwise, need to use the setp approach: + */ + { + struct src_register one, zero; + /* zero immediate is 0,0,0,1 */ + zero = get_zero_immediate( emit ); + one = scalar( zero, TGSI_SWIZZLE_W ); + zero = scalar( zero, TGSI_SWIZZLE_X ); + + return emit_conditional( + emit, + compare_func, + dst, + src0, + src1, + one, zero); + } +} + + +static boolean emit_select_op(struct svga_shader_emitter *emit, + unsigned compare, + const struct tgsi_full_instruction *insn) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + struct src_register src0 = translate_src_register( + emit, &insn->FullSrcRegisters[0] ); + struct src_register src1 = translate_src_register( + emit, &insn->FullSrcRegisters[1] ); + + return emit_select( emit, compare, dst, src0, src1 ); +} + + +/* Translate texture instructions to SVGA3D representation. + */ +static boolean emit_tex2(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn, + SVGA3dShaderDestToken dst ) +{ + SVGA3dShaderInstToken inst; + struct src_register src0; + struct src_register src1; + + inst.value = 0; + inst.op = SVGA3DOP_TEX; + + switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_TEX: + break; + case TGSI_OPCODE_TXP: + inst.control = SVGA3DOPCONT_PROJECT; + break; + case TGSI_OPCODE_TXB: + inst.control = SVGA3DOPCONT_BIAS; + break; + default: + assert(0); + return FALSE; + } + + src0 = translate_src_register( emit, &insn->FullSrcRegisters[0] ); + src1 = translate_src_register( emit, &insn->FullSrcRegisters[1] ); + + if (emit->key.fkey.tex[src1.base.num].unnormalized) { + struct src_register wh = get_tex_dimensions( emit, src1.base.num ); + SVGA3dShaderDestToken tmp = get_temp( emit ); + + /* MUL tmp, SRC0, WH */ + if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), + tmp, src0, wh )) + return FALSE; + src0 = src( tmp ); + } + + return submit_op2( emit, inst, dst, src0, src1 ); +} + + + + +/* Translate texture instructions to SVGA3D representation. + */ +static boolean emit_tex3(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn, + SVGA3dShaderDestToken dst ) +{ + SVGA3dShaderInstToken inst; + struct src_register src0; + struct src_register src1; + struct src_register src2; + + inst.value = 0; + + switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_TXD: + inst.op = SVGA3DOP_TEXLDD; + break; + case TGSI_OPCODE_TXL: + inst.op = SVGA3DOP_TEXLDL; + break; + } + + src0 = translate_src_register( emit, &insn->FullSrcRegisters[0] ); + src1 = translate_src_register( emit, &insn->FullSrcRegisters[1] ); + src2 = translate_src_register( emit, &insn->FullSrcRegisters[2] ); + + return submit_op3( emit, inst, dst, src0, src1, src2 ); +} + + +static boolean emit_tex(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderDestToken dst = + translate_dst_register( emit, insn, 0 ); + struct src_register src0 = + translate_src_register( emit, &insn->FullSrcRegisters[0] ); + struct src_register src1 = + translate_src_register( emit, &insn->FullSrcRegisters[1] ); + + SVGA3dShaderDestToken tex_result; + + /* check for shadow samplers */ + boolean compare = (emit->key.fkey.tex[src1.base.num].compare_mode == + PIPE_TEX_COMPARE_R_TO_TEXTURE); + + + /* If doing compare processing, need to put this value into a + * temporary so it can be used as a source later on. + */ + if (compare || + (!emit->use_sm30 && dst.mask != TGSI_WRITEMASK_XYZW) ) { + tex_result = get_temp( emit ); + } + else { + tex_result = dst; + } + + switch(insn->Instruction.Opcode) { + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXP: + if (!emit_tex2( emit, insn, tex_result )) + return FALSE; + break; + case TGSI_OPCODE_TXL: + case TGSI_OPCODE_TXD: + if (!emit_tex3( emit, insn, tex_result )) + return FALSE; + break; + default: + assert(0); + } + + + if (compare) { + SVGA3dShaderDestToken src0_zdivw = get_temp( emit ); + struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y); + struct src_register one = + scalar( get_zero_immediate( emit ), TGSI_SWIZZLE_W ); + + /* Divide texcoord R by Q */ + if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ), + src0_zdivw, + scalar(src0, TGSI_SWIZZLE_W) )) + return FALSE; + + if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), + src0_zdivw, + scalar(src0, TGSI_SWIZZLE_Z), + src(src0_zdivw) )) + return FALSE; + + if (!emit_select( + emit, + emit->key.fkey.tex[src1.base.num].compare_func, + dst, + src(src0_zdivw), + tex_src_x)) + return FALSE; + + return submit_op1( emit, inst_token( SVGA3DOP_MOV ), + writemask( dst, TGSI_WRITEMASK_W), + one ); + } + else if (!emit->use_sm30 && dst.mask != TGSI_WRITEMASK_XYZW) + { + if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) )) + return FALSE; + } + + return TRUE; +} + +static boolean emit_bgnloop2( struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP ); + struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 ); + struct src_register const_int = get_loop_const( emit ); + + return (emit_instruction( emit, inst ) && + emit_src( emit, loop_reg ) && + emit_src( emit, const_int ) ); +} + +static boolean emit_endloop2( struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP ); + return emit_instruction( emit, inst ); +} + +static boolean emit_brk( struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK ); + return emit_instruction( emit, inst ); +} + +static boolean emit_scalar_op1( struct svga_shader_emitter *emit, + unsigned opcode, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderInstToken inst; + SVGA3dShaderDestToken dst; + struct src_register src; + + inst = inst_token( opcode ); + dst = translate_dst_register( emit, insn, 0 ); + src = translate_src_register( emit, &insn->FullSrcRegisters[0] ); + src = scalar( src, TGSI_SWIZZLE_X ); + + return submit_op1( emit, inst, dst, src ); +} + + +static boolean emit_simple_instruction(struct svga_shader_emitter *emit, + unsigned opcode, + const struct tgsi_full_instruction *insn ) +{ + const struct tgsi_full_src_register *src = insn->FullSrcRegisters; + SVGA3dShaderInstToken inst; + SVGA3dShaderDestToken dst; + + inst = inst_token( opcode ); + dst = translate_dst_register( emit, insn, 0 ); + + switch (insn->Instruction.NumSrcRegs) { + case 0: + return submit_op0( emit, inst, dst ); + case 1: + return submit_op1( emit, inst, dst, + translate_src_register( emit, &src[0] )); + case 2: + return submit_op2( emit, inst, dst, + translate_src_register( emit, &src[0] ), + translate_src_register( emit, &src[1] ) ); + case 3: + return submit_op3( emit, inst, dst, + translate_src_register( emit, &src[0] ), + translate_src_register( emit, &src[1] ), + translate_src_register( emit, &src[2] ) ); + default: + assert(0); + return FALSE; + } +} + +static boolean emit_arl(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + ++emit->current_arl; + if (svga_arl_needs_adjustment( emit )) { + return emit_fake_arl( emit, insn ); + } else { + /* no need to adjust, just emit straight arl */ + return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn); + } +} + +static boolean alias_src_dst( struct src_register src, + SVGA3dShaderDestToken dst ) +{ + if (src.base.num != dst.num) + return FALSE; + + if (SVGA3dShaderGetRegType(dst.value) != + SVGA3dShaderGetRegType(src.base.value)) + return FALSE; + + return TRUE; +} + +static boolean emit_pow(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + struct src_register src0 = translate_src_register( + emit, &insn->FullSrcRegisters[0] ); + struct src_register src1 = translate_src_register( + emit, &insn->FullSrcRegisters[1] ); + boolean need_tmp = FALSE; + + /* POW can only output to a temporary */ + if (insn->FullDstRegisters[0].DstRegister.File != TGSI_FILE_TEMPORARY) + need_tmp = TRUE; + + /* POW src1 must not be the same register as dst */ + if (alias_src_dst( src1, dst )) + need_tmp = TRUE; + + /* it's a scalar op */ + src0 = scalar( src0, TGSI_SWIZZLE_X ); + src1 = scalar( src1, TGSI_SWIZZLE_X ); + + if (need_tmp) { + SVGA3dShaderDestToken tmp = writemask(get_temp( emit ), TGSI_WRITEMASK_X ); + + if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1)) + return FALSE; + + return submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, scalar(src(tmp), 0) ); + } + else { + return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1); + } +} + +static boolean emit_xpd(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + const struct src_register src0 = translate_src_register( + emit, &insn->FullSrcRegisters[0] ); + const struct src_register src1 = translate_src_register( + emit, &insn->FullSrcRegisters[1] ); + boolean need_dst_tmp = FALSE; + + /* XPD can only output to a temporary */ + if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP) + need_dst_tmp = TRUE; + + /* The dst reg must not be the same as src0 or src1*/ + if (alias_src_dst(src0, dst) || + alias_src_dst(src1, dst)) + need_dst_tmp = TRUE; + + if (need_dst_tmp) { + SVGA3dShaderDestToken tmp = get_temp( emit ); + + /* Obey DX9 restrictions on mask: + */ + tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ; + + if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1)) + return FALSE; + + if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp ))) + return FALSE; + } + else { + if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1)) + return FALSE; + } + + /* Need to emit 1.0 to dst.w? + */ + if (dst.mask & TGSI_WRITEMASK_W) { + struct src_register zero = get_zero_immediate( emit ); + + if (!submit_op1(emit, + inst_token( SVGA3DOP_MOV ), + writemask(dst, TGSI_WRITEMASK_W), + zero)) + return FALSE; + } + + return TRUE; +} + + +static boolean emit_lrp(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + SVGA3dShaderDestToken tmp; + const struct src_register src0 = translate_src_register( + emit, &insn->FullSrcRegisters[0] ); + const struct src_register src1 = translate_src_register( + emit, &insn->FullSrcRegisters[1] ); + const struct src_register src2 = translate_src_register( + emit, &insn->FullSrcRegisters[2] ); + boolean need_dst_tmp = FALSE; + + /* The dst reg must not be the same as src0 or src2 */ + if (alias_src_dst(src0, dst) || + alias_src_dst(src2, dst)) + need_dst_tmp = TRUE; + + if (need_dst_tmp) { + tmp = get_temp( emit ); + tmp.mask = dst.mask; + } + else { + tmp = dst; + } + + if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2)) + return FALSE; + + if (need_dst_tmp) { + if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp ))) + return FALSE; + } + + return TRUE; +} + + +static boolean emit_dst_insn(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + if (emit->unit == PIPE_SHADER_VERTEX) { + /* SVGA/DX9 has a DST instruction, but only for vertex shaders: + */ + return emit_simple_instruction(emit, SVGA3DOP_DST, insn); + } + else { + + /* result[0] = 1 * 1; + * result[1] = a[1] * b[1]; + * result[2] = a[2] * 1; + * result[3] = 1 * b[3]; + */ + + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + SVGA3dShaderDestToken tmp; + const struct src_register src0 = translate_src_register( + emit, &insn->FullSrcRegisters[0] ); + const struct src_register src1 = translate_src_register( + emit, &insn->FullSrcRegisters[1] ); + struct src_register zero = get_zero_immediate( emit ); + boolean need_tmp = FALSE; + + if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP || + alias_src_dst(src0, dst) || + alias_src_dst(src1, dst)) + need_tmp = TRUE; + + if (need_tmp) { + tmp = get_temp( emit ); + } + else { + tmp = dst; + } + + /* tmp.xw = 1.0 + */ + if (tmp.mask & TGSI_WRITEMASK_XW) { + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), + writemask(tmp, TGSI_WRITEMASK_XW ), + scalar( zero, 3 ))) + return FALSE; + } + + /* tmp.yz = src0 + */ + if (tmp.mask & TGSI_WRITEMASK_YZ) { + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), + writemask(tmp, TGSI_WRITEMASK_YZ ), + src0)) + return FALSE; + } + + /* tmp.yw = tmp * src1 + */ + if (tmp.mask & TGSI_WRITEMASK_YW) { + if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), + writemask(tmp, TGSI_WRITEMASK_YW ), + src(tmp), + src1)) + return FALSE; + } + + /* dst = tmp + */ + if (need_tmp) { + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), + dst, + src(tmp))) + return FALSE; + } + } + + return TRUE; +} + + +static boolean emit_exp(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + struct src_register src0 = + translate_src_register( emit, &insn->FullSrcRegisters[0] ); + struct src_register zero = get_zero_immediate( emit ); + SVGA3dShaderDestToken fraction; + + if (dst.mask & TGSI_WRITEMASK_Y) + fraction = dst; + else if (dst.mask & TGSI_WRITEMASK_X) + fraction = get_temp( emit ); + + /* If y is being written, fill it with src0 - floor(src0). + */ + if (dst.mask & TGSI_WRITEMASK_XY) { + if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), + writemask( fraction, TGSI_WRITEMASK_Y ), + src0 )) + return FALSE; + } + + /* If x is being written, fill it with 2 ^ floor(src0). + */ + if (dst.mask & TGSI_WRITEMASK_X) { + if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), + writemask( dst, dst.mask & TGSI_WRITEMASK_X ), + src0, + scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) ) + return FALSE; + + if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ), + writemask( dst, dst.mask & TGSI_WRITEMASK_X ), + scalar( src( dst ), TGSI_SWIZZLE_X ) ) ) + return FALSE; + + if (!(dst.mask & TGSI_WRITEMASK_Y)) + release_temp( emit, fraction ); + } + + /* If z is being written, fill it with 2 ^ src0 (partial precision). + */ + if (dst.mask & TGSI_WRITEMASK_Z) { + if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ), + writemask( dst, dst.mask & TGSI_WRITEMASK_Z ), + src0 ) ) + return FALSE; + } + + /* If w is being written, fill it with one. + */ + if (dst.mask & TGSI_WRITEMASK_W) { + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), + writemask(dst, TGSI_WRITEMASK_W), + scalar( zero, TGSI_SWIZZLE_W ) )) + return FALSE; + } + + return TRUE; +} + +static boolean emit_lit(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + if (emit->unit == PIPE_SHADER_VERTEX) { + /* SVGA/DX9 has a LIT instruction, but only for vertex shaders: + */ + return emit_simple_instruction(emit, SVGA3DOP_LIT, insn); + } + else { + + /* D3D vs. GL semantics can be fairly easily accomodated by + * variations on this sequence. + * + * GL: + * tmp.y = src.x + * tmp.z = pow(src.y,src.w) + * p0 = src0.xxxx > 0 + * result = zero.wxxw + * (p0) result.yz = tmp + * + * D3D: + * tmp.y = src.x + * tmp.z = pow(src.y,src.w) + * p0 = src0.xxyy > 0 + * result = zero.wxxw + * (p0) result.yz = tmp + * + * Will implement the GL version for now. + */ + + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + SVGA3dShaderDestToken tmp = get_temp( emit ); + const struct src_register src0 = translate_src_register( + emit, &insn->FullSrcRegisters[0] ); + struct src_register zero = get_zero_immediate( emit ); + + /* tmp = pow(src.y, src.w) + */ + if (dst.mask & TGSI_WRITEMASK_Z) { + if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), + tmp, + scalar(src0, 1), + scalar(src0, 3))) + return FALSE; + } + + /* tmp.y = src.x + */ + if (dst.mask & TGSI_WRITEMASK_Y) { + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), + writemask(tmp, TGSI_WRITEMASK_Y ), + scalar(src0, 0))) + return FALSE; + } + + /* Can't quite do this with emit conditional due to the extra + * writemask on the predicated mov: + */ + { + SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 ); + SVGA3dShaderInstToken setp_token, mov_token; + struct src_register predsrc; + + setp_token = inst_token( SVGA3DOP_SETP ); + mov_token = inst_token( SVGA3DOP_MOV ); + + setp_token.control = SVGA3DOPCOMP_GT; + + /* D3D vs GL semantics: + */ + if (0) + predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */ + else + predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */ + + /* SETP src0.xxyy, GT, {0}.x */ + if (!submit_op2( emit, setp_token, pred_reg, + predsrc, + swizzle(zero, 0, 0, 0, 0) )) + return FALSE; + + /* MOV dst, fail */ + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, + swizzle(zero, 3, 0, 0, 3 ))) + return FALSE; + + /* MOV dst.yz, tmp (predicated) + * + * Note that the predicate reg (and possible modifiers) is passed + * as the first source argument. + */ + if (dst.mask & TGSI_WRITEMASK_YZ) { + mov_token.predicated = 1; + if (!submit_op2( emit, mov_token, + writemask(dst, TGSI_WRITEMASK_YZ), + src( pred_reg ), src( tmp ) )) + return FALSE; + } + } + } + + return TRUE; +} + + + + +static boolean emit_ex2( struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + SVGA3dShaderInstToken inst; + SVGA3dShaderDestToken dst; + struct src_register src0; + + inst = inst_token( SVGA3DOP_EXP ); + dst = translate_dst_register( emit, insn, 0 ); + src0 = translate_src_register( emit, &insn->FullSrcRegisters[0] ); + src0 = scalar( src0, TGSI_SWIZZLE_X ); + + if (dst.mask != TGSI_WRITEMASK_XYZW) { + SVGA3dShaderDestToken tmp = get_temp( emit ); + + if (!submit_op1( emit, inst, tmp, src0 )) + return FALSE; + + return submit_op1( emit, inst_token( SVGA3DOP_MOV ), + dst, + scalar( src( tmp ), TGSI_SWIZZLE_X ) ); + } + + return submit_op1( emit, inst, dst, src0 ); +} + + +static boolean emit_log(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + struct src_register src0 = + translate_src_register( emit, &insn->FullSrcRegisters[0] ); + struct src_register zero = get_zero_immediate( emit ); + SVGA3dShaderDestToken abs_tmp; + struct src_register abs_src0; + SVGA3dShaderDestToken log2_abs; + + if (dst.mask & TGSI_WRITEMASK_Z) + log2_abs = dst; + else if (dst.mask & TGSI_WRITEMASK_XY) + log2_abs = get_temp( emit ); + + /* If z is being written, fill it with log2( abs( src0 ) ). + */ + if (dst.mask & TGSI_WRITEMASK_XYZ) { + if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS) + abs_src0 = src0; + else { + abs_tmp = get_temp( emit ); + + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), + abs_tmp, + src0 ) ) + return FALSE; + + abs_src0 = src( abs_tmp ); + } + + abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) ); + + if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ), + writemask( log2_abs, TGSI_WRITEMASK_Z ), + abs_src0 ) ) + return FALSE; + } + + if (dst.mask & TGSI_WRITEMASK_XY) { + SVGA3dShaderDestToken floor_log2; + + if (dst.mask & TGSI_WRITEMASK_X) + floor_log2 = dst; + else + floor_log2 = get_temp( emit ); + + /* If x is being written, fill it with floor( log2( abs( src0 ) ) ). + */ + if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), + writemask( floor_log2, TGSI_WRITEMASK_X ), + scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) ) + return FALSE; + + if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), + writemask( floor_log2, TGSI_WRITEMASK_X ), + scalar( src( log2_abs ), TGSI_SWIZZLE_Z ), + negate( src( floor_log2 ) ) ) ) + return FALSE; + + /* If y is being written, fill it with + * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ). + */ + if (dst.mask & TGSI_WRITEMASK_Y) { + if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ), + writemask( dst, TGSI_WRITEMASK_Y ), + negate( scalar( src( floor_log2 ), + TGSI_SWIZZLE_X ) ) ) ) + return FALSE; + + if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), + writemask( dst, TGSI_WRITEMASK_Y ), + src( dst ), + abs_src0 ) ) + return FALSE; + } + + if (!(dst.mask & TGSI_WRITEMASK_X)) + release_temp( emit, floor_log2 ); + + if (!(dst.mask & TGSI_WRITEMASK_Z)) + release_temp( emit, log2_abs ); + } + + if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod && + src0.base.srcMod != SVGA3DSRCMOD_ABS) + release_temp( emit, abs_tmp ); + + /* If w is being written, fill it with one. + */ + if (dst.mask & TGSI_WRITEMASK_W) { + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), + writemask(dst, TGSI_WRITEMASK_W), + scalar( zero, TGSI_SWIZZLE_W ) )) + return FALSE; + } + + return TRUE; +} + + +static boolean emit_bgnsub( struct svga_shader_emitter *emit, + unsigned position, + const struct tgsi_full_instruction *insn ) +{ + unsigned i; + + /* Note that we've finished the main function and are now emitting + * subroutines. This affects how we terminate the generated + * shader. + */ + emit->in_main_func = FALSE; + + for (i = 0; i < emit->nr_labels; i++) { + if (emit->label[i] == position) { + return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) && + emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) && + emit_src( emit, src_register( SVGA3DREG_LABEL, i ))); + } + } + + assert(0); + return TRUE; +} + +static boolean emit_call( struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + unsigned position = insn->InstructionExtLabel.Label; + unsigned i; + + for (i = 0; i < emit->nr_labels; i++) { + if (emit->label[i] == position) + break; + } + + if (emit->nr_labels == Elements(emit->label)) + return FALSE; + + if (i == emit->nr_labels) { + emit->label[i] = position; + emit->nr_labels++; + } + + return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) && + emit_src( emit, src_register( SVGA3DREG_LABEL, i ))); +} + + +static boolean emit_end( struct svga_shader_emitter *emit ) +{ + if (emit->unit == PIPE_SHADER_VERTEX) { + return emit_vs_postamble( emit ); + } + else { + return emit_ps_postamble( emit ); + } +} + + + +static boolean svga_emit_instruction( struct svga_shader_emitter *emit, + unsigned position, + const struct tgsi_full_instruction *insn ) +{ + switch (insn->Instruction.Opcode) { + + case TGSI_OPCODE_ARL: + return emit_arl( emit, insn ); + + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXP: + case TGSI_OPCODE_TXL: + case TGSI_OPCODE_TXD: + return emit_tex( emit, insn ); + + case TGSI_OPCODE_BGNSUB: + return emit_bgnsub( emit, position, insn ); + + case TGSI_OPCODE_ENDSUB: + return TRUE; + + case TGSI_OPCODE_CAL: + return emit_call( emit, insn ); + + case TGSI_OPCODE_FLR: + case TGSI_OPCODE_TRUNC: /* should be TRUNC, not FLR */ + return emit_floor( emit, insn ); + + case TGSI_OPCODE_CMP: + return emit_cmp( emit, insn ); + + case TGSI_OPCODE_DIV: + return emit_div( emit, insn ); + + case TGSI_OPCODE_DP2: + return emit_dp2( emit, insn ); + + case TGSI_OPCODE_DPH: + return emit_dph( emit, insn ); + + case TGSI_OPCODE_NRM: + return emit_nrm( emit, insn ); + + case TGSI_OPCODE_COS: + return emit_cos( emit, insn ); + + case TGSI_OPCODE_SIN: + return emit_sin( emit, insn ); + + case TGSI_OPCODE_SCS: + return emit_sincos( emit, insn ); + + case TGSI_OPCODE_END: + /* TGSI always finishes the main func with an END */ + return emit_end( emit ); + + case TGSI_OPCODE_KIL: + return emit_kil( emit, insn ); + + /* Selection opcodes. The underlying language is fairly + * non-orthogonal about these. + */ + case TGSI_OPCODE_SEQ: + return emit_select_op( emit, PIPE_FUNC_EQUAL, insn ); + + case TGSI_OPCODE_SNE: + return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn ); + + case TGSI_OPCODE_SGT: + return emit_select_op( emit, PIPE_FUNC_GREATER, insn ); + + case TGSI_OPCODE_SGE: + return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn ); + + case TGSI_OPCODE_SLT: + return emit_select_op( emit, PIPE_FUNC_LESS, insn ); + + case TGSI_OPCODE_SLE: + return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn ); + + case TGSI_OPCODE_SUB: + return emit_sub( emit, insn ); + + case TGSI_OPCODE_POW: + return emit_pow( emit, insn ); + + case TGSI_OPCODE_EX2: + return emit_ex2( emit, insn ); + + case TGSI_OPCODE_EXP: + return emit_exp( emit, insn ); + + case TGSI_OPCODE_LOG: + return emit_log( emit, insn ); + + case TGSI_OPCODE_LG2: + return emit_scalar_op1( emit, SVGA3DOP_LOG, insn ); + + case TGSI_OPCODE_RSQ: + return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn ); + + case TGSI_OPCODE_RCP: + return emit_scalar_op1( emit, SVGA3DOP_RCP, insn ); + + case TGSI_OPCODE_CONT: + case TGSI_OPCODE_RET: + /* This is a noop -- we tell mesa that we can't support RET + * within a function (early return), so this will always be + * followed by an ENDSUB. + */ + return TRUE; + + /* These aren't actually used by any of the frontends we care + * about: + */ + case TGSI_OPCODE_CLAMP: + case TGSI_OPCODE_ROUND: + case TGSI_OPCODE_AND: + case TGSI_OPCODE_OR: + case TGSI_OPCODE_I2F: + case TGSI_OPCODE_NOT: + case TGSI_OPCODE_SHL: + case TGSI_OPCODE_SHR: + case TGSI_OPCODE_XOR: + return FALSE; + + case TGSI_OPCODE_IF: + return emit_if( emit, insn ); + case TGSI_OPCODE_ELSE: + return emit_else( emit, insn ); + case TGSI_OPCODE_ENDIF: + return emit_endif( emit, insn ); + + case TGSI_OPCODE_BGNLOOP: + return emit_bgnloop2( emit, insn ); + case TGSI_OPCODE_ENDLOOP: + return emit_endloop2( emit, insn ); + case TGSI_OPCODE_BRK: + return emit_brk( emit, insn ); + + case TGSI_OPCODE_XPD: + return emit_xpd( emit, insn ); + + case TGSI_OPCODE_KILP: + return emit_kilp( emit, insn ); + + case TGSI_OPCODE_DST: + return emit_dst_insn( emit, insn ); + + case TGSI_OPCODE_LIT: + return emit_lit( emit, insn ); + + case TGSI_OPCODE_LRP: + return emit_lrp( emit, insn ); + + default: { + unsigned opcode = translate_opcode(insn->Instruction.Opcode); + + if (opcode == SVGA3DOP_LAST_INST) + return FALSE; + + if (!emit_simple_instruction( emit, opcode, insn )) + return FALSE; + } + } + + return TRUE; +} + + +static boolean svga_emit_immediate( struct svga_shader_emitter *emit, + struct tgsi_full_immediate *imm) +{ + static const float id[4] = {0,0,0,1}; + float value[4]; + unsigned i; + + assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5); + for (i = 0; i < imm->Immediate.NrTokens - 1; i++) + value[i] = imm->u[i].Float; + + for ( ; i < 4; i++ ) + value[i] = id[i]; + + return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, + emit->imm_start + emit->internal_imm_count++, + value[0], value[1], value[2], value[3]); +} + +static boolean make_immediate( struct svga_shader_emitter *emit, + float a, + float b, + float c, + float d, + struct src_register *out ) +{ + unsigned idx = emit->nr_hw_const++; + + if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, + idx, a, b, c, d )) + return FALSE; + + *out = src_register( SVGA3DREG_CONST, idx ); + + return TRUE; +} + +static boolean emit_vs_preamble( struct svga_shader_emitter *emit ) +{ + if (!emit->key.vkey.need_prescale) { + if (!make_immediate( emit, 0, 0, .5, .5, + &emit->imm_0055)) + return FALSE; + } + + return TRUE; +} + +static boolean emit_ps_preamble( struct svga_shader_emitter *emit ) +{ + unsigned i; + + /* For SM20, need to initialize the temporaries we're using to hold + * color outputs to some value. Shaders which don't set all of + * these values are likely to be rejected by the DX9 runtime. + */ + if (!emit->use_sm30) { + struct src_register zero = get_zero_immediate( emit ); + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) { + + if (!submit_op1( emit, + inst_token(SVGA3DOP_MOV), + emit->temp_col[i], + zero )) + return FALSE; + } + } + } + + return TRUE; +} + +static boolean emit_ps_postamble( struct svga_shader_emitter *emit ) +{ + unsigned i; + + /* PS oDepth is incredibly fragile and it's very hard to catch the + * types of usage that break it during shader emit. Easier just to + * redirect the main program to a temporary and then only touch + * oDepth with a hand-crafted MOV below. + */ + if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) { + + if (!submit_op1( emit, + inst_token(SVGA3DOP_MOV), + emit->true_pos, + scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) )) + return FALSE; + } + + /* Similarly for SM20 color outputs... Luckily SM30 isn't so + * fragile. + */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) { + + if (!submit_op1( emit, + inst_token(SVGA3DOP_MOV), + emit->true_col[i], + src(emit->temp_col[i]) )) + return FALSE; + } + } + + return TRUE; +} + +static boolean emit_vs_postamble( struct svga_shader_emitter *emit ) +{ + /* PSIZ output is incredibly fragile and it's very hard to catch + * the types of usage that break it during shader emit. Easier + * just to redirect the main program to a temporary and then only + * touch PSIZ with a hand-crafted MOV below. + */ + if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) { + + if (!submit_op1( emit, + inst_token(SVGA3DOP_MOV), + emit->true_psiz, + scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) )) + return FALSE; + } + + /* Need to perform various manipulations on vertex position to cope + * with the different GL and D3D clip spaces. + */ + if (emit->key.vkey.need_prescale) { + SVGA3dShaderDestToken temp_pos = emit->temp_pos; + SVGA3dShaderDestToken pos = emit->true_pos; + unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1; + struct src_register prescale_scale = src_register( SVGA3DREG_CONST, + offset + 0 ); + struct src_register prescale_trans = src_register( SVGA3DREG_CONST, + offset + 1 ); + + /* MUL temp_pos.xyz, temp_pos, prescale.scale + * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos + * --> Note that prescale.trans.w == 0 + */ + if (!submit_op2( emit, + inst_token(SVGA3DOP_MUL), + writemask(temp_pos, TGSI_WRITEMASK_XYZ), + src(temp_pos), + prescale_scale )) + return FALSE; + + if (!submit_op3( emit, + inst_token(SVGA3DOP_MAD), + pos, + swizzle(src(temp_pos), 3, 3, 3, 3), + prescale_trans, + src(temp_pos))) + return FALSE; + } + else { + SVGA3dShaderDestToken temp_pos = emit->temp_pos; + SVGA3dShaderDestToken pos = emit->true_pos; + struct src_register imm_0055 = emit->imm_0055; + + /* Adjust GL clipping coordinate space to hardware (D3D-style): + * + * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos + * MOV result.position, temp_pos + */ + if (!submit_op2( emit, + inst_token(SVGA3DOP_DP4), + writemask(temp_pos, TGSI_WRITEMASK_Z), + imm_0055, + src(temp_pos) )) + return FALSE; + + if (!submit_op1( emit, + inst_token(SVGA3DOP_MOV), + pos, + src(temp_pos) )) + return FALSE; + } + + return TRUE; +} + +/* + 0: IF VFACE :4 + 1: COLOR = FrontColor; + 2: ELSE + 3: COLOR = BackColor; + 4: ENDIF + */ +static boolean emit_light_twoside( struct svga_shader_emitter *emit ) +{ + struct src_register vface, zero; + struct src_register front[2]; + struct src_register back[2]; + SVGA3dShaderDestToken color[2]; + int count = emit->internal_color_count; + int i; + SVGA3dShaderInstToken if_token; + + if (count == 0) + return TRUE; + + vface = get_vface( emit ); + zero = get_zero_immediate( emit ); + + /* Can't use get_temp() to allocate the color reg as such + * temporaries will be reclaimed after each instruction by the call + * to reset_temp_regs(). + */ + for (i = 0; i < count; i++) { + color[i] = dst_register( SVGA3DREG_TEMP, + emit->nr_hw_temp++ ); + + front[i] = emit->input_map[emit->internal_color_idx[i]]; + + /* Back is always the next input: + */ + back[i] = front[i]; + back[i].base.num = front[i].base.num + 1; + + /* Reassign the input_map to the actual front-face color: + */ + emit->input_map[emit->internal_color_idx[i]] = src(color[i]); + } + + if_token = inst_token( SVGA3DOP_IFC ); + + if (emit->key.fkey.front_cw) + if_token.control = SVGA3DOPCOMP_GT; + else + if_token.control = SVGA3DOPCOMP_LT; + + zero = scalar(zero, TGSI_SWIZZLE_X); + + if (!(emit_instruction( emit, if_token ) && + emit_src( emit, vface ) && + emit_src( emit, zero ) )) + return FALSE; + + for (i = 0; i < count; i++) { + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] )) + return FALSE; + } + + if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE)))) + return FALSE; + + for (i = 0; i < count; i++) { + if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] )) + return FALSE; + } + + if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) )) + return FALSE; + + return TRUE; +} + +/* + 0: SETP_GT TEMP, VFACE, 0 + where TEMP is a fake frontface register + */ +static boolean emit_frontface( struct svga_shader_emitter *emit ) +{ + struct src_register vface, zero; + SVGA3dShaderDestToken temp; + struct src_register pass, fail; + + vface = get_vface( emit ); + zero = get_zero_immediate( emit ); + + /* Can't use get_temp() to allocate the fake frontface reg as such + * temporaries will be reclaimed after each instruction by the call + * to reset_temp_regs(). + */ + temp = dst_register( SVGA3DREG_TEMP, + emit->nr_hw_temp++ ); + + if (emit->key.fkey.front_cw) { + pass = scalar( zero, TGSI_SWIZZLE_W ); + fail = scalar( zero, TGSI_SWIZZLE_X ); + } else { + pass = scalar( zero, TGSI_SWIZZLE_X ); + fail = scalar( zero, TGSI_SWIZZLE_W ); + } + + if (!emit_conditional(emit, PIPE_FUNC_GREATER, + temp, vface, scalar( zero, TGSI_SWIZZLE_X ), + pass, fail)) + return FALSE; + + /* Reassign the input_map to the actual front-face color: + */ + emit->input_map[emit->internal_frontface_idx] = src(temp); + + return TRUE; +} + +static INLINE boolean +needs_to_create_zero( struct svga_shader_emitter *emit ) +{ + int i; + + if (emit->unit == PIPE_SHADER_FRAGMENT) { + if (!emit->use_sm30) + return TRUE; + + if (emit->key.fkey.light_twoside) + return TRUE; + + if (emit->emit_frontface) + return TRUE; + + if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1) + return TRUE; + } + + if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_KILP] >= 1) + return TRUE; + + for (i = 0; i < emit->key.fkey.num_textures; i++) { + if (emit->key.fkey.tex[i].compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) + return TRUE; + } + + return FALSE; +} + +static INLINE boolean +needs_to_create_loop_const( struct svga_shader_emitter *emit ) +{ + return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1); +} + +static INLINE boolean +needs_to_create_sincos_consts( struct svga_shader_emitter *emit ) +{ + return !emit->use_sm30 && (emit->info.opcode_count[TGSI_OPCODE_SIN] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_COS] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_SCS] >= 1); +} + +static INLINE boolean +needs_to_create_arl_consts( struct svga_shader_emitter *emit ) +{ + return (emit->num_arl_consts > 0); +} + +static INLINE boolean +pre_parse_add_indirect( struct svga_shader_emitter *emit, + int num, int current_arl) +{ + int i; + assert(num < 0); + + for (i = 0; i < emit->num_arl_consts; ++i) { + if (emit->arl_consts[i].arl_num == current_arl) + break; + } + /* new entry */ + if (emit->num_arl_consts == i) { + ++emit->num_arl_consts; + } + emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ? + num : + emit->arl_consts[i].number; + emit->arl_consts[i].arl_num = current_arl; + return TRUE; +} + +static boolean +pre_parse_instruction( struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn, + int current_arl) +{ + if (insn->FullSrcRegisters[0].SrcRegister.Indirect && + insn->FullSrcRegisters[0].SrcRegisterInd.File == TGSI_FILE_ADDRESS) { + const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[0]; + if (reg->SrcRegister.Index < 0) { + pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl); + } + } + + if (insn->FullSrcRegisters[1].SrcRegister.Indirect && + insn->FullSrcRegisters[1].SrcRegisterInd.File == TGSI_FILE_ADDRESS) { + const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[1]; + if (reg->SrcRegister.Index < 0) { + pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl); + } + } + + if (insn->FullSrcRegisters[2].SrcRegister.Indirect && + insn->FullSrcRegisters[2].SrcRegisterInd.File == TGSI_FILE_ADDRESS) { + const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[2]; + if (reg->SrcRegister.Index < 0) { + pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl); + } + } + + return TRUE; +} + +static boolean +pre_parse_tokens( struct svga_shader_emitter *emit, + const struct tgsi_token *tokens ) +{ + struct tgsi_parse_context parse; + int current_arl = 0; + + tgsi_parse_init( &parse, tokens ); + + while (!tgsi_parse_end_of_tokens( &parse )) { + tgsi_parse_token( &parse ); + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + case TGSI_TOKEN_TYPE_DECLARATION: + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (parse.FullToken.FullInstruction.Instruction.Opcode == + TGSI_OPCODE_ARL) { + ++current_arl; + } + if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction, + current_arl )) + return FALSE; + break; + default: + break; + } + + } + return TRUE; +} + +static boolean svga_shader_emit_helpers( struct svga_shader_emitter *emit ) + +{ + if (needs_to_create_zero( emit )) { + create_zero_immediate( emit ); + } + if (needs_to_create_loop_const( emit )) { + create_loop_const( emit ); + } + if (needs_to_create_sincos_consts( emit )) { + create_sincos_consts( emit ); + } + if (needs_to_create_arl_consts( emit )) { + create_arl_consts( emit ); + } + + if (emit->unit == PIPE_SHADER_FRAGMENT) { + if (!emit_ps_preamble( emit )) + return FALSE; + + if (emit->key.fkey.light_twoside) { + if (!emit_light_twoside( emit )) + return FALSE; + } + if (emit->emit_frontface) { + if (!emit_frontface( emit )) + return FALSE; + } + } + + return TRUE; +} + +boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit, + const struct tgsi_token *tokens ) +{ + struct tgsi_parse_context parse; + boolean ret = TRUE; + boolean helpers_emitted = FALSE; + unsigned line_nr = 0; + + tgsi_parse_init( &parse, tokens ); + emit->internal_imm_count = 0; + + if (emit->unit == PIPE_SHADER_VERTEX) { + ret = emit_vs_preamble( emit ); + if (!ret) + goto done; + } + + pre_parse_tokens(emit, tokens); + + while (!tgsi_parse_end_of_tokens( &parse )) { + tgsi_parse_token( &parse ); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate ); + if (!ret) + goto done; + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + if (emit->use_sm30) + ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration ); + else + ret = svga_translate_decl_sm20( emit, &parse.FullToken.FullDeclaration ); + if (!ret) + goto done; + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (!helpers_emitted) { + if (!svga_shader_emit_helpers( emit )) + goto done; + helpers_emitted = TRUE; + } + ret = svga_emit_instruction( emit, + line_nr++, + &parse.FullToken.FullInstruction ); + if (!ret) + goto done; + break; + default: + break; + } + + reset_temp_regs( emit ); + } + + /* Need to terminate the current subroutine. Note that the + * hardware doesn't tolerate shaders without sub-routines + * terminating with RET+END. + */ + if (!emit->in_main_func) { + ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) ); + if (!ret) + goto done; + } + + /* Need to terminate the whole shader: + */ + ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) ); + if (!ret) + goto done; + +done: + assert(ret); + tgsi_parse_free( &parse ); + return ret; +} + diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h new file mode 100644 index 0000000000..59f299c185 --- /dev/null +++ b/src/gallium/drivers/svga/svga_winsys.h @@ -0,0 +1,299 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * VMware SVGA specific winsys interface. + * + * @author Jose Fonseca + * + * Documentation taken from the VMware SVGA DDK. + */ + +#ifndef SVGA_WINSYS_H_ +#define SVGA_WINSYS_H_ + + +#include "svga_types.h" +#include "svga_reg.h" +#include "svga3d_reg.h" + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" + + +struct svga_winsys_screen; +struct svga_winsys_buffer; +struct pipe_screen; +struct pipe_context; +struct pipe_fence_handle; +struct pipe_texture; +struct svga_region; + + +#define SVGA_BUFFER_USAGE_PINNED (PIPE_BUFFER_USAGE_CUSTOM << 0) +#define SVGA_BUFFER_USAGE_WRAPPED (PIPE_BUFFER_USAGE_CUSTOM << 1) + + +/** Opaque surface handle */ +struct svga_winsys_surface; + +/** Opaque buffer handle */ +struct svga_winsys_handle; + + +/** + * SVGA per-context winsys interface. + */ +struct svga_winsys_context +{ + void + (*destroy)(struct svga_winsys_context *swc); + + void * + (*reserve)(struct svga_winsys_context *swc, + uint32_t nr_bytes, uint32_t nr_relocs ); + + /** + * Emit a relocation for a host surface. + * + * @param flags PIPE_BUFFER_USAGE_GPU_READ/WRITE + * + * NOTE: Order of this call does matter. It should be the same order + * as relocations appear in the command buffer. + */ + void + (*surface_relocation)(struct svga_winsys_context *swc, + uint32 *sid, + struct svga_winsys_surface *surface, + unsigned flags); + + /** + * Emit a relocation for a guest memory region. + * + * @param flags PIPE_BUFFER_USAGE_GPU_READ/WRITE + * + * NOTE: Order of this call does matter. It should be the same order + * as relocations appear in the command buffer. + */ + void + (*region_relocation)(struct svga_winsys_context *swc, + struct SVGAGuestPtr *ptr, + struct svga_winsys_buffer *buffer, + uint32 offset, + unsigned flags); + + void + (*commit)(struct svga_winsys_context *swc); + + enum pipe_error + (*flush)(struct svga_winsys_context *swc, + struct pipe_fence_handle **pfence); + + /** + * Context ID used to fill in the commands + * + * Context IDs are arbitrary small non-negative integers, + * global to the entire SVGA device. + */ + uint32 cid; +}; + + +/** + * SVGA per-screen winsys interface. + */ +struct svga_winsys_screen +{ + void + (*destroy)(struct svga_winsys_screen *sws); + + boolean + (*get_cap)(struct svga_winsys_screen *sws, + SVGA3dDevCapIndex index, + SVGA3dDevCapResult *result); + + /** + * Create a new context. + * + * Context objects encapsulate all render state, and shader + * objects are per-context. + * + * Surfaces are not per-context. The same surface can be shared + * between multiple contexts, and surface operations can occur + * without a context. + */ + struct svga_winsys_context * + (*context_create)(struct svga_winsys_screen *sws); + + + /** + * This creates a "surface" object in the SVGA3D device, + * and returns the surface ID (sid). Surfaces are generic + * containers for host VRAM objects like textures, vertex + * buffers, and depth/stencil buffers. + * + * Surfaces are hierarchial: + * + * - Surface may have multiple faces (for cube maps) + * + * - Each face has a list of mipmap levels + * + * - Each mipmap image may have multiple volume + * slices, if the image is three dimensional. + * + * - Each slice is a 2D array of 'blocks' + * + * - Each block may be one or more pixels. + * (Usually 1, more for DXT or YUV formats.) + * + * Surfaces are generic host VRAM objects. The SVGA3D device + * may optimize surfaces according to the format they were + * created with, but this format does not limit the ways in + * which the surface may be used. For example, a depth surface + * can be used as a texture, or a floating point image may + * be used as a vertex buffer. Some surface usages may be + * lower performance, due to software emulation, but any + * usage should work with any surface. + */ + struct svga_winsys_surface * + (*surface_create)(struct svga_winsys_screen *sws, + SVGA3dSurfaceFlags flags, + SVGA3dSurfaceFormat format, + SVGA3dSize size, + uint32 numFaces, + uint32 numMipLevels); + + /** + * Whether this surface is sitting in a validate list + */ + boolean + (*surface_is_flushed)(struct svga_winsys_screen *sws, + struct svga_winsys_surface *surface); + + /** + * Reference a SVGA3D surface object. This allows sharing of a + * surface between different objects. + */ + void + (*surface_reference)(struct svga_winsys_screen *sws, + struct svga_winsys_surface **pdst, + struct svga_winsys_surface *src); + + /** + * Buffer management. Buffer attributes are mostly fixed over its lifetime. + * + * Remember that gallium gets to choose the interface it needs, and the + * window systems must then implement that interface (rather than the + * other way around...). + * + * usage is a bitmask of PIPE_BUFFER_USAGE_PIXEL/VERTEX/INDEX/CONSTANT. This + * usage argument is only an optimization hint, not a guarantee, therefore + * proper behavior must be observed in all circumstances. + * + * alignment indicates the client's alignment requirements, eg for + * SSE instructions. + */ + struct svga_winsys_buffer * + (*buffer_create)( struct svga_winsys_screen *sws, + unsigned alignment, + unsigned usage, + unsigned size ); + + /** + * Map the entire data store of a buffer object into the client's address. + * flags is a bitmask of: + * - PIPE_BUFFER_USAGE_CPU_READ/WRITE + * - PIPE_BUFFER_USAGE_DONTBLOCK + * - PIPE_BUFFER_USAGE_UNSYNCHRONIZED + */ + void * + (*buffer_map)( struct svga_winsys_screen *sws, + struct svga_winsys_buffer *buf, + unsigned usage ); + + void + (*buffer_unmap)( struct svga_winsys_screen *sws, + struct svga_winsys_buffer *buf ); + + void + (*buffer_destroy)( struct svga_winsys_screen *sws, + struct svga_winsys_buffer *buf ); + + + /** + * Reference a fence object. + */ + void + (*fence_reference)( struct svga_winsys_screen *sws, + struct pipe_fence_handle **pdst, + struct pipe_fence_handle *src ); + + /** + * Checks whether the fence has been signalled. + * \param flags driver-specific meaning + * \return zero on success. + */ + int (*fence_signalled)( struct svga_winsys_screen *sws, + struct pipe_fence_handle *fence, + unsigned flag ); + + /** + * Wait for the fence to finish. + * \param flags driver-specific meaning + * \return zero on success. + */ + int (*fence_finish)( struct svga_winsys_screen *sws, + struct pipe_fence_handle *fence, + unsigned flag ); + +}; + + +struct pipe_context * +svga_context_create(struct pipe_screen *screen); + +struct pipe_screen * +svga_screen_create(struct svga_winsys_screen *sws); + +struct svga_winsys_screen * +svga_winsys_screen(struct pipe_screen *screen); + +struct pipe_buffer * +svga_screen_buffer_wrap_surface(struct pipe_screen *screen, + enum SVGA3dSurfaceFormat format, + struct svga_winsys_surface *srf); + +struct svga_winsys_surface * +svga_screen_texture_get_winsys_surface(struct pipe_texture *texture); +struct svga_winsys_surface * +svga_screen_buffer_get_winsys_surface(struct pipe_buffer *buffer); + +boolean +svga_screen_buffer_from_texture(struct pipe_texture *texture, + struct pipe_buffer **buffer, + unsigned *stride); + +#endif /* SVGA_WINSYS_H_ */ diff --git a/src/gallium/drivers/svga/svgadump/st_shader.h b/src/gallium/drivers/svga/svgadump/st_shader.h new file mode 100644 index 0000000000..2fc1796a90 --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/st_shader.h @@ -0,0 +1,214 @@ +/********************************************************** + * Copyright 2007-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * SVGA Shader Token Definitions + * + * @author Michal Krol + */ + +#ifndef ST_SHADER_SVGA_H +#define ST_SHADER_SVGA_H + +#include "pipe/p_compiler.h" + +struct sh_op +{ + unsigned opcode:16; + unsigned control:8; + unsigned length:4; + unsigned predicated:1; + unsigned unused:1; + unsigned coissue:1; + unsigned is_reg:1; +}; + +struct sh_reg +{ + unsigned number:11; + unsigned type_hi:2; + unsigned relative:1; + unsigned unused:14; + unsigned type_lo:3; + unsigned is_reg:1; +}; + +static INLINE unsigned +sh_reg_type( struct sh_reg reg ) +{ + return reg.type_lo | (reg.type_hi << 3); +} + +struct sh_cdata +{ + float xyzw[4]; +}; + +struct sh_def +{ + struct sh_op op; + struct sh_reg reg; + struct sh_cdata cdata; +}; + +struct sh_defb +{ + struct sh_op op; + struct sh_reg reg; + uint data; +}; + +struct sh_idata +{ + int xyzw[4]; +}; + +struct sh_defi +{ + struct sh_op op; + struct sh_reg reg; + struct sh_idata idata; +}; + +#define PS_TEXTURETYPE_UNKNOWN SVGA3DSAMP_UNKNOWN +#define PS_TEXTURETYPE_2D SVGA3DSAMP_2D +#define PS_TEXTURETYPE_CUBE SVGA3DSAMP_CUBE +#define PS_TEXTURETYPE_VOLUME SVGA3DSAMP_VOLUME + +struct ps_sampleinfo +{ + unsigned unused:27; + unsigned texture_type:4; + unsigned is_reg:1; +}; + +struct vs_semantic +{ + unsigned usage:5; + unsigned unused1:11; + unsigned usage_index:4; + unsigned unused2:12; +}; + +struct sh_dstreg +{ + unsigned number:11; + unsigned type_hi:2; + unsigned relative:1; + unsigned unused:2; + unsigned write_mask:4; + unsigned modifier:4; + unsigned shift_scale:4; + unsigned type_lo:3; + unsigned is_reg:1; +}; + +static INLINE unsigned +sh_dstreg_type( struct sh_dstreg reg ) +{ + return reg.type_lo | (reg.type_hi << 3); +} + +struct sh_dcl +{ + struct sh_op op; + union { + struct { + struct ps_sampleinfo sampleinfo; + } ps; + struct { + struct vs_semantic semantic; + } vs; + } u; + struct sh_dstreg reg; +}; + + +struct sh_srcreg +{ + unsigned number:11; + unsigned type_hi:2; + unsigned relative:1; + unsigned unused:2; + unsigned swizzle_x:2; + unsigned swizzle_y:2; + unsigned swizzle_z:2; + unsigned swizzle_w:2; + unsigned modifier:4; + unsigned type_lo:3; + unsigned is_reg:1; +}; + +static INLINE unsigned +sh_srcreg_type( struct sh_srcreg reg ) +{ + return reg.type_lo | (reg.type_hi << 3); +} + +struct sh_dstop +{ + struct sh_op op; + struct sh_dstreg dst; +}; + +struct sh_srcop +{ + struct sh_op op; + struct sh_srcreg src; +}; + +struct sh_src2op +{ + struct sh_op op; + struct sh_srcreg src0; + struct sh_srcreg src1; +}; + +struct sh_unaryop +{ + struct sh_op op; + struct sh_dstreg dst; + struct sh_srcreg src; +}; + +struct sh_binaryop +{ + struct sh_op op; + struct sh_dstreg dst; + struct sh_srcreg src0; + struct sh_srcreg src1; +}; + +struct sh_trinaryop +{ + struct sh_op op; + struct sh_dstreg dst; + struct sh_srcreg src0; + struct sh_srcreg src1; + struct sh_srcreg src2; +}; + +#endif /* ST_SHADER_SVGA_H */ diff --git a/src/gallium/drivers/svga/svgadump/st_shader_dump.c b/src/gallium/drivers/svga/svgadump/st_shader_dump.c new file mode 100644 index 0000000000..d65cc93bfd --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/st_shader_dump.c @@ -0,0 +1,649 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * SVGA Shader Dump Facilities + * + * @author Michal Krol + */ + +#include "st_shader.h" +#include "st_shader_dump.h" +#include "st_shader_op.h" +#include "util/u_debug.h" + +#include "../svga_hw_reg.h" +#include "svga3d_shaderdefs.h" + +struct dump_info +{ + SVGA3dShaderVersion version; + boolean is_ps; +}; + +static void dump_op( struct sh_op op, const char *mnemonic ) +{ + assert( op.predicated == 0 ); + assert( op.is_reg == 0 ); + + if (op.coissue) + debug_printf( "+" ); + debug_printf( "%s", mnemonic ); + switch (op.control) { + case 0: + break; + case SVGA3DOPCONT_PROJECT: + debug_printf( "p" ); + break; + case SVGA3DOPCONT_BIAS: + debug_printf( "b" ); + break; + default: + assert( 0 ); + } +} + + +static void dump_comp_op( struct sh_op op, const char *mnemonic ) +{ + assert( op.is_reg == 0 ); + + if (op.coissue) + debug_printf( "+" ); + debug_printf( "%s", mnemonic ); + switch (op.control) { + case SVGA3DOPCOMP_RESERVED0: + break; + case SVGA3DOPCOMP_GT: + debug_printf("_gt"); + break; + case SVGA3DOPCOMP_EQ: + debug_printf("_eq"); + break; + case SVGA3DOPCOMP_GE: + debug_printf("_ge"); + break; + case SVGA3DOPCOMP_LT: + debug_printf("_lt"); + break; + case SVGA3DOPCOMPC_NE: + debug_printf("_ne"); + break; + case SVGA3DOPCOMP_LE: + debug_printf("_le"); + break; + case SVGA3DOPCOMP_RESERVED1: + default: + assert( 0 ); + } +} + + +static void dump_reg( struct sh_reg reg, struct sh_srcreg *indreg, const struct dump_info *di ) +{ + assert( sh_reg_type( reg ) == SVGA3DREG_CONST || reg.relative == 0 ); + assert( reg.is_reg == 1 ); + + switch (sh_reg_type( reg )) { + case SVGA3DREG_TEMP: + debug_printf( "r%u", reg.number ); + break; + + case SVGA3DREG_INPUT: + debug_printf( "v%u", reg.number ); + break; + + case SVGA3DREG_CONST: + if (reg.relative) { + if (sh_srcreg_type( *indreg ) == SVGA3DREG_LOOP) + debug_printf( "c[aL+%u]", reg.number ); + else + debug_printf( "c[a%u.x+%u]", indreg->number, reg.number ); + } + else + debug_printf( "c%u", reg.number ); + break; + + case SVGA3DREG_ADDR: /* VS */ + /* SVGA3DREG_TEXTURE */ /* PS */ + if (di->is_ps) + debug_printf( "t%u", reg.number ); + else + debug_printf( "a%u", reg.number ); + break; + + case SVGA3DREG_RASTOUT: + switch (reg.number) { + case 0 /*POSITION*/: + debug_printf( "oPos" ); + break; + case 1 /*FOG*/: + debug_printf( "oFog" ); + break; + case 2 /*POINT_SIZE*/: + debug_printf( "oPts" ); + break; + default: + assert( 0 ); + debug_printf( "???" ); + } + break; + + case SVGA3DREG_ATTROUT: + assert( reg.number < 2 ); + debug_printf( "oD%u", reg.number ); + break; + + case SVGA3DREG_TEXCRDOUT: + /* SVGA3DREG_OUTPUT */ + debug_printf( "oT%u", reg.number ); + break; + + case SVGA3DREG_COLOROUT: + debug_printf( "oC%u", reg.number ); + break; + + case SVGA3DREG_DEPTHOUT: + debug_printf( "oD%u", reg.number ); + break; + + case SVGA3DREG_SAMPLER: + debug_printf( "s%u", reg.number ); + break; + + case SVGA3DREG_CONSTBOOL: + assert( !reg.relative ); + debug_printf( "b%u", reg.number ); + break; + + case SVGA3DREG_CONSTINT: + assert( !reg.relative ); + debug_printf( "i%u", reg.number ); + break; + + case SVGA3DREG_LOOP: + assert( reg.number == 0 ); + debug_printf( "aL" ); + break; + + case SVGA3DREG_MISCTYPE: + switch (reg.number) { + case SVGA3DMISCREG_POSITION: + debug_printf( "vPos" ); + break; + case SVGA3DMISCREG_FACE: + debug_printf( "vFace" ); + break; + default: + assert(0); + break; + } + break; + + case SVGA3DREG_LABEL: + debug_printf( "l%u", reg.number ); + break; + + case SVGA3DREG_PREDICATE: + debug_printf( "p%u", reg.number ); + break; + + + default: + assert( 0 ); + debug_printf( "???" ); + } +} + +static void dump_cdata( struct sh_cdata cdata ) +{ + debug_printf( "%f, %f, %f, %f", cdata.xyzw[0], cdata.xyzw[1], cdata.xyzw[2], cdata.xyzw[3] ); +} + +static void dump_idata( struct sh_idata idata ) +{ + debug_printf( "%d, %d, %d, %d", idata.xyzw[0], idata.xyzw[1], idata.xyzw[2], idata.xyzw[3] ); +} + +static void dump_bdata( boolean bdata ) +{ + debug_printf( bdata ? "TRUE" : "FALSE" ); +} + +static void dump_sampleinfo( struct ps_sampleinfo sampleinfo ) +{ + switch (sampleinfo.texture_type) { + case SVGA3DSAMP_2D: + debug_printf( "_2d" ); + break; + case SVGA3DSAMP_CUBE: + debug_printf( "_cube" ); + break; + case SVGA3DSAMP_VOLUME: + debug_printf( "_volume" ); + break; + default: + assert( 0 ); + } +} + + +static void dump_usageinfo( struct vs_semantic semantic ) +{ + switch (semantic.usage) { + case SVGA3D_DECLUSAGE_POSITION: + debug_printf("_position" ); + break; + case SVGA3D_DECLUSAGE_BLENDWEIGHT: + debug_printf("_blendweight" ); + break; + case SVGA3D_DECLUSAGE_BLENDINDICES: + debug_printf("_blendindices" ); + break; + case SVGA3D_DECLUSAGE_NORMAL: + debug_printf("_normal" ); + break; + case SVGA3D_DECLUSAGE_PSIZE: + debug_printf("_psize" ); + break; + case SVGA3D_DECLUSAGE_TEXCOORD: + debug_printf("_texcoord"); + break; + case SVGA3D_DECLUSAGE_TANGENT: + debug_printf("_tangent" ); + break; + case SVGA3D_DECLUSAGE_BINORMAL: + debug_printf("_binormal" ); + break; + case SVGA3D_DECLUSAGE_TESSFACTOR: + debug_printf("_tessfactor" ); + break; + case SVGA3D_DECLUSAGE_POSITIONT: + debug_printf("_positiont" ); + break; + case SVGA3D_DECLUSAGE_COLOR: + debug_printf("_color" ); + break; + case SVGA3D_DECLUSAGE_FOG: + debug_printf("_fog" ); + break; + case SVGA3D_DECLUSAGE_DEPTH: + debug_printf("_depth" ); + break; + case SVGA3D_DECLUSAGE_SAMPLE: + debug_printf("_sample"); + break; + default: + assert( 0 ); + return; + } + + if (semantic.usage_index != 0) { + debug_printf("%d", semantic.usage_index ); + } +} + +static void dump_dstreg( struct sh_dstreg dstreg, const struct dump_info *di ) +{ + union { + struct sh_reg reg; + struct sh_dstreg dstreg; + } u; + + assert( (dstreg.modifier & (SVGA3DDSTMOD_SATURATE | SVGA3DDSTMOD_PARTIALPRECISION)) == dstreg.modifier ); + + if (dstreg.modifier & SVGA3DDSTMOD_SATURATE) + debug_printf( "_sat" ); + if (dstreg.modifier & SVGA3DDSTMOD_PARTIALPRECISION) + debug_printf( "_pp" ); + switch (dstreg.shift_scale) { + case 0: + break; + case 1: + debug_printf( "_x2" ); + break; + case 2: + debug_printf( "_x4" ); + break; + case 3: + debug_printf( "_x8" ); + break; + case 13: + debug_printf( "_d8" ); + break; + case 14: + debug_printf( "_d4" ); + break; + case 15: + debug_printf( "_d2" ); + break; + default: + assert( 0 ); + } + debug_printf( " " ); + + u.dstreg = dstreg; + dump_reg( u.reg, NULL, di ); + if (dstreg.write_mask != SVGA3DWRITEMASK_ALL) { + debug_printf( "." ); + if (dstreg.write_mask & SVGA3DWRITEMASK_0) + debug_printf( "x" ); + if (dstreg.write_mask & SVGA3DWRITEMASK_1) + debug_printf( "y" ); + if (dstreg.write_mask & SVGA3DWRITEMASK_2) + debug_printf( "z" ); + if (dstreg.write_mask & SVGA3DWRITEMASK_3) + debug_printf( "w" ); + } +} + +static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, const struct dump_info *di ) +{ + union { + struct sh_reg reg; + struct sh_srcreg srcreg; + } u; + + switch (srcreg.modifier) { + case SVGA3DSRCMOD_NEG: + case SVGA3DSRCMOD_BIASNEG: + case SVGA3DSRCMOD_SIGNNEG: + case SVGA3DSRCMOD_X2NEG: + debug_printf( "-" ); + break; + case SVGA3DSRCMOD_ABS: + debug_printf( "|" ); + break; + case SVGA3DSRCMOD_ABSNEG: + debug_printf( "-|" ); + break; + case SVGA3DSRCMOD_COMP: + debug_printf( "1-" ); + break; + case SVGA3DSRCMOD_NOT: + debug_printf( "!" ); + } + + u.srcreg = srcreg; + dump_reg( u.reg, indreg, di ); + switch (srcreg.modifier) { + case SVGA3DSRCMOD_NONE: + case SVGA3DSRCMOD_NEG: + case SVGA3DSRCMOD_COMP: + case SVGA3DSRCMOD_NOT: + break; + case SVGA3DSRCMOD_ABS: + case SVGA3DSRCMOD_ABSNEG: + debug_printf( "|" ); + break; + case SVGA3DSRCMOD_BIAS: + case SVGA3DSRCMOD_BIASNEG: + debug_printf( "_bias" ); + break; + case SVGA3DSRCMOD_SIGN: + case SVGA3DSRCMOD_SIGNNEG: + debug_printf( "_bx2" ); + break; + case SVGA3DSRCMOD_X2: + case SVGA3DSRCMOD_X2NEG: + debug_printf( "_x2" ); + break; + case SVGA3DSRCMOD_DZ: + debug_printf( "_dz" ); + break; + case SVGA3DSRCMOD_DW: + debug_printf( "_dw" ); + break; + default: + assert( 0 ); + } + if (srcreg.swizzle_x != 0 || srcreg.swizzle_y != 1 || srcreg.swizzle_z != 2 || srcreg.swizzle_w != 3) { + debug_printf( "." ); + if (srcreg.swizzle_x == srcreg.swizzle_y && srcreg.swizzle_y == srcreg.swizzle_z && srcreg.swizzle_z == srcreg.swizzle_w) { + debug_printf( "%c", "xyzw"[srcreg.swizzle_x] ); + } + else { + debug_printf( "%c", "xyzw"[srcreg.swizzle_x] ); + debug_printf( "%c", "xyzw"[srcreg.swizzle_y] ); + debug_printf( "%c", "xyzw"[srcreg.swizzle_z] ); + debug_printf( "%c", "xyzw"[srcreg.swizzle_w] ); + } + } +} + +void +sh_svga_dump( + const unsigned *assem, + unsigned dwords, + unsigned do_binary ) +{ + const unsigned *start = assem; + boolean finished = FALSE; + struct dump_info di; + unsigned i; + + if (do_binary) { + for (i = 0; i < dwords; i++) + debug_printf(" 0x%08x,\n", assem[i]); + + debug_printf("\n\n"); + } + + di.version.value = *assem++; + di.is_ps = (di.version.type == SVGA3D_PS_TYPE); + + debug_printf( + "%s_%u_%u\n", + di.is_ps ? "ps" : "vs", + di.version.major, + di.version.minor ); + + while (!finished) { + struct sh_op op = *(struct sh_op *) assem; + + if (assem - start >= dwords) { + debug_printf("... ran off end of buffer\n"); + assert(0); + return; + } + + switch (op.opcode) { + case SVGA3DOP_DCL: + { + struct sh_dcl dcl = *(struct sh_dcl *) assem; + + debug_printf( "dcl" ); + if (sh_dstreg_type( dcl.reg ) == SVGA3DREG_SAMPLER) + dump_sampleinfo( dcl.u.ps.sampleinfo ); + else if (di.is_ps) { + if (di.version.major == 3 && + sh_dstreg_type( dcl.reg ) != SVGA3DREG_MISCTYPE) + dump_usageinfo( dcl.u.vs.semantic ); + } + else + dump_usageinfo( dcl.u.vs.semantic ); + dump_dstreg( dcl.reg, &di ); + debug_printf( "\n" ); + assem += sizeof( struct sh_dcl ) / sizeof( unsigned ); + } + break; + + case SVGA3DOP_DEFB: + { + struct sh_defb defb = *(struct sh_defb *) assem; + + debug_printf( "defb " ); + dump_reg( defb.reg, NULL, &di ); + debug_printf( ", " ); + dump_bdata( defb.data ); + debug_printf( "\n" ); + assem += sizeof( struct sh_defb ) / sizeof( unsigned ); + } + break; + + case SVGA3DOP_DEFI: + { + struct sh_defi defi = *(struct sh_defi *) assem; + + debug_printf( "defi " ); + dump_reg( defi.reg, NULL, &di ); + debug_printf( ", " ); + dump_idata( defi.idata ); + debug_printf( "\n" ); + assem += sizeof( struct sh_defi ) / sizeof( unsigned ); + } + break; + + case SVGA3DOP_TEXCOORD: + assert( di.is_ps ); + dump_op( op, "texcoord" ); + if (0) { + struct sh_dstop dstop = *(struct sh_dstop *) assem; + dump_dstreg( dstop.dst, &di ); + assem += sizeof( struct sh_dstop ) / sizeof( unsigned ); + } + else { + struct sh_unaryop unaryop = *(struct sh_unaryop *) assem; + dump_dstreg( unaryop.dst, &di ); + debug_printf( ", " ); + dump_srcreg( unaryop.src, NULL, &di ); + assem += sizeof( struct sh_unaryop ) / sizeof( unsigned ); + } + debug_printf( "\n" ); + break; + + case SVGA3DOP_TEX: + assert( di.is_ps ); + if (0) { + dump_op( op, "tex" ); + if (0) { + struct sh_dstop dstop = *(struct sh_dstop *) assem; + + dump_dstreg( dstop.dst, &di ); + assem += sizeof( struct sh_dstop ) / sizeof( unsigned ); + } + else { + struct sh_unaryop unaryop = *(struct sh_unaryop *) assem; + + dump_dstreg( unaryop.dst, &di ); + debug_printf( ", " ); + dump_srcreg( unaryop.src, NULL, &di ); + assem += sizeof( struct sh_unaryop ) / sizeof( unsigned ); + } + } + else { + struct sh_binaryop binaryop = *(struct sh_binaryop *) assem; + + dump_op( op, "texld" ); + dump_dstreg( binaryop.dst, &di ); + debug_printf( ", " ); + dump_srcreg( binaryop.src0, NULL, &di ); + debug_printf( ", " ); + dump_srcreg( binaryop.src1, NULL, &di ); + assem += sizeof( struct sh_binaryop ) / sizeof( unsigned ); + } + debug_printf( "\n" ); + break; + + case SVGA3DOP_DEF: + { + struct sh_def def = *(struct sh_def *) assem; + + debug_printf( "def " ); + dump_reg( def.reg, NULL, &di ); + debug_printf( ", " ); + dump_cdata( def.cdata ); + debug_printf( "\n" ); + assem += sizeof( struct sh_def ) / sizeof( unsigned ); + } + break; + + case SVGA3DOP_PHASE: + debug_printf( "phase\n" ); + assem += sizeof( struct sh_op ) / sizeof( unsigned ); + break; + + case SVGA3DOP_COMMENT: + assert( 0 ); + break; + + case SVGA3DOP_RET: + debug_printf( "ret\n" ); + assem += sizeof( struct sh_op ) / sizeof( unsigned ); + break; + + case SVGA3DOP_END: + debug_printf( "end\n" ); + finished = TRUE; + break; + + default: + { + const struct sh_opcode_info *info = sh_svga_opcode_info( op.opcode ); + uint i; + uint num_src = info->num_src + op.predicated; + boolean not_first_arg = FALSE; + + assert( info->num_dst <= 1 ); + + if (op.opcode == SVGA3DOP_SINCOS && di.version.major < 3) + num_src += 2; + + dump_comp_op( op, info->mnemonic ); + assem += sizeof( struct sh_op ) / sizeof( unsigned ); + + if (info->num_dst > 0) { + struct sh_dstreg dstreg = *(struct sh_dstreg *) assem; + + dump_dstreg( dstreg, &di ); + assem += sizeof( struct sh_dstreg ) / sizeof( unsigned ); + not_first_arg = TRUE; + } + + for (i = 0; i < num_src; i++) { + struct sh_srcreg srcreg; + struct sh_srcreg indreg; + + srcreg = *(struct sh_srcreg *) assem; + assem += sizeof( struct sh_srcreg ) / sizeof( unsigned ); + if (srcreg.relative && !di.is_ps && di.version.major >= 2) { + indreg = *(struct sh_srcreg *) assem; + assem += sizeof( struct sh_srcreg ) / sizeof( unsigned ); + } + + if (not_first_arg) + debug_printf( ", " ); + else + debug_printf( " " ); + dump_srcreg( srcreg, &indreg, &di ); + not_first_arg = TRUE; + } + + debug_printf( "\n" ); + } + } + } +} diff --git a/src/gallium/drivers/svga/svgadump/st_shader_dump.h b/src/gallium/drivers/svga/svgadump/st_shader_dump.h new file mode 100644 index 0000000000..af5549cdba --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/st_shader_dump.h @@ -0,0 +1,42 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * SVGA Shader Dump Facilities + * + * @author Michal Krol + */ + +#ifndef ST_SHADER_SVGA_DUMP_H +#define ST_SHADER_SVGA_DUMP_H + +void +sh_svga_dump( + const unsigned *assem, + unsigned dwords, + unsigned do_binary ); + +#endif /* ST_SHADER_SVGA_DUMP_H */ diff --git a/src/gallium/drivers/svga/svgadump/st_shader_op.c b/src/gallium/drivers/svga/svgadump/st_shader_op.c new file mode 100644 index 0000000000..2c05382ab9 --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/st_shader_op.c @@ -0,0 +1,168 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * SVGA Shader Token Opcode Info + * + * @author Michal Krol + */ + +#include "util/u_debug.h" +#include "st_shader_op.h" + +#include "../svga_hw_reg.h" +#include "svga3d_shaderdefs.h" + +#define SVGA3DOP_INVALID SVGA3DOP_END +#define TGSI_OPCODE_INVALID TGSI_OPCODE_LAST + +static struct sh_opcode_info opcode_info[] = +{ + { "nop", 0, 0, SVGA3DOP_NOP }, + { "mov", 1, 1, SVGA3DOP_MOV, }, + { "add", 1, 2, SVGA3DOP_ADD, }, + { "sub", 1, 2, SVGA3DOP_SUB, }, + { "mad", 1, 3, SVGA3DOP_MAD, }, + { "mul", 1, 2, SVGA3DOP_MUL, }, + { "rcp", 1, 1, SVGA3DOP_RCP, }, + { "rsq", 1, 1, SVGA3DOP_RSQ, }, + { "dp3", 1, 2, SVGA3DOP_DP3, }, + { "dp4", 1, 2, SVGA3DOP_DP4, }, + { "min", 1, 2, SVGA3DOP_MIN, }, + { "max", 1, 2, SVGA3DOP_MAX, }, + { "slt", 1, 2, SVGA3DOP_SLT, }, + { "sge", 1, 2, SVGA3DOP_SGE, }, + { "exp", 1, 1, SVGA3DOP_EXP, }, + { "log", 1, 1, SVGA3DOP_LOG, }, + { "lit", 1, 1, SVGA3DOP_LIT, }, + { "dst", 1, 2, SVGA3DOP_DST, }, + { "lrp", 1, 3, SVGA3DOP_LRP, }, + { "frc", 1, 1, SVGA3DOP_FRC, }, + { "m4x4", 1, 2, SVGA3DOP_M4x4, }, + { "m4x3", 1, 2, SVGA3DOP_M4x3, }, + { "m3x4", 1, 2, SVGA3DOP_M3x4, }, + { "m3x3", 1, 2, SVGA3DOP_M3x3, }, + { "m3x2", 1, 2, SVGA3DOP_M3x2, }, + { "call", 0, 1, SVGA3DOP_CALL, }, + { "callnz", 0, 2, SVGA3DOP_CALLNZ, }, + { "loop", 0, 2, SVGA3DOP_LOOP, }, + { "ret", 0, 0, SVGA3DOP_RET, }, + { "endloop", 0, 0, SVGA3DOP_ENDLOOP, }, + { "label", 0, 1, SVGA3DOP_LABEL, }, + { "dcl", 0, 0, SVGA3DOP_DCL, }, + { "pow", 1, 2, SVGA3DOP_POW, }, + { "crs", 1, 2, SVGA3DOP_CRS, }, + { "sgn", 1, 3, SVGA3DOP_SGN, }, + { "abs", 1, 1, SVGA3DOP_ABS, }, + { "nrm", 1, 1, SVGA3DOP_NRM, }, /* 3-componenet normalization */ + { "sincos", 1, 1, SVGA3DOP_SINCOS, }, + { "rep", 0, 1, SVGA3DOP_REP, }, + { "endrep", 0, 0, SVGA3DOP_ENDREP, }, + { "if", 0, 1, SVGA3DOP_IF, }, + { "ifc", 0, 2, SVGA3DOP_IFC, }, + { "else", 0, 0, SVGA3DOP_ELSE, }, + { "endif", 0, 0, SVGA3DOP_ENDIF, }, + { "break", 0, 0, SVGA3DOP_BREAK, }, + { "breakc", 0, 0, SVGA3DOP_BREAKC, }, + { "mova", 1, 1, SVGA3DOP_MOVA, }, + { "defb", 0, 0, SVGA3DOP_DEFB, }, + { "defi", 0, 0, SVGA3DOP_DEFI, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "texcoord", 0, 0, SVGA3DOP_TEXCOORD, }, + { "texkill", 1, 0, SVGA3DOP_TEXKILL, }, + { "tex", 0, 0, SVGA3DOP_TEX, }, + { "texbem", 1, 1, SVGA3DOP_TEXBEM, }, + { "texbeml", 1, 1, SVGA3DOP_TEXBEML, }, + { "texreg2ar", 1, 1, SVGA3DOP_TEXREG2AR, }, + { "texreg2gb", 1, 1, SVGA3DOP_TEXREG2GB, }, + { "texm3x2pad", 1, 1, SVGA3DOP_TEXM3x2PAD, }, + { "texm3x2tex", 1, 1, SVGA3DOP_TEXM3x2TEX, }, + { "texm3x3pad", 1, 1, SVGA3DOP_TEXM3x3PAD, }, + { "texm3x3tex", 1, 1, SVGA3DOP_TEXM3x3TEX, }, + { "reserved0", 0, 0, SVGA3DOP_RESERVED0, }, + { "texm3x3spec", 1, 2, SVGA3DOP_TEXM3x3SPEC, }, + { "texm3x3vspec", 1, 1, SVGA3DOP_TEXM3x3VSPEC,}, + { "expp", 1, 1, SVGA3DOP_EXPP, }, + { "logp", 1, 1, SVGA3DOP_LOGP, }, + { "cnd", 1, 3, SVGA3DOP_CND, }, + { "def", 0, 0, SVGA3DOP_DEF, }, + { "texreg2rgb", 1, 1, SVGA3DOP_TEXREG2RGB, }, + { "texdp3tex", 1, 1, SVGA3DOP_TEXDP3TEX, }, + { "texm3x2depth", 1, 1, SVGA3DOP_TEXM3x2DEPTH,}, + { "texdp3", 1, 1, SVGA3DOP_TEXDP3, }, + { "texm3x3", 1, 1, SVGA3DOP_TEXM3x3, }, + { "texdepth", 1, 0, SVGA3DOP_TEXDEPTH, }, + { "cmp", 1, 3, SVGA3DOP_CMP, }, + { "bem", 1, 2, SVGA3DOP_BEM, }, + { "dp2add", 1, 3, SVGA3DOP_DP2ADD, }, + { "dsx", 1, 1, SVGA3DOP_INVALID, }, + { "dsy", 1, 1, SVGA3DOP_INVALID, }, + { "texldd", 1, 1, SVGA3DOP_INVALID, }, + { "setp", 1, 2, SVGA3DOP_SETP, }, + { "texldl", 1, 1, SVGA3DOP_INVALID, }, + { "breakp", 1, 1, SVGA3DOP_INVALID, }, +}; + +const struct sh_opcode_info *sh_svga_opcode_info( uint op ) +{ + struct sh_opcode_info *info; + + if (op >= sizeof( opcode_info ) / sizeof( opcode_info[0] )) { + /* The opcode is either PHASE, COMMENT, END or out of range. + */ + assert( 0 ); + return NULL; + } + + info = &opcode_info[op]; + + if (info->svga_opcode == SVGA3DOP_INVALID) { + /* No valid information. Please provide number of dst/src registers. + */ + assert( 0 ); + return NULL; + } + + /* Sanity check. + */ + assert( op == info->svga_opcode ); + + return info; +} diff --git a/src/gallium/drivers/svga/svgadump/st_shader_op.h b/src/gallium/drivers/svga/svgadump/st_shader_op.h new file mode 100644 index 0000000000..01d39dca84 --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/st_shader_op.h @@ -0,0 +1,46 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * SVGA Shader Token Opcode Info + * + * @author Michal Krol + */ + +#ifndef ST_SHADER_SVGA_OP_H +#define ST_SHADER_SVGA_OP_H + +struct sh_opcode_info +{ + const char *mnemonic; + unsigned num_dst:8; + unsigned num_src:8; + unsigned svga_opcode:16; +}; + +const struct sh_opcode_info *sh_svga_opcode_info( unsigned op ); + +#endif /* ST_SHADER_SVGA_OP_H */ diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c new file mode 100644 index 0000000000..180dde8dc1 --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/svga_dump.c @@ -0,0 +1,1736 @@ +/********************************************************** + * Copyright 2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * Dump SVGA commands. + * + * Generated automatically from svga3d_reg.h by svga_dump.py. + */ + +#include "svga_types.h" +#include "st_shader_dump.h" +#include "svga3d_reg.h" + +#include "util/u_debug.h" +#include "svga_dump.h" + +static void +dump_SVGA3dVertexDecl(const SVGA3dVertexDecl *cmd) +{ + switch((*cmd).identity.type) { + case SVGA3D_DECLTYPE_FLOAT1: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT1\n"); + break; + case SVGA3D_DECLTYPE_FLOAT2: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT2\n"); + break; + case SVGA3D_DECLTYPE_FLOAT3: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT3\n"); + break; + case SVGA3D_DECLTYPE_FLOAT4: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT4\n"); + break; + case SVGA3D_DECLTYPE_D3DCOLOR: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_D3DCOLOR\n"); + break; + case SVGA3D_DECLTYPE_UBYTE4: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4\n"); + break; + case SVGA3D_DECLTYPE_SHORT2: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2\n"); + break; + case SVGA3D_DECLTYPE_SHORT4: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4\n"); + break; + case SVGA3D_DECLTYPE_UBYTE4N: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4N\n"); + break; + case SVGA3D_DECLTYPE_SHORT2N: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2N\n"); + break; + case SVGA3D_DECLTYPE_SHORT4N: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4N\n"); + break; + case SVGA3D_DECLTYPE_USHORT2N: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT2N\n"); + break; + case SVGA3D_DECLTYPE_USHORT4N: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT4N\n"); + break; + case SVGA3D_DECLTYPE_UDEC3: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UDEC3\n"); + break; + case SVGA3D_DECLTYPE_DEC3N: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_DEC3N\n"); + break; + case SVGA3D_DECLTYPE_FLOAT16_2: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_2\n"); + break; + case SVGA3D_DECLTYPE_FLOAT16_4: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_4\n"); + break; + case SVGA3D_DECLTYPE_MAX: + debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_MAX\n"); + break; + default: + debug_printf("\t\t.identity.type = %i\n", (*cmd).identity.type); + break; + } + switch((*cmd).identity.method) { + case SVGA3D_DECLMETHOD_DEFAULT: + debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_DEFAULT\n"); + break; + case SVGA3D_DECLMETHOD_PARTIALU: + debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALU\n"); + break; + case SVGA3D_DECLMETHOD_PARTIALV: + debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALV\n"); + break; + case SVGA3D_DECLMETHOD_CROSSUV: + debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_CROSSUV\n"); + break; + case SVGA3D_DECLMETHOD_UV: + debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_UV\n"); + break; + case SVGA3D_DECLMETHOD_LOOKUP: + debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUP\n"); + break; + case SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED: + debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED\n"); + break; + default: + debug_printf("\t\t.identity.method = %i\n", (*cmd).identity.method); + break; + } + switch((*cmd).identity.usage) { + case SVGA3D_DECLUSAGE_POSITION: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITION\n"); + break; + case SVGA3D_DECLUSAGE_BLENDWEIGHT: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDWEIGHT\n"); + break; + case SVGA3D_DECLUSAGE_BLENDINDICES: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDINDICES\n"); + break; + case SVGA3D_DECLUSAGE_NORMAL: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_NORMAL\n"); + break; + case SVGA3D_DECLUSAGE_PSIZE: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_PSIZE\n"); + break; + case SVGA3D_DECLUSAGE_TEXCOORD: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TEXCOORD\n"); + break; + case SVGA3D_DECLUSAGE_TANGENT: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TANGENT\n"); + break; + case SVGA3D_DECLUSAGE_BINORMAL: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BINORMAL\n"); + break; + case SVGA3D_DECLUSAGE_TESSFACTOR: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TESSFACTOR\n"); + break; + case SVGA3D_DECLUSAGE_POSITIONT: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITIONT\n"); + break; + case SVGA3D_DECLUSAGE_COLOR: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_COLOR\n"); + break; + case SVGA3D_DECLUSAGE_FOG: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_FOG\n"); + break; + case SVGA3D_DECLUSAGE_DEPTH: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_DEPTH\n"); + break; + case SVGA3D_DECLUSAGE_SAMPLE: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_SAMPLE\n"); + break; + case SVGA3D_DECLUSAGE_MAX: + debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_MAX\n"); + break; + default: + debug_printf("\t\t.identity.usage = %i\n", (*cmd).identity.usage); + break; + } + debug_printf("\t\t.identity.usageIndex = %u\n", (*cmd).identity.usageIndex); + debug_printf("\t\t.array.surfaceId = %u\n", (*cmd).array.surfaceId); + debug_printf("\t\t.array.offset = %u\n", (*cmd).array.offset); + debug_printf("\t\t.array.stride = %u\n", (*cmd).array.stride); + debug_printf("\t\t.rangeHint.first = %u\n", (*cmd).rangeHint.first); + debug_printf("\t\t.rangeHint.last = %u\n", (*cmd).rangeHint.last); +} + +static void +dump_SVGA3dTextureState(const SVGA3dTextureState *cmd) +{ + debug_printf("\t\t.stage = %u\n", (*cmd).stage); + switch((*cmd).name) { + case SVGA3D_TS_INVALID: + debug_printf("\t\t.name = SVGA3D_TS_INVALID\n"); + break; + case SVGA3D_TS_BIND_TEXTURE: + debug_printf("\t\t.name = SVGA3D_TS_BIND_TEXTURE\n"); + break; + case SVGA3D_TS_COLOROP: + debug_printf("\t\t.name = SVGA3D_TS_COLOROP\n"); + break; + case SVGA3D_TS_COLORARG1: + debug_printf("\t\t.name = SVGA3D_TS_COLORARG1\n"); + break; + case SVGA3D_TS_COLORARG2: + debug_printf("\t\t.name = SVGA3D_TS_COLORARG2\n"); + break; + case SVGA3D_TS_ALPHAOP: + debug_printf("\t\t.name = SVGA3D_TS_ALPHAOP\n"); + break; + case SVGA3D_TS_ALPHAARG1: + debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG1\n"); + break; + case SVGA3D_TS_ALPHAARG2: + debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG2\n"); + break; + case SVGA3D_TS_ADDRESSU: + debug_printf("\t\t.name = SVGA3D_TS_ADDRESSU\n"); + break; + case SVGA3D_TS_ADDRESSV: + debug_printf("\t\t.name = SVGA3D_TS_ADDRESSV\n"); + break; + case SVGA3D_TS_MIPFILTER: + debug_printf("\t\t.name = SVGA3D_TS_MIPFILTER\n"); + break; + case SVGA3D_TS_MAGFILTER: + debug_printf("\t\t.name = SVGA3D_TS_MAGFILTER\n"); + break; + case SVGA3D_TS_MINFILTER: + debug_printf("\t\t.name = SVGA3D_TS_MINFILTER\n"); + break; + case SVGA3D_TS_BORDERCOLOR: + debug_printf("\t\t.name = SVGA3D_TS_BORDERCOLOR\n"); + break; + case SVGA3D_TS_TEXCOORDINDEX: + debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDINDEX\n"); + break; + case SVGA3D_TS_TEXTURETRANSFORMFLAGS: + debug_printf("\t\t.name = SVGA3D_TS_TEXTURETRANSFORMFLAGS\n"); + break; + case SVGA3D_TS_TEXCOORDGEN: + debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDGEN\n"); + break; + case SVGA3D_TS_BUMPENVMAT00: + debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT00\n"); + break; + case SVGA3D_TS_BUMPENVMAT01: + debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT01\n"); + break; + case SVGA3D_TS_BUMPENVMAT10: + debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT10\n"); + break; + case SVGA3D_TS_BUMPENVMAT11: + debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT11\n"); + break; + case SVGA3D_TS_TEXTURE_MIPMAP_LEVEL: + debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_MIPMAP_LEVEL\n"); + break; + case SVGA3D_TS_TEXTURE_LOD_BIAS: + debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_LOD_BIAS\n"); + break; + case SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL: + debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL\n"); + break; + case SVGA3D_TS_ADDRESSW: + debug_printf("\t\t.name = SVGA3D_TS_ADDRESSW\n"); + break; + case SVGA3D_TS_GAMMA: + debug_printf("\t\t.name = SVGA3D_TS_GAMMA\n"); + break; + case SVGA3D_TS_BUMPENVLSCALE: + debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLSCALE\n"); + break; + case SVGA3D_TS_BUMPENVLOFFSET: + debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLOFFSET\n"); + break; + case SVGA3D_TS_COLORARG0: + debug_printf("\t\t.name = SVGA3D_TS_COLORARG0\n"); + break; + case SVGA3D_TS_ALPHAARG0: + debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG0\n"); + break; + case SVGA3D_TS_MAX: + debug_printf("\t\t.name = SVGA3D_TS_MAX\n"); + break; + default: + debug_printf("\t\t.name = %i\n", (*cmd).name); + break; + } + debug_printf("\t\t.value = %u\n", (*cmd).value); + debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue); +} + +static void +dump_SVGA3dCopyBox(const SVGA3dCopyBox *cmd) +{ + debug_printf("\t\t.x = %u\n", (*cmd).x); + debug_printf("\t\t.y = %u\n", (*cmd).y); + debug_printf("\t\t.z = %u\n", (*cmd).z); + debug_printf("\t\t.w = %u\n", (*cmd).w); + debug_printf("\t\t.h = %u\n", (*cmd).h); + debug_printf("\t\t.d = %u\n", (*cmd).d); + debug_printf("\t\t.srcx = %u\n", (*cmd).srcx); + debug_printf("\t\t.srcy = %u\n", (*cmd).srcy); + debug_printf("\t\t.srcz = %u\n", (*cmd).srcz); +} + +static void +dump_SVGA3dCmdSetClipPlane(const SVGA3dCmdSetClipPlane *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + debug_printf("\t\t.index = %u\n", (*cmd).index); + debug_printf("\t\t.plane[0] = %f\n", (*cmd).plane[0]); + debug_printf("\t\t.plane[1] = %f\n", (*cmd).plane[1]); + debug_printf("\t\t.plane[2] = %f\n", (*cmd).plane[2]); + debug_printf("\t\t.plane[3] = %f\n", (*cmd).plane[3]); +} + +static void +dump_SVGA3dCmdWaitForQuery(const SVGA3dCmdWaitForQuery *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + switch((*cmd).type) { + case SVGA3D_QUERYTYPE_OCCLUSION: + debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n"); + break; + case SVGA3D_QUERYTYPE_MAX: + debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n"); + break; + default: + debug_printf("\t\t.type = %i\n", (*cmd).type); + break; + } + debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId); + debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset); +} + +static void +dump_SVGA3dCmdSetRenderTarget(const SVGA3dCmdSetRenderTarget *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + switch((*cmd).type) { + case SVGA3D_RT_DEPTH: + debug_printf("\t\t.type = SVGA3D_RT_DEPTH\n"); + break; + case SVGA3D_RT_STENCIL: + debug_printf("\t\t.type = SVGA3D_RT_STENCIL\n"); + break; + default: + debug_printf("\t\t.type = SVGA3D_RT_COLOR%u\n", (*cmd).type - SVGA3D_RT_COLOR0); + break; + } + debug_printf("\t\t.target.sid = %u\n", (*cmd).target.sid); + debug_printf("\t\t.target.face = %u\n", (*cmd).target.face); + debug_printf("\t\t.target.mipmap = %u\n", (*cmd).target.mipmap); +} + +static void +dump_SVGA3dCmdSetTextureState(const SVGA3dCmdSetTextureState *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); +} + +static void +dump_SVGA3dCmdSurfaceCopy(const SVGA3dCmdSurfaceCopy *cmd) +{ + debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid); + debug_printf("\t\t.src.face = %u\n", (*cmd).src.face); + debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap); + debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid); + debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face); + debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap); +} + +static void +dump_SVGA3dCmdSetMaterial(const SVGA3dCmdSetMaterial *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + switch((*cmd).face) { + case SVGA3D_FACE_INVALID: + debug_printf("\t\t.face = SVGA3D_FACE_INVALID\n"); + break; + case SVGA3D_FACE_NONE: + debug_printf("\t\t.face = SVGA3D_FACE_NONE\n"); + break; + case SVGA3D_FACE_FRONT: + debug_printf("\t\t.face = SVGA3D_FACE_FRONT\n"); + break; + case SVGA3D_FACE_BACK: + debug_printf("\t\t.face = SVGA3D_FACE_BACK\n"); + break; + case SVGA3D_FACE_FRONT_BACK: + debug_printf("\t\t.face = SVGA3D_FACE_FRONT_BACK\n"); + break; + case SVGA3D_FACE_MAX: + debug_printf("\t\t.face = SVGA3D_FACE_MAX\n"); + break; + default: + debug_printf("\t\t.face = %i\n", (*cmd).face); + break; + } + debug_printf("\t\t.material.diffuse[0] = %f\n", (*cmd).material.diffuse[0]); + debug_printf("\t\t.material.diffuse[1] = %f\n", (*cmd).material.diffuse[1]); + debug_printf("\t\t.material.diffuse[2] = %f\n", (*cmd).material.diffuse[2]); + debug_printf("\t\t.material.diffuse[3] = %f\n", (*cmd).material.diffuse[3]); + debug_printf("\t\t.material.ambient[0] = %f\n", (*cmd).material.ambient[0]); + debug_printf("\t\t.material.ambient[1] = %f\n", (*cmd).material.ambient[1]); + debug_printf("\t\t.material.ambient[2] = %f\n", (*cmd).material.ambient[2]); + debug_printf("\t\t.material.ambient[3] = %f\n", (*cmd).material.ambient[3]); + debug_printf("\t\t.material.specular[0] = %f\n", (*cmd).material.specular[0]); + debug_printf("\t\t.material.specular[1] = %f\n", (*cmd).material.specular[1]); + debug_printf("\t\t.material.specular[2] = %f\n", (*cmd).material.specular[2]); + debug_printf("\t\t.material.specular[3] = %f\n", (*cmd).material.specular[3]); + debug_printf("\t\t.material.emissive[0] = %f\n", (*cmd).material.emissive[0]); + debug_printf("\t\t.material.emissive[1] = %f\n", (*cmd).material.emissive[1]); + debug_printf("\t\t.material.emissive[2] = %f\n", (*cmd).material.emissive[2]); + debug_printf("\t\t.material.emissive[3] = %f\n", (*cmd).material.emissive[3]); + debug_printf("\t\t.material.shininess = %f\n", (*cmd).material.shininess); +} + +static void +dump_SVGA3dCmdSetLightData(const SVGA3dCmdSetLightData *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + debug_printf("\t\t.index = %u\n", (*cmd).index); + switch((*cmd).data.type) { + case SVGA3D_LIGHTTYPE_INVALID: + debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_INVALID\n"); + break; + case SVGA3D_LIGHTTYPE_POINT: + debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_POINT\n"); + break; + case SVGA3D_LIGHTTYPE_SPOT1: + debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT1\n"); + break; + case SVGA3D_LIGHTTYPE_SPOT2: + debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT2\n"); + break; + case SVGA3D_LIGHTTYPE_DIRECTIONAL: + debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_DIRECTIONAL\n"); + break; + case SVGA3D_LIGHTTYPE_MAX: + debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_MAX\n"); + break; + default: + debug_printf("\t\t.data.type = %i\n", (*cmd).data.type); + break; + } + debug_printf("\t\t.data.inWorldSpace = %u\n", (*cmd).data.inWorldSpace); + debug_printf("\t\t.data.diffuse[0] = %f\n", (*cmd).data.diffuse[0]); + debug_printf("\t\t.data.diffuse[1] = %f\n", (*cmd).data.diffuse[1]); + debug_printf("\t\t.data.diffuse[2] = %f\n", (*cmd).data.diffuse[2]); + debug_printf("\t\t.data.diffuse[3] = %f\n", (*cmd).data.diffuse[3]); + debug_printf("\t\t.data.specular[0] = %f\n", (*cmd).data.specular[0]); + debug_printf("\t\t.data.specular[1] = %f\n", (*cmd).data.specular[1]); + debug_printf("\t\t.data.specular[2] = %f\n", (*cmd).data.specular[2]); + debug_printf("\t\t.data.specular[3] = %f\n", (*cmd).data.specular[3]); + debug_printf("\t\t.data.ambient[0] = %f\n", (*cmd).data.ambient[0]); + debug_printf("\t\t.data.ambient[1] = %f\n", (*cmd).data.ambient[1]); + debug_printf("\t\t.data.ambient[2] = %f\n", (*cmd).data.ambient[2]); + debug_printf("\t\t.data.ambient[3] = %f\n", (*cmd).data.ambient[3]); + debug_printf("\t\t.data.position[0] = %f\n", (*cmd).data.position[0]); + debug_printf("\t\t.data.position[1] = %f\n", (*cmd).data.position[1]); + debug_printf("\t\t.data.position[2] = %f\n", (*cmd).data.position[2]); + debug_printf("\t\t.data.position[3] = %f\n", (*cmd).data.position[3]); + debug_printf("\t\t.data.direction[0] = %f\n", (*cmd).data.direction[0]); + debug_printf("\t\t.data.direction[1] = %f\n", (*cmd).data.direction[1]); + debug_printf("\t\t.data.direction[2] = %f\n", (*cmd).data.direction[2]); + debug_printf("\t\t.data.direction[3] = %f\n", (*cmd).data.direction[3]); + debug_printf("\t\t.data.range = %f\n", (*cmd).data.range); + debug_printf("\t\t.data.falloff = %f\n", (*cmd).data.falloff); + debug_printf("\t\t.data.attenuation0 = %f\n", (*cmd).data.attenuation0); + debug_printf("\t\t.data.attenuation1 = %f\n", (*cmd).data.attenuation1); + debug_printf("\t\t.data.attenuation2 = %f\n", (*cmd).data.attenuation2); + debug_printf("\t\t.data.theta = %f\n", (*cmd).data.theta); + debug_printf("\t\t.data.phi = %f\n", (*cmd).data.phi); +} + +static void +dump_SVGA3dCmdSetViewport(const SVGA3dCmdSetViewport *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x); + debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y); + debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w); + debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h); +} + +static void +dump_SVGA3dCmdSetScissorRect(const SVGA3dCmdSetScissorRect *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x); + debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y); + debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w); + debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h); +} + +static void +dump_SVGA3dCopyRect(const SVGA3dCopyRect *cmd) +{ + debug_printf("\t\t.x = %u\n", (*cmd).x); + debug_printf("\t\t.y = %u\n", (*cmd).y); + debug_printf("\t\t.w = %u\n", (*cmd).w); + debug_printf("\t\t.h = %u\n", (*cmd).h); + debug_printf("\t\t.srcx = %u\n", (*cmd).srcx); + debug_printf("\t\t.srcy = %u\n", (*cmd).srcy); +} + +static void +dump_SVGA3dCmdSetShader(const SVGA3dCmdSetShader *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + switch((*cmd).type) { + case SVGA3D_SHADERTYPE_COMPILED_DX8: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); + break; + case SVGA3D_SHADERTYPE_VS: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); + break; + case SVGA3D_SHADERTYPE_PS: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); + break; + case SVGA3D_SHADERTYPE_MAX: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); + break; + default: + debug_printf("\t\t.type = %i\n", (*cmd).type); + break; + } + debug_printf("\t\t.shid = %u\n", (*cmd).shid); +} + +static void +dump_SVGA3dCmdEndQuery(const SVGA3dCmdEndQuery *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + switch((*cmd).type) { + case SVGA3D_QUERYTYPE_OCCLUSION: + debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n"); + break; + case SVGA3D_QUERYTYPE_MAX: + debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n"); + break; + default: + debug_printf("\t\t.type = %i\n", (*cmd).type); + break; + } + debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId); + debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset); +} + +static void +dump_SVGA3dSize(const SVGA3dSize *cmd) +{ + debug_printf("\t\t.width = %u\n", (*cmd).width); + debug_printf("\t\t.height = %u\n", (*cmd).height); + debug_printf("\t\t.depth = %u\n", (*cmd).depth); +} + +static void +dump_SVGA3dCmdDestroySurface(const SVGA3dCmdDestroySurface *cmd) +{ + debug_printf("\t\t.sid = %u\n", (*cmd).sid); +} + +static void +dump_SVGA3dCmdDefineContext(const SVGA3dCmdDefineContext *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); +} + +static void +dump_SVGA3dRect(const SVGA3dRect *cmd) +{ + debug_printf("\t\t.x = %u\n", (*cmd).x); + debug_printf("\t\t.y = %u\n", (*cmd).y); + debug_printf("\t\t.w = %u\n", (*cmd).w); + debug_printf("\t\t.h = %u\n", (*cmd).h); +} + +static void +dump_SVGA3dCmdBeginQuery(const SVGA3dCmdBeginQuery *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + switch((*cmd).type) { + case SVGA3D_QUERYTYPE_OCCLUSION: + debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n"); + break; + case SVGA3D_QUERYTYPE_MAX: + debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n"); + break; + default: + debug_printf("\t\t.type = %i\n", (*cmd).type); + break; + } +} + +static void +dump_SVGA3dRenderState(const SVGA3dRenderState *cmd) +{ + switch((*cmd).state) { + case SVGA3D_RS_INVALID: + debug_printf("\t\t.state = SVGA3D_RS_INVALID\n"); + break; + case SVGA3D_RS_ZENABLE: + debug_printf("\t\t.state = SVGA3D_RS_ZENABLE\n"); + break; + case SVGA3D_RS_ZWRITEENABLE: + debug_printf("\t\t.state = SVGA3D_RS_ZWRITEENABLE\n"); + break; + case SVGA3D_RS_ALPHATESTENABLE: + debug_printf("\t\t.state = SVGA3D_RS_ALPHATESTENABLE\n"); + break; + case SVGA3D_RS_DITHERENABLE: + debug_printf("\t\t.state = SVGA3D_RS_DITHERENABLE\n"); + break; + case SVGA3D_RS_BLENDENABLE: + debug_printf("\t\t.state = SVGA3D_RS_BLENDENABLE\n"); + break; + case SVGA3D_RS_FOGENABLE: + debug_printf("\t\t.state = SVGA3D_RS_FOGENABLE\n"); + break; + case SVGA3D_RS_SPECULARENABLE: + debug_printf("\t\t.state = SVGA3D_RS_SPECULARENABLE\n"); + break; + case SVGA3D_RS_STENCILENABLE: + debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE\n"); + break; + case SVGA3D_RS_LIGHTINGENABLE: + debug_printf("\t\t.state = SVGA3D_RS_LIGHTINGENABLE\n"); + break; + case SVGA3D_RS_NORMALIZENORMALS: + debug_printf("\t\t.state = SVGA3D_RS_NORMALIZENORMALS\n"); + break; + case SVGA3D_RS_POINTSPRITEENABLE: + debug_printf("\t\t.state = SVGA3D_RS_POINTSPRITEENABLE\n"); + break; + case SVGA3D_RS_POINTSCALEENABLE: + debug_printf("\t\t.state = SVGA3D_RS_POINTSCALEENABLE\n"); + break; + case SVGA3D_RS_STENCILREF: + debug_printf("\t\t.state = SVGA3D_RS_STENCILREF\n"); + break; + case SVGA3D_RS_STENCILMASK: + debug_printf("\t\t.state = SVGA3D_RS_STENCILMASK\n"); + break; + case SVGA3D_RS_STENCILWRITEMASK: + debug_printf("\t\t.state = SVGA3D_RS_STENCILWRITEMASK\n"); + break; + case SVGA3D_RS_FOGSTART: + debug_printf("\t\t.state = SVGA3D_RS_FOGSTART\n"); + break; + case SVGA3D_RS_FOGEND: + debug_printf("\t\t.state = SVGA3D_RS_FOGEND\n"); + break; + case SVGA3D_RS_FOGDENSITY: + debug_printf("\t\t.state = SVGA3D_RS_FOGDENSITY\n"); + break; + case SVGA3D_RS_POINTSIZE: + debug_printf("\t\t.state = SVGA3D_RS_POINTSIZE\n"); + break; + case SVGA3D_RS_POINTSIZEMIN: + debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMIN\n"); + break; + case SVGA3D_RS_POINTSIZEMAX: + debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMAX\n"); + break; + case SVGA3D_RS_POINTSCALE_A: + debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_A\n"); + break; + case SVGA3D_RS_POINTSCALE_B: + debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_B\n"); + break; + case SVGA3D_RS_POINTSCALE_C: + debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_C\n"); + break; + case SVGA3D_RS_FOGCOLOR: + debug_printf("\t\t.state = SVGA3D_RS_FOGCOLOR\n"); + break; + case SVGA3D_RS_AMBIENT: + debug_printf("\t\t.state = SVGA3D_RS_AMBIENT\n"); + break; + case SVGA3D_RS_CLIPPLANEENABLE: + debug_printf("\t\t.state = SVGA3D_RS_CLIPPLANEENABLE\n"); + break; + case SVGA3D_RS_FOGMODE: + debug_printf("\t\t.state = SVGA3D_RS_FOGMODE\n"); + break; + case SVGA3D_RS_FILLMODE: + debug_printf("\t\t.state = SVGA3D_RS_FILLMODE\n"); + break; + case SVGA3D_RS_SHADEMODE: + debug_printf("\t\t.state = SVGA3D_RS_SHADEMODE\n"); + break; + case SVGA3D_RS_LINEPATTERN: + debug_printf("\t\t.state = SVGA3D_RS_LINEPATTERN\n"); + break; + case SVGA3D_RS_SRCBLEND: + debug_printf("\t\t.state = SVGA3D_RS_SRCBLEND\n"); + break; + case SVGA3D_RS_DSTBLEND: + debug_printf("\t\t.state = SVGA3D_RS_DSTBLEND\n"); + break; + case SVGA3D_RS_BLENDEQUATION: + debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATION\n"); + break; + case SVGA3D_RS_CULLMODE: + debug_printf("\t\t.state = SVGA3D_RS_CULLMODE\n"); + break; + case SVGA3D_RS_ZFUNC: + debug_printf("\t\t.state = SVGA3D_RS_ZFUNC\n"); + break; + case SVGA3D_RS_ALPHAFUNC: + debug_printf("\t\t.state = SVGA3D_RS_ALPHAFUNC\n"); + break; + case SVGA3D_RS_STENCILFUNC: + debug_printf("\t\t.state = SVGA3D_RS_STENCILFUNC\n"); + break; + case SVGA3D_RS_STENCILFAIL: + debug_printf("\t\t.state = SVGA3D_RS_STENCILFAIL\n"); + break; + case SVGA3D_RS_STENCILZFAIL: + debug_printf("\t\t.state = SVGA3D_RS_STENCILZFAIL\n"); + break; + case SVGA3D_RS_STENCILPASS: + debug_printf("\t\t.state = SVGA3D_RS_STENCILPASS\n"); + break; + case SVGA3D_RS_ALPHAREF: + debug_printf("\t\t.state = SVGA3D_RS_ALPHAREF\n"); + break; + case SVGA3D_RS_FRONTWINDING: + debug_printf("\t\t.state = SVGA3D_RS_FRONTWINDING\n"); + break; + case SVGA3D_RS_COORDINATETYPE: + debug_printf("\t\t.state = SVGA3D_RS_COORDINATETYPE\n"); + break; + case SVGA3D_RS_ZBIAS: + debug_printf("\t\t.state = SVGA3D_RS_ZBIAS\n"); + break; + case SVGA3D_RS_RANGEFOGENABLE: + debug_printf("\t\t.state = SVGA3D_RS_RANGEFOGENABLE\n"); + break; + case SVGA3D_RS_COLORWRITEENABLE: + debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE\n"); + break; + case SVGA3D_RS_VERTEXMATERIALENABLE: + debug_printf("\t\t.state = SVGA3D_RS_VERTEXMATERIALENABLE\n"); + break; + case SVGA3D_RS_DIFFUSEMATERIALSOURCE: + debug_printf("\t\t.state = SVGA3D_RS_DIFFUSEMATERIALSOURCE\n"); + break; + case SVGA3D_RS_SPECULARMATERIALSOURCE: + debug_printf("\t\t.state = SVGA3D_RS_SPECULARMATERIALSOURCE\n"); + break; + case SVGA3D_RS_AMBIENTMATERIALSOURCE: + debug_printf("\t\t.state = SVGA3D_RS_AMBIENTMATERIALSOURCE\n"); + break; + case SVGA3D_RS_EMISSIVEMATERIALSOURCE: + debug_printf("\t\t.state = SVGA3D_RS_EMISSIVEMATERIALSOURCE\n"); + break; + case SVGA3D_RS_TEXTUREFACTOR: + debug_printf("\t\t.state = SVGA3D_RS_TEXTUREFACTOR\n"); + break; + case SVGA3D_RS_LOCALVIEWER: + debug_printf("\t\t.state = SVGA3D_RS_LOCALVIEWER\n"); + break; + case SVGA3D_RS_SCISSORTESTENABLE: + debug_printf("\t\t.state = SVGA3D_RS_SCISSORTESTENABLE\n"); + break; + case SVGA3D_RS_BLENDCOLOR: + debug_printf("\t\t.state = SVGA3D_RS_BLENDCOLOR\n"); + break; + case SVGA3D_RS_STENCILENABLE2SIDED: + debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE2SIDED\n"); + break; + case SVGA3D_RS_CCWSTENCILFUNC: + debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFUNC\n"); + break; + case SVGA3D_RS_CCWSTENCILFAIL: + debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFAIL\n"); + break; + case SVGA3D_RS_CCWSTENCILZFAIL: + debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILZFAIL\n"); + break; + case SVGA3D_RS_CCWSTENCILPASS: + debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILPASS\n"); + break; + case SVGA3D_RS_VERTEXBLEND: + debug_printf("\t\t.state = SVGA3D_RS_VERTEXBLEND\n"); + break; + case SVGA3D_RS_SLOPESCALEDEPTHBIAS: + debug_printf("\t\t.state = SVGA3D_RS_SLOPESCALEDEPTHBIAS\n"); + break; + case SVGA3D_RS_DEPTHBIAS: + debug_printf("\t\t.state = SVGA3D_RS_DEPTHBIAS\n"); + break; + case SVGA3D_RS_OUTPUTGAMMA: + debug_printf("\t\t.state = SVGA3D_RS_OUTPUTGAMMA\n"); + break; + case SVGA3D_RS_ZVISIBLE: + debug_printf("\t\t.state = SVGA3D_RS_ZVISIBLE\n"); + break; + case SVGA3D_RS_LASTPIXEL: + debug_printf("\t\t.state = SVGA3D_RS_LASTPIXEL\n"); + break; + case SVGA3D_RS_CLIPPING: + debug_printf("\t\t.state = SVGA3D_RS_CLIPPING\n"); + break; + case SVGA3D_RS_WRAP0: + debug_printf("\t\t.state = SVGA3D_RS_WRAP0\n"); + break; + case SVGA3D_RS_WRAP1: + debug_printf("\t\t.state = SVGA3D_RS_WRAP1\n"); + break; + case SVGA3D_RS_WRAP2: + debug_printf("\t\t.state = SVGA3D_RS_WRAP2\n"); + break; + case SVGA3D_RS_WRAP3: + debug_printf("\t\t.state = SVGA3D_RS_WRAP3\n"); + break; + case SVGA3D_RS_WRAP4: + debug_printf("\t\t.state = SVGA3D_RS_WRAP4\n"); + break; + case SVGA3D_RS_WRAP5: + debug_printf("\t\t.state = SVGA3D_RS_WRAP5\n"); + break; + case SVGA3D_RS_WRAP6: + debug_printf("\t\t.state = SVGA3D_RS_WRAP6\n"); + break; + case SVGA3D_RS_WRAP7: + debug_printf("\t\t.state = SVGA3D_RS_WRAP7\n"); + break; + case SVGA3D_RS_WRAP8: + debug_printf("\t\t.state = SVGA3D_RS_WRAP8\n"); + break; + case SVGA3D_RS_WRAP9: + debug_printf("\t\t.state = SVGA3D_RS_WRAP9\n"); + break; + case SVGA3D_RS_WRAP10: + debug_printf("\t\t.state = SVGA3D_RS_WRAP10\n"); + break; + case SVGA3D_RS_WRAP11: + debug_printf("\t\t.state = SVGA3D_RS_WRAP11\n"); + break; + case SVGA3D_RS_WRAP12: + debug_printf("\t\t.state = SVGA3D_RS_WRAP12\n"); + break; + case SVGA3D_RS_WRAP13: + debug_printf("\t\t.state = SVGA3D_RS_WRAP13\n"); + break; + case SVGA3D_RS_WRAP14: + debug_printf("\t\t.state = SVGA3D_RS_WRAP14\n"); + break; + case SVGA3D_RS_WRAP15: + debug_printf("\t\t.state = SVGA3D_RS_WRAP15\n"); + break; + case SVGA3D_RS_MULTISAMPLEANTIALIAS: + debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEANTIALIAS\n"); + break; + case SVGA3D_RS_MULTISAMPLEMASK: + debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEMASK\n"); + break; + case SVGA3D_RS_INDEXEDVERTEXBLENDENABLE: + debug_printf("\t\t.state = SVGA3D_RS_INDEXEDVERTEXBLENDENABLE\n"); + break; + case SVGA3D_RS_TWEENFACTOR: + debug_printf("\t\t.state = SVGA3D_RS_TWEENFACTOR\n"); + break; + case SVGA3D_RS_ANTIALIASEDLINEENABLE: + debug_printf("\t\t.state = SVGA3D_RS_ANTIALIASEDLINEENABLE\n"); + break; + case SVGA3D_RS_COLORWRITEENABLE1: + debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE1\n"); + break; + case SVGA3D_RS_COLORWRITEENABLE2: + debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE2\n"); + break; + case SVGA3D_RS_COLORWRITEENABLE3: + debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE3\n"); + break; + case SVGA3D_RS_SEPARATEALPHABLENDENABLE: + debug_printf("\t\t.state = SVGA3D_RS_SEPARATEALPHABLENDENABLE\n"); + break; + case SVGA3D_RS_SRCBLENDALPHA: + debug_printf("\t\t.state = SVGA3D_RS_SRCBLENDALPHA\n"); + break; + case SVGA3D_RS_DSTBLENDALPHA: + debug_printf("\t\t.state = SVGA3D_RS_DSTBLENDALPHA\n"); + break; + case SVGA3D_RS_BLENDEQUATIONALPHA: + debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATIONALPHA\n"); + break; + case SVGA3D_RS_MAX: + debug_printf("\t\t.state = SVGA3D_RS_MAX\n"); + break; + default: + debug_printf("\t\t.state = %i\n", (*cmd).state); + break; + } + debug_printf("\t\t.uintValue = %u\n", (*cmd).uintValue); + debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue); +} + +static void +dump_SVGA3dVertexDivisor(const SVGA3dVertexDivisor *cmd) +{ + debug_printf("\t\t.value = %u\n", (*cmd).value); + debug_printf("\t\t.count = %u\n", (*cmd).count); + debug_printf("\t\t.indexedData = %u\n", (*cmd).indexedData); + debug_printf("\t\t.instanceData = %u\n", (*cmd).instanceData); +} + +static void +dump_SVGA3dCmdDefineShader(const SVGA3dCmdDefineShader *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + debug_printf("\t\t.shid = %u\n", (*cmd).shid); + switch((*cmd).type) { + case SVGA3D_SHADERTYPE_COMPILED_DX8: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); + break; + case SVGA3D_SHADERTYPE_VS: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); + break; + case SVGA3D_SHADERTYPE_PS: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); + break; + case SVGA3D_SHADERTYPE_MAX: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); + break; + default: + debug_printf("\t\t.type = %i\n", (*cmd).type); + break; + } +} + +static void +dump_SVGA3dCmdSetShaderConst(const SVGA3dCmdSetShaderConst *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + debug_printf("\t\t.reg = %u\n", (*cmd).reg); + switch((*cmd).type) { + case SVGA3D_SHADERTYPE_COMPILED_DX8: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); + break; + case SVGA3D_SHADERTYPE_VS: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); + break; + case SVGA3D_SHADERTYPE_PS: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); + break; + case SVGA3D_SHADERTYPE_MAX: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); + break; + default: + debug_printf("\t\t.type = %i\n", (*cmd).type); + break; + } + switch((*cmd).ctype) { + case SVGA3D_CONST_TYPE_FLOAT: + debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_FLOAT\n"); + debug_printf("\t\t.values[0] = %f\n", *(const float *)&(*cmd).values[0]); + debug_printf("\t\t.values[1] = %f\n", *(const float *)&(*cmd).values[1]); + debug_printf("\t\t.values[2] = %f\n", *(const float *)&(*cmd).values[2]); + debug_printf("\t\t.values[3] = %f\n", *(const float *)&(*cmd).values[3]); + break; + case SVGA3D_CONST_TYPE_INT: + debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_INT\n"); + debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]); + debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]); + debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]); + debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]); + break; + case SVGA3D_CONST_TYPE_BOOL: + debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_BOOL\n"); + debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]); + debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]); + debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]); + debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]); + break; + default: + debug_printf("\t\t.ctype = %i\n", (*cmd).ctype); + debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]); + debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]); + debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]); + debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]); + break; + } +} + +static void +dump_SVGA3dCmdSetZRange(const SVGA3dCmdSetZRange *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + debug_printf("\t\t.zRange.min = %f\n", (*cmd).zRange.min); + debug_printf("\t\t.zRange.max = %f\n", (*cmd).zRange.max); +} + +static void +dump_SVGA3dCmdDrawPrimitives(const SVGA3dCmdDrawPrimitives *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + debug_printf("\t\t.numVertexDecls = %u\n", (*cmd).numVertexDecls); + debug_printf("\t\t.numRanges = %u\n", (*cmd).numRanges); +} + +static void +dump_SVGA3dCmdSetLightEnabled(const SVGA3dCmdSetLightEnabled *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + debug_printf("\t\t.index = %u\n", (*cmd).index); + debug_printf("\t\t.enabled = %u\n", (*cmd).enabled); +} + +static void +dump_SVGA3dPrimitiveRange(const SVGA3dPrimitiveRange *cmd) +{ + switch((*cmd).primType) { + case SVGA3D_PRIMITIVE_INVALID: + debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_INVALID\n"); + break; + case SVGA3D_PRIMITIVE_TRIANGLELIST: + debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLELIST\n"); + break; + case SVGA3D_PRIMITIVE_POINTLIST: + debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_POINTLIST\n"); + break; + case SVGA3D_PRIMITIVE_LINELIST: + debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINELIST\n"); + break; + case SVGA3D_PRIMITIVE_LINESTRIP: + debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINESTRIP\n"); + break; + case SVGA3D_PRIMITIVE_TRIANGLESTRIP: + debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLESTRIP\n"); + break; + case SVGA3D_PRIMITIVE_TRIANGLEFAN: + debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLEFAN\n"); + break; + case SVGA3D_PRIMITIVE_MAX: + debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_MAX\n"); + break; + default: + debug_printf("\t\t.primType = %i\n", (*cmd).primType); + break; + } + debug_printf("\t\t.primitiveCount = %u\n", (*cmd).primitiveCount); + debug_printf("\t\t.indexArray.surfaceId = %u\n", (*cmd).indexArray.surfaceId); + debug_printf("\t\t.indexArray.offset = %u\n", (*cmd).indexArray.offset); + debug_printf("\t\t.indexArray.stride = %u\n", (*cmd).indexArray.stride); + debug_printf("\t\t.indexWidth = %u\n", (*cmd).indexWidth); + debug_printf("\t\t.indexBias = %i\n", (*cmd).indexBias); +} + +static void +dump_SVGA3dCmdPresent(const SVGA3dCmdPresent *cmd) +{ + debug_printf("\t\t.sid = %u\n", (*cmd).sid); +} + +static void +dump_SVGA3dCmdSetRenderState(const SVGA3dCmdSetRenderState *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); +} + +static void +dump_SVGA3dCmdSurfaceStretchBlt(const SVGA3dCmdSurfaceStretchBlt *cmd) +{ + debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid); + debug_printf("\t\t.src.face = %u\n", (*cmd).src.face); + debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap); + debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid); + debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face); + debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap); + debug_printf("\t\t.boxSrc.x = %u\n", (*cmd).boxSrc.x); + debug_printf("\t\t.boxSrc.y = %u\n", (*cmd).boxSrc.y); + debug_printf("\t\t.boxSrc.z = %u\n", (*cmd).boxSrc.z); + debug_printf("\t\t.boxSrc.w = %u\n", (*cmd).boxSrc.w); + debug_printf("\t\t.boxSrc.h = %u\n", (*cmd).boxSrc.h); + debug_printf("\t\t.boxSrc.d = %u\n", (*cmd).boxSrc.d); + debug_printf("\t\t.boxDest.x = %u\n", (*cmd).boxDest.x); + debug_printf("\t\t.boxDest.y = %u\n", (*cmd).boxDest.y); + debug_printf("\t\t.boxDest.z = %u\n", (*cmd).boxDest.z); + debug_printf("\t\t.boxDest.w = %u\n", (*cmd).boxDest.w); + debug_printf("\t\t.boxDest.h = %u\n", (*cmd).boxDest.h); + debug_printf("\t\t.boxDest.d = %u\n", (*cmd).boxDest.d); + switch((*cmd).mode) { + case SVGA3D_STRETCH_BLT_POINT: + debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_POINT\n"); + break; + case SVGA3D_STRETCH_BLT_LINEAR: + debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_LINEAR\n"); + break; + case SVGA3D_STRETCH_BLT_MAX: + debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_MAX\n"); + break; + default: + debug_printf("\t\t.mode = %i\n", (*cmd).mode); + break; + } +} + +static void +dump_SVGA3dCmdSurfaceDMA(const SVGA3dCmdSurfaceDMA *cmd) +{ + debug_printf("\t\t.guest.ptr.gmrId = %u\n", (*cmd).guest.ptr.gmrId); + debug_printf("\t\t.guest.ptr.offset = %u\n", (*cmd).guest.ptr.offset); + debug_printf("\t\t.guest.pitch = %u\n", (*cmd).guest.pitch); + debug_printf("\t\t.host.sid = %u\n", (*cmd).host.sid); + debug_printf("\t\t.host.face = %u\n", (*cmd).host.face); + debug_printf("\t\t.host.mipmap = %u\n", (*cmd).host.mipmap); + switch((*cmd).transfer) { + case SVGA3D_WRITE_HOST_VRAM: + debug_printf("\t\t.transfer = SVGA3D_WRITE_HOST_VRAM\n"); + break; + case SVGA3D_READ_HOST_VRAM: + debug_printf("\t\t.transfer = SVGA3D_READ_HOST_VRAM\n"); + break; + default: + debug_printf("\t\t.transfer = %i\n", (*cmd).transfer); + break; + } +} + +static void +dump_SVGA3dCmdSurfaceDMASuffix(const SVGA3dCmdSurfaceDMASuffix *cmd) +{ + debug_printf("\t\t.suffixSize = %u\n", (*cmd).suffixSize); + debug_printf("\t\t.maximumOffset = %u\n", (*cmd).maximumOffset); + debug_printf("\t\t.flags.discard = %u\n", (*cmd).flags.discard); + debug_printf("\t\t.flags.unsynchronized = %u\n", (*cmd).flags.unsynchronized); +} + +static void +dump_SVGA3dCmdSetTransform(const SVGA3dCmdSetTransform *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + switch((*cmd).type) { + case SVGA3D_TRANSFORM_INVALID: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_INVALID\n"); + break; + case SVGA3D_TRANSFORM_WORLD: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD\n"); + break; + case SVGA3D_TRANSFORM_VIEW: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_VIEW\n"); + break; + case SVGA3D_TRANSFORM_PROJECTION: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_PROJECTION\n"); + break; + case SVGA3D_TRANSFORM_TEXTURE0: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE0\n"); + break; + case SVGA3D_TRANSFORM_TEXTURE1: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE1\n"); + break; + case SVGA3D_TRANSFORM_TEXTURE2: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE2\n"); + break; + case SVGA3D_TRANSFORM_TEXTURE3: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE3\n"); + break; + case SVGA3D_TRANSFORM_TEXTURE4: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE4\n"); + break; + case SVGA3D_TRANSFORM_TEXTURE5: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE5\n"); + break; + case SVGA3D_TRANSFORM_TEXTURE6: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE6\n"); + break; + case SVGA3D_TRANSFORM_TEXTURE7: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE7\n"); + break; + case SVGA3D_TRANSFORM_WORLD1: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD1\n"); + break; + case SVGA3D_TRANSFORM_WORLD2: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD2\n"); + break; + case SVGA3D_TRANSFORM_WORLD3: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD3\n"); + break; + case SVGA3D_TRANSFORM_MAX: + debug_printf("\t\t.type = SVGA3D_TRANSFORM_MAX\n"); + break; + default: + debug_printf("\t\t.type = %i\n", (*cmd).type); + break; + } + debug_printf("\t\t.matrix[0] = %f\n", (*cmd).matrix[0]); + debug_printf("\t\t.matrix[1] = %f\n", (*cmd).matrix[1]); + debug_printf("\t\t.matrix[2] = %f\n", (*cmd).matrix[2]); + debug_printf("\t\t.matrix[3] = %f\n", (*cmd).matrix[3]); + debug_printf("\t\t.matrix[4] = %f\n", (*cmd).matrix[4]); + debug_printf("\t\t.matrix[5] = %f\n", (*cmd).matrix[5]); + debug_printf("\t\t.matrix[6] = %f\n", (*cmd).matrix[6]); + debug_printf("\t\t.matrix[7] = %f\n", (*cmd).matrix[7]); + debug_printf("\t\t.matrix[8] = %f\n", (*cmd).matrix[8]); + debug_printf("\t\t.matrix[9] = %f\n", (*cmd).matrix[9]); + debug_printf("\t\t.matrix[10] = %f\n", (*cmd).matrix[10]); + debug_printf("\t\t.matrix[11] = %f\n", (*cmd).matrix[11]); + debug_printf("\t\t.matrix[12] = %f\n", (*cmd).matrix[12]); + debug_printf("\t\t.matrix[13] = %f\n", (*cmd).matrix[13]); + debug_printf("\t\t.matrix[14] = %f\n", (*cmd).matrix[14]); + debug_printf("\t\t.matrix[15] = %f\n", (*cmd).matrix[15]); +} + +static void +dump_SVGA3dCmdDestroyShader(const SVGA3dCmdDestroyShader *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + debug_printf("\t\t.shid = %u\n", (*cmd).shid); + switch((*cmd).type) { + case SVGA3D_SHADERTYPE_COMPILED_DX8: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); + break; + case SVGA3D_SHADERTYPE_VS: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); + break; + case SVGA3D_SHADERTYPE_PS: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); + break; + case SVGA3D_SHADERTYPE_MAX: + debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); + break; + default: + debug_printf("\t\t.type = %i\n", (*cmd).type); + break; + } +} + +static void +dump_SVGA3dCmdDestroyContext(const SVGA3dCmdDestroyContext *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); +} + +static void +dump_SVGA3dCmdClear(const SVGA3dCmdClear *cmd) +{ + debug_printf("\t\t.cid = %u\n", (*cmd).cid); + switch((*cmd).clearFlag) { + case SVGA3D_CLEAR_COLOR: + debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_COLOR\n"); + break; + case SVGA3D_CLEAR_DEPTH: + debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_DEPTH\n"); + break; + case SVGA3D_CLEAR_STENCIL: + debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_STENCIL\n"); + break; + default: + debug_printf("\t\t.clearFlag = %i\n", (*cmd).clearFlag); + break; + } + debug_printf("\t\t.color = %u\n", (*cmd).color); + debug_printf("\t\t.depth = %f\n", (*cmd).depth); + debug_printf("\t\t.stencil = %u\n", (*cmd).stencil); +} + +static void +dump_SVGA3dCmdDefineSurface(const SVGA3dCmdDefineSurface *cmd) +{ + debug_printf("\t\t.sid = %u\n", (*cmd).sid); + switch((*cmd).surfaceFlags) { + case SVGA3D_SURFACE_CUBEMAP: + debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_CUBEMAP\n"); + break; + case SVGA3D_SURFACE_HINT_STATIC: + debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_STATIC\n"); + break; + case SVGA3D_SURFACE_HINT_DYNAMIC: + debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_DYNAMIC\n"); + break; + case SVGA3D_SURFACE_HINT_INDEXBUFFER: + debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_INDEXBUFFER\n"); + break; + case SVGA3D_SURFACE_HINT_VERTEXBUFFER: + debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_VERTEXBUFFER\n"); + break; + default: + debug_printf("\t\t.surfaceFlags = %i\n", (*cmd).surfaceFlags); + break; + } + switch((*cmd).format) { + case SVGA3D_FORMAT_INVALID: + debug_printf("\t\t.format = SVGA3D_FORMAT_INVALID\n"); + break; + case SVGA3D_X8R8G8B8: + debug_printf("\t\t.format = SVGA3D_X8R8G8B8\n"); + break; + case SVGA3D_A8R8G8B8: + debug_printf("\t\t.format = SVGA3D_A8R8G8B8\n"); + break; + case SVGA3D_R5G6B5: + debug_printf("\t\t.format = SVGA3D_R5G6B5\n"); + break; + case SVGA3D_X1R5G5B5: + debug_printf("\t\t.format = SVGA3D_X1R5G5B5\n"); + break; + case SVGA3D_A1R5G5B5: + debug_printf("\t\t.format = SVGA3D_A1R5G5B5\n"); + break; + case SVGA3D_A4R4G4B4: + debug_printf("\t\t.format = SVGA3D_A4R4G4B4\n"); + break; + case SVGA3D_Z_D32: + debug_printf("\t\t.format = SVGA3D_Z_D32\n"); + break; + case SVGA3D_Z_D16: + debug_printf("\t\t.format = SVGA3D_Z_D16\n"); + break; + case SVGA3D_Z_D24S8: + debug_printf("\t\t.format = SVGA3D_Z_D24S8\n"); + break; + case SVGA3D_Z_D15S1: + debug_printf("\t\t.format = SVGA3D_Z_D15S1\n"); + break; + case SVGA3D_LUMINANCE8: + debug_printf("\t\t.format = SVGA3D_LUMINANCE8\n"); + break; + case SVGA3D_LUMINANCE4_ALPHA4: + debug_printf("\t\t.format = SVGA3D_LUMINANCE4_ALPHA4\n"); + break; + case SVGA3D_LUMINANCE16: + debug_printf("\t\t.format = SVGA3D_LUMINANCE16\n"); + break; + case SVGA3D_LUMINANCE8_ALPHA8: + debug_printf("\t\t.format = SVGA3D_LUMINANCE8_ALPHA8\n"); + break; + case SVGA3D_DXT1: + debug_printf("\t\t.format = SVGA3D_DXT1\n"); + break; + case SVGA3D_DXT2: + debug_printf("\t\t.format = SVGA3D_DXT2\n"); + break; + case SVGA3D_DXT3: + debug_printf("\t\t.format = SVGA3D_DXT3\n"); + break; + case SVGA3D_DXT4: + debug_printf("\t\t.format = SVGA3D_DXT4\n"); + break; + case SVGA3D_DXT5: + debug_printf("\t\t.format = SVGA3D_DXT5\n"); + break; + case SVGA3D_BUMPU8V8: + debug_printf("\t\t.format = SVGA3D_BUMPU8V8\n"); + break; + case SVGA3D_BUMPL6V5U5: + debug_printf("\t\t.format = SVGA3D_BUMPL6V5U5\n"); + break; + case SVGA3D_BUMPX8L8V8U8: + debug_printf("\t\t.format = SVGA3D_BUMPX8L8V8U8\n"); + break; + case SVGA3D_BUMPL8V8U8: + debug_printf("\t\t.format = SVGA3D_BUMPL8V8U8\n"); + break; + case SVGA3D_ARGB_S10E5: + debug_printf("\t\t.format = SVGA3D_ARGB_S10E5\n"); + break; + case SVGA3D_ARGB_S23E8: + debug_printf("\t\t.format = SVGA3D_ARGB_S23E8\n"); + break; + case SVGA3D_A2R10G10B10: + debug_printf("\t\t.format = SVGA3D_A2R10G10B10\n"); + break; + case SVGA3D_V8U8: + debug_printf("\t\t.format = SVGA3D_V8U8\n"); + break; + case SVGA3D_Q8W8V8U8: + debug_printf("\t\t.format = SVGA3D_Q8W8V8U8\n"); + break; + case SVGA3D_CxV8U8: + debug_printf("\t\t.format = SVGA3D_CxV8U8\n"); + break; + case SVGA3D_X8L8V8U8: + debug_printf("\t\t.format = SVGA3D_X8L8V8U8\n"); + break; + case SVGA3D_A2W10V10U10: + debug_printf("\t\t.format = SVGA3D_A2W10V10U10\n"); + break; + case SVGA3D_ALPHA8: + debug_printf("\t\t.format = SVGA3D_ALPHA8\n"); + break; + case SVGA3D_R_S10E5: + debug_printf("\t\t.format = SVGA3D_R_S10E5\n"); + break; + case SVGA3D_R_S23E8: + debug_printf("\t\t.format = SVGA3D_R_S23E8\n"); + break; + case SVGA3D_RG_S10E5: + debug_printf("\t\t.format = SVGA3D_RG_S10E5\n"); + break; + case SVGA3D_RG_S23E8: + debug_printf("\t\t.format = SVGA3D_RG_S23E8\n"); + break; + case SVGA3D_BUFFER: + debug_printf("\t\t.format = SVGA3D_BUFFER\n"); + break; + case SVGA3D_Z_D24X8: + debug_printf("\t\t.format = SVGA3D_Z_D24X8\n"); + break; + case SVGA3D_FORMAT_MAX: + debug_printf("\t\t.format = SVGA3D_FORMAT_MAX\n"); + break; + default: + debug_printf("\t\t.format = %i\n", (*cmd).format); + break; + } + debug_printf("\t\t.face[0].numMipLevels = %u\n", (*cmd).face[0].numMipLevels); + debug_printf("\t\t.face[1].numMipLevels = %u\n", (*cmd).face[1].numMipLevels); + debug_printf("\t\t.face[2].numMipLevels = %u\n", (*cmd).face[2].numMipLevels); + debug_printf("\t\t.face[3].numMipLevels = %u\n", (*cmd).face[3].numMipLevels); + debug_printf("\t\t.face[4].numMipLevels = %u\n", (*cmd).face[4].numMipLevels); + debug_printf("\t\t.face[5].numMipLevels = %u\n", (*cmd).face[5].numMipLevels); +} + + +void +svga_dump_commands(const void *commands, uint32_t size) +{ + const uint8_t *next = commands; + const uint8_t *last = next + size; + + assert(size % sizeof(uint32_t) == 0); + + while(next < last) { + const uint32_t cmd_id = *(const uint32_t *)next; + + if(SVGA_3D_CMD_BASE <= cmd_id && cmd_id < SVGA_3D_CMD_MAX) { + const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next; + const uint8_t *body = (const uint8_t *)&header[1]; + + next = (const uint8_t *)body + header->size; + if(next > last) + break; + + switch(cmd_id) { + case SVGA_3D_CMD_SURFACE_DEFINE: + debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n"); + { + const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body; + dump_SVGA3dCmdDefineSurface(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dSize) <= next) { + dump_SVGA3dSize((const SVGA3dSize *)body); + body += sizeof(SVGA3dSize); + } + } + break; + case SVGA_3D_CMD_SURFACE_DESTROY: + debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n"); + { + const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body; + dump_SVGA3dCmdDestroySurface(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SURFACE_COPY: + debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n"); + { + const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body; + dump_SVGA3dCmdSurfaceCopy(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dCopyBox) <= next) { + dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); + body += sizeof(SVGA3dCopyBox); + } + } + break; + case SVGA_3D_CMD_SURFACE_STRETCHBLT: + debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n"); + { + const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body; + dump_SVGA3dCmdSurfaceStretchBlt(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SURFACE_DMA: + debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n"); + { + const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body; + dump_SVGA3dCmdSurfaceDMA(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dCopyBox) <= next) { + dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); + body += sizeof(SVGA3dCopyBox); + } + while(body + sizeof(SVGA3dCmdSurfaceDMASuffix) <= next) { + dump_SVGA3dCmdSurfaceDMASuffix((const SVGA3dCmdSurfaceDMASuffix *)body); + body += sizeof(SVGA3dCmdSurfaceDMASuffix); + } + } + break; + case SVGA_3D_CMD_CONTEXT_DEFINE: + debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n"); + { + const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body; + dump_SVGA3dCmdDefineContext(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_CONTEXT_DESTROY: + debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n"); + { + const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body; + dump_SVGA3dCmdDestroyContext(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETTRANSFORM: + debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n"); + { + const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body; + dump_SVGA3dCmdSetTransform(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETZRANGE: + debug_printf("\tSVGA_3D_CMD_SETZRANGE\n"); + { + const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body; + dump_SVGA3dCmdSetZRange(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETRENDERSTATE: + debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n"); + { + const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body; + dump_SVGA3dCmdSetRenderState(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dRenderState) <= next) { + dump_SVGA3dRenderState((const SVGA3dRenderState *)body); + body += sizeof(SVGA3dRenderState); + } + } + break; + case SVGA_3D_CMD_SETRENDERTARGET: + debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n"); + { + const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body; + dump_SVGA3dCmdSetRenderTarget(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETTEXTURESTATE: + debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n"); + { + const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body; + dump_SVGA3dCmdSetTextureState(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dTextureState) <= next) { + dump_SVGA3dTextureState((const SVGA3dTextureState *)body); + body += sizeof(SVGA3dTextureState); + } + } + break; + case SVGA_3D_CMD_SETMATERIAL: + debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n"); + { + const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body; + dump_SVGA3dCmdSetMaterial(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETLIGHTDATA: + debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n"); + { + const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body; + dump_SVGA3dCmdSetLightData(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETLIGHTENABLED: + debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n"); + { + const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body; + dump_SVGA3dCmdSetLightEnabled(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETVIEWPORT: + debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n"); + { + const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body; + dump_SVGA3dCmdSetViewport(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETCLIPPLANE: + debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n"); + { + const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body; + dump_SVGA3dCmdSetClipPlane(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_CLEAR: + debug_printf("\tSVGA_3D_CMD_CLEAR\n"); + { + const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body; + dump_SVGA3dCmdClear(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dRect) <= next) { + dump_SVGA3dRect((const SVGA3dRect *)body); + body += sizeof(SVGA3dRect); + } + } + break; + case SVGA_3D_CMD_PRESENT: + debug_printf("\tSVGA_3D_CMD_PRESENT\n"); + { + const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body; + dump_SVGA3dCmdPresent(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dCopyRect) <= next) { + dump_SVGA3dCopyRect((const SVGA3dCopyRect *)body); + body += sizeof(SVGA3dCopyRect); + } + } + break; + case SVGA_3D_CMD_SHADER_DEFINE: + debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n"); + { + const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body; + dump_SVGA3dCmdDefineShader(cmd); + body = (const uint8_t *)&cmd[1]; + sh_svga_dump((const uint32_t *)body, + (unsigned)(next - body)/sizeof(uint32_t), + FALSE ); + body = next; + } + break; + case SVGA_3D_CMD_SHADER_DESTROY: + debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n"); + { + const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body; + dump_SVGA3dCmdDestroyShader(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SET_SHADER: + debug_printf("\tSVGA_3D_CMD_SET_SHADER\n"); + { + const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body; + dump_SVGA3dCmdSetShader(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SET_SHADER_CONST: + debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n"); + { + const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body; + dump_SVGA3dCmdSetShaderConst(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_DRAW_PRIMITIVES: + debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n"); + { + const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body; + unsigned i, j; + dump_SVGA3dCmdDrawPrimitives(cmd); + body = (const uint8_t *)&cmd[1]; + for(i = 0; i < cmd->numVertexDecls; ++i) { + dump_SVGA3dVertexDecl((const SVGA3dVertexDecl *)body); + body += sizeof(SVGA3dVertexDecl); + } + for(j = 0; j < cmd->numRanges; ++j) { + dump_SVGA3dPrimitiveRange((const SVGA3dPrimitiveRange *)body); + body += sizeof(SVGA3dPrimitiveRange); + } + while(body + sizeof(SVGA3dVertexDivisor) <= next) { + dump_SVGA3dVertexDivisor((const SVGA3dVertexDivisor *)body); + body += sizeof(SVGA3dVertexDivisor); + } + } + break; + case SVGA_3D_CMD_SETSCISSORRECT: + debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n"); + { + const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body; + dump_SVGA3dCmdSetScissorRect(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_BEGIN_QUERY: + debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n"); + { + const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body; + dump_SVGA3dCmdBeginQuery(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_END_QUERY: + debug_printf("\tSVGA_3D_CMD_END_QUERY\n"); + { + const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body; + dump_SVGA3dCmdEndQuery(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_WAIT_FOR_QUERY: + debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n"); + { + const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body; + dump_SVGA3dCmdWaitForQuery(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + default: + debug_printf("\t0x%08x\n", cmd_id); + break; + } + + while(body + sizeof(uint32_t) <= next) { + debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); + body += sizeof(uint32_t); + } + while(body + sizeof(uint32_t) <= next) + debug_printf("\t\t0x%02x\n", *body++); + } + else if(cmd_id == SVGA_CMD_FENCE) { + debug_printf("\tSVGA_CMD_FENCE\n"); + debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]); + next += 2*sizeof(uint32_t); + } + else { + debug_printf("\t0x%08x\n", cmd_id); + next += sizeof(uint32_t); + } + } +} + diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.h b/src/gallium/drivers/svga/svgadump/svga_dump.h new file mode 100644 index 0000000000..69a8702087 --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/svga_dump.h @@ -0,0 +1,34 @@ +/********************************************************** + * Copyright 2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef SVGA_DUMP_H_ +#define SVGA_DUMP_H_ + +#include "pipe/p_compiler.h" + +void +svga_dump_commands(const void *commands, uint32_t size); + +#endif /* SVGA_DUMP_H_ */ diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.py b/src/gallium/drivers/svga/svgadump/svga_dump.py new file mode 100755 index 0000000000..3cb29c395b --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/svga_dump.py @@ -0,0 +1,329 @@ +#!/usr/bin/env python +''' +Generates dumper for the SVGA 3D command stream using pygccxml. + +Jose Fonseca +''' + +copyright = ''' +/********************************************************** + * Copyright 2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + ''' + +import os +import sys + +from pygccxml import parser +from pygccxml import declarations + +from pygccxml.declarations import algorithm +from pygccxml.declarations import decl_visitor +from pygccxml.declarations import type_traits +from pygccxml.declarations import type_visitor + + +enums = True + + +class decl_dumper_t(decl_visitor.decl_visitor_t): + + def __init__(self, instance = '', decl = None): + decl_visitor.decl_visitor_t.__init__(self) + self._instance = instance + self.decl = decl + + def clone(self): + return decl_dumper_t(self._instance, self.decl) + + def visit_class(self): + class_ = self.decl + assert self.decl.class_type in ('struct', 'union') + + for variable in class_.variables(): + if variable.name != '': + #print 'variable = %r' % variable.name + dump_type(self._instance + '.' + variable.name, variable.type) + + def visit_enumeration(self): + if enums: + print ' switch(%s) {' % ("(*cmd)" + self._instance,) + for name, value in self.decl.values: + print ' case %s:' % (name,) + print ' debug_printf("\\t\\t%s = %s\\n");' % (self._instance, name) + print ' break;' + print ' default:' + print ' debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance) + print ' break;' + print ' }' + else: + print ' debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance) + + +def dump_decl(instance, decl): + dumper = decl_dumper_t(instance, decl) + algorithm.apply_visitor(dumper, decl) + + +class type_dumper_t(type_visitor.type_visitor_t): + + def __init__(self, instance, type_): + type_visitor.type_visitor_t.__init__(self) + self.instance = instance + self.type = type_ + + def clone(self): + return type_dumper_t(self.instance, self.type) + + def visit_char(self): + self.print_instance('%i') + + def visit_unsigned_char(self): + self.print_instance('%u') + + def visit_signed_char(self): + self.print_instance('%i') + + def visit_wchar(self): + self.print_instance('%i') + + def visit_short_int(self): + self.print_instance('%i') + + def visit_short_unsigned_int(self): + self.print_instance('%u') + + def visit_bool(self): + self.print_instance('%i') + + def visit_int(self): + self.print_instance('%i') + + def visit_unsigned_int(self): + self.print_instance('%u') + + def visit_long_int(self): + self.print_instance('%li') + + def visit_long_unsigned_int(self): + self.print_instance('%lu') + + def visit_long_long_int(self): + self.print_instance('%lli') + + def visit_long_long_unsigned_int(self): + self.print_instance('%llu') + + def visit_float(self): + self.print_instance('%f') + + def visit_double(self): + self.print_instance('%f') + + def visit_array(self): + for i in range(type_traits.array_size(self.type)): + dump_type(self.instance + '[%i]' % i, type_traits.base_type(self.type)) + + def visit_pointer(self): + self.print_instance('%p') + + def visit_declarated(self): + #print 'decl = %r' % self.type.decl_string + decl = type_traits.remove_declarated(self.type) + dump_decl(self.instance, decl) + + def print_instance(self, format): + print ' debug_printf("\\t\\t%s = %s\\n", %s);' % (self.instance, format, "(*cmd)" + self.instance) + + +def dump_type(instance, type_): + type_ = type_traits.remove_alias(type_) + visitor = type_dumper_t(instance, type_) + algorithm.apply_visitor(visitor, type_) + + +def dump_struct(decls, class_): + print 'static void' + print 'dump_%s(const %s *cmd)' % (class_.name, class_.name) + print '{' + dump_decl('', class_) + print '}' + print '' + + +cmds = [ + ('SVGA_3D_CMD_SURFACE_DEFINE', 'SVGA3dCmdDefineSurface', (), 'SVGA3dSize'), + ('SVGA_3D_CMD_SURFACE_DESTROY', 'SVGA3dCmdDestroySurface', (), None), + ('SVGA_3D_CMD_SURFACE_COPY', 'SVGA3dCmdSurfaceCopy', (), 'SVGA3dCopyBox'), + ('SVGA_3D_CMD_SURFACE_STRETCHBLT', 'SVGA3dCmdSurfaceStretchBlt', (), None), + ('SVGA_3D_CMD_SURFACE_DMA', 'SVGA3dCmdSurfaceDMA', (), 'SVGA3dCopyBox'), + ('SVGA_3D_CMD_CONTEXT_DEFINE', 'SVGA3dCmdDefineContext', (), None), + ('SVGA_3D_CMD_CONTEXT_DESTROY', 'SVGA3dCmdDestroyContext', (), None), + ('SVGA_3D_CMD_SETTRANSFORM', 'SVGA3dCmdSetTransform', (), None), + ('SVGA_3D_CMD_SETZRANGE', 'SVGA3dCmdSetZRange', (), None), + ('SVGA_3D_CMD_SETRENDERSTATE', 'SVGA3dCmdSetRenderState', (), 'SVGA3dRenderState'), + ('SVGA_3D_CMD_SETRENDERTARGET', 'SVGA3dCmdSetRenderTarget', (), None), + ('SVGA_3D_CMD_SETTEXTURESTATE', 'SVGA3dCmdSetTextureState', (), 'SVGA3dTextureState'), + ('SVGA_3D_CMD_SETMATERIAL', 'SVGA3dCmdSetMaterial', (), None), + ('SVGA_3D_CMD_SETLIGHTDATA', 'SVGA3dCmdSetLightData', (), None), + ('SVGA_3D_CMD_SETLIGHTENABLED', 'SVGA3dCmdSetLightEnabled', (), None), + ('SVGA_3D_CMD_SETVIEWPORT', 'SVGA3dCmdSetViewport', (), None), + ('SVGA_3D_CMD_SETCLIPPLANE', 'SVGA3dCmdSetClipPlane', (), None), + ('SVGA_3D_CMD_CLEAR', 'SVGA3dCmdClear', (), 'SVGA3dRect'), + ('SVGA_3D_CMD_PRESENT', 'SVGA3dCmdPresent', (), 'SVGA3dCopyRect'), + ('SVGA_3D_CMD_SHADER_DEFINE', 'SVGA3dCmdDefineShader', (), None), + ('SVGA_3D_CMD_SHADER_DESTROY', 'SVGA3dCmdDestroyShader', (), None), + ('SVGA_3D_CMD_SET_SHADER', 'SVGA3dCmdSetShader', (), None), + ('SVGA_3D_CMD_SET_SHADER_CONST', 'SVGA3dCmdSetShaderConst', (), None), + ('SVGA_3D_CMD_DRAW_PRIMITIVES', 'SVGA3dCmdDrawPrimitives', (('SVGA3dVertexDecl', 'numVertexDecls'), ('SVGA3dPrimitiveRange', 'numRanges')), 'SVGA3dVertexDivisor'), + ('SVGA_3D_CMD_SETSCISSORRECT', 'SVGA3dCmdSetScissorRect', (), None), + ('SVGA_3D_CMD_BEGIN_QUERY', 'SVGA3dCmdBeginQuery', (), None), + ('SVGA_3D_CMD_END_QUERY', 'SVGA3dCmdEndQuery', (), None), + ('SVGA_3D_CMD_WAIT_FOR_QUERY', 'SVGA3dCmdWaitForQuery', (), None), + #('SVGA_3D_CMD_PRESENT_READBACK', None, (), None), +] + +def dump_cmds(): + print r''' +void +svga_dump_commands(const void *commands, uint32_t size) +{ + const uint8_t *next = commands; + const uint8_t *last = next + size; + + assert(size % sizeof(uint32_t) == 0); + + while(next < last) { + const uint32_t cmd_id = *(const uint32_t *)next; + + if(SVGA_3D_CMD_BASE <= cmd_id && cmd_id < SVGA_3D_CMD_MAX) { + const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next; + const uint8_t *body = (const uint8_t *)&header[1]; + + next = (const uint8_t *)body + header->size; + if(next > last) + break; +''' + + print ' switch(cmd_id) {' + indexes = 'ijklmn' + for id, header, body, footer in cmds: + print ' case %s:' % id + print ' debug_printf("\\t%s\\n");' % id + print ' {' + print ' const %s *cmd = (const %s *)body;' % (header, header) + if len(body): + print ' unsigned ' + ', '.join(indexes[:len(body)]) + ';' + print ' dump_%s(cmd);' % header + print ' body = (const uint8_t *)&cmd[1];' + for i in range(len(body)): + struct, count = body[i] + idx = indexes[i] + print ' for(%s = 0; %s < cmd->%s; ++%s) {' % (idx, idx, count, idx) + print ' dump_%s((const %s *)body);' % (struct, struct) + print ' body += sizeof(%s);' % struct + print ' }' + if footer is not None: + print ' while(body + sizeof(%s) <= next) {' % footer + print ' dump_%s((const %s *)body);' % (footer, footer) + print ' body += sizeof(%s);' % footer + print ' }' + if id == 'SVGA_3D_CMD_SHADER_DEFINE': + print ' sh_svga_dump((const uint32_t *)body, (unsigned)(next - body)/sizeof(uint32_t));' + print ' body = next;' + print ' }' + print ' break;' + print ' default:' + print ' debug_printf("\\t0x%08x\\n", cmd_id);' + print ' break;' + print ' }' + + print r''' + while(body + sizeof(uint32_t) <= next) { + debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); + body += sizeof(uint32_t); + } + while(body + sizeof(uint32_t) <= next) + debug_printf("\t\t0x%02x\n", *body++); + } + else if(cmd_id == SVGA_CMD_FENCE) { + debug_printf("\tSVGA_CMD_FENCE\n"); + debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]); + next += 2*sizeof(uint32_t); + } + else { + debug_printf("\t0x%08x\n", cmd_id); + next += sizeof(uint32_t); + } + } +} +''' + +def main(): + print copyright.strip() + print + print '/**' + print ' * @file' + print ' * Dump SVGA commands.' + print ' *' + print ' * Generated automatically from svga3d_reg.h by svga_dump.py.' + print ' */' + print + print '#include "svga_types.h"' + print '#include "shader_dump/st_shader_dump.h"' + print '#include "svga3d_reg.h"' + print + print '#include "pipe/p_debug.h"' + print '#include "svga_dump.h"' + print + + config = parser.config_t( + include_paths = ['include'], + compiler = 'gcc', + ) + + headers = [ + 'include/svga_types.h', + 'include/svga3d_reg.h', + ] + + decls = parser.parse(headers, config, parser.COMPILATION_MODE.ALL_AT_ONCE) + global_ns = declarations.get_global_namespace(decls) + + names = set() + for id, header, body, footer in cmds: + names.add(header) + for struct, count in body: + names.add(struct) + if footer is not None: + names.add(footer) + + for class_ in global_ns.classes(lambda decl: decl.name in names): + dump_struct(decls, class_) + + dump_cmds() + + +if __name__ == '__main__': + main() diff --git a/src/gallium/winsys/drm/vmware/Makefile b/src/gallium/winsys/drm/vmware/Makefile new file mode 100644 index 0000000000..2ae6dead5c --- /dev/null +++ b/src/gallium/winsys/drm/vmware/Makefile @@ -0,0 +1,12 @@ +# src/gallium/winsys/drm/vmware/Makefile +TOP = ../../../../.. +include $(TOP)/configs/current + +SUBDIRS = core $(GALLIUM_STATE_TRACKERS_DIRS) + +default install clean: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE) $@) || exit 1; \ + fi \ + done diff --git a/src/gallium/winsys/drm/vmware/SConscript b/src/gallium/winsys/drm/vmware/SConscript new file mode 100644 index 0000000000..06e6d5be9c --- /dev/null +++ b/src/gallium/winsys/drm/vmware/SConscript @@ -0,0 +1,11 @@ +Import('*') + +SConscript(['core/SConscript',]) + +if 'mesa' in env['statetrackers']: + + SConscript(['dri/SConscript']) + +if 'xorg' in env['statetrackers']: + + SConscript(['xorg/SConscript']) diff --git a/src/gallium/winsys/drm/vmware/core/Makefile b/src/gallium/winsys/drm/vmware/core/Makefile new file mode 100644 index 0000000000..755dc45935 --- /dev/null +++ b/src/gallium/winsys/drm/vmware/core/Makefile @@ -0,0 +1,47 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +LIBNAME = svgadrm + +C_SOURCES = \ + vmw_buffer.c \ + vmw_context.c \ + vmw_fence.c \ + vmw_screen.c \ + vmw_screen_dri.c \ + vmw_screen_ioctl.c \ + vmw_screen_pools.c \ + vmw_screen_svga.c \ + vmw_surface.c + +LIBRARY_INCLUDES = \ + -I$(TOP)/src/gallium/drivers/svga \ + -I$(TOP)/src/gallium/drivers/svga/include \ + -I$(GALLIUM)/src/mesa/drivers/dri/common \ + -I$(GALLIUM)/include \ + -I$(GALLIUM)/include/GL/internal \ + -I$(GALLIUM)/src/mesa \ + -I$(GALLIUM)/src/mesa/main \ + -I$(GALLIUM)/src/mesa/glapi \ + -I$(GALLIUM)/src/egl/main \ + -I$(GALLIUM)/src/egl/drivers/dri \ + $(shell pkg-config libdrm --cflags-only-I) + +LIBRARY_DEFINES = \ + -DHAVE_STDINT_H -D_FILE_OFFSET_BITS=64 \ + $(shell pkg-config libdrm --cflags-only-other) + +CC = gcc -fvisibility=hidden -msse -msse2 + +# Set the gnu99 standard to enable anonymous structs in vmware headers. +# +CFLAGS = -Wall -Werror -Wmissing-prototypes -std=gnu99 -ffast-math \ + $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) $(ASM_FLAGS) + +include ../../../../Makefile.template + + +symlinks: + + +include depend diff --git a/src/gallium/winsys/drm/vmware/core/SConscript b/src/gallium/winsys/drm/vmware/core/SConscript new file mode 100644 index 0000000000..1875b659ac --- /dev/null +++ b/src/gallium/winsys/drm/vmware/core/SConscript @@ -0,0 +1,39 @@ +Import('*') + +env = env.Clone() + +if env['gcc']: + env.Append(CCFLAGS = ['-fvisibility=hidden', '-Werror']) + env.Append(CPPDEFINES = [ + 'HAVE_STDINT_H', + 'HAVE_SYS_TYPES_H', + '-D_FILE_OFFSET_BITS=64', + ]) + +env.Prepend(CPPPATH = [ + 'include', + '#/src/gallium/drivers/svga', + '#/src/gallium/drivers/svga/include', +]) + +env.Append(CPPDEFINES = [ +]) + +sources = [ + 'vmw_buffer.c', + 'vmw_context.c', + 'vmw_fence.c', + 'vmw_screen.c', + 'vmw_screen_dri.c', + 'vmw_screen_ioctl.c', + 'vmw_screen_pools.c', + 'vmw_screen_svga.c', + 'vmw_surface.c', +] + +svgadrm = env.ConvenienceLibrary( + target = 'svgadrm', + source = sources, +) + +Export('svgadrm') diff --git a/src/gallium/winsys/drm/vmware/core/vmw_buffer.c b/src/gallium/winsys/drm/vmware/core/vmw_buffer.c new file mode 100644 index 0000000000..b812fb59d3 --- /dev/null +++ b/src/gallium/winsys/drm/vmware/core/vmw_buffer.c @@ -0,0 +1,274 @@ +/********************************************************** + * Copyright 2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * SVGA buffer manager for Guest Memory Regions (GMRs). + * + * GMRs are used for pixel and vertex data upload/download to/from the virtual + * SVGA hardware. There is a limited number of GMRs available, and + * creating/destroying them is also a slow operation so we must suballocate + * them. + * + * This file implements a pipebuffer library's buffer manager, so that we can + * use pipepbuffer's suballocation, fencing, and debugging facilities with GMRs. + * + * @author Jose Fonseca + */ + + +#include "svga_cmd.h" + +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "pipebuffer/pb_buffer.h" +#include "pipebuffer/pb_bufmgr.h" + +#include "svga_winsys.h" + +#include "vmw_screen.h" +#include "vmw_buffer.h" + + +struct vmw_gmr_bufmgr; + + +struct vmw_gmr_buffer +{ + struct pb_buffer base; + + struct vmw_gmr_bufmgr *mgr; + + struct vmw_region *region; + void *map; + +#ifdef DEBUG + struct pipe_fence_handle *last_fence; +#endif +}; + + +extern const struct pb_vtbl vmw_gmr_buffer_vtbl; + + +static INLINE struct vmw_gmr_buffer * +vmw_gmr_buffer(struct pb_buffer *buf) +{ + assert(buf); + assert(buf->vtbl == &vmw_gmr_buffer_vtbl); + return (struct vmw_gmr_buffer *)buf; +} + + +struct vmw_gmr_bufmgr +{ + struct pb_manager base; + + struct vmw_winsys_screen *vws; +}; + + +static INLINE struct vmw_gmr_bufmgr * +vmw_gmr_bufmgr(struct pb_manager *mgr) +{ + assert(mgr); + return (struct vmw_gmr_bufmgr *)mgr; +} + + +static void +vmw_gmr_buffer_destroy(struct pb_buffer *_buf) +{ + struct vmw_gmr_buffer *buf = vmw_gmr_buffer(_buf); + +#ifdef DEBUG + if(buf->last_fence) { + struct svga_winsys_screen *sws = &buf->mgr->vws->base; + assert(sws->fence_signalled(sws, buf->last_fence, 0) == 0); + } +#endif + + vmw_ioctl_region_unmap(buf->region); + + vmw_ioctl_region_destroy(buf->region); + + FREE(buf); +} + + +static void * +vmw_gmr_buffer_map(struct pb_buffer *_buf, + unsigned flags) +{ + struct vmw_gmr_buffer *buf = vmw_gmr_buffer(_buf); + return buf->map; +} + + +static void +vmw_gmr_buffer_unmap(struct pb_buffer *_buf) +{ + /* Do nothing */ + (void)_buf; +} + + +static void +vmw_gmr_buffer_get_base_buffer(struct pb_buffer *buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + *base_buf = buf; + *offset = 0; +} + + +static enum pipe_error +vmw_gmr_buffer_validate( struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags ) +{ + /* Always pinned */ + return PIPE_OK; +} + + +static void +vmw_gmr_buffer_fence( struct pb_buffer *_buf, + struct pipe_fence_handle *fence ) +{ + /* We don't need to do anything, as the pipebuffer library + * will take care of delaying the destruction of fenced buffers */ +#ifdef DEBUG + struct vmw_gmr_buffer *buf = vmw_gmr_buffer(_buf); + if(fence) + buf->last_fence = fence; +#endif +} + + +const struct pb_vtbl vmw_gmr_buffer_vtbl = { + vmw_gmr_buffer_destroy, + vmw_gmr_buffer_map, + vmw_gmr_buffer_unmap, + vmw_gmr_buffer_validate, + vmw_gmr_buffer_fence, + vmw_gmr_buffer_get_base_buffer +}; + + +static struct pb_buffer * +vmw_gmr_bufmgr_create_buffer(struct pb_manager *_mgr, + pb_size size, + const struct pb_desc *desc) +{ + struct vmw_gmr_bufmgr *mgr = vmw_gmr_bufmgr(_mgr); + struct vmw_winsys_screen *vws = mgr->vws; + struct vmw_gmr_buffer *buf; + + buf = CALLOC_STRUCT(vmw_gmr_buffer); + if(!buf) + goto error1; + + pipe_reference_init(&buf->base.base.reference, 1); + buf->base.base.alignment = desc->alignment; + buf->base.base.usage = desc->usage; + buf->base.base.size = size; + buf->base.vtbl = &vmw_gmr_buffer_vtbl; + buf->mgr = mgr; + + buf->region = vmw_ioctl_region_create(vws, size); + if(!buf->region) + goto error2; + + buf->map = vmw_ioctl_region_map(buf->region); + if(!buf->map) + goto error3; + + return &buf->base; + +error3: + vmw_ioctl_region_destroy(buf->region); +error2: + FREE(buf); +error1: + return NULL; +} + + +static void +vmw_gmr_bufmgr_flush(struct pb_manager *mgr) +{ + /* No-op */ +} + + +static void +vmw_gmr_bufmgr_destroy(struct pb_manager *_mgr) +{ + struct vmw_gmr_bufmgr *mgr = vmw_gmr_bufmgr(_mgr); + FREE(mgr); +} + + +struct pb_manager * +vmw_gmr_bufmgr_create(struct vmw_winsys_screen *vws) +{ + struct vmw_gmr_bufmgr *mgr; + + mgr = CALLOC_STRUCT(vmw_gmr_bufmgr); + if(!mgr) + return NULL; + + mgr->base.destroy = vmw_gmr_bufmgr_destroy; + mgr->base.create_buffer = vmw_gmr_bufmgr_create_buffer; + mgr->base.flush = vmw_gmr_bufmgr_flush; + + mgr->vws = vws; + + return &mgr->base; +} + + +boolean +vmw_gmr_bufmgr_region_ptr(struct pb_buffer *buf, + struct SVGAGuestPtr *ptr) +{ + struct pb_buffer *base_buf; + unsigned offset = 0; + struct vmw_gmr_buffer *gmr_buf; + + pb_get_base_buffer( buf, &base_buf, &offset ); + + gmr_buf = vmw_gmr_buffer(base_buf); + if(!gmr_buf) + return FALSE; + + *ptr = vmw_ioctl_region_ptr(gmr_buf->region); + + ptr->offset += offset; + + return TRUE; +} diff --git a/src/gallium/winsys/drm/vmware/core/vmw_buffer.h b/src/gallium/winsys/drm/vmware/core/vmw_buffer.h new file mode 100644 index 0000000000..634bdcabd2 --- /dev/null +++ b/src/gallium/winsys/drm/vmware/core/vmw_buffer.h @@ -0,0 +1,65 @@ +/********************************************************** + * Copyright 2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + + +#ifndef VMW_BUFFER_H_ +#define VMW_BUFFER_H_ + + +#include "pipe/p_compiler.h" + +struct SVGAGuestPtr; +struct pb_buffer; +struct pb_manager; +struct svga_winsys_buffer; +struct svga_winsys_surface; +struct vmw_winsys_screen; + + +static INLINE struct pb_buffer * +vmw_pb_buffer(struct svga_winsys_buffer *buffer) +{ + assert(buffer); + return (struct pb_buffer *)buffer; +} + + +static INLINE struct svga_winsys_buffer * +vmw_svga_winsys_buffer(struct pb_buffer *buffer) +{ + assert(buffer); + return (struct svga_winsys_buffer *)buffer; +} + + +struct pb_manager * +vmw_gmr_bufmgr_create(struct vmw_winsys_screen *vws); + +boolean +vmw_gmr_bufmgr_region_ptr(struct pb_buffer *buf, + struct SVGAGuestPtr *ptr); + + +#endif /* VMW_BUFFER_H_ */ diff --git a/src/gallium/winsys/drm/vmware/core/vmw_context.c b/src/gallium/winsys/drm/vmware/core/vmw_context.c new file mode 100644 index 0000000000..b6997588de --- /dev/null +++ b/src/gallium/winsys/drm/vmware/core/vmw_context.c @@ -0,0 +1,297 @@ +/********************************************************** + * Copyright 2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + + +#include "svga_cmd.h" + +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_debug_stack.h" +#include "pipebuffer/pb_buffer.h" +#include "pipebuffer/pb_validate.h" + +#include "svga_winsys.h" +#include "vmw_context.h" +#include "vmw_screen.h" +#include "vmw_buffer.h" +#include "vmw_surface.h" +#include "vmw_fence.h" + +#define VMW_COMMAND_SIZE (64*1024) +#define VMW_SURFACE_RELOCS (1024) + +#define VMW_MUST_FLUSH_STACK 8 + +struct vmw_svga_winsys_context +{ + struct svga_winsys_context base; + + struct vmw_winsys_screen *vws; + +#ifdef DEBUG + boolean must_flush; + struct debug_stack_frame must_flush_stack[VMW_MUST_FLUSH_STACK]; +#endif + + struct { + uint8_t buffer[VMW_COMMAND_SIZE]; + uint32_t size; + uint32_t used; + uint32_t reserved; + } command; + + struct { + struct vmw_svga_winsys_surface *handles[VMW_SURFACE_RELOCS]; + uint32_t size; + uint32_t used; + uint32_t staged; + uint32_t reserved; + } surface; + + struct pb_validate *validate; + + uint32_t last_fence; +}; + + +static INLINE struct vmw_svga_winsys_context * +vmw_svga_winsys_context(struct svga_winsys_context *swc) +{ + assert(swc); + return (struct vmw_svga_winsys_context *)swc; +} + + +static enum pipe_error +vmw_swc_flush(struct svga_winsys_context *swc, + struct pipe_fence_handle **pfence) +{ + struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); + struct pipe_fence_handle *fence = NULL; + unsigned i; + enum pipe_error ret; + + ret = pb_validate_validate(vswc->validate); + assert(ret == PIPE_OK); + if(ret == PIPE_OK) { + + if (vswc->command.used) + vmw_ioctl_command(vswc->vws, + vswc->command.buffer, + vswc->command.used, + &vswc->last_fence); + + fence = vmw_pipe_fence(vswc->last_fence); + + pb_validate_fence(vswc->validate, fence); + } + + vswc->command.used = 0; + vswc->command.reserved = 0; + + for(i = 0; i < vswc->surface.used + vswc->surface.staged; ++i) { + struct vmw_svga_winsys_surface *vsurf = + vswc->surface.handles[i]; + p_atomic_dec(&vsurf->validated); + vmw_svga_winsys_surface_reference(&vswc->surface.handles[i], NULL); + } + + vswc->surface.used = 0; + vswc->surface.reserved = 0; + +#ifdef DEBUG + vswc->must_flush = FALSE; +#endif + + if(pfence) + *pfence = fence; + + return ret; +} + + +static void * +vmw_swc_reserve(struct svga_winsys_context *swc, + uint32_t nr_bytes, uint32_t nr_relocs ) +{ + struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); + +#ifdef DEBUG + /* Check if somebody forgot to check the previous failure */ + if(vswc->must_flush) { + debug_printf("Forgot to flush:\n"); + debug_backtrace_dump(vswc->must_flush_stack, VMW_MUST_FLUSH_STACK); + assert(!vswc->must_flush); + } +#endif + + assert(nr_bytes <= vswc->command.size); + if(nr_bytes > vswc->command.size) + return NULL; + + if(vswc->command.used + nr_bytes > vswc->command.size || + vswc->surface.used + nr_relocs > vswc->surface.size) { +#ifdef DEBUG + vswc->must_flush = TRUE; + debug_backtrace_capture(vswc->must_flush_stack, 1, + VMW_MUST_FLUSH_STACK); +#endif + return NULL; + } + + assert(vswc->command.used + nr_bytes <= vswc->command.size); + assert(vswc->surface.used + nr_relocs <= vswc->surface.size); + + vswc->command.reserved = nr_bytes; + vswc->surface.reserved = nr_relocs; + vswc->surface.staged = 0; + + return vswc->command.buffer + vswc->command.used; +} + + +static void +vmw_swc_surface_relocation(struct svga_winsys_context *swc, + uint32 *where, + struct svga_winsys_surface *surface, + unsigned flags) +{ + struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); + struct vmw_svga_winsys_surface *vsurf; + + if(!surface) { + *where = SVGA3D_INVALID_ID; + return; + } + + assert(vswc->surface.staged < vswc->surface.reserved); + + vsurf = vmw_svga_winsys_surface(surface); + + *where = vsurf->sid; + + vmw_svga_winsys_surface_reference(&vswc->surface.handles[vswc->surface.used + vswc->surface.staged], vsurf); + p_atomic_inc(&vsurf->validated); + ++vswc->surface.staged; +} + + +static void +vmw_swc_region_relocation(struct svga_winsys_context *swc, + struct SVGAGuestPtr *where, + struct svga_winsys_buffer *buffer, + uint32 offset, + unsigned flags) +{ + struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); + struct SVGAGuestPtr ptr; + struct pb_buffer *buf = vmw_pb_buffer(buffer); + enum pipe_error ret; + + if(!vmw_gmr_bufmgr_region_ptr(buf, &ptr)) + assert(0); + + ptr.offset += offset; + + *where = ptr; + + ret = pb_validate_add_buffer(vswc->validate, buf, flags); + /* TODO: Update pipebuffer to reserve buffers and not fail here */ + assert(ret == PIPE_OK); +} + + +static void +vmw_swc_commit(struct svga_winsys_context *swc) +{ + struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); + + assert(vswc->command.reserved); + assert(vswc->command.used + vswc->command.reserved <= vswc->command.size); + vswc->command.used += vswc->command.reserved; + vswc->command.reserved = 0; + + assert(vswc->surface.staged <= vswc->surface.reserved); + assert(vswc->surface.used + vswc->surface.staged <= vswc->surface.size); + vswc->surface.used += vswc->surface.staged; + vswc->surface.staged = 0; + vswc->surface.reserved = 0; +} + + +static void +vmw_swc_destroy(struct svga_winsys_context *swc) +{ + struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc); + unsigned i; + for(i = 0; i < vswc->surface.used; ++i) { + p_atomic_dec(&vswc->surface.handles[i]->validated); + vmw_svga_winsys_surface_reference(&vswc->surface.handles[i], NULL); + } + pb_validate_destroy(vswc->validate); + vmw_ioctl_context_destroy(vswc->vws, swc->cid); + FREE(vswc); +} + + +struct svga_winsys_context * +vmw_svga_winsys_context_create(struct svga_winsys_screen *sws) +{ + struct vmw_winsys_screen *vws = vmw_winsys_screen(sws); + struct vmw_svga_winsys_context *vswc; + + vswc = CALLOC_STRUCT(vmw_svga_winsys_context); + if(!vswc) + return NULL; + + vswc->base.destroy = vmw_swc_destroy; + vswc->base.reserve = vmw_swc_reserve; + vswc->base.surface_relocation = vmw_swc_surface_relocation; + vswc->base.region_relocation = vmw_swc_region_relocation; + vswc->base.commit = vmw_swc_commit; + vswc->base.flush = vmw_swc_flush; + + vswc->base.cid = vmw_ioctl_context_create(vws); + + vswc->vws = vws; + + vswc->command.size = VMW_COMMAND_SIZE; + vswc->surface.size = VMW_SURFACE_RELOCS; + + vswc->validate = pb_validate_create(); + if(!vswc->validate) { + FREE(vswc); + return NULL; + } + + return &vswc->base; +} + + +struct pipe_context * +vmw_svga_context_create(struct pipe_screen *screen) +{ + return svga_context_create(screen); +} diff --git a/src/gallium/winsys/drm/vmware/core/vmw_context.h b/src/gallium/winsys/drm/vmware/core/vmw_context.h new file mode 100644 index 0000000000..305ce9b5be --- /dev/null +++ b/src/gallium/winsys/drm/vmware/core/vmw_context.h @@ -0,0 +1,59 @@ +/********************************************************** + * Copyright 2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @author Jose Fonseca + */ + + +#ifndef VMW_CONTEXT_H_ +#define VMW_CONTEXT_H_ + +#include "pipe/p_compiler.h" + +struct svga_winsys_screen; +struct svga_winsys_context; +struct pipe_context; +struct pipe_screen; + +#define VMW_DEBUG 0 + +#if VMW_DEBUG +#define vmw_printf debug_printf +#define VMW_FUNC debug_printf("%s\n", __FUNCTION__) +#else +#define VMW_FUNC +#define vmw_printf(...) +#endif + + +struct svga_winsys_context * +vmw_svga_winsys_context_create(struct svga_winsys_screen *sws); + +struct pipe_context * +vmw_svga_context_create(struct pipe_screen *screen); + + +#endif /* VMW_CONTEXT_H_ */ diff --git a/src/gallium/winsys/drm/vmware/core/vmw_fence.c b/src/gallium/winsys/drm/vmware/core/vmw_fence.c new file mode 100644 index 0000000000..873dd51166 --- /dev/null +++ b/src/gallium/winsys/drm/vmware/core/vmw_fence.c @@ -0,0 +1,108 @@ +/********************************************************** + * Copyright 2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + + +#include "util/u_memory.h" +#include "pipebuffer/pb_buffer_fenced.h" + +#include "vmw_screen.h" +#include "vmw_fence.h" + + + +struct vmw_fence_ops +{ + struct pb_fence_ops base; + + struct vmw_winsys_screen *vws; +}; + + +static INLINE struct vmw_fence_ops * +vmw_fence_ops(struct pb_fence_ops *ops) +{ + assert(ops); + return (struct vmw_fence_ops *)ops; +} + + +static void +vmw_fence_ops_fence_reference(struct pb_fence_ops *ops, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + *ptr = fence; +} + + +static int +vmw_fence_ops_fence_signalled(struct pb_fence_ops *ops, + struct pipe_fence_handle *fence, + unsigned flag) +{ + struct vmw_winsys_screen *vws = vmw_fence_ops(ops)->vws; + (void)flag; + return vmw_ioctl_fence_signalled(vws, vmw_fence(fence)); +} + + +static int +vmw_fence_ops_fence_finish(struct pb_fence_ops *ops, + struct pipe_fence_handle *fence, + unsigned flag) +{ + struct vmw_winsys_screen *vws = vmw_fence_ops(ops)->vws; + (void)flag; + return vmw_ioctl_fence_finish(vws, vmw_fence(fence)); +} + + +static void +vmw_fence_ops_destroy(struct pb_fence_ops *ops) +{ + FREE(ops); +} + + +struct pb_fence_ops * +vmw_fence_ops_create(struct vmw_winsys_screen *vws) +{ + struct vmw_fence_ops *ops; + + ops = CALLOC_STRUCT(vmw_fence_ops); + if(!ops) + return NULL; + + ops->base.destroy = &vmw_fence_ops_destroy; + ops->base.fence_reference = &vmw_fence_ops_fence_reference; + ops->base.fence_signalled = &vmw_fence_ops_fence_signalled; + ops->base.fence_finish = &vmw_fence_ops_fence_finish; + + ops->vws = vws; + + return &ops->base; +} + + diff --git a/src/gallium/winsys/drm/vmware/core/vmw_fence.h b/src/gallium/winsys/drm/vmware/core/vmw_fence.h new file mode 100644 index 0000000000..5357b4f61d --- /dev/null +++ b/src/gallium/winsys/drm/vmware/core/vmw_fence.h @@ -0,0 +1,59 @@ +/********************************************************** + * Copyright 2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + + +#ifndef VMW_FENCE_H_ +#define VMW_FENCE_H_ + + +#include "pipe/p_compiler.h" + + +struct pipe_fence_handle; +struct pb_fence_ops; +struct vmw_winsys_screen; + + +/** Cast from a pipe_fence_handle pointer into a SVGA fence */ +static INLINE uint32_t +vmw_fence( struct pipe_fence_handle *fence ) +{ + return (uint32_t)(uintptr_t)fence; +} + + +/** Cast from a SVGA fence number to pipe_fence_handle pointer */ +static INLINE struct pipe_fence_handle * +vmw_pipe_fence( uint32_t fence ) +{ + return (struct pipe_fence_handle *)(uintptr_t)fence; +} + + +struct pb_fence_ops * +vmw_fence_ops_create(struct vmw_winsys_screen *vws); + + +#endif /* VMW_FENCE_H_ */ diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen.c b/src/gallium/winsys/drm/vmware/core/vmw_screen.c new file mode 100644 index 0000000000..911eec5e25 --- /dev/null +++ b/src/gallium/winsys/drm/vmware/core/vmw_screen.c @@ -0,0 +1,74 @@ +/********************************************************** + * Copyright 2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + + +#include "vmw_screen.h" + +#include "vmw_context.h" + +#include "util/u_memory.h" +#include "pipe/p_compiler.h" + + +/* Called from vmw_drm_create_screen(), creates and initializes the + * vmw_winsys_screen structure, which is the main entity in this + * module. + */ +struct vmw_winsys_screen * +vmw_winsys_create( int fd ) +{ + struct vmw_winsys_screen *vws = CALLOC_STRUCT(vmw_winsys_screen); + if (!vws) + goto out_no_vws; + + vws->ioctl.drm_fd = fd; + + if (!vmw_ioctl_init(vws)) + goto out_no_ioctl; + + if(!vmw_pools_init(vws)) + goto out_no_pools; + + if (!vmw_winsys_screen_init_svga(vws)) + goto out_no_svga; + + return vws; +out_no_svga: + vmw_pools_cleanup(vws); +out_no_pools: + vmw_ioctl_cleanup(vws); +out_no_ioctl: + FREE(vws); +out_no_vws: + return NULL; +} + +void +vmw_winsys_destroy(struct vmw_winsys_screen *vws) +{ + vmw_pools_cleanup(vws); + vmw_ioctl_cleanup(vws); + FREE(vws); +} diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen.h b/src/gallium/winsys/drm/vmware/core/vmw_screen.h new file mode 100644 index 0000000000..a875107370 --- /dev/null +++ b/src/gallium/winsys/drm/vmware/core/vmw_screen.h @@ -0,0 +1,134 @@ +/********************************************************** + * Copyright 2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * Common definitions for the VMware SVGA winsys. + * + * @author Jose Fonseca + */ + + +#ifndef VMW_SCREEN_H_ +#define VMW_SCREEN_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + +#include "svga_winsys.h" + +struct pb_manager; +struct vmw_region; + + +struct vmw_winsys_screen +{ + struct svga_winsys_screen base; + + struct { + volatile uint32_t *fifo_map; + uint64_t last_fence; + int drm_fd; + } ioctl; + + struct { + struct pb_manager *gmr; + struct pb_manager *gmr_mm; + struct pb_manager *gmr_fenced; + } pools; +}; + + +static INLINE struct vmw_winsys_screen * +vmw_winsys_screen(struct svga_winsys_screen *base) +{ + return (struct vmw_winsys_screen *)base; +} + +/* */ +uint32 +vmw_ioctl_context_create(struct vmw_winsys_screen *vws); + +void +vmw_ioctl_context_destroy(struct vmw_winsys_screen *vws, + uint32 cid); + +uint32 +vmw_ioctl_surface_create(struct vmw_winsys_screen *vws, + SVGA3dSurfaceFlags flags, + SVGA3dSurfaceFormat format, + SVGA3dSize size, + uint32 numFaces, + uint32 numMipLevels); + +void +vmw_ioctl_surface_destroy(struct vmw_winsys_screen *vws, + uint32 sid); + +void +vmw_ioctl_command(struct vmw_winsys_screen *vws, + void *commands, + uint32_t size, + uint32_t *fence); + +struct vmw_region * +vmw_ioctl_region_create(struct vmw_winsys_screen *vws, uint32_t size); + +void +vmw_ioctl_region_destroy(struct vmw_region *region); + +struct SVGAGuestPtr +vmw_ioctl_region_ptr(struct vmw_region *region); + +void * +vmw_ioctl_region_map(struct vmw_region *region); +void +vmw_ioctl_region_unmap(struct vmw_region *region); + + +int +vmw_ioctl_fence_finish(struct vmw_winsys_screen *vws, + uint32_t fence); + +int +vmw_ioctl_fence_signalled(struct vmw_winsys_screen *vws, + uint32_t fence); + + +/* Initialize parts of vmw_winsys_screen at startup: + */ +boolean vmw_ioctl_init(struct vmw_winsys_screen *vws); +boolean vmw_pools_init(struct vmw_winsys_screen *vws); +boolean vmw_winsys_screen_init_svga(struct vmw_winsys_screen *vws); + +void vmw_ioctl_cleanup(struct vmw_winsys_screen *vws); +void vmw_pools_cleanup(struct vmw_winsys_screen *vws); + +struct vmw_winsys_screen *vmw_winsys_create(int fd); +void vmw_winsys_destroy(struct vmw_winsys_screen *sws); + + +#endif /* VMW_SCREEN_H_ */ diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c new file mode 100644 index 0000000000..5995eee34b --- /dev/null +++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c @@ -0,0 +1,371 @@ +/********************************************************** + * Copyright 2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + + +#include "pipe/p_compiler.h" +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "vmw_screen.h" + +#include "trace/tr_drm.h" + +#include "vmw_screen.h" +#include "vmw_surface.h" +#include "vmw_fence.h" +#include "vmw_context.h" + +#include +#include +#include +#include + +#include + +static struct dri1_api dri1_api_hooks; +static struct dri1_api_version ddx_required = { 0, 1, 0 }; +static struct dri1_api_version ddx_compat = { 0, 0, 0 }; +static struct dri1_api_version dri_required = { 4, 0, 0 }; +static struct dri1_api_version dri_compat = { 4, 0, 0 }; +static struct dri1_api_version drm_required = { 0, 1, 0 }; +static struct dri1_api_version drm_compat = { 0, 0, 0 }; + +static boolean +vmw_dri1_check_version(const struct dri1_api_version *cur, + const struct dri1_api_version *required, + const struct dri1_api_version *compat, + const char component[]) +{ + if (cur->major > required->major && cur->major <= compat->major) + return TRUE; + if (cur->major == required->major && cur->minor >= required->minor) + return TRUE; + + fprintf(stderr, "%s version failure.\n", component); + fprintf(stderr, "%s version is %d.%d.%d and this driver can only work\n" + "with versions %d.%d.x through %d.x.x.\n", + component, + cur->major, + cur->minor, + cur->patch_level, required->major, required->minor, compat->major); + return FALSE; +} + +/* This is actually the entrypoint to the entire driver, called by the + * libGL (or EGL, or ...) code via the drm_api_hooks table at the + * bottom of the file. + */ +static struct pipe_screen * +vmw_drm_create_screen(struct drm_api *drm_api, + int fd, + struct drm_create_screen_arg *arg) +{ + struct vmw_winsys_screen *vws; + struct pipe_screen *screen; + struct dri1_create_screen_arg *dri1; + + if (arg != NULL) { + switch (arg->mode) { + case DRM_CREATE_NORMAL: + break; + case DRM_CREATE_DRI1: + dri1 = (struct dri1_create_screen_arg *)arg; + if (!vmw_dri1_check_version(&dri1->ddx_version, &ddx_required, + &ddx_compat, "ddx - driver api")) + return NULL; + if (!vmw_dri1_check_version(&dri1->dri_version, &dri_required, + &dri_compat, "dri info")) + return NULL; + if (!vmw_dri1_check_version(&dri1->drm_version, &drm_required, + &drm_compat, "vmwgfx drm driver")) + return NULL; + dri1->api = &dri1_api_hooks; + break; + default: + return NULL; + } + } + + vws = vmw_winsys_create( fd ); + if (!vws) + goto out_no_vws; + + screen = svga_screen_create( &vws->base ); + if (!screen) + goto out_no_screen; + + return screen; + + /* Failure cases: + */ +out_no_screen: + vmw_winsys_destroy( vws ); + +out_no_vws: + return NULL; +} + +static INLINE boolean +vmw_dri1_intersect_src_bbox(struct drm_clip_rect *dst, + int dst_x, + int dst_y, + const struct drm_clip_rect *src, + const struct drm_clip_rect *bbox) +{ + int xy1; + int xy2; + + xy1 = ((int)src->x1 > (int)bbox->x1 + dst_x) ? src->x1 : + (int)bbox->x1 + dst_x; + xy2 = ((int)src->x2 < (int)bbox->x2 + dst_x) ? src->x2 : + (int)bbox->x2 + dst_x; + if (xy1 >= xy2 || xy1 < 0) + return FALSE; + + dst->x1 = xy1; + dst->x2 = xy2; + + xy1 = ((int)src->y1 > (int)bbox->y1 + dst_y) ? src->y1 : + (int)bbox->y1 + dst_y; + xy2 = ((int)src->y2 < (int)bbox->y2 + dst_y) ? src->y2 : + (int)bbox->y2 + dst_y; + if (xy1 >= xy2 || xy1 < 0) + return FALSE; + + dst->y1 = xy1; + dst->y2 = xy2; + return TRUE; +} + +/** + * No fancy get-surface-from-sarea stuff here. + * Just use the present blit. + */ + +static void +vmw_dri1_present_locked(struct pipe_context *locked_pipe, + struct pipe_surface *surf, + const struct drm_clip_rect *rect, + unsigned int num_clip, + int x_draw, int y_draw, + const struct drm_clip_rect *bbox, + struct pipe_fence_handle **p_fence) +{ + struct svga_winsys_surface *srf = + svga_screen_texture_get_winsys_surface(surf->texture); + struct vmw_svga_winsys_surface *vsrf = vmw_svga_winsys_surface(srf); + struct vmw_winsys_screen *vws = + vmw_winsys_screen(svga_winsys_screen(locked_pipe->screen)); + struct drm_clip_rect clip; + int i; + struct + { + SVGA3dCmdHeader header; + SVGA3dCmdPresent body; + SVGA3dCopyRect rect; + } cmd; + boolean visible = FALSE; + uint32_t fence_seq = 0; + + VMW_FUNC; + cmd.header.id = SVGA_3D_CMD_PRESENT; + cmd.header.size = sizeof cmd.body + sizeof cmd.rect; + cmd.body.sid = vsrf->sid; + + for (i = 0; i < num_clip; ++i) { + if (!vmw_dri1_intersect_src_bbox(&clip, x_draw, y_draw, rect++, bbox)) + continue; + + cmd.rect.x = clip.x1; + cmd.rect.y = clip.y1; + cmd.rect.w = clip.x2 - clip.x1; + cmd.rect.h = clip.y2 - clip.y1; + cmd.rect.srcx = (int)clip.x1 - x_draw; + cmd.rect.srcy = (int)clip.y1 - y_draw; + + vmw_printf("%s: Clip %d x %d y %d w %d h %d srcx %d srcy %d\n", + __FUNCTION__, + i, + cmd.rect.x, + cmd.rect.y, + cmd.rect.w, cmd.rect.h, cmd.rect.srcx, cmd.rect.srcy); + + vmw_ioctl_command(vws, &cmd, sizeof cmd.header + cmd.header.size, + &fence_seq); + visible = TRUE; + } + + *p_fence = (visible) ? vmw_pipe_fence(fence_seq) : NULL; + vmw_svga_winsys_surface_reference(&vsrf, NULL); +} + +/** + * FIXME: We'd probably want to cache these buffers in the + * screen, based on handle. + */ + +static struct pipe_buffer * +vmw_drm_buffer_from_handle(struct drm_api *drm_api, + struct pipe_screen *screen, + const char *name, + unsigned handle) +{ + struct vmw_svga_winsys_surface *vsrf; + struct svga_winsys_surface *ssrf; + struct vmw_winsys_screen *vws = + vmw_winsys_screen(svga_winsys_screen(screen)); + struct pipe_buffer *buf; + union drm_vmw_surface_reference_arg arg; + struct drm_vmw_surface_arg *req = &arg.req; + struct drm_vmw_surface_create_req *rep = &arg.rep; + int ret; + int i; + + /** + * The vmware device specific handle is the hardware SID. + * FIXME: We probably want to move this to the ioctl implementations. + */ + + memset(&arg, 0, sizeof(arg)); + req->sid = handle; + + ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_REF_SURFACE, + &arg, sizeof(arg)); + + if (ret) { + fprintf(stderr, "Failed referencing shared surface. SID %d.\n" + "Error %d (%s).\n", + handle, ret, strerror(-ret)); + return NULL; + } + + if (rep->mip_levels[0] != 1) { + fprintf(stderr, "Incorrect number of mipmap levels on shared surface." + " SID %d, levels %d\n", + handle, rep->mip_levels[0]); + goto out_mip; + } + + for (i=1; i < DRM_VMW_MAX_SURFACE_FACES; ++i) { + if (rep->mip_levels[i] != 0) { + fprintf(stderr, "Incorrect number of faces levels on shared surface." + " SID %d, face %d present.\n", + handle, i); + goto out_mip; + } + } + + vsrf = CALLOC_STRUCT(vmw_svga_winsys_surface); + if (!vsrf) + goto out_mip; + + pipe_reference_init(&vsrf->refcnt, 1); + p_atomic_set(&vsrf->validated, 0); + vsrf->sid = handle; + ssrf = svga_winsys_surface(vsrf); + buf = svga_screen_buffer_wrap_surface(screen, rep->format, ssrf); + if (!buf) + vmw_svga_winsys_surface_reference(&vsrf, NULL); + + return buf; + out_mip: + vmw_ioctl_surface_destroy(vws, handle); + return NULL; +} + +static struct pipe_texture * +vmw_drm_texture_from_handle(struct drm_api *drm_api, + struct pipe_screen *screen, + struct pipe_texture *templat, + const char *name, + unsigned stride, + unsigned handle) +{ + struct pipe_buffer *buffer; + buffer = vmw_drm_buffer_from_handle(drm_api, screen, name, handle); + + if (!buffer) + return NULL; + + return screen->texture_blanket(screen, templat, &stride, buffer); +} + +static boolean +vmw_drm_handle_from_buffer(struct drm_api *drm_api, + struct pipe_screen *screen, + struct pipe_buffer *buffer, + unsigned *handle) +{ + struct svga_winsys_surface *surface = + svga_screen_buffer_get_winsys_surface(buffer); + struct vmw_svga_winsys_surface *vsrf; + + if (!surface) + return FALSE; + + vsrf = vmw_svga_winsys_surface(surface); + *handle = vsrf->sid; + vmw_svga_winsys_surface_reference(&vsrf, NULL); + return TRUE; +} + +static boolean +vmw_drm_handle_from_texture(struct drm_api *drm_api, + struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned *stride, + unsigned *handle) +{ + struct pipe_buffer *buffer; + + if (!svga_screen_buffer_from_texture(texture, &buffer, stride)) + return FALSE; + + return vmw_drm_handle_from_buffer(drm_api, screen, buffer, handle); +} + +static struct pipe_context* +vmw_drm_create_context(struct drm_api *drm_api, + struct pipe_screen *screen) +{ + return vmw_svga_context_create(screen); +} + +static struct dri1_api dri1_api_hooks = { + .front_srf_locked = NULL, + .present_locked = vmw_dri1_present_locked +}; + +static struct drm_api vmw_drm_api_hooks = { + .create_screen = vmw_drm_create_screen, + .create_context = vmw_drm_create_context, + .texture_from_shared_handle = vmw_drm_texture_from_handle, + .shared_handle_from_texture = vmw_drm_handle_from_texture, + .local_handle_from_texture = vmw_drm_handle_from_texture, +}; + +struct drm_api* drm_api_create() +{ + return trace_drm_create(&vmw_drm_api_hooks); +} diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_ioctl.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_ioctl.c new file mode 100644 index 0000000000..b3515732a2 --- /dev/null +++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_ioctl.c @@ -0,0 +1,503 @@ +/********************************************************** + * Copyright 2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * + * Wrappers for DRM ioctl functionlaity used by the rest of the vmw + * drm winsys. + * + * Based on svgaicd_escape.c + */ + + +#include "svga_cmd.h" +#include "util/u_memory.h" +#include "util/u_math.h" +#include "svgadump/svga_dump.h" +#include "vmw_screen.h" +#include "vmw_context.h" +#include "xf86drm.h" +#include "vmwgfx_drm.h" + +#include +#include +#include + +struct vmw_region +{ + SVGAGuestPtr ptr; + uint32_t handle; + uint64_t map_handle; + void *data; + uint32_t map_count; + int drm_fd; + uint32_t size; +}; + +static void +vmw_check_last_cmd(struct vmw_winsys_screen *vws) +{ + static uint32_t buffer[16384]; + struct drm_vmw_fifo_debug_arg arg; + int ret; + + return; + memset(&arg, 0, sizeof(arg)); + arg.debug_buffer = (unsigned long)buffer; + arg.debug_buffer_size = 65536; + + ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_FIFO_DEBUG, + &arg, sizeof(arg)); + + if (ret) { + debug_printf("%s Ioctl error: \"%s\".\n", __FUNCTION__, strerror(-ret)); + return; + } + + if (arg.did_not_fit) { + debug_printf("%s Command did not fit completely.\n", __FUNCTION__); + } + + svga_dump_commands(buffer, arg.used_size); +} + +static void +vmw_ioctl_fifo_unmap(struct vmw_winsys_screen *vws, void *mapping) +{ + VMW_FUNC; + (void)munmap(mapping, getpagesize()); +} + + +static void * +vmw_ioctl_fifo_map(struct vmw_winsys_screen *vws, + uint32_t fifo_offset ) +{ + void *map; + + VMW_FUNC; + + map = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, + vws->ioctl.drm_fd, fifo_offset); + + if (map == MAP_FAILED) { + debug_printf("Map failed %s\n", strerror(errno)); + return NULL; + } + + vmw_printf("Fifo (min) is 0x%08x\n", ((uint32_t *) map)[SVGA_FIFO_MIN]); + + return map; +} + +uint32 +vmw_ioctl_context_create(struct vmw_winsys_screen *vws) +{ + struct drm_vmw_context_arg c_arg; + int ret; + + VMW_FUNC; + + ret = drmCommandRead(vws->ioctl.drm_fd, DRM_VMW_CREATE_CONTEXT, + &c_arg, sizeof(c_arg)); + + if (ret) + return -1; + + vmw_check_last_cmd(vws); + vmw_printf("Context id is %d\n", c_arg.cid); + + return c_arg.cid; +} + +void +vmw_ioctl_context_destroy(struct vmw_winsys_screen *vws, uint32 cid) +{ + struct drm_vmw_context_arg c_arg; + + VMW_FUNC; + + memset(&c_arg, 0, sizeof(c_arg)); + c_arg.cid = cid; + + (void)drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_UNREF_CONTEXT, + &c_arg, sizeof(c_arg)); + + vmw_check_last_cmd(vws); +} + +uint32 +vmw_ioctl_surface_create(struct vmw_winsys_screen *vws, + SVGA3dSurfaceFlags flags, + SVGA3dSurfaceFormat format, + SVGA3dSize size, + uint32_t numFaces, uint32_t numMipLevels) +{ + union drm_vmw_surface_create_arg s_arg; + struct drm_vmw_surface_create_req *req = &s_arg.req; + struct drm_vmw_surface_arg *rep = &s_arg.rep; + struct drm_vmw_size sizes[DRM_VMW_MAX_SURFACE_FACES* + DRM_VMW_MAX_MIP_LEVELS]; + struct drm_vmw_size *cur_size; + uint32_t iFace; + uint32_t iMipLevel; + int ret; + + vmw_printf("%s flags %d format %d\n", __FUNCTION__, flags, format); + + memset(&s_arg, 0, sizeof(s_arg)); + req->flags = (uint32_t) flags; + req->format = (uint32_t) format; + req->shareable = 1; + + assert(numFaces * numMipLevels < DRM_VMW_MAX_SURFACE_FACES* + DRM_VMW_MAX_MIP_LEVELS); + cur_size = sizes; + for (iFace = 0; iFace < numFaces; ++iFace) { + SVGA3dSize mipSize = size; + + req->mip_levels[iFace] = numMipLevels; + for (iMipLevel = 0; iMipLevel < numMipLevels; ++iMipLevel) { + cur_size->width = mipSize.width; + cur_size->height = mipSize.height; + cur_size->depth = mipSize.depth; + mipSize.width = MAX2(mipSize.width >> 1, 1); + mipSize.height = MAX2(mipSize.height >> 1, 1); + mipSize.depth = MAX2(mipSize.depth >> 1, 1); + cur_size++; + } + } + for (iFace = numFaces; iFace < SVGA3D_MAX_SURFACE_FACES; ++iFace) { + req->mip_levels[iFace] = 0; + } + + req->size_addr = (unsigned long)&sizes; + + ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_CREATE_SURFACE, + &s_arg, sizeof(s_arg)); + + if (ret) + return -1; + + vmw_printf("Surface id is %d\n", rep->sid); + vmw_check_last_cmd(vws); + + return rep->sid; +} + +void +vmw_ioctl_surface_destroy(struct vmw_winsys_screen *vws, uint32 sid) +{ + struct drm_vmw_surface_arg s_arg; + + VMW_FUNC; + + memset(&s_arg, 0, sizeof(s_arg)); + s_arg.sid = sid; + + (void)drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_UNREF_SURFACE, + &s_arg, sizeof(s_arg)); + vmw_check_last_cmd(vws); + +} + +void +vmw_ioctl_command(struct vmw_winsys_screen *vws, void *commands, uint32_t size, + uint32_t * pfence) +{ + struct drm_vmw_execbuf_arg arg; + struct drm_vmw_fence_rep rep; + int ret; + +#ifdef DEBUG + { + static boolean firsttime = TRUE; + static boolean debug = FALSE; + static boolean skip = FALSE; + if (firsttime) { + debug = debug_get_bool_option("SVGA_DUMP_CMD", FALSE); + skip = debug_get_bool_option("SVGA_SKIP_CMD", FALSE); + } + if (debug) { + VMW_FUNC; + svga_dump_commands(commands, size); + } + firsttime = FALSE; + if (skip) { + size = 0; + } + } +#endif + + memset(&arg, 0, sizeof(arg)); + memset(&rep, 0, sizeof(rep)); + + rep.error = -EFAULT; + arg.fence_rep = (unsigned long)&rep; + arg.commands = (unsigned long)commands; + arg.command_size = size; + + do { + ret = drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_EXECBUF, &arg, sizeof(arg)); + } while(ret == -ERESTART); + if (ret) { + debug_printf("%s error %s.\n", __FUNCTION__, strerror(-ret)); + } + if (rep.error) { + + /* + * Kernel has synced and put the last fence sequence in the FIFO + * register. + */ + + if (rep.error == -EFAULT) + rep.fence_seq = vws->ioctl.fifo_map[SVGA_FIFO_FENCE]; + + debug_printf("%s Fence error %s.\n", __FUNCTION__, + strerror(-rep.error)); + } + + vws->ioctl.last_fence = rep.fence_seq; + + if (pfence) + *pfence = rep.fence_seq; + vmw_check_last_cmd(vws); + +} + + +struct vmw_region * +vmw_ioctl_region_create(struct vmw_winsys_screen *vws, uint32_t size) +{ + struct vmw_region *region; + union drm_vmw_alloc_dmabuf_arg arg; + struct drm_vmw_alloc_dmabuf_req *req = &arg.req; + struct drm_vmw_dmabuf_rep *rep = &arg.rep; + int ret; + + vmw_printf("%s: size = %u\n", __FUNCTION__, size); + + region = CALLOC_STRUCT(vmw_region); + if (!region) + goto out_err1; + + memset(&arg, 0, sizeof(arg)); + req->size = size; + do { + ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_ALLOC_DMABUF, &arg, + sizeof(arg)); + } while (ret == -ERESTART); + + if (ret) { + debug_printf("IOCTL failed %d: %s\n", ret, strerror(-ret)); + goto out_err1; + } + + region->ptr.gmrId = rep->cur_gmr_id; + region->ptr.offset = rep->cur_gmr_offset; + region->data = NULL; + region->handle = rep->handle; + region->map_handle = rep->map_handle; + region->map_count = 0; + region->size = size; + region->drm_fd = vws->ioctl.drm_fd; + + vmw_printf(" gmrId = %u, offset = %u\n", + region->ptr.gmrId, region->ptr.offset); + + return region; + + out_err1: + return NULL; +} + +void +vmw_ioctl_region_destroy(struct vmw_region *region) +{ + struct drm_vmw_unref_dmabuf_arg arg; + + vmw_printf("%s: gmrId = %u, offset = %u\n", __FUNCTION__, + region->ptr.gmrId, region->ptr.offset); + + if (region->data) { + munmap(region->data, region->size); + region->data = NULL; + } + + memset(&arg, 0, sizeof(arg)); + arg.handle = region->handle; + drmCommandWrite(region->drm_fd, DRM_VMW_UNREF_DMABUF, &arg, sizeof(arg)); + + FREE(region); +} + +SVGAGuestPtr +vmw_ioctl_region_ptr(struct vmw_region *region) +{ + return region->ptr; +} + +void * +vmw_ioctl_region_map(struct vmw_region *region) +{ + void *map; + + vmw_printf("%s: gmrId = %u, offset = %u\n", __FUNCTION__, + region->ptr.gmrId, region->ptr.offset); + + if (region->data == NULL) { + map = mmap(NULL, region->size, PROT_READ | PROT_WRITE, MAP_SHARED, + region->drm_fd, region->map_handle); + if (map == MAP_FAILED) { + debug_printf("%s: Map failed.\n", __FUNCTION__); + return NULL; + } + + region->data = map; + } + + ++region->map_count; + + return region->data; +} + +void +vmw_ioctl_region_unmap(struct vmw_region *region) +{ + vmw_printf("%s: gmrId = %u, offset = %u\n", __FUNCTION__, + region->ptr.gmrId, region->ptr.offset); + --region->map_count; +} + + +int +vmw_ioctl_fence_signalled(struct vmw_winsys_screen *vws, + uint32_t fence) +{ + uint32_t expected; + uint32_t current; + + assert(fence); + if(!fence) + return 0; + + expected = fence; + current = vws->ioctl.fifo_map[SVGA_FIFO_FENCE]; + + if ((int32)(current - expected) >= 0) + return 0; /* fence passed */ + else + return -1; +} + + +static void +vmw_ioctl_sync(struct vmw_winsys_screen *vws, + uint32_t fence) +{ + uint32_t cur_fence; + struct drm_vmw_fence_wait_arg arg; + int ret; + + vmw_printf("%s: fence = %lu\n", __FUNCTION__, + (unsigned long)fence); + + cur_fence = vws->ioctl.fifo_map[SVGA_FIFO_FENCE]; + vmw_printf("%s: Fence id read is 0x%08x\n", __FUNCTION__, + (unsigned int)cur_fence); + + if ((cur_fence - fence) < (1 << 24)) + return; + + memset(&arg, 0, sizeof(arg)); + arg.sequence = fence; + + do { + ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_FENCE_WAIT, &arg, + sizeof(arg)); + } while (ret == -ERESTART); +} + + +int +vmw_ioctl_fence_finish(struct vmw_winsys_screen *vws, + uint32_t fence) +{ + assert(fence); + + if(fence) { + if(vmw_ioctl_fence_signalled(vws, fence) != 0) { + vmw_ioctl_sync(vws, fence); + } + } + + return 0; +} + + +boolean +vmw_ioctl_init(struct vmw_winsys_screen *vws) +{ + struct drm_vmw_getparam_arg gp_arg; + int ret; + + VMW_FUNC; + + memset(&gp_arg, 0, sizeof(gp_arg)); + gp_arg.param = DRM_VMW_PARAM_FIFO_OFFSET; + ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM, + &gp_arg, sizeof(gp_arg)); + + if (ret) { + debug_printf("GET_PARAM on %d returned %d: %s\n", + vws->ioctl.drm_fd, ret, strerror(-ret)); + goto out_err1; + } + + vmw_printf("Offset to map is 0x%08llx\n", + (unsigned long long)gp_arg.value); + + vws->ioctl.fifo_map = vmw_ioctl_fifo_map(vws, gp_arg.value); + if (vws->ioctl.fifo_map == NULL) + goto out_err1; + + vmw_printf("%s OK\n", __FUNCTION__); + return TRUE; + + out_err1: + debug_printf("%s Failed\n", __FUNCTION__); + return FALSE; +} + + + +void +vmw_ioctl_cleanup(struct vmw_winsys_screen *vws) +{ + VMW_FUNC; + + vmw_ioctl_fifo_unmap(vws, (void *)vws->ioctl.fifo_map); +} diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c new file mode 100644 index 0000000000..b1c24b0cb6 --- /dev/null +++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c @@ -0,0 +1,79 @@ +/********************************************************** + * Copyright 2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + + +#include "vmw_screen.h" + +#include "vmw_buffer.h" +#include "vmw_fence.h" + +#include "pipebuffer/pb_buffer.h" +#include "pipebuffer/pb_bufmgr.h" + +void +vmw_pools_cleanup(struct vmw_winsys_screen *vws) +{ + if(vws->pools.gmr_fenced) + vws->pools.gmr_fenced->destroy(vws->pools.gmr_fenced); + + /* gmr_mm pool is already destroyed above */ + + if(vws->pools.gmr) + vws->pools.gmr->destroy(vws->pools.gmr); +} + + +boolean +vmw_pools_init(struct vmw_winsys_screen *vws) +{ + vws->pools.gmr = vmw_gmr_bufmgr_create(vws); + if(!vws->pools.gmr) + goto error; + + vws->pools.gmr_mm = mm_bufmgr_create(vws->pools.gmr, + 16*1024*1024, + 12 /* 4096 alignment */); + if(!vws->pools.gmr_mm) + goto error; + + vws->pools.gmr_fenced = fenced_bufmgr_create( + vws->pools.gmr_mm, + vmw_fence_ops_create(vws)); + +#ifdef DEBUG + vws->pools.gmr_fenced = pb_debug_manager_create(vws->pools.gmr_fenced, + 4096, + 4096); +#endif + if(!vws->pools.gmr_fenced) + goto error; + + return TRUE; + +error: + vmw_pools_cleanup(vws); + return FALSE; +} + diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_svga.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_svga.c new file mode 100644 index 0000000000..d7d008859b --- /dev/null +++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_svga.c @@ -0,0 +1,295 @@ +/********************************************************** + * Copyright 2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * This file implements the SVGA interface into this winsys, defined + * in drivers/svga/svga_winsys.h. + * + * @author Keith Whitwell + * @author Jose Fonseca + */ + + +#include "svga_cmd.h" +#include "svga3d_caps.h" + +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipebuffer/pb_buffer.h" +#include "pipebuffer/pb_bufmgr.h" +#include "svga_winsys.h" +#include "vmw_context.h" +#include "vmw_screen.h" +#include "vmw_surface.h" +#include "vmw_buffer.h" +#include "vmw_fence.h" + + +static struct svga_winsys_buffer * +vmw_svga_winsys_buffer_create(struct svga_winsys_screen *sws, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct vmw_winsys_screen *vws = vmw_winsys_screen(sws); + struct pb_desc desc; + struct pb_manager *provider; + struct pb_buffer *buffer; + + memset(&desc, 0, sizeof desc); + desc.alignment = alignment; + desc.usage = usage; + + provider = vws->pools.gmr_fenced; + + assert(provider); + buffer = provider->create_buffer(provider, size, &desc); + if(!buffer) + return NULL; + + return vmw_svga_winsys_buffer(buffer); +} + + +static void * +vmw_svga_winsys_buffer_map(struct svga_winsys_screen *sws, + struct svga_winsys_buffer *buf, + unsigned flags) +{ + (void)sws; + return pb_map(vmw_pb_buffer(buf), flags); +} + + +static void +vmw_svga_winsys_buffer_unmap(struct svga_winsys_screen *sws, + struct svga_winsys_buffer *buf) +{ + (void)sws; + pb_unmap(vmw_pb_buffer(buf)); +} + + +static void +vmw_svga_winsys_buffer_destroy(struct svga_winsys_screen *sws, + struct svga_winsys_buffer *buf) +{ + struct pb_buffer *pbuf = vmw_pb_buffer(buf); + (void)sws; + pb_reference(&pbuf, NULL); +} + + +static void +vmw_svga_winsys_fence_reference(struct svga_winsys_screen *sws, + struct pipe_fence_handle **pdst, + struct pipe_fence_handle *src) +{ + (void)sws; + *pdst = src; +} + + +static int +vmw_svga_winsys_fence_signalled(struct svga_winsys_screen *sws, + struct pipe_fence_handle *fence, + unsigned flag) +{ + struct vmw_winsys_screen *vws = vmw_winsys_screen(sws); + (void)flag; + return vmw_ioctl_fence_signalled(vws, vmw_fence(fence)); +} + + +static int +vmw_svga_winsys_fence_finish(struct svga_winsys_screen *sws, + struct pipe_fence_handle *fence, + unsigned flag) +{ + struct vmw_winsys_screen *vws = vmw_winsys_screen(sws); + (void)flag; + return vmw_ioctl_fence_finish(vws, vmw_fence(fence)); +} + + + +static struct svga_winsys_surface * +vmw_svga_winsys_surface_create(struct svga_winsys_screen *sws, + SVGA3dSurfaceFlags flags, + SVGA3dSurfaceFormat format, + SVGA3dSize size, + uint32 numFaces, + uint32 numMipLevels) +{ + struct vmw_winsys_screen *vws = vmw_winsys_screen(sws); + struct vmw_svga_winsys_surface *surface; + + surface = CALLOC_STRUCT(vmw_svga_winsys_surface); + if(!surface) + goto no_surface; + + pipe_reference_init(&surface->refcnt, 1); + p_atomic_set(&surface->validated, 0); + surface->screen = vws; + surface->sid = vmw_ioctl_surface_create(vws, + flags, format, size, + numFaces, numMipLevels); + if(surface->sid == SVGA3D_INVALID_ID) + goto no_sid; + + return svga_winsys_surface(surface); + +no_sid: + FREE(surface); +no_surface: + return NULL; +} + + +static boolean +vmw_svga_winsys_surface_is_flushed(struct svga_winsys_screen *sws, + struct svga_winsys_surface *surface) +{ + struct vmw_svga_winsys_surface *vsurf = vmw_svga_winsys_surface(surface); + return (p_atomic_read(&vsurf->validated) == 0); +} + + +static void +vmw_svga_winsys_surface_ref(struct svga_winsys_screen *sws, + struct svga_winsys_surface **pDst, + struct svga_winsys_surface *src) +{ + struct vmw_svga_winsys_surface *d_vsurf = vmw_svga_winsys_surface(*pDst); + struct vmw_svga_winsys_surface *s_vsurf = vmw_svga_winsys_surface(src); + + vmw_svga_winsys_surface_reference(&d_vsurf, s_vsurf); + *pDst = svga_winsys_surface(d_vsurf); +} + + +static void +vmw_svga_winsys_destroy(struct svga_winsys_screen *sws) +{ + struct vmw_winsys_screen *vws = vmw_winsys_screen(sws); + + vmw_winsys_destroy(vws); +} + + +static boolean +vmw_svga_winsys_get_cap(struct svga_winsys_screen *sws, + SVGA3dDevCapIndex index, + SVGA3dDevCapResult *result) +{ + struct vmw_winsys_screen *vws = vmw_winsys_screen(sws); + const uint32 *capsBlock; + const SVGA3dCapsRecord *capsRecord = NULL; + uint32 offset; + const SVGA3dCapPair *capArray; + int numCaps, first, last; + + if(!vws->ioctl.fifo_map) + return FALSE; + + if(vws->ioctl.fifo_map[SVGA_FIFO_3D_HWVERSION] < SVGA3D_HWVERSION_WS6_B1) + return FALSE; + + /* + * Search linearly through the caps block records for the specified type. + */ + capsBlock = (const uint32 *)&vws->ioctl.fifo_map[SVGA_FIFO_3D_CAPS]; + for (offset = 0; capsBlock[offset] != 0; offset += capsBlock[offset]) { + const SVGA3dCapsRecord *record; + assert(offset < SVGA_FIFO_3D_CAPS_SIZE); + record = (const SVGA3dCapsRecord *) (capsBlock + offset); + if ((record->header.type >= SVGA3DCAPS_RECORD_DEVCAPS_MIN) && + (record->header.type <= SVGA3DCAPS_RECORD_DEVCAPS_MAX) && + (!capsRecord || (record->header.type > capsRecord->header.type))) { + capsRecord = record; + } + } + + if(!capsRecord) + return FALSE; + + /* + * Calculate the number of caps from the size of the record. + */ + capArray = (const SVGA3dCapPair *) capsRecord->data; + numCaps = (int) ((capsRecord->header.length * sizeof(uint32) - + sizeof capsRecord->header) / (2 * sizeof(uint32))); + + /* + * Binary-search for the cap with the specified index. + */ + for (first = 0, last = numCaps - 1; first <= last; ) { + int mid = (first + last) / 2; + + if ((SVGA3dDevCapIndex) capArray[mid][0] == index) { + /* + * Found it. + */ + result->u = capArray[mid][1]; + return TRUE; + } + + /* + * Divide and conquer. + */ + if ((SVGA3dDevCapIndex) capArray[mid][0] > index) { + last = mid - 1; + } else { + first = mid + 1; + } + } + + return FALSE; +} + + +boolean +vmw_winsys_screen_init_svga(struct vmw_winsys_screen *vws) +{ + vws->base.destroy = vmw_svga_winsys_destroy; + vws->base.get_cap = vmw_svga_winsys_get_cap; + vws->base.context_create = vmw_svga_winsys_context_create; + vws->base.surface_create = vmw_svga_winsys_surface_create; + vws->base.surface_is_flushed = vmw_svga_winsys_surface_is_flushed; + vws->base.surface_reference = vmw_svga_winsys_surface_ref; + vws->base.buffer_create = vmw_svga_winsys_buffer_create; + vws->base.buffer_map = vmw_svga_winsys_buffer_map; + vws->base.buffer_unmap = vmw_svga_winsys_buffer_unmap; + vws->base.buffer_destroy = vmw_svga_winsys_buffer_destroy; + vws->base.fence_reference = vmw_svga_winsys_fence_reference; + vws->base.fence_signalled = vmw_svga_winsys_fence_signalled; + vws->base.fence_finish = vmw_svga_winsys_fence_finish; + + return TRUE; +} + + diff --git a/src/gallium/winsys/drm/vmware/core/vmw_surface.c b/src/gallium/winsys/drm/vmware/core/vmw_surface.c new file mode 100644 index 0000000000..9ec4bf9272 --- /dev/null +++ b/src/gallium/winsys/drm/vmware/core/vmw_surface.c @@ -0,0 +1,59 @@ +/********************************************************** + * Copyright 2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + + +#include "svga_cmd.h" +#include "util/u_debug.h" +#include "util/u_memory.h" + +#include "vmw_surface.h" +#include "vmw_screen.h" + +void +vmw_svga_winsys_surface_reference(struct vmw_svga_winsys_surface **pdst, + struct vmw_svga_winsys_surface *src) +{ + struct pipe_reference *src_ref; + struct pipe_reference *dst_ref; + struct vmw_svga_winsys_surface *dst = *pdst; + + if(*pdst == src || pdst == NULL) + return; + + src_ref = src ? &src->refcnt : NULL; + dst_ref = dst ? &dst->refcnt : NULL; + + if (pipe_reference(&dst_ref, src_ref)) { + vmw_ioctl_surface_destroy(dst->screen, dst->sid); +#ifdef DEBUG + /* to detect dangling pointers */ + assert(p_atomic_read(&dst->validated) == 0); + dst->sid = SVGA3D_INVALID_ID; +#endif + FREE(dst); + } + + *pdst = src; +} diff --git a/src/gallium/winsys/drm/vmware/core/vmw_surface.h b/src/gallium/winsys/drm/vmware/core/vmw_surface.h new file mode 100644 index 0000000000..340cc1532e --- /dev/null +++ b/src/gallium/winsys/drm/vmware/core/vmw_surface.h @@ -0,0 +1,79 @@ +/********************************************************** + * Copyright 2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * Surfaces for VMware SVGA winsys. + * + * @author Jose Fonseca + */ + + +#ifndef VMW_SURFACE_H_ +#define VMW_SURFACE_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_atomic.h" +#include "pipe/p_refcnt.h" + +#define VMW_MAX_PRESENTS 3 + + + +struct vmw_svga_winsys_surface +{ + struct pipe_atomic validated; + struct pipe_reference refcnt; + + struct vmw_winsys_screen *screen; + uint32_t sid; + + /* FIXME: make this thread safe */ + unsigned next_present_no; + uint32_t present_fences[VMW_MAX_PRESENTS]; +}; + + +static INLINE struct svga_winsys_surface * +svga_winsys_surface(struct vmw_svga_winsys_surface *surf) +{ + assert(!surf || surf->sid != SVGA3D_INVALID_ID); + return (struct svga_winsys_surface *)surf; +} + + +static INLINE struct vmw_svga_winsys_surface * +vmw_svga_winsys_surface(struct svga_winsys_surface *surf) +{ + return (struct vmw_svga_winsys_surface *)surf; +} + + +void +vmw_svga_winsys_surface_reference(struct vmw_svga_winsys_surface **pdst, + struct vmw_svga_winsys_surface *src); + +#endif /* VMW_SURFACE_H_ */ diff --git a/src/gallium/winsys/drm/vmware/dri/Makefile b/src/gallium/winsys/drm/vmware/dri/Makefile new file mode 100644 index 0000000000..8a39e23da6 --- /dev/null +++ b/src/gallium/winsys/drm/vmware/dri/Makefile @@ -0,0 +1,18 @@ + +TOP = ../../../../../.. +include $(TOP)/configs/current + +LIBNAME = vmwgfx_dri.so + +PIPE_DRIVERS = \ + $(TOP)/src/gallium/state_trackers/dri/libdridrm.a \ + $(TOP)/src/gallium/winsys/drm/vmware/core/libsvgadrm.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/svga/libsvga.a + +C_SOURCES = \ + $(COMMON_GALLIUM_SOURCES) + +include ../../Makefile.template + +symlinks: diff --git a/src/gallium/winsys/drm/vmware/dri/SConscript b/src/gallium/winsys/drm/vmware/dri/SConscript new file mode 100644 index 0000000000..adf2bf16d1 --- /dev/null +++ b/src/gallium/winsys/drm/vmware/dri/SConscript @@ -0,0 +1,63 @@ +import os +import os.path + +Import('*') + +if env['platform'] == 'linux': + + if env['dri']: + env = env.Clone() + + sources = [ + '#/src/mesa/drivers/dri/common/utils.c', + '#/src/mesa/drivers/dri/common/vblank.c', + '#/src/mesa/drivers/dri/common/dri_util.c', + '#/src/mesa/drivers/dri/common/xmlconfig.c', + ] + + + env.ParseConfig('pkg-config --cflags --libs libdrm') + + env.Prepend(CPPPATH = [ + '#/src/mesa/state_tracker', + '#/src/mesa/drivers/dri/common', + '#/src/mesa/main', + '#/src/mesa/glapi', + '#/src/mesa', + '#/include', + '#/src/gallium/drivers/svga', + '#/src/gallium/drivers/svga/include', + ]) + + env.Append(CPPDEFINES = [ + 'HAVE_STDINT_H', + 'HAVE_SYS_TYPES_H', + ]) + + env.Append(CFLAGS = [ + '-Werror', + '-std=gnu99', + '-D_FILE_OFFSET_BITS=64', + ]) + + env.Prepend(LIBPATH = [ + ]) + + env.Prepend(LIBS = [ + trace, + st_dri, + svgadrm, + svga, + mesa, + auxiliaries, + ]) + + # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions + env.LoadableModule( + target ='vmwgfx_dri.so', + source = sources, + LIBS = env['LIBS'], + SHLIBPREFIX = '', + ) + + diff --git a/src/gallium/winsys/drm/vmware/egl/Makefile b/src/gallium/winsys/drm/vmware/egl/Makefile new file mode 100644 index 0000000000..8e2980c318 --- /dev/null +++ b/src/gallium/winsys/drm/vmware/egl/Makefile @@ -0,0 +1,18 @@ + +TOP = ../../../../../.. +include $(TOP)/configs/current + +LIBNAME = EGL_svga.so + +PIPE_DRIVERS = \ + $(TOP)/src/gallium/state_trackers/egl/libegldrm.a \ + $(TOP)/src/gallium/winsys/drm/vmware/core/libsvgadrm.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/svga/libsvga.a + +C_SOURCES = \ + $(COMMON_GALLIUM_SOURCES) + +include ../../Makefile.template + +symlinks: diff --git a/src/gallium/winsys/drm/vmware/xorg/Makefile b/src/gallium/winsys/drm/vmware/xorg/Makefile new file mode 100644 index 0000000000..e152263256 --- /dev/null +++ b/src/gallium/winsys/drm/vmware/xorg/Makefile @@ -0,0 +1,54 @@ +TARGET = vmwgfx_drv.so +CFILES = $(wildcard ./*.c) +OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES)) +TOP = ../../../../../.. + +include $(TOP)/configs/current + +INCLUDES = \ + $(shell pkg-config --cflags-only-I pixman-1 xorg-server libdrm xproto) \ + -I../gem \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/gallium/auxiliary \ + -I$(TOP)/src/gallium + +LIBS = \ + $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ + $(TOP)/src/gallium/winsys/drm/vmware/core/libsvgadrm.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/svga/libsvga.a \ + $(GALLIUM_AUXILIARIES) + +DRIVER_DEFINES = \ + -DHAVE_CONFIG_H + + +############################################# + + + +all default: $(TARGET) + +$(TARGET): $(OBJECTS) Makefile $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a $(LIBS) + $(TOP)/bin/mklib -noprefix -o $@ \ + $(OBJECTS) $(LIBS) $(shell pkg-config --libs libdrm) -ldrm_intel + +clean: + rm -rf $(OBJECTS) $(TARGET) + +install: + $(INSTALL) -d $(DESTDIR)/$(XORG_DRIVER_INSTALL_DIR) + $(MINSTALL) -m 755 $(TARGET) $(DESTDIR)/$(XORG_DRIVER_INSTALL_DIR) + + +############################################## + + +.c.o: + $(CC) -c $(CFLAGS) $(INCLUDES) $(DRIVER_DEFINES) $< -o $@ + + +############################################## + +.PHONY = all clean install diff --git a/src/gallium/winsys/drm/vmware/xorg/SConscript b/src/gallium/winsys/drm/vmware/xorg/SConscript new file mode 100644 index 0000000000..41a489774c --- /dev/null +++ b/src/gallium/winsys/drm/vmware/xorg/SConscript @@ -0,0 +1,55 @@ +import os.path + +Import('*') + +if env['platform'] == 'linux': + + env = env.Clone() + + env.ParseConfig('pkg-config --cflags --libs libdrm xorg-server') + + env.Prepend(CPPPATH = [ + '#/include', + '#/src/gallium', + '#/src/mesa', + '#/src/gallium/drivers/svga', + '#/src/gallium/drivers/svga/include', + ]) + + env.Append(CPPDEFINES = [ + ]) + + if env['gcc']: + env.Append(CPPDEFINES = [ + 'HAVE_STDINT_H', + 'HAVE_SYS_TYPES_H', + ]) + env.Append(CFLAGS = ['-Werror']) + + env.Append(CFLAGS = [ + '-std=gnu99', + '-D_FILE_OFFSET_BITS=64', + ]) + + env.Prepend(LIBPATH = [ + ]) + + env.Prepend(LIBS = [ + trace, + st_xorg, + svgadrm, + svga, + auxiliaries, + ]) + + sources = [ + 'vmw_xorg.c', + ] + + # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions + env.LoadableModule( + target ='vmwgfx_drv.so', + source = sources, + LIBS = env['LIBS'], + SHLIBPREFIX = '', + ) diff --git a/src/gallium/winsys/drm/vmware/xorg/vmw_xorg.c b/src/gallium/winsys/drm/vmware/xorg/vmw_xorg.c new file mode 100644 index 0000000000..3acc110ae7 --- /dev/null +++ b/src/gallium/winsys/drm/vmware/xorg/vmw_xorg.c @@ -0,0 +1,150 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * Glue file for Xorg State Tracker. + * + * @author Alan Hourihane + * @author Jakob Bornecrantz + */ + +#include "state_trackers/xorg/xorg_winsys.h" + +static void vmw_xorg_identify(int flags); +static Bool vmw_xorg_pci_probe(DriverPtr driver, + int entity_num, + struct pci_device *device, + intptr_t match_data); + +static const struct pci_id_match vmw_xorg_device_match[] = { + {0x15ad, PCI_MATCH_ANY, PCI_MATCH_ANY, PCI_MATCH_ANY, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0}, +}; + +static SymTabRec vmw_xorg_chipsets[] = { + {PCI_MATCH_ANY, "VMware SVGA Device"}, + {-1, NULL} +}; + +static PciChipsets vmw_xorg_pci_devices[] = { + {PCI_MATCH_ANY, PCI_MATCH_ANY, NULL}, + {-1, -1, NULL} +}; + +static XF86ModuleVersionInfo vmw_xorg_version = { + "vmwgfx", + MODULEVENDORSTRING, + MODINFOSTRING1, + MODINFOSTRING2, + XORG_VERSION_CURRENT, + 0, 1, 0, /* major, minor, patch */ + ABI_CLASS_VIDEODRV, + ABI_VIDEODRV_VERSION, + MOD_CLASS_VIDEODRV, + {0, 0, 0, 0} +}; + +/* + * Xorg driver exported structures + */ + +_X_EXPORT DriverRec vmwgfx = { + 1, + "vmwgfx", + vmw_xorg_identify, + NULL, + xorg_tracker_available_options, + NULL, + 0, + NULL, + vmw_xorg_device_match, + vmw_xorg_pci_probe +}; + +static MODULESETUPPROTO(vmw_xorg_setup); + +_X_EXPORT XF86ModuleData vmwgfxModuleData = { + &vmw_xorg_version, + vmw_xorg_setup, + NULL +}; + +/* + * Xorg driver functions + */ + +static pointer +vmw_xorg_setup(pointer module, pointer opts, int *errmaj, int *errmin) +{ + static Bool setupDone = 0; + + /* This module should be loaded only once, but check to be sure. + */ + if (!setupDone) { + setupDone = 1; + xf86AddDriver(&vmwgfx, module, HaveDriverFuncs); + + /* + * The return value must be non-NULL on success even though there + * is no TearDownProc. + */ + return (pointer) 1; + } else { + if (errmaj) + *errmaj = LDR_ONCEONLY; + return NULL; + } +} + +static void +vmw_xorg_identify(int flags) +{ + xf86PrintChipsets("vmwgfx", "Driver for VMware SVGA device", + vmw_xorg_chipsets); +} + +static Bool +vmw_xorg_pci_probe(DriverPtr driver, + int entity_num, struct pci_device *device, intptr_t match_data) +{ + ScrnInfoPtr scrn = NULL; + EntityInfoPtr entity; + + scrn = xf86ConfigPciEntity(scrn, 0, entity_num, vmw_xorg_pci_devices, + NULL, NULL, NULL, NULL, NULL); + if (scrn != NULL) { + scrn->driverVersion = 1; + scrn->driverName = "vmwgfx"; + scrn->name = "vmwgfx"; + scrn->Probe = NULL; + + entity = xf86GetEntityInfo(entity_num); + + /* Use all the functions from the xorg tracker */ + xorg_tracker_set_functions(scrn); + } + return scrn != NULL; +} -- cgit v1.2.3 From 49289f1d25d42a6b3eb5da5f85b2dd6a14cda8e7 Mon Sep 17 00:00:00 2001 From: Patrice Mandin Date: Tue, 17 Nov 2009 19:49:56 +0100 Subject: nouveau: nv30: Add missing include to fix warning --- src/gallium/drivers/nv30/nv30_fragprog.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index cc0385426c..0ce702d6f8 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -4,6 +4,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" @@ -131,7 +132,7 @@ emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src) sizeof(uint32_t) * 4); } - sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT); + sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT); break; case NV30SR_NONE: sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); @@ -666,7 +667,7 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc) { struct tgsi_full_immediate *imm; float vals[4]; - + imm = &p.FullToken.FullImmediate; assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); assert(fpc->nr_imm < MAX_IMM); @@ -754,7 +755,7 @@ nv30_fragprog_translate(struct nv30_context *nv30, fp->insn[fpc->inst_offset + 1] = 0x00000000; fp->insn[fpc->inst_offset + 2] = 0x00000000; fp->insn[fpc->inst_offset + 3] = 0x00000000; - + fp->translated = TRUE; fp->on_hw = FALSE; out_err: @@ -838,7 +839,7 @@ nv30_fragprog_validate(struct nv30_context *nv30) update_constants: if (fp->nr_consts) { float *map; - + map = pipe_buffer_map(pscreen, constbuf, PIPE_BUFFER_USAGE_CPU_READ); for (i = 0; i < fp->nr_consts; i++) { -- cgit v1.2.3 From b353106467d386b48877d6ae1048cca3feaf99ff Mon Sep 17 00:00:00 2001 From: Patrice Mandin Date: Tue, 17 Nov 2009 19:50:37 +0100 Subject: nouveau: nv30: Check for NULL front (happens with DRI2) --- src/gallium/drivers/nv30/nv30_screen.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 221ae1b5f8..7cd36902eb 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -116,7 +116,10 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen, case PIPE_FORMAT_Z24X8_UNORM: return TRUE; case PIPE_FORMAT_Z16_UNORM: - return (front->format == PIPE_FORMAT_R5G6B5_UNORM); + if (front) { + return (front->format == PIPE_FORMAT_R5G6B5_UNORM); + } + return TRUE; default: break; } -- cgit v1.2.3 From 8c5a108dc321c4760e6d70b1104493b5bd54e6de Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Tue, 17 Nov 2009 09:07:15 +0100 Subject: svga: Remove -Werror for now as GCC 4.4.x raises a bunch of warnings --- src/gallium/drivers/svga/Makefile | 2 +- src/gallium/drivers/svga/SConscript | 3 --- src/gallium/winsys/drm/vmware/core/SConscript | 2 +- 3 files changed, 2 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/Makefile b/src/gallium/drivers/svga/Makefile index 05ab4ab9b3..fe1d6d7384 100644 --- a/src/gallium/drivers/svga/Makefile +++ b/src/gallium/drivers/svga/Makefile @@ -57,7 +57,7 @@ CC = gcc -fvisibility=hidden -msse -msse2 # Set the gnu99 standard to enable anonymous structs in vmware headers. # -CFLAGS = -Wall -Werror -Wmissing-prototypes -std=gnu99 -ffast-math \ +CFLAGS = -Wall -Wmissing-prototypes -std=gnu99 -ffast-math \ $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) $(ASM_FLAGS) include ../../Makefile.template diff --git a/src/gallium/drivers/svga/SConscript b/src/gallium/drivers/svga/SConscript index 0fa745c9b8..ff9645fc03 100644 --- a/src/gallium/drivers/svga/SConscript +++ b/src/gallium/drivers/svga/SConscript @@ -10,9 +10,6 @@ if env['gcc']: 'HAVE_STDINT_H', 'HAVE_SYS_TYPES_H', ]) - if env['platform'] not in ['windows']: - # The Windows headers cause many gcc warnings - env.Append(CCFLAGS = ['-Werror']) env.Prepend(CPPPATH = [ 'include', diff --git a/src/gallium/winsys/drm/vmware/core/SConscript b/src/gallium/winsys/drm/vmware/core/SConscript index 1875b659ac..edaf9458be 100644 --- a/src/gallium/winsys/drm/vmware/core/SConscript +++ b/src/gallium/winsys/drm/vmware/core/SConscript @@ -3,7 +3,7 @@ Import('*') env = env.Clone() if env['gcc']: - env.Append(CCFLAGS = ['-fvisibility=hidden', '-Werror']) + env.Append(CCFLAGS = ['-fvisibility=hidden']) env.Append(CPPDEFINES = [ 'HAVE_STDINT_H', 'HAVE_SYS_TYPES_H', -- cgit v1.2.3 From 845ddbc9aa62d1c9142822608370d96b2d68cec0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 17 Nov 2009 16:14:54 -0700 Subject: i915g: remove trailing commas in enum lists to silence warnings --- src/gallium/drivers/i915/intel_winsys.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/intel_winsys.h b/src/gallium/drivers/i915/intel_winsys.h index 2c8dc63f3f..c6bf6e6f7f 100644 --- a/src/gallium/drivers/i915/intel_winsys.h +++ b/src/gallium/drivers/i915/intel_winsys.h @@ -42,21 +42,21 @@ enum intel_buffer_usage INTEL_USAGE_2D_TARGET = 0x04, INTEL_USAGE_2D_SOURCE = 0x08, /* use on vertex */ - INTEL_USAGE_VERTEX = 0x10, + INTEL_USAGE_VERTEX = 0x10 }; enum intel_buffer_type { INTEL_NEW_TEXTURE, INTEL_NEW_SCANOUT, /**< a texture used for scanning out from */ - INTEL_NEW_VERTEX, + INTEL_NEW_VERTEX }; enum intel_buffer_tile { INTEL_TILE_NONE, INTEL_TILE_X, - INTEL_TILE_Y, + INTEL_TILE_Y }; struct intel_batchbuffer { -- cgit v1.2.3 From 7e3955d8e80c364d9b4c9eee1ec9758ff3ab8a1d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 17 Nov 2009 16:15:29 -0700 Subject: i915g: replace //-style comments --- src/gallium/drivers/i915/i915_state.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index 7d48e6e84d..71f00bc346 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -58,8 +58,10 @@ translate_wrap_mode(unsigned wrap) return TEXCOORDMODE_CLAMP_EDGE; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return TEXCOORDMODE_CLAMP_BORDER; -// case PIPE_TEX_WRAP_MIRRORED_REPEAT: -// return TEXCOORDMODE_MIRROR; +/* + case PIPE_TEX_WRAP_MIRRORED_REPEAT: + return TEXCOORDMODE_MIRROR; +*/ default: return TEXCOORDMODE_WRAP; } -- cgit v1.2.3 From 1dbf3642b9c1c37f72e2212ce78056cf8959a957 Mon Sep 17 00:00:00 2001 From: Alan Hourihane Date: Fri, 20 Nov 2009 18:08:29 +0000 Subject: Fix memory leak. --- src/gallium/drivers/softpipe/sp_state_fs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 256faa94b8..b41f7e8ab7 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -143,6 +143,7 @@ softpipe_delete_vs_state(struct pipe_context *pipe, void *vs) struct sp_vertex_shader *state = (struct sp_vertex_shader *) vs; draw_delete_vertex_shader(softpipe->draw, state->draw_data); + FREE( (void *)state->shader.tokens ); FREE( state ); } -- cgit v1.2.3 From f56b95e40796ea3859b1cb83341730bf74a6f85f Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Thu, 19 Nov 2009 08:18:58 +0100 Subject: identity: Add missing screen methods. --- src/gallium/drivers/identity/id_objects.c | 39 +++++++++++++++++++++++++++++++ src/gallium/drivers/identity/id_objects.h | 25 ++++++++++++++++++++ src/gallium/drivers/identity/id_public.h | 2 +- src/gallium/drivers/identity/id_screen.c | 33 ++++++++++++++++++++++++++ 4 files changed, 98 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/identity/id_objects.c b/src/gallium/drivers/identity/id_objects.c index e893e59940..bc9bc7121d 100644 --- a/src/gallium/drivers/identity/id_objects.c +++ b/src/gallium/drivers/identity/id_objects.c @@ -180,3 +180,42 @@ identity_transfer_destroy(struct identity_transfer *id_transfer) screen->tex_transfer_destroy(id_transfer->transfer); FREE(id_transfer); } + +struct pipe_video_surface * +identity_video_surface_create(struct identity_screen *id_screen, + struct pipe_video_surface *video_surface) +{ + struct identity_video_surface *id_video_surface; + + if (!video_surface) { + goto error; + } + + assert(video_surface->screen == id_screen->screen); + + id_video_surface = CALLOC_STRUCT(identity_video_surface); + if (!id_video_surface) { + goto error; + } + + memcpy(&id_video_surface->base, + video_surface, + sizeof(struct pipe_video_surface)); + + pipe_reference_init(&id_video_surface->base.reference, 1); + id_video_surface->base.screen = &id_screen->base; + id_video_surface->video_surface = video_surface; + + return &id_video_surface->base; + +error: + pipe_video_surface_reference(&video_surface, NULL); + return NULL; +} + +void +identity_video_surface_destroy(struct identity_video_surface *id_video_surface) +{ + pipe_video_surface_reference(&id_video_surface->video_surface, NULL); + FREE(id_video_surface); +} diff --git a/src/gallium/drivers/identity/id_objects.h b/src/gallium/drivers/identity/id_objects.h index ce58faa3c7..77cc719079 100644 --- a/src/gallium/drivers/identity/id_objects.h +++ b/src/gallium/drivers/identity/id_objects.h @@ -31,6 +31,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_state.h" +#include "pipe/p_video_state.h" #include "id_screen.h" @@ -67,6 +68,14 @@ struct identity_transfer }; +struct identity_video_surface +{ + struct pipe_video_surface base; + + struct pipe_video_surface *video_surface; +}; + + static INLINE struct identity_buffer * identity_buffer(struct pipe_buffer *_buffer) { @@ -103,6 +112,15 @@ identity_transfer(struct pipe_transfer *_transfer) return (struct identity_transfer *)_transfer; } +static INLINE struct identity_video_surface * +identity_video_surface(struct pipe_video_surface *_video_surface) +{ + if (!_video_surface) { + return NULL; + } + (void)identity_screen(_video_surface->screen); + return (struct identity_video_surface *)_video_surface; +} static INLINE struct pipe_buffer * identity_buffer_unwrap(struct pipe_buffer *_buffer) @@ -165,5 +183,12 @@ identity_transfer_create(struct identity_texture *id_texture, void identity_transfer_destroy(struct identity_transfer *id_transfer); +struct pipe_video_surface * +identity_video_surface_create(struct identity_screen *id_screen, + struct pipe_video_surface *video_surface); + +void +identity_video_surface_destroy(struct identity_video_surface *id_video_surface); + #endif /* ID_OBJECTS_H */ diff --git a/src/gallium/drivers/identity/id_public.h b/src/gallium/drivers/identity/id_public.h index cac14cfd60..3d2862eaa0 100644 --- a/src/gallium/drivers/identity/id_public.h +++ b/src/gallium/drivers/identity/id_public.h @@ -37,4 +37,4 @@ identity_screen_create(struct pipe_screen *screen); struct pipe_context * identity_context_create(struct pipe_screen *screen, struct pipe_context *pipe); -#endif /* PT_PUBLIC_H */ +#endif /* ID_PUBLIC_H */ diff --git a/src/gallium/drivers/identity/id_screen.c b/src/gallium/drivers/identity/id_screen.c index 26439637d0..53eae3ef54 100644 --- a/src/gallium/drivers/identity/id_screen.c +++ b/src/gallium/drivers/identity/id_screen.c @@ -379,6 +379,33 @@ identity_screen_buffer_destroy(struct pipe_buffer *_buffer) identity_buffer_destroy(identity_buffer(_buffer)); } +static struct pipe_video_surface * +identity_screen_video_surface_create(struct pipe_screen *_screen, + enum pipe_video_chroma_format chroma_format, + unsigned width, + unsigned height) +{ + struct identity_screen *id_screen = identity_screen(_screen); + struct pipe_screen *screen = id_screen->screen; + struct pipe_video_surface *result; + + result = screen->video_surface_create(screen, + chroma_format, + width, + height); + + if (result) { + return identity_video_surface_create(id_screen, result); + } + return NULL; +} + +static void +identity_screen_video_surface_destroy(struct pipe_video_surface *_vsfc) +{ + identity_video_surface_destroy(identity_video_surface(_vsfc)); +} + static void identity_screen_flush_frontbuffer(struct pipe_screen *_screen, struct pipe_surface *_surface, @@ -472,6 +499,12 @@ identity_screen_create(struct pipe_screen *screen) if (screen->buffer_unmap) id_screen->base.buffer_unmap = identity_screen_buffer_unmap; id_screen->base.buffer_destroy = identity_screen_buffer_destroy; + if (screen->video_surface_create) { + id_screen->base.video_surface_create = identity_screen_video_surface_create; + } + if (screen->video_surface_destroy) { + id_screen->base.video_surface_destroy = identity_screen_video_surface_destroy; + } id_screen->base.flush_frontbuffer = identity_screen_flush_frontbuffer; id_screen->base.fence_reference = identity_screen_fence_reference; id_screen->base.fence_signalled = identity_screen_fence_signalled; -- cgit v1.2.3 From 367cfca808e74101689dd0acb247f3ec38fc4c7f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 19 Nov 2009 11:37:50 -0700 Subject: softpipe: add missing check in softpipe_is_texture_referenced() Check if the named texture is referenced by the texture cache. --- src/gallium/drivers/softpipe/sp_context.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 94d000a5ac..d325499bf8 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -120,7 +120,7 @@ softpipe_destroy( struct pipe_context *pipe ) * if (the texture is being used as a framebuffer surface) * return PIPE_REFERENCED_FOR_WRITE * else if (the texture is a bound texture source) - * return PIPE_REFERENCED_FOR_READ XXX not done yet + * return PIPE_REFERENCED_FOR_READ * else * return PIPE_UNREFERENCED */ @@ -132,6 +132,7 @@ softpipe_is_texture_referenced( struct pipe_context *pipe, struct softpipe_context *softpipe = softpipe_context( pipe ); unsigned i; + /* check if any of the bound drawing surfaces are this texture */ if (softpipe->dirty_render_cache) { for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) { if (softpipe->framebuffer.cbufs[i] && @@ -145,7 +146,12 @@ softpipe_is_texture_referenced( struct pipe_context *pipe, } } - /* FIXME: we also need to do the same for the texture cache */ + /* check if any of the tex_cache textures are this texture */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + if (softpipe->tex_cache[i] && + softpipe->tex_cache[i]->texture == texture) + return PIPE_REFERENCED_FOR_READ; + } return PIPE_UNREFERENCED; } -- cgit v1.2.3 From 3f4016650099642f900fc169c078b1d78128899a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 19 Nov 2009 14:02:06 -0700 Subject: softpipe: whitespace/indentation fixes --- src/gallium/drivers/softpipe/sp_context.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index d325499bf8..5f60139968 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -249,9 +249,9 @@ softpipe_create( struct pipe_screen *screen ) /* setup quad rendering stages */ - softpipe->quad.shade = sp_quad_shade_stage(softpipe); - softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe); - softpipe->quad.blend = sp_quad_blend_stage(softpipe); + softpipe->quad.shade = sp_quad_shade_stage(softpipe); + softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe); + softpipe->quad.blend = sp_quad_blend_stage(softpipe); /* @@ -281,7 +281,6 @@ softpipe_create( struct pipe_screen *screen ) draw_set_render(softpipe->draw, softpipe->vbuf_backend); - /* plug in AA line/point stages */ draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe); @@ -297,4 +296,3 @@ softpipe_create( struct pipe_screen *screen ) softpipe_destroy(&softpipe->pipe); return NULL; } - -- cgit v1.2.3 From 683e35f726a182ed9fc6b6d5cb07146eebe14dea Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 19 Nov 2009 14:39:34 -0800 Subject: gallium: don't use arrays for texture width,height,depth --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 10 +-- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 6 +- src/gallium/auxiliary/util/u_blit.c | 26 +++---- src/gallium/auxiliary/util/u_gen_mipmap.c | 25 +++--- src/gallium/auxiliary/util/u_math.h | 4 +- src/gallium/auxiliary/util/u_surface.c | 6 +- src/gallium/auxiliary/vl/vl_compositor.c | 12 +-- src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 22 +++--- src/gallium/drivers/i915/i915_state_sampler.c | 2 +- src/gallium/drivers/i915/i915_texture.c | 96 +++++++++++------------- src/gallium/drivers/softpipe/sp_tex_sample.c | 58 +++++++------- src/gallium/drivers/softpipe/sp_tex_tile_cache.c | 7 +- src/gallium/drivers/softpipe/sp_texture.c | 49 ++++++------ src/gallium/drivers/trace/tr_dump_state.c | 6 +- src/gallium/drivers/trace/tr_rbug.c | 6 +- src/gallium/include/pipe/p_state.h | 6 +- src/mesa/state_tracker/st_atom_framebuffer.c | 5 +- src/mesa/state_tracker/st_atom_pixeltransfer.c | 2 +- src/mesa/state_tracker/st_cb_drawpixels.c | 4 +- src/mesa/state_tracker/st_cb_fbo.c | 6 +- src/mesa/state_tracker/st_cb_readpixels.c | 2 +- src/mesa/state_tracker/st_cb_texture.c | 12 +-- src/mesa/state_tracker/st_gen_mipmap.c | 39 +++++----- src/mesa/state_tracker/st_texture.c | 36 ++++----- 24 files changed, 226 insertions(+), 221 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 9f956715a2..31de84b272 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -398,9 +398,9 @@ aaline_create_texture(struct aaline_stage *aaline) texTemp.target = PIPE_TEXTURE_2D; texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */ texTemp.last_level = MAX_TEXTURE_LEVEL; - texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL; - texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL; - texTemp.depth[0] = 1; + texTemp.width0 = 1 << MAX_TEXTURE_LEVEL; + texTemp.height0 = 1 << MAX_TEXTURE_LEVEL; + texTemp.depth0 = 1; pf_get_block(texTemp.format, &texTemp.block); aaline->texture = screen->texture_create(screen, &texTemp); @@ -413,11 +413,11 @@ aaline_create_texture(struct aaline_stage *aaline) */ for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) { struct pipe_transfer *transfer; - const uint size = aaline->texture->width[level]; + const uint size = u_minify(aaline->texture->width0, level); ubyte *data; uint i, j; - assert(aaline->texture->width[level] == aaline->texture->height[level]); + assert(aaline->texture->width0 == aaline->texture->height0); /* This texture is new, no need to flush. */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 283502cdf3..27d89721b1 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -427,9 +427,9 @@ pstip_create_texture(struct pstip_stage *pstip) texTemp.target = PIPE_TEXTURE_2D; texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */ texTemp.last_level = 0; - texTemp.width[0] = 32; - texTemp.height[0] = 32; - texTemp.depth[0] = 1; + texTemp.width0 = 32; + texTemp.height0 = 32; + texTemp.depth0 = 1; pf_get_block(texTemp.format, &texTemp.block); pstip->texture = screen->texture_create(screen, &texTemp); diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 5038642599..5372df5735 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -354,9 +354,9 @@ util_blit_pixels_writemask(struct blit_state *ctx, texTemp.target = PIPE_TEXTURE_2D; texTemp.format = src->format; texTemp.last_level = 0; - texTemp.width[0] = srcW; - texTemp.height[0] = srcH; - texTemp.depth[0] = 1; + texTemp.width0 = srcW; + texTemp.height0 = srcH; + texTemp.depth0 = 1; pf_get_block(src->format, &texTemp.block); tex = screen->texture_create(screen, &texTemp); @@ -389,10 +389,10 @@ util_blit_pixels_writemask(struct blit_state *ctx, } else { pipe_texture_reference(&tex, src->texture); - s0 = srcX0 / (float)tex->width[0]; - s1 = srcX1 / (float)tex->width[0]; - t0 = srcY0 / (float)tex->height[0]; - t1 = srcY1 / (float)tex->height[0]; + s0 = srcX0 / (float)tex->width0; + s1 = srcX1 / (float)tex->width0; + t0 = srcY0 / (float)tex->height0; + t1 = srcY1 / (float)tex->height0; } @@ -518,13 +518,13 @@ util_blit_pixels_tex(struct blit_state *ctx, assert(filter == PIPE_TEX_MIPFILTER_NEAREST || filter == PIPE_TEX_MIPFILTER_LINEAR); - assert(tex->width[0] != 0); - assert(tex->height[0] != 0); + assert(tex->width0 != 0); + assert(tex->height0 != 0); - s0 = srcX0 / (float)tex->width[0]; - s1 = srcX1 / (float)tex->width[0]; - t0 = srcY0 / (float)tex->height[0]; - t1 = srcY1 / (float)tex->height[0]; + s0 = srcX0 / (float)tex->width0; + s1 = srcX1 / (float)tex->width0; + t0 = srcY0 / (float)tex->height0; + t1 = srcY1 / (float)tex->height0; assert(ctx->pipe->screen->is_format_supported(ctx->pipe->screen, dst->format, PIPE_TEXTURE_2D, diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index aa823aa218..84db14576e 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -45,6 +45,7 @@ #include "util/u_draw_quad.h" #include "util/u_gen_mipmap.h" #include "util/u_simple_shaders.h" +#include "util/u_math.h" #include "cso_cache/cso_context.h" @@ -1125,12 +1126,12 @@ make_1d_mipmap(struct gen_mipmap_state *ctx, srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, PIPE_TRANSFER_READ, 0, 0, - pt->width[srcLevel], - pt->height[srcLevel]); + u_minify(pt->width0, srcLevel), + u_minify(pt->height0, srcLevel)); dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, PIPE_TRANSFER_WRITE, 0, 0, - pt->width[dstLevel], - pt->height[dstLevel]); + u_minify(pt->width0, dstLevel), + u_minify(pt->height0, dstLevel)); srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); @@ -1168,12 +1169,12 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, PIPE_TRANSFER_READ, 0, 0, - pt->width[srcLevel], - pt->height[srcLevel]); + u_minify(pt->width0, srcLevel), + u_minify(pt->height0, srcLevel)); dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, PIPE_TRANSFER_WRITE, 0, 0, - pt->width[dstLevel], - pt->height[dstLevel]); + u_minify(pt->width0, dstLevel), + u_minify(pt->height0, dstLevel)); srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); @@ -1575,8 +1576,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, * Setup framebuffer / dest surface */ fb.cbufs[0] = surf; - fb.width = pt->width[dstLevel]; - fb.height = pt->height[dstLevel]; + fb.width = u_minify(pt->width0, dstLevel); + fb.height = u_minify(pt->height0, dstLevel); cso_set_framebuffer(ctx->cso, &fb); /* @@ -1597,8 +1598,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, offset = set_vertex_data(ctx, pt->target, face, - (float) pt->width[dstLevel], - (float) pt->height[dstLevel]); + (float) u_minify(pt->width0, dstLevel), + (float) u_minify(pt->height0, dstLevel)); util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 75b075f160..7a598efc3a 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -507,9 +507,9 @@ align(int value, int alignment) } static INLINE unsigned -minify(unsigned value) +u_minify(unsigned value, unsigned levels) { - return MAX2(1, value >> 1); + return MAX2(1, value >> levels); } #ifndef COPY_4V diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index 85e443204e..de8c266db8 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -79,9 +79,9 @@ util_create_rgba_surface(struct pipe_screen *screen, templ.target = target; templ.format = format; templ.last_level = 0; - templ.width[0] = width; - templ.height[0] = height; - templ.depth[0] = 1; + templ.width0 = width; + templ.height0 = height; + templ.depth0 = 1; pf_get_block(format, &templ.block); templ.tex_usage = usage; diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index cda6dc134a..592dd17421 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -455,8 +455,8 @@ void vl_compositor_render(struct vl_compositor *compositor, assert(dst_area); assert(picture_type == PIPE_MPEG12_PICTURE_TYPE_FRAME); - compositor->fb_state.width = dst_surface->width[0]; - compositor->fb_state.height = dst_surface->height[0]; + compositor->fb_state.width = dst_surface->width0; + compositor->fb_state.height = dst_surface->height0; compositor->fb_state.cbufs[0] = compositor->pipe->screen->get_tex_surface ( compositor->pipe->screen, @@ -504,12 +504,12 @@ void vl_compositor_render(struct vl_compositor *compositor, vs_consts->dst_trans.z = 0; vs_consts->dst_trans.w = 0; - vs_consts->src_scale.x = src_area->w / (float)src_surface->width[0]; - vs_consts->src_scale.y = src_area->h / (float)src_surface->height[0]; + vs_consts->src_scale.x = src_area->w / (float)src_surface->width0; + vs_consts->src_scale.y = src_area->h / (float)src_surface->height0; vs_consts->src_scale.z = 1; vs_consts->src_scale.w = 1; - vs_consts->src_trans.x = src_area->x / (float)src_surface->width[0]; - vs_consts->src_trans.y = src_area->y / (float)src_surface->height[0]; + vs_consts->src_trans.x = src_area->x / (float)src_surface->width0; + vs_consts->src_trans.y = src_area->y / (float)src_surface->height0; vs_consts->src_trans.z = 0; vs_consts->src_trans.w = 0; diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index c4ba69817f..1934965995 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -681,7 +681,7 @@ xfer_buffers_map(struct vl_mpeg12_mc_renderer *r) ( r->pipe->screen, r->textures.all[i], 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0, - r->textures.all[i]->width[0], r->textures.all[i]->height[0] + r->textures.all[i]->width0, r->textures.all[i]->height0 ); r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]); @@ -835,26 +835,26 @@ init_buffers(struct vl_mpeg12_mc_renderer *r) /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */ template.format = PIPE_FORMAT_R16_SNORM; template.last_level = 0; - template.width[0] = r->pot_buffers ? + template.width0 = r->pot_buffers ? util_next_power_of_two(r->picture_width) : r->picture_width; - template.height[0] = r->pot_buffers ? + template.height0 = r->pot_buffers ? util_next_power_of_two(r->picture_height) : r->picture_height; - template.depth[0] = 1; + template.depth0 = 1; pf_get_block(template.format, &template.block); template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC; r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template); if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) { - template.width[0] = r->pot_buffers ? + template.width0 = r->pot_buffers ? util_next_power_of_two(r->picture_width / 2) : r->picture_width / 2; - template.height[0] = r->pot_buffers ? + template.height0 = r->pot_buffers ? util_next_power_of_two(r->picture_height / 2) : r->picture_height / 2; } else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) - template.height[0] = r->pot_buffers ? + template.height0 = r->pot_buffers ? util_next_power_of_two(r->picture_height / 2) : r->picture_height / 2; @@ -1283,8 +1283,8 @@ flush(struct vl_mpeg12_mc_renderer *r) PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD ); - vs_consts->denorm.x = r->surface->width[0]; - vs_consts->denorm.y = r->surface->height[0]; + vs_consts->denorm.x = r->surface->width0; + vs_consts->denorm.y = r->surface->height0; pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer); @@ -1633,8 +1633,8 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer renderer->past = past; renderer->future = future; renderer->fence = fence; - renderer->surface_tex_inv_size.x = 1.0f / surface->width[0]; - renderer->surface_tex_inv_size.y = 1.0f / surface->height[0]; + renderer->surface_tex_inv_size.x = 1.0f / surface->width0; + renderer->surface_tex_inv_size.y = 1.0f / surface->height0; } while (num_macroblocks) { diff --git a/src/gallium/drivers/i915/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c index c5e9084d12..cbac4175c8 100644 --- a/src/gallium/drivers/i915/i915_state_sampler.c +++ b/src/gallium/drivers/i915/i915_state_sampler.c @@ -231,7 +231,7 @@ i915_update_texture(struct i915_context *i915, { const struct pipe_texture *pt = &tex->base; uint format, pitch; - const uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + const uint width = pt->width0, height = pt->height0, depth = pt->depth0; const uint num_levels = pt->last_level; unsigned max_lod = num_levels * 4; unsigned tiled = MS3_USE_FENCE_REGS; diff --git a/src/gallium/drivers/i915/i915_texture.c b/src/gallium/drivers/i915/i915_texture.c index 286c9ace8e..c7b86dd4c5 100644 --- a/src/gallium/drivers/i915/i915_texture.c +++ b/src/gallium/drivers/i915/i915_texture.c @@ -105,10 +105,6 @@ i915_miptree_set_level_info(struct i915_texture *tex, assert(level < PIPE_MAX_TEXTURE_LEVELS); - pt->width[level] = w; - pt->height[level] = h; - pt->depth[level] = d; - pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w); pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h); @@ -168,16 +164,16 @@ i915_scanout_layout(struct i915_texture *tex) return FALSE; i915_miptree_set_level_info(tex, 0, 1, - tex->base.width[0], - tex->base.height[0], + tex->base.width0, + tex->base.height0, 1); i915_miptree_set_image_offset(tex, 0, 0, 0, 0); - if (tex->base.width[0] >= 240) { + if (tex->base.width0 >= 240) { tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); tex->hw_tiled = INTEL_TILE_X; - } else if (tex->base.width[0] == 64 && tex->base.height[0] == 64) { + } else if (tex->base.width0 == 64 && tex->base.height0 == 64) { tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); } else { @@ -185,7 +181,7 @@ i915_scanout_layout(struct i915_texture *tex) } debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, - tex->base.width[0], tex->base.height[0], pt->block.size, + tex->base.width0, tex->base.height0, pt->block.size, tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy); return TRUE; @@ -203,12 +199,12 @@ i915_display_target_layout(struct i915_texture *tex) return FALSE; /* fallback to normal textures for small textures */ - if (tex->base.width[0] < 240) + if (tex->base.width0 < 240) return FALSE; i915_miptree_set_level_info(tex, 0, 1, - tex->base.width[0], - tex->base.height[0], + tex->base.width0, + tex->base.height0, 1); i915_miptree_set_image_offset(tex, 0, 0, 0, 0); @@ -217,7 +213,7 @@ i915_display_target_layout(struct i915_texture *tex) tex->hw_tiled = INTEL_TILE_X; debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, - tex->base.width[0], tex->base.height[0], pt->block.size, + tex->base.width0, tex->base.height0, pt->block.size, tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy); return TRUE; @@ -228,8 +224,8 @@ i915_miptree_layout_2d(struct i915_texture *tex) { struct pipe_texture *pt = &tex->base; unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; unsigned nblocksx = pt->nblocksx[0]; unsigned nblocksy = pt->nblocksy[0]; @@ -254,8 +250,8 @@ i915_miptree_layout_2d(struct i915_texture *tex) tex->total_nblocksy += nblocksy; - width = minify(width); - height = minify(height); + width = u_minify(width, 1); + height = u_minify(height, 1); nblocksx = pf_get_nblocksx(&pt->block, width); nblocksy = pf_get_nblocksy(&pt->block, height); } @@ -267,9 +263,9 @@ i915_miptree_layout_3d(struct i915_texture *tex) struct pipe_texture *pt = &tex->base; unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; unsigned nblocksx = pt->nblocksx[0]; unsigned nblocksy = pt->nblocksy[0]; unsigned stack_nblocksy = 0; @@ -285,36 +281,34 @@ i915_miptree_layout_3d(struct i915_texture *tex) stack_nblocksy += MAX2(2, nblocksy); - width = minify(width); - height = minify(height); - depth = minify(depth); + width = u_minify(width, 1); + height = u_minify(height, 1); nblocksx = pf_get_nblocksx(&pt->block, width); nblocksy = pf_get_nblocksy(&pt->block, height); } /* Fixup depth image_offsets: */ - depth = pt->depth[0]; for (level = 0; level <= pt->last_level; level++) { unsigned i; for (i = 0; i < depth; i++) i915_miptree_set_image_offset(tex, level, i, 0, i * stack_nblocksy); - depth = minify(depth); + depth = u_minify(depth, 1); } /* Multiply slice size by texture depth for total size. It's * remarkable how wasteful of memory the i915 texture layouts * are. They are largely fixed in the i945. */ - tex->total_nblocksy = stack_nblocksy * pt->depth[0]; + tex->total_nblocksy = stack_nblocksy * pt->depth0; } static void i915_miptree_layout_cube(struct i915_texture *tex) { struct pipe_texture *pt = &tex->base; - unsigned width = pt->width[0], height = pt->height[0]; + unsigned width = pt->width0, height = pt->height0; const unsigned nblocks = pt->nblocksx[0]; unsigned level; unsigned face; @@ -383,8 +377,8 @@ i945_miptree_layout_2d(struct i915_texture *tex) unsigned level; unsigned x = 0; unsigned y = 0; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; unsigned nblocksx = pt->nblocksx[0]; unsigned nblocksy = pt->nblocksy[0]; @@ -407,8 +401,8 @@ i945_miptree_layout_2d(struct i915_texture *tex) */ if (pt->last_level > 0) { unsigned mip1_nblocksx - = align(pf_get_nblocksx(&pt->block, minify(width)), align_x) - + pf_get_nblocksx(&pt->block, minify(minify(width))); + = align(pf_get_nblocksx(&pt->block, u_minify(width, 1)), align_x) + + pf_get_nblocksx(&pt->block, u_minify(width, 2)); if (mip1_nblocksx > nblocksx) tex->stride = mip1_nblocksx * pt->block.size; @@ -439,8 +433,8 @@ i945_miptree_layout_2d(struct i915_texture *tex) y += nblocksy; } - width = minify(width); - height = minify(height); + width = u_minify(width, 1); + height = u_minify(height, 1); nblocksx = pf_get_nblocksx(&pt->block, width); nblocksy = pf_get_nblocksy(&pt->block, height); } @@ -450,9 +444,9 @@ static void i945_miptree_layout_3d(struct i915_texture *tex) { struct pipe_texture *pt = &tex->base; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; unsigned nblocksx = pt->nblocksx[0]; unsigned nblocksy = pt->nblocksy[0]; unsigned pack_x_pitch, pack_x_nr; @@ -495,9 +489,9 @@ i945_miptree_layout_3d(struct i915_texture *tex) pack_y_pitch >>= 1; } - width = minify(width); - height = minify(height); - depth = minify(depth); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); nblocksx = pf_get_nblocksx(&pt->block, width); nblocksy = pf_get_nblocksy(&pt->block, height); } @@ -511,11 +505,11 @@ i945_miptree_layout_cube(struct i915_texture *tex) const unsigned nblocks = pt->nblocksx[0]; unsigned face; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; /* - printf("%s %i, %i\n", __FUNCTION__, pt->width[0], pt->height[0]); + printf("%s %i, %i\n", __FUNCTION__, pt->width0, pt->height0); */ assert(width == height); /* cubemap images are square */ @@ -651,8 +645,8 @@ i915_texture_create(struct pipe_screen *screen, pipe_reference_init(&tex->base.reference, 1); tex->base.screen = screen; - tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]); - tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]); + tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width0); + tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height0); if (is->is_i945) { if (!i945_miptree_layout(tex)) @@ -667,7 +661,7 @@ i915_texture_create(struct pipe_screen *screen, /* for scanouts and cursors, cursors arn't scanouts */ - if (templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY && templat->width[0] != 64) + if (templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY && templat->width0 != 64) buf_usage = INTEL_NEW_SCANOUT; else buf_usage = INTEL_NEW_TEXTURE; @@ -710,7 +704,7 @@ i915_texture_blanket(struct pipe_screen * screen, /* Only supports one type */ if (base->target != PIPE_TEXTURE_2D || base->last_level != 0 || - base->depth[0] != 1) { + base->depth0 != 1) { return NULL; } @@ -724,7 +718,7 @@ i915_texture_blanket(struct pipe_screen * screen, tex->stride = stride[0]; - i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1); + i915_miptree_set_level_info(tex, 0, 1, base->width0, base->height0, 1); i915_miptree_set_image_offset(tex, 0, 0, 0, 0); pipe_buffer_reference(&tex->buffer, buffer); @@ -788,8 +782,8 @@ i915_get_tex_surface(struct pipe_screen *screen, pipe_reference_init(&ps->reference, 1); pipe_texture_reference(&ps->texture, pt); ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; + ps->width = u_minify(pt->width0, level); + ps->height = u_minify(pt->height0, level); ps->offset = offset; ps->usage = flags; } @@ -919,7 +913,7 @@ i915_texture_blanket_intel(struct pipe_screen *screen, /* Only supports one type */ if (base->target != PIPE_TEXTURE_2D || base->last_level != 0 || - base->depth[0] != 1) { + base->depth0 != 1) { return NULL; } @@ -933,7 +927,7 @@ i915_texture_blanket_intel(struct pipe_screen *screen, tex->stride = stride; - i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1); + i915_miptree_set_level_info(tex, 0, 1, base->width0, base->height0, 1); i915_miptree_set_image_offset(tex, 0, 0, 0, 0); tex->buffer = buffer; diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index c22ee86b66..e26153b1d9 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -521,7 +521,7 @@ compute_lambda_1d(const struct sp_sampler_varient *samp, const struct pipe_sampler_state *sampler = samp->sampler; float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]); float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); - float rho = MAX2(dsdx, dsdy) * texture->width[0]; + float rho = MAX2(dsdx, dsdy) * texture->width0; float lambda; lambda = util_fast_log2(rho); @@ -545,8 +545,8 @@ compute_lambda_2d(const struct sp_sampler_varient *samp, float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]); float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]); - float maxx = MAX2(dsdx, dsdy) * texture->width[0]; - float maxy = MAX2(dtdx, dtdy) * texture->height[0]; + float maxx = MAX2(dsdx, dsdy) * texture->width0; + float maxy = MAX2(dtdx, dtdy) * texture->height0; float rho = MAX2(maxx, maxy); float lambda; @@ -573,9 +573,9 @@ compute_lambda_3d(const struct sp_sampler_varient *samp, float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]); float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]); float dpdy = fabsf(p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]); - float maxx = MAX2(dsdx, dsdy) * texture->width[0]; - float maxy = MAX2(dtdx, dtdy) * texture->height[0]; - float maxz = MAX2(dpdx, dpdy) * texture->depth[0]; + float maxx = MAX2(dsdx, dsdy) * texture->width0; + float maxy = MAX2(dtdx, dtdy) * texture->height0; + float maxz = MAX2(dpdx, dpdy) * texture->depth0; float rho, lambda; rho = MAX2(maxx, maxy); @@ -644,8 +644,8 @@ get_texel_2d(const struct sp_sampler_varient *samp, const struct pipe_texture *texture = samp->texture; unsigned level = addr.bits.level; - if (x < 0 || x >= (int) texture->width[level] || - y < 0 || y >= (int) texture->height[level]) { + if (x < 0 || x >= (int) u_minify(texture->width0, level) || + y < 0 || y >= (int) u_minify(texture->height0, level)) { return samp->sampler->border_color; } else { @@ -737,9 +737,9 @@ get_texel_3d(const struct sp_sampler_varient *samp, const struct pipe_texture *texture = samp->texture; unsigned level = addr.bits.level; - if (x < 0 || x >= (int) texture->width[level] || - y < 0 || y >= (int) texture->height[level] || - z < 0 || z >= (int) texture->depth[level]) { + if (x < 0 || x >= (int) u_minify(texture->width0, level) || + y < 0 || y >= (int) u_minify(texture->height0, level) || + z < 0 || z >= (int) u_minify(texture->depth0, level)) { return samp->sampler->border_color; } else { @@ -925,7 +925,7 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler, union tex_tile_address addr; level0 = samp->level; - width = texture->width[level0]; + width = u_minify(texture->width0, level0); assert(width > 0); @@ -961,8 +961,8 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, level0 = samp->level; - width = texture->width[level0]; - height = texture->height[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); assert(width > 0); assert(height > 0); @@ -1008,8 +1008,8 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler, union tex_tile_address addr; level0 = samp->level; - width = texture->width[level0]; - height = texture->height[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); assert(width > 0); assert(height > 0); @@ -1046,9 +1046,9 @@ img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler, union tex_tile_address addr; level0 = samp->level; - width = texture->width[level0]; - height = texture->height[level0]; - depth = texture->depth[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); + depth = u_minify(texture->depth0, level0); assert(width > 0); assert(height > 0); @@ -1088,7 +1088,7 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler, union tex_tile_address addr; level0 = samp->level; - width = texture->width[level0]; + width = u_minify(texture->width0, level0); assert(width > 0); @@ -1127,8 +1127,8 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, union tex_tile_address addr; level0 = samp->level; - width = texture->width[level0]; - height = texture->height[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); assert(width > 0); assert(height > 0); @@ -1174,8 +1174,8 @@ img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler, union tex_tile_address addr; level0 = samp->level; - width = texture->width[level0]; - height = texture->height[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); assert(width > 0); assert(height > 0); @@ -1221,9 +1221,9 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler, union tex_tile_address addr; level0 = samp->level; - width = texture->width[level0]; - height = texture->height[level0]; - depth = texture->depth[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); + depth = u_minify(texture->depth0, level0); addr.value = 0; addr.bits.level = level0; @@ -1778,8 +1778,8 @@ sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp, samp->texture = texture; samp->cache = tex_cache; - samp->xpot = util_unsigned_logbase2( texture->width[0] ); - samp->ypot = util_unsigned_logbase2( texture->height[0] ); + samp->xpot = util_unsigned_logbase2( texture->width0 ); + samp->ypot = util_unsigned_logbase2( texture->height0 ); samp->level = CLAMP((int) sampler->min_lod, 0, (int) texture->last_level); } diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c index 407a22a9f4..e50a76a73b 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c @@ -35,6 +35,7 @@ #include "pipe/p_inlines.h" #include "util/u_memory.h" #include "util/u_tile.h" +#include "util/u_math.h" #include "sp_context.h" #include "sp_surface.h" #include "sp_texture.h" @@ -246,9 +247,9 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, addr.bits.level, addr.bits.z, PIPE_TRANSFER_READ, 0, 0, - tc->texture->width[addr.bits.level], - tc->texture->height[addr.bits.level]); - + u_minify(tc->texture->width0, addr.bits.level), + u_minify(tc->texture->height0, addr.bits.level)); + tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans); tc->tex_face = addr.bits.face; diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 7caf2928b4..ac5f61e46f 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -52,16 +52,17 @@ softpipe_texture_layout(struct pipe_screen *screen, { struct pipe_texture *pt = &spt->base; unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; unsigned buffer_size = 0; + pt->width0 = width; + pt->height0 = height; + pt->depth0 = depth; + for (level = 0; level <= pt->last_level; level++) { - pt->width[level] = width; - pt->height[level] = height; - pt->depth[level] = depth; pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); spt->stride[level] = pt->nblocksx[level]*pt->block.size; @@ -72,9 +73,9 @@ softpipe_texture_layout(struct pipe_screen *screen, ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * spt->stride[level]); - width = minify(width); - height = minify(height); - depth = minify(depth); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); } spt->buffer = screen->buffer_create(screen, 32, @@ -96,12 +97,12 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, PIPE_BUFFER_USAGE_GPU_READ_WRITE); unsigned tex_usage = spt->base.tex_usage; - spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]); - spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]); + spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width0); + spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height0); spt->buffer = screen->surface_buffer_create( screen, - spt->base.width[0], - spt->base.height[0], + spt->base.width0, + spt->base.height0, spt->base.format, usage, tex_usage, @@ -126,9 +127,9 @@ softpipe_texture_create(struct pipe_screen *screen, pipe_reference_init(&spt->base.reference, 1); spt->base.screen = screen; - spt->pot = (util_is_power_of_two(template->width[0]) && - util_is_power_of_two(template->height[0]) && - util_is_power_of_two(template->depth[0])); + spt->pot = (util_is_power_of_two(template->width0) && + util_is_power_of_two(template->height0) && + util_is_power_of_two(template->depth0)); if (spt->base.tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | PIPE_TEXTURE_USAGE_PRIMARY)) { @@ -163,7 +164,7 @@ softpipe_texture_blanket(struct pipe_screen * screen, /* Only supports one type */ if (base->target != PIPE_TEXTURE_2D || base->last_level != 0 || - base->depth[0] != 1) { + base->depth0 != 1) { return NULL; } @@ -174,8 +175,8 @@ softpipe_texture_blanket(struct pipe_screen * screen, spt->base = *base; pipe_reference_init(&spt->base.reference, 1); spt->base.screen = screen; - spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]); - spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]); + spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width0); + spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height0); spt->stride[0] = stride[0]; pipe_buffer_reference(&spt->buffer, buffer); @@ -213,8 +214,8 @@ softpipe_get_tex_surface(struct pipe_screen *screen, pipe_reference_init(&ps->reference, 1); pipe_texture_reference(&ps->texture, pt); ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; + ps->width = u_minify(pt->width0, level); + ps->height = u_minify(pt->height0, level); ps->offset = spt->level_offset[level]; ps->usage = usage; @@ -434,9 +435,9 @@ softpipe_video_surface_create(struct pipe_screen *screen, template.format = PIPE_FORMAT_X8R8G8B8_UNORM; template.last_level = 0; /* vl_mpeg12_mc_renderer expects this when it's initialized with pot_buffers=true */ - template.width[0] = util_next_power_of_two(width); - template.height[0] = util_next_power_of_two(height); - template.depth[0] = 1; + template.width0 = util_next_power_of_two(width); + template.height0 = util_next_power_of_two(height); + template.depth0 = 1; pf_get_block(template.format, &template.block); template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET; diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index bcf6751af4..6d58209294 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -83,15 +83,15 @@ void trace_dump_template(const struct pipe_texture *templat) trace_dump_member(format, templat, format); trace_dump_member_begin("width"); - trace_dump_array(uint, templat->width, 1); + trace_dump_uint(templat->width0); trace_dump_member_end(); trace_dump_member_begin("height"); - trace_dump_array(uint, templat->height, 1); + trace_dump_uint(templat->height0); trace_dump_member_end(); trace_dump_member_begin("depth"); - trace_dump_array(uint, templat->depth, 1); + trace_dump_uint(templat->depth0); trace_dump_member_end(); trace_dump_member_begin("block"); diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c index 81e0a6f3b0..b59458c0e3 100644 --- a/src/gallium/drivers/trace/tr_rbug.c +++ b/src/gallium/drivers/trace/tr_rbug.c @@ -200,9 +200,9 @@ trace_rbug_texture_info(struct trace_rbug *tr_rbug, struct rbug_header *header, t = tr_tex->texture; rbug_send_texture_info_reply(tr_rbug->con, serial, t->target, t->format, - t->width, t->last_level + 1, - t->height, t->last_level + 1, - t->depth, t->last_level + 1, + &t->width0, 1, + &t->height0, 1, + &t->depth0, 1, t->block.width, t->block.height, t->block.size, t->last_level, t->nr_samples, diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 287b424e4a..9766e86620 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -343,9 +343,9 @@ struct pipe_texture enum pipe_texture_target target; /**< PIPE_TEXTURE_x */ enum pipe_format format; /**< PIPE_FORMAT_x */ - unsigned width[PIPE_MAX_TEXTURE_LEVELS]; - unsigned height[PIPE_MAX_TEXTURE_LEVELS]; - unsigned depth[PIPE_MAX_TEXTURE_LEVELS]; + unsigned width0; + unsigned height0; + unsigned depth0; struct pipe_format_block block; unsigned nblocksx[PIPE_MAX_TEXTURE_LEVELS]; /**< allocated width in blocks */ diff --git a/src/mesa/state_tracker/st_atom_framebuffer.c b/src/mesa/state_tracker/st_atom_framebuffer.c index e18c0f6e0a..8ca4335e33 100644 --- a/src/mesa/state_tracker/st_atom_framebuffer.c +++ b/src/mesa/state_tracker/st_atom_framebuffer.c @@ -40,6 +40,7 @@ #include "pipe/p_inlines.h" #include "cso_cache/cso_context.h" #include "util/u_rect.h" +#include "util/u_math.h" @@ -64,8 +65,8 @@ update_renderbuffer_surface(struct st_context *st, GLuint level; /* find matching mipmap level size */ for (level = 0; level <= texture->last_level; level++) { - if (texture->width[level] == rtt_width && - texture->height[level] == rtt_height) { + if (u_minify(texture->width0, level) == rtt_width && + u_minify(texture->height0, level) == rtt_height) { pipe_surface_reference(&strb->surface, NULL); diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c index babfcc87b4..4b35f59cc2 100644 --- a/src/mesa/state_tracker/st_atom_pixeltransfer.c +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c @@ -145,7 +145,7 @@ load_color_map_texture(GLcontext *ctx, struct pipe_texture *pt) const GLuint gSize = ctx->PixelMaps.GtoG.Size; const GLuint bSize = ctx->PixelMaps.BtoB.Size; const GLuint aSize = ctx->PixelMaps.AtoA.Size; - const uint texSize = pt->width[0]; + const uint texSize = pt->width0; uint *dest; uint i, j; diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 1d33e81c2c..7ec4599280 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -637,8 +637,8 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, y1 = y + height * ctx->Pixel.ZoomY; draw_quad(ctx, x0, y0, z, x1, y1, color, invertTex, - (GLfloat) width / pt->width[0], - (GLfloat) height / pt->height[0]); + (GLfloat) width / pt->width0, + (GLfloat) height / pt->height0); /* restore state */ cso_restore_rasterizer(cso); diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 65ce12ccd4..0469fb9978 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -128,9 +128,9 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, template.target = PIPE_TEXTURE_2D; template.format = format; pf_get_block(format, &template.block); - template.width[0] = width; - template.height[0] = height; - template.depth[0] = 1; + template.width0 = width; + template.height0 = height; + template.depth0 = 1; template.last_level = 0; template.nr_samples = rb->NumSamples; if (pf_is_depth_stencil(format)) { diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 772bb3bb69..103861d6f9 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -243,7 +243,7 @@ st_fast_readpixels(GLcontext *ctx, struct st_renderbuffer *strb, GLint row, col, dy, dstStride; if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) { - y = strb->texture->height[0] - y - height; + y = strb->texture->height0 - y - height; } trans = st_cond_flush_get_tex_transfer(st_context(ctx), strb->texture, diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 9186db76e1..72892b7c8c 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -451,9 +451,9 @@ compress_with_blit(GLcontext * ctx, templ.target = PIPE_TEXTURE_2D; templ.format = st_mesa_format_to_pipe_format(mesa_format); pf_get_block(templ.format, &templ.block); - templ.width[0] = width; - templ.height[0] = height; - templ.depth[0] = 1; + templ.width0 = width; + templ.height0 = height; + templ.depth0 = 1; templ.last_level = 0; templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER; src_tex = screen->texture_create(screen, &templ); @@ -1813,9 +1813,9 @@ st_finalize_texture(GLcontext *ctx, if (stObj->pt->target != gl_target_to_pipe(stObj->base.Target) || stObj->pt->format != fmt || stObj->pt->last_level < stObj->lastLevel || - stObj->pt->width[0] != firstImage->base.Width2 || - stObj->pt->height[0] != firstImage->base.Height2 || - stObj->pt->depth[0] != firstImage->base.Depth2 || + stObj->pt->width0 != firstImage->base.Width2 || + stObj->pt->height0 != firstImage->base.Height2 || + stObj->pt->depth0 != firstImage->base.Depth2 || /* Nominal bytes per pixel: */ stObj->pt->block.size / stObj->pt->block.width != cpp) { diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 16ca2771b0..f8068fa12b 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -38,6 +38,7 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "util/u_gen_mipmap.h" +#include "util/u_math.h" #include "cso_cache/cso_cache.h" #include "cso_cache/cso_context.h" @@ -133,14 +134,14 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, srcTrans = st_cond_flush_get_tex_transfer(st_context(ctx), pt, face, srcLevel, zslice, PIPE_TRANSFER_READ, 0, 0, - pt->width[srcLevel], - pt->height[srcLevel]); + u_minify(pt->width0, srcLevel), + u_minify(pt->height0, srcLevel)); dstTrans = st_cond_flush_get_tex_transfer(st_context(ctx), pt, face, dstLevel, zslice, PIPE_TRANSFER_WRITE, 0, 0, - pt->width[dstLevel], - pt->height[dstLevel]); + u_minify(pt->width0, dstLevel), + u_minify(pt->height0, dstLevel)); srcData = (ubyte *) screen->transfer_map(screen, srcTrans); dstData = (ubyte *) screen->transfer_map(screen, dstTrans); @@ -149,13 +150,17 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, dstStride = dstTrans->stride / dstTrans->block.size; _mesa_generate_mipmap_level(target, datatype, comps, - 0 /*border*/, - pt->width[srcLevel], pt->height[srcLevel], pt->depth[srcLevel], - srcData, - srcStride, /* stride in texels */ - pt->width[dstLevel], pt->height[dstLevel], pt->depth[dstLevel], - dstData, - dstStride); /* stride in texels */ + 0 /*border*/, + u_minify(pt->width0, srcLevel), + u_minify(pt->height0, srcLevel), + u_minify(pt->depth0, srcLevel), + srcData, + srcStride, /* stride in texels */ + u_minify(pt->width0, dstLevel), + u_minify(pt->height0, dstLevel), + u_minify(pt->depth0, dstLevel), + dstData, + dstStride); /* stride in texels */ screen->transfer_unmap(screen, srcTrans); screen->transfer_unmap(screen, dstTrans); @@ -232,9 +237,9 @@ st_generate_mipmap(GLcontext *ctx, GLenum target, oldTex->target, oldTex->format, lastLevel, - oldTex->width[0], - oldTex->height[0], - oldTex->depth[0], + oldTex->width0, + oldTex->height0, + oldTex->depth0, oldTex->tex_usage); /* The texture isn't in a "complete" state yet so set the expected @@ -269,9 +274,9 @@ st_generate_mipmap(GLcontext *ctx, GLenum target, = _mesa_get_tex_image(ctx, texObj, target, srcLevel); struct gl_texture_image *dstImage; struct st_texture_image *stImage; - uint dstWidth = pt->width[dstLevel]; - uint dstHeight = pt->height[dstLevel]; - uint dstDepth = pt->depth[dstLevel]; + uint dstWidth = u_minify(pt->width0, dstLevel); + uint dstHeight = u_minify(pt->height0, dstLevel); + uint dstDepth = u_minify(pt->depth0, dstLevel); uint border = srcImage->Border; dstImage = _mesa_get_tex_image(ctx, texObj, target, dstLevel); diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index 3945822f66..aa88fdcd78 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -44,6 +44,7 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "util/u_rect.h" +#include "util/u_math.h" #define DBG if(0) printf @@ -100,9 +101,9 @@ st_texture_create(struct st_context *st, pt.target = target; pt.format = format; pt.last_level = last_level; - pt.width[0] = width0; - pt.height[0] = height0; - pt.depth[0] = depth0; + pt.width0 = width0; + pt.height0 = height0; + pt.depth0 = depth0; pf_get_block(format, &pt.block); pt.tex_usage = usage; @@ -135,9 +136,9 @@ st_texture_match_image(const struct pipe_texture *pt, /* Test if this image's size matches what's expected in the * established texture. */ - if (image->Width != pt->width[level] || - image->Height != pt->height[level] || - image->Depth != pt->depth[level]) + if (image->Width != u_minify(pt->width0, level) || + image->Height != u_minify(pt->height0, level) || + image->Depth != u_minify(pt->depth0, level)) return GL_FALSE; return GL_TRUE; @@ -265,7 +266,7 @@ st_texture_image_data(struct st_context *st, { struct pipe_context *pipe = st->pipe; struct pipe_screen *screen = pipe->screen; - GLuint depth = dst->depth[level]; + GLuint depth = u_minify(dst->depth0, level); GLuint i; const GLubyte *srcUB = src; struct pipe_transfer *dst_transfer; @@ -275,15 +276,16 @@ st_texture_image_data(struct st_context *st, for (i = 0; i < depth; i++) { dst_transfer = st_no_flush_get_tex_transfer(st, dst, face, level, i, PIPE_TRANSFER_WRITE, 0, 0, - dst->width[level], - dst->height[level]); + u_minify(dst->width0, level), + u_minify(dst->height0, level)); st_surface_data(pipe, dst_transfer, 0, 0, /* dstx, dsty */ srcUB, src_row_stride, 0, 0, /* source x, y */ - dst->width[level], dst->height[level]); /* width, height */ + u_minify(dst->width0, level), + u_minify(dst->height0, level)); /* width, height */ screen->tex_transfer_destroy(dst_transfer); @@ -301,9 +303,9 @@ st_texture_image_copy(struct pipe_context *pipe, GLuint face) { struct pipe_screen *screen = pipe->screen; - GLuint width = dst->width[dstLevel]; - GLuint height = dst->height[dstLevel]; - GLuint depth = dst->depth[dstLevel]; + GLuint width = u_minify(dst->width0, dstLevel); + GLuint height = u_minify(dst->height0, dstLevel); + GLuint depth = u_minify(dst->depth0, dstLevel); struct pipe_surface *src_surface; struct pipe_surface *dst_surface; GLuint i; @@ -313,13 +315,13 @@ st_texture_image_copy(struct pipe_context *pipe, /* find src texture level of needed size */ for (srcLevel = 0; srcLevel <= src->last_level; srcLevel++) { - if (src->width[srcLevel] == width && - src->height[srcLevel] == height) { + if (u_minify(src->width0, srcLevel) == width && + u_minify(src->height0, srcLevel) == height) { break; } } - assert(src->width[srcLevel] == width); - assert(src->height[srcLevel] == height); + assert(u_minify(src->width0, srcLevel) == width); + assert(u_minify(src->height0, srcLevel) == height); #if 0 { -- cgit v1.2.3 From a24631bcd7ab2cbc6fff2a536502a07a13a9bc83 Mon Sep 17 00:00:00 2001 From: Alan Hourihane Date: Fri, 20 Nov 2009 18:08:29 +0000 Subject: Fix memory leak. --- src/gallium/drivers/softpipe/sp_state_fs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 256faa94b8..b41f7e8ab7 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -143,6 +143,7 @@ softpipe_delete_vs_state(struct pipe_context *pipe, void *vs) struct sp_vertex_shader *state = (struct sp_vertex_shader *) vs; draw_delete_vertex_shader(softpipe->draw, state->draw_data); + FREE( (void *)state->shader.tokens ); FREE( state ); } -- cgit v1.2.3 From 904469dcd2e50d950c5e061103907da659053ff2 Mon Sep 17 00:00:00 2001 From: Alan Hourihane Date: Fri, 20 Nov 2009 18:10:54 +0000 Subject: Fix indentation. --- src/gallium/drivers/softpipe/sp_context.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 5f60139968..bdbb7fa9b9 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -90,14 +90,15 @@ softpipe_destroy( struct pipe_context *pipe ) if (softpipe->draw) draw_destroy( softpipe->draw ); - softpipe->quad.shade->destroy( softpipe->quad.shade ); - softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); - softpipe->quad.blend->destroy( softpipe->quad.blend ); + softpipe->quad.shade->destroy( softpipe->quad.shade ); + softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); + softpipe->quad.blend->destroy( softpipe->quad.blend ); for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { sp_destroy_tile_cache(softpipe->cbuf_cache[i]); pipe_surface_reference(&softpipe->framebuffer.cbufs[i], NULL); } + sp_destroy_tile_cache(softpipe->zsbuf_cache); pipe_surface_reference(&softpipe->framebuffer.zsbuf, NULL); -- cgit v1.2.3 From beea241374a91b8aab81db175b28e98c2b4835d9 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 19 Nov 2009 01:35:08 +0100 Subject: r300g: set better values in the R300_VAP_CNTL register --- src/gallium/drivers/r300/r300_emit.c | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index eeb97a2d37..2a8d32242b 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -690,12 +690,35 @@ void r300_emit_vertex_format_state(struct r300_context* r300) END_CS; } +/* XXX This should probably go to util ... */ +/* Return the number of bits set in the given number. */ +static unsigned bitcount(unsigned n) +{ + unsigned bits; + for (bits = 0; n > 0; n = n >> 1) { + bits += n & 1; + } + return bits; +} + +/* XXX ... and this one too. */ +#define MIN3(x, y, z) MIN2(MIN2(x, y), z) + void r300_emit_vertex_program_code(struct r300_context* r300, struct r300_vertex_program_code* code) { int i; struct r300_screen* r300screen = r300_screen(r300->context.screen); unsigned instruction_count = code->length / 4; + + int vtx_mem_size = r300screen->caps->is_r500 ? 128 : 72; + int input_count = MAX2(bitcount(code->InputsRead), 1); + int output_count = MAX2(bitcount(code->OutputsWritten), 1); + int temp_count = MAX2(code->num_temporaries, 1); + int pvs_num_slots = MIN3(vtx_mem_size / input_count, + vtx_mem_size / output_count, 10); + int pvs_num_controllers = MIN2(6, vtx_mem_size / temp_count); + CS_LOCALS(r300); if (!r300screen->caps->has_tcl) { @@ -708,8 +731,7 @@ void r300_emit_vertex_program_code(struct r300_context* r300, /* R300_VAP_PVS_CODE_CNTL_0 * R300_VAP_PVS_CONST_CNTL * R300_VAP_PVS_CODE_CNTL_1 - * See the r5xx docs for instructions on how to use these. - * XXX these could be optimized to select better values... */ + * See the r5xx docs for instructions on how to use these. */ OUT_CS_REG_SEQ(R300_VAP_PVS_CODE_CNTL_0, 3); OUT_CS(R300_PVS_FIRST_INST(0) | R300_PVS_XYZW_VALID_INST(instruction_count - 1) | @@ -722,10 +744,11 @@ void r300_emit_vertex_program_code(struct r300_context* r300, for (i = 0; i < code->length; i++) OUT_CS(code->body.d[i]); - OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(10) | - R300_PVS_NUM_CNTLRS(5) | + OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(pvs_num_slots) | + R300_PVS_NUM_CNTLRS(pvs_num_controllers) | R300_PVS_NUM_FPUS(r300screen->caps->num_vert_fpus) | - R300_PVS_VF_MAX_VTX_NUM(12)); + R300_PVS_VF_MAX_VTX_NUM(12) | + (r300screen->caps->is_r500 ? R500_TCL_STATE_OPTIMIZATION : 0)); END_CS; } -- cgit v1.2.3 From 6a3eb1f91b4ccd4ee7ac6b91505e0dfa476922d4 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 20 Nov 2009 14:10:45 -0800 Subject: r300g: Use MAX3 and MIN3. --- src/gallium/drivers/r300/r300_emit.c | 3 --- src/gallium/drivers/r300/r300_state_derived.c | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 2a8d32242b..4cd5074379 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -701,9 +701,6 @@ static unsigned bitcount(unsigned n) return bits; } -/* XXX ... and this one too. */ -#define MIN3(x, y, z) MIN2(MIN2(x, y), z) - void r300_emit_vertex_program_code(struct r300_context* r300, struct r300_vertex_program_code* code) { diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 7166694edf..b4d0eeaf8c 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -458,7 +458,7 @@ static void r300_update_rs_block(struct r300_context* r300, rs->count = (rs_tex_comp) | (col_count << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; - rs->inst_count = MAX2(MAX2(col_count - 1, tex_count - 1), 0); + rs->inst_count = MAX3(col_count - 1, tex_count - 1, 0); } /* Update the vertex format. */ -- cgit v1.2.3 From 06ec216d191e160494dd0a922ab0395418a78402 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 20 Nov 2009 14:10:59 -0800 Subject: r300g: Clean up bitcounting function. I didn't see this in u_math; surely somebody else has this wheel reinvented elsewhere. --- src/gallium/drivers/r300/r300_emit.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 4cd5074379..c50c989f01 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -690,14 +690,19 @@ void r300_emit_vertex_format_state(struct r300_context* r300) END_CS; } -/* XXX This should probably go to util ... */ +/* XXX This should go to util ... */ /* Return the number of bits set in the given number. */ static unsigned bitcount(unsigned n) { - unsigned bits; - for (bits = 0; n > 0; n = n >> 1) { - bits += n & 1; + unsigned bits = 0; + + while (n) { + if (n & 1) { + bits++; + } + n >>= 1; } + return bits; } @@ -714,7 +719,7 @@ void r300_emit_vertex_program_code(struct r300_context* r300, int temp_count = MAX2(code->num_temporaries, 1); int pvs_num_slots = MIN3(vtx_mem_size / input_count, vtx_mem_size / output_count, 10); - int pvs_num_controllers = MIN2(6, vtx_mem_size / temp_count); + int pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 6); CS_LOCALS(r300); -- cgit v1.2.3 From f6541773c4661247879995637207dcc5803bbf00 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Fri, 20 Nov 2009 14:31:42 -0800 Subject: i915g: Add missing break statement in i915_debug.c. --- src/gallium/drivers/i915/i915_debug.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c index e6640e587b..c6e6d6fd31 100644 --- a/src/gallium/drivers/i915/i915_debug.c +++ b/src/gallium/drivers/i915/i915_debug.c @@ -851,6 +851,7 @@ static boolean i915_debug_packet( struct debug_stream *stream ) default: return debug(stream, "", 0); } + break; default: assert(0); return 0; -- cgit v1.2.3 From f4041b37e2d305cff0a97eb836250e9f8b1840a8 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 14 Nov 2009 22:14:42 +0100 Subject: r300g: fix rectangle textures on r3xx Adapted from Maciej Cencora's patch. --- src/gallium/drivers/r300/r300_emit.c | 22 ++++++++++++++++++++-- src/gallium/drivers/r300/r300_state.c | 8 ++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index c50c989f01..0bdf58202f 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -129,7 +129,9 @@ static const float * get_shader_constant( struct rc_constant * constant, struct r300_constant_buffer * externals) { - static const float zero[4] = { 0.0, 0.0, 0.0, 0.0 }; + static float vec[4] = { 0.0, 0.0, 0.0, 0.0 }; + struct pipe_texture *tex; + switch(constant->Type) { case RC_CONSTANT_EXTERNAL: return externals->constants[constant->u.External]; @@ -137,10 +139,26 @@ static const float * get_shader_constant( case RC_CONSTANT_IMMEDIATE: return constant->u.Immediate; + case RC_CONSTANT_STATE: + switch (constant->u.State[0]) + { + /* R3xx-specific */ + case RC_STATE_R300_TEXRECT_FACTOR: + tex = &r300->textures[constant->u.State[1]]->tex; + vec[0] = 1.0 / tex->width[0]; + vec[1] = 1.0 / tex->height[0]; + vec[2] = vec[3] = 1; + break; + + default: + assert(0); + } + return vec; + default: debug_printf("r300: Implementation error: Unhandled constant type %i\n", constant->Type); - return zero; + return vec; } } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index d1eced61db..00f10ffd73 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -571,6 +571,7 @@ static void r300_set_sampler_textures(struct pipe_context* pipe, struct pipe_texture** texture) { struct r300_context* r300 = r300_context(pipe); + boolean is_r500 = r300_screen(r300->context.screen)->caps->is_r500; int i; /* XXX magic num */ @@ -585,6 +586,13 @@ static void r300_set_sampler_textures(struct pipe_context* pipe, pipe_texture_reference((struct pipe_texture**)&r300->textures[i], texture[i]); r300->dirty_state |= (R300_NEW_TEXTURE << i); + + /* R300-specific - set the texrect factor in a fragment shader */ + if (!is_r500 && r300->textures[i]->is_npot) { + /* XXX It would be nice to re-emit just 1 constant, + * XXX not all of them */ + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; + } } } -- cgit v1.2.3 From 6a95996abb33a040f957ffedf3824afcc98a9e71 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 20 Nov 2009 14:55:22 -0800 Subject: r300g: Texrect factor cleanup. (0, 0, 0, 1) is a much saner default value, and texrect factors only need to be (1/s, 1/t, 0, 1). --- src/gallium/drivers/r300/r300_emit.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 0bdf58202f..37e75ba061 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -129,7 +129,7 @@ static const float * get_shader_constant( struct rc_constant * constant, struct r300_constant_buffer * externals) { - static float vec[4] = { 0.0, 0.0, 0.0, 0.0 }; + static float vec[4] = { 0.0, 0.0, 0.0, 1.0 }; struct pipe_texture *tex; switch(constant->Type) { @@ -140,26 +140,30 @@ static const float * get_shader_constant( return constant->u.Immediate; case RC_CONSTANT_STATE: - switch (constant->u.State[0]) - { - /* R3xx-specific */ + switch (constant->u.State[0]) { + /* Factor for converting rectangle coords to + * normalized coords. Should only show up on non-r500. */ case RC_STATE_R300_TEXRECT_FACTOR: tex = &r300->textures[constant->u.State[1]]->tex; vec[0] = 1.0 / tex->width[0]; vec[1] = 1.0 / tex->height[0]; - vec[2] = vec[3] = 1; break; default: - assert(0); + debug_printf("r300: Implementation error: " + "Unknown RC_CONSTANT type %d\n", constant->u.State[0]); } - return vec; + break; default: - debug_printf("r300: Implementation error: Unhandled constant type %i\n", - constant->Type); - return vec; + debug_printf("r300: Implementation error: " + "Unhandled constant type %d\n", constant->Type); } + + /* This should either be (0, 0, 0, 1), which should be a relatively safe + * RGBA or STRQ value, or it could be one of the RC_CONSTANT_STATE + * state factors. */ + return vec; } /* Convert a normal single-precision float into the 7.16 format -- cgit v1.2.3 From ae70cd1f027bdfc7f500d78b6c5333e6b35d3ee8 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 19 Nov 2009 21:07:20 +0100 Subject: r300g: remove variant states from emit_state_invariant --- src/gallium/drivers/r300/r300_state_invariant.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index c07e6ae676..46d1cb39b5 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -84,7 +84,7 @@ void r300_emit_invariant_state(struct r300_context* r300) END_CS; /* XXX unsorted stuff from surface_fill */ - BEGIN_CS(60 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0)); + BEGIN_CS(56 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0)); /* Flush PVS. */ OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); @@ -135,8 +135,6 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000); OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000); - OUT_CS_REG(R300_VAP_VTX_STATE_CNTL, 0x1); - OUT_CS_REG(R300_VAP_VSM_VTX_ASSM, 0x405); OUT_CS_REG(R300_SE_VTE_CNTL, 0x0000043F); /* XXX */ -- cgit v1.2.3 From 015e7e7724a64d3d9e02e57f6a8eb88a6441f596 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 20 Nov 2009 05:17:00 +0100 Subject: r300g: emit R300_TEX_ENABLE to indicate there are no textures bound Previously, this reg wasn't emitted at all if texture_count == 0. --- src/gallium/drivers/r300/r300_emit.c | 15 +++++++++++++-- src/gallium/drivers/r300/r300_emit.h | 2 ++ 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 37e75ba061..6d702c0027 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -837,13 +837,22 @@ void r300_emit_viewport_state(struct r300_context* r300, END_CS; } +void r300_emit_texture_count(struct r300_context* r300) +{ + CS_LOCALS(r300); + + BEGIN_CS(2); + OUT_CS_REG(R300_TX_ENABLE, (1 << r300->texture_count) - 1); + END_CS; + +} + void r300_flush_textures(struct r300_context* r300) { CS_LOCALS(r300); - BEGIN_CS(4); + BEGIN_CS(2); OUT_CS_REG(R300_TX_INVALTAGS, 0); - OUT_CS_REG(R300_TX_ENABLE, (1 << r300->texture_count) - 1); END_CS; } @@ -997,6 +1006,8 @@ validate: /* Samplers and textures are tracked separately but emitted together. */ if (r300->dirty_state & (R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) { + r300_emit_texture_count(r300); + for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) { if (r300->dirty_state & ((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i))) { diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 7c83c5166d..3797d3d332 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -92,6 +92,8 @@ void r300_emit_vertex_shader(struct r300_context* r300, void r300_emit_viewport_state(struct r300_context* r300, struct r300_viewport_state* viewport); +void r300_emit_texture_count(struct r300_context* r300); + void r300_flush_textures(struct r300_context* r300); /* Emit all dirty state. */ -- cgit v1.2.3 From 1c181a7eff96816b5d72ea5daab5818eef0ebc60 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 15 Nov 2009 05:25:15 +0100 Subject: r300g: Begin separating HW TCL and SW TCL state and setup. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch removes draw_context entirely from the HW TCL path and cleans up a few other things along the way. Hopefully, nothing got broken. Thanks to Marek Olšák for testing, review, and pointing out my bugs. :3 --- src/gallium/drivers/r300/r300_context.c | 31 ++--- src/gallium/drivers/r300/r300_render.c | 4 - src/gallium/drivers/r300/r300_state.c | 8 +- src/gallium/drivers/r300/r300_state_derived.c | 192 +++++++++++++------------- src/gallium/drivers/r300/r300_vbo.c | 46 ------ src/gallium/drivers/r300/r300_vs.h | 3 - 6 files changed, 110 insertions(+), 174 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index ae23329b83..26db536248 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -123,15 +123,24 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.clear = r300_clear; - if (r300screen->caps->has_tcl) - { + if (r300screen->caps->has_tcl) { r300->context.draw_arrays = r300_draw_arrays; r300->context.draw_elements = r300_draw_elements; r300->context.draw_range_elements = r300_draw_range_elements; - } - else - { - assert(0); + } else { + r300->context.draw_arrays = r300_swtcl_draw_arrays; + r300->context.draw_elements = r300_draw_elements; + r300->context.draw_range_elements = r300_swtcl_draw_range_elements; + + /* Create a Draw. This is used for SW TCL. */ + r300->draw = draw_create(); + /* Enable our renderer. */ + draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300)); + /* Enable Draw's clipping. */ + draw_set_driver_clipping(r300->draw, FALSE); + /* Force Draw to never do viewport transform, since we can do + * transform in hardware, always. */ + draw_set_viewport_state(r300->draw, &r300_viewport_identity); } r300->context.is_texture_referenced = r300_is_texture_referenced; @@ -145,16 +154,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->scissor_state = CALLOC_STRUCT(r300_scissor_state); r300->viewport_state = CALLOC_STRUCT(r300_viewport_state); - /* Create a Draw. This is used for vert collation and SW TCL. */ - r300->draw = draw_create(); - /* Enable our renderer. */ - draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300)); - /* Disable Draw's clipping if TCL is present. */ - draw_set_driver_clipping(r300->draw, r300_screen(screen)->caps->has_tcl); - /* Force Draw to never do viewport transform, since (again) we can do - * transform in hardware, always. */ - draw_set_viewport_state(r300->draw, &r300_viewport_identity); - /* Open up the OQ BO. */ r300->oqbo = screen->buffer_create(screen, 4096, PIPE_BUFFER_USAGE_VERTEX, 4096); diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 62e1456ed3..4c5fb405c6 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -183,8 +183,6 @@ boolean r300_draw_range_elements(struct pipe_context* pipe, return FALSE; } - setup_vertex_attributes(r300); - setup_index_buffer(r300, indexBuffer, indexSize); r300_emit_dirty_state(r300); @@ -226,8 +224,6 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, return FALSE; } - setup_vertex_attributes(r300); - r300_emit_dirty_state(r300); r300_emit_aos(r300, start); diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 00f10ffd73..5422a2cc9c 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -714,9 +714,6 @@ static void* r300_create_vs_state(struct pipe_context* pipe, tgsi_scan_shader(shader->tokens, &vs->info); - /* Appease Draw. */ - vs->draw = draw_create_vertex_shader(r300->draw, shader); - return (void*)vs; } else { return draw_create_vertex_shader(r300->draw, shader); @@ -727,8 +724,6 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) { struct r300_context* r300 = r300_context(pipe); - draw_flush(r300->draw); - if (r300_screen(pipe->screen)->caps->has_tcl) { struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; @@ -739,10 +734,10 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) r300_translate_vertex_shader(r300, vs); } - draw_bind_vertex_shader(r300->draw, vs->draw); r300->vs = vs; r300->dirty_state |= R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS; } else { + draw_flush(r300->draw); draw_bind_vertex_shader(r300->draw, (struct draw_vertex_shader*)shader); } @@ -756,7 +751,6 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader) struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; rc_constants_destroy(&vs->code.constants); - draw_delete_vertex_shader(r300->draw, vs->draw); FREE((void*)vs->state.tokens); FREE(shader); } else { diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index b4d0eeaf8c..5aa4166d93 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -65,84 +65,43 @@ int r300_shader_key_compare(void* key1, void* key2) { static void r300_vs_tab_routes(struct r300_context* r300, struct r300_vertex_info* vformat) { - struct r300_screen* r300screen = r300_screen(r300->context.screen); struct vertex_info* vinfo = &vformat->vinfo; int* tab = vformat->vs_tab; boolean pos = FALSE, psize = FALSE, fog = FALSE; int i, texs = 0, cols = 0; - struct tgsi_shader_info* info; - - if (r300screen->caps->has_tcl) { - /* Use vertex shader to determine required routes. */ - info = &r300->vs->info; - } else { - /* Use fragment shader to determine required routes. */ - info = &r300->fs->info; - } + struct tgsi_shader_info* info = &r300->fs->info; - assert(info->num_inputs <= 16); + /* XXX One day we should figure out how to handle a different number of + * VS outputs and FS inputs, as well as a different number of vertex streams + * and VS inputs. It's definitely one of the sources of hardlocks. */ - if (!r300screen->caps->has_tcl || !r300->rs_state->enable_vte) - { - for (i = 0; i < info->num_inputs; i++) { - switch (r300->vs->code.inputs[i]) { - case TGSI_SEMANTIC_POSITION: - pos = TRUE; - tab[i] = 0; - break; - case TGSI_SEMANTIC_COLOR: - tab[i] = 2 + cols; - cols++; - break; - case TGSI_SEMANTIC_PSIZE: - assert(psize == FALSE); - psize = TRUE; - tab[i] = 15; - break; - case TGSI_SEMANTIC_FOG: - assert(fog == FALSE); - fog = TRUE; - /* Fall through */ - case TGSI_SEMANTIC_GENERIC: - tab[i] = 6 + texs; - texs++; - break; - default: - debug_printf("r300: Unknown vertex input %d\n", - info->input_semantic_name[i]); - break; - } - } - } - else - { - /* Just copy vert attribs over as-is. */ - for (i = 0; i < info->num_inputs; i++) { - tab[i] = i; - } - - for (i = 0; i < info->num_outputs; i++) { - switch (info->output_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - pos = TRUE; - break; - case TGSI_SEMANTIC_COLOR: - cols++; - break; - case TGSI_SEMANTIC_PSIZE: - psize = TRUE; - break; - case TGSI_SEMANTIC_FOG: - fog = TRUE; - /* Fall through */ - case TGSI_SEMANTIC_GENERIC: - texs++; - break; - default: - debug_printf("r300: Unknown vertex output %d\n", - info->output_semantic_name[i]); - break; - } + for (i = 0; i < info->num_inputs; i++) { + switch (info->input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + pos = TRUE; + tab[i] = 0; + break; + case TGSI_SEMANTIC_COLOR: + tab[i] = 2 + cols; + cols++; + break; + case TGSI_SEMANTIC_PSIZE: + assert(psize == FALSE); + psize = TRUE; + tab[i] = 15; + break; + case TGSI_SEMANTIC_FOG: + assert(fog == FALSE); + fog = TRUE; + /* Fall through */ + case TGSI_SEMANTIC_GENERIC: + tab[i] = 6 + texs; + texs++; + break; + default: + debug_printf("r300: Unknown vertex input %d\n", + info->input_semantic_name[i]); + break; } } @@ -161,8 +120,7 @@ static void r300_vs_tab_routes(struct r300_context* r300, /* We need to add vertex position attribute only for SW TCL case, * for HW TCL case it could be generated by vertex shader */ - if (!pos && !r300screen->caps->has_tcl) { - debug_printf("r300: Forcing vertex position attribute emit...\n"); + if (!pos) { /* Make room for the position attribute * at the beginning of the tab. */ for (i = 15; i > 0; i--) { @@ -230,31 +188,66 @@ static void r300_vs_tab_routes(struct r300_context* r300, static void r300_vertex_psc(struct r300_context* r300, struct r300_vertex_info* vformat) { - struct r300_screen* r300screen = r300_screen(r300->context.screen); - struct vertex_info* vinfo = &vformat->vinfo; - int* tab = vformat->vs_tab; uint16_t type, swizzle; enum pipe_format format; - unsigned i, attrib_count; + unsigned i; /* Vertex shaders have no semantics on their inputs, - * so PSC should just route stuff based on their info, + * so PSC should just route stuff based on the vertex elements, * and not on attrib information. */ - if (r300screen->caps->has_tcl) { - attrib_count = r300->vs->info.num_inputs; - DBG(r300, DBG_DRAW, "r300: routing %d attribs in psc for vs\n", - attrib_count); - } else { - attrib_count = vinfo->num_attribs; - DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count); - for (i = 0; i < attrib_count; i++) { - DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d," - " tab %d\n", vinfo->attrib[i].src_index, - vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, - tab[i]); + DBG(r300, DBG_DRAW, "r300: vs expects %d attribs, routing %d elements" + " in psc\n", + r300->vs->info.num_inputs, + r300->vertex_element_count); + + for (i = 0; i < r300->vertex_element_count; i++) { + format = r300->vertex_element[i].src_format; + + type = r300_translate_vertex_data_type(format) | + (i << R300_DST_VEC_LOC_SHIFT); + swizzle = r300_translate_vertex_data_swizzle(format); + + if (i % 2) { + vformat->vap_prog_stream_cntl[i >> 1] |= type << 16; + vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; + } else { + vformat->vap_prog_stream_cntl[i >> 1] |= type; + vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle; } } + + assert(i <= 15); + + /* Set the last vector in the PSC. */ + if (i) { + i -= 1; + } + vformat->vap_prog_stream_cntl[i >> 1] |= + (R300_LAST_VEC << (i & 1 ? 16 : 0)); +} + +/* Update the PSC tables for SW TCL, using Draw. */ +static void r300_swtcl_vertex_psc(struct r300_context* r300, + struct r300_vertex_info* vformat) +{ + struct vertex_info* vinfo = &vformat->vinfo; + int* tab = vformat->vs_tab; + uint16_t type, swizzle; + enum pipe_format format; + unsigned i, attrib_count; + + /* For each Draw attribute, route it to the fragment shader according + * to the tab. */ + attrib_count = vinfo->num_attribs; + DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count); + for (i = 0; i < attrib_count; i++) { + DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d," + " tab %d\n", vinfo->attrib[i].src_index, + vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, + tab[i]); + } + for (i = 0; i < attrib_count; i++) { /* Make sure we have a proper destination for our attribute. */ assert(tab[i] != -1); @@ -272,12 +265,10 @@ static void r300_vertex_psc(struct r300_context* r300, /* Add the attribute to the PSC table. */ if (i & 1) { vformat->vap_prog_stream_cntl[i >> 1] |= type << 16; - vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; } else { - vformat->vap_prog_stream_cntl[i >> 1] |= type << 0; - - vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 0; + vformat->vap_prog_stream_cntl[i >> 1] |= type; + vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle; } } @@ -505,7 +496,13 @@ static void r300_update_derived_shader_state(struct r300_context* r300) } r300_vs_tab_routes(r300, vformat); - r300_vertex_psc(r300, vformat); + + if (r300screen->caps->has_tcl) { + r300_vertex_psc(r300, vformat); + } else { + r300_swtcl_vertex_psc(r300, vformat); + } + r300_update_fs_tab(r300, vformat); r300_update_rs_block(r300, rs_block); @@ -553,8 +550,7 @@ static void r300_update_ztop(struct r300_context* r300) void r300_update_derived_state(struct r300_context* r300) { - /* XXX */ - if (TRUE || r300->dirty_state & + if (r300->dirty_state & (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER)) { r300_update_derived_shader_state(r300); } diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c index a6a159667a..6ebaf715dc 100644 --- a/src/gallium/drivers/r300/r300_vbo.c +++ b/src/gallium/drivers/r300/r300_vbo.c @@ -34,52 +34,6 @@ #include "r300_reg.h" #include "r300_winsys.h" -static INLINE void setup_vertex_attribute(struct r300_vertex_info *vinfo, - struct pipe_vertex_element *vert_elem, - unsigned attr_num) -{ - uint16_t hw_fmt1, hw_fmt2; - - hw_fmt1 = r300_translate_vertex_data_type(vert_elem->src_format) | - (attr_num << R300_DST_VEC_LOC_SHIFT); - hw_fmt2 = r300_translate_vertex_data_swizzle(vert_elem->src_format); - - if (attr_num % 2 == 0) - { - vinfo->vap_prog_stream_cntl[attr_num >> 1] = hw_fmt1; - vinfo->vap_prog_stream_cntl_ext[attr_num >> 1] = hw_fmt2; - } - else - { - vinfo->vap_prog_stream_cntl[attr_num >> 1] |= hw_fmt1 << 16; - vinfo->vap_prog_stream_cntl_ext[attr_num >> 1] |= hw_fmt2 << 16; - } -} - -static void finish_vertex_attribs_setup(struct r300_vertex_info *vinfo, - unsigned attribs_num) -{ - uint32_t last_vec_bit = (attribs_num % 2 == 0) ? - (R300_LAST_VEC << 16) : R300_LAST_VEC; - - assert(attribs_num > 0 && attribs_num <= 16); - vinfo->vap_prog_stream_cntl[(attribs_num - 1) >> 1] |= last_vec_bit; -} - -void setup_vertex_attributes(struct r300_context *r300) -{ - struct pipe_vertex_element *vert_elem; - int i; - - for (i = 0; i < r300->vertex_element_count; i++) { - vert_elem = &r300->vertex_element[i]; - setup_vertex_attribute(r300->vertex_info, vert_elem, i); - } - - finish_vertex_attribs_setup(r300->vertex_info, - r300->vertex_element_count); -} - static INLINE int get_buffer_offset(struct r300_context *r300, unsigned int buf_nr, unsigned int elem_offset) diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 2a4ce315e3..00b02bf510 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -35,9 +35,6 @@ struct r300_vertex_shader { struct pipe_shader_state state; struct tgsi_shader_info info; - /* Fallback shader, because Draw has issues */ - struct draw_vertex_shader* draw; - /* Has this shader been translated yet? */ boolean translated; -- cgit v1.2.3 From b7078a88119e248b0196f7446abe029c22f1ee28 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 14 Nov 2009 23:27:20 +0100 Subject: r300g: add texture lod clamping These now work: piglit/lodclamp piglit/levelclamp --- src/gallium/drivers/r300/r300_context.h | 4 ++++ src/gallium/drivers/r300/r300_emit.c | 11 ++++++++++- src/gallium/drivers/r300/r300_reg.h | 5 +++-- src/gallium/drivers/r300/r300_state.c | 5 +++++ src/gallium/drivers/r300/r300_texture.c | 3 +-- 5 files changed, 23 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index f954ba7f9a..60ef415caa 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -92,6 +92,10 @@ struct r300_sampler_state { uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */ uint32_t filter1; /* R300_TX_FILTER1: 0x4440 */ uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */ + + /* Min/max LOD must be clamped to [0, last_level], thus + * it's dependent on a currently bound texture */ + unsigned min_lod, max_lod; }; struct r300_scissor_state { diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 6d702c0027..ad7dff36be 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -583,6 +583,8 @@ void r300_emit_texture(struct r300_context* r300, unsigned offset) { uint32_t filter0 = sampler->filter0; + uint32_t format0 = tex->state.format0; + unsigned min_level, max_level; CS_LOCALS(r300); /* to emulate 1D textures through 2D ones correctly */ @@ -591,13 +593,20 @@ void r300_emit_texture(struct r300_context* r300, filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); } + /* determine min/max levels */ + /* the MAX_MIP level is the largest (finest) one */ + max_level = MIN2(sampler->max_lod, tex->tex.last_level); + min_level = MIN2(sampler->min_lod, max_level); + format0 |= R300_TX_NUM_LEVELS(max_level); + filter0 |= R300_TX_MAX_MIP_LEVEL(min_level); + BEGIN_CS(16); OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), filter0 | (offset << 28)); OUT_CS_REG(R300_TX_FILTER1_0 + (offset * 4), sampler->filter1); OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (offset * 4), sampler->border_color); - OUT_CS_REG(R300_TX_FORMAT0_0 + (offset * 4), tex->state.format0); + OUT_CS_REG(R300_TX_FORMAT0_0 + (offset * 4), format0); OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1); OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2); OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1); diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 8ca785cb58..66fdada221 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -1463,6 +1463,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_MIN_FILTER_MIP_NEAREST (1 << 13) # define R300_TX_MIN_FILTER_MIP_LINEAR (2 << 13) # define R300_TX_MIN_FILTER_MIP_MASK (3 << 13) +# define R300_TX_MAX_MIP_LEVEL_SHIFT 17 +# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 17) # define R300_TX_MAX_ANISO_1_TO_1 (0 << 21) # define R300_TX_MAX_ANISO_2_TO_1 (1 << 21) # define R300_TX_MAX_ANISO_4_TO_1 (2 << 21) @@ -1471,6 +1473,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_MAX_ANISO_MASK (7 << 21) # define R300_TX_WRAP_S(x) ((x) << 0) # define R300_TX_WRAP_T(x) ((x) << 3) +# define R300_TX_MAX_MIP_LEVEL(x) ((x) << 17) #define R300_TX_FILTER1_0 0x4440 # define R300_CHROMA_KEY_MODE_DISABLE 0 @@ -1500,8 +1503,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_HEIGHTMASK_MASK (2047 << 11) # define R300_TX_DEPTHMASK_SHIFT 22 # define R300_TX_DEPTHMASK_MASK (0xf << 22) -# define R300_TX_MAX_MIP_LEVEL_SHIFT 26 -# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 26) # define R300_TX_SIZE_PROJECTED (1 << 30) # define R300_TX_PITCH_EN (1 << 31) # define R300_TX_WIDTH(x) ((x) << 0) diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 5422a2cc9c..f2867675f0 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -523,6 +523,11 @@ static void* state->mag_img_filter, state->min_mip_filter); + /* Unfortunately, r300-r500 don't support floating-point mipmap lods. */ + /* We must pass these to the emit function to clamp them properly. */ + sampler->min_lod = MAX2((unsigned)state->min_lod, 0); + sampler->max_lod = MAX2((unsigned)ceilf(state->max_lod), 0); + lod_bias = CLAMP((int)(state->lod_bias * 32), -(1 << 9), (1 << 9) - 1); sampler->filter1 |= lod_bias << R300_LOD_BIAS_SHIFT; diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index aea25cf71d..d13aa8f036 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -43,8 +43,7 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) state->format2 = (tex->pitch[0] - 1) & 0x1fff; } else { /* power of two textures (3D, mipmaps, and no pitch) */ - state->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) | - R300_TX_NUM_LEVELS(pt->last_level & 0xf); + state->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf); } state->format1 = r300_translate_texformat(pt->format); -- cgit v1.2.3 From 4e1236e60267d036a1a604412bd7efd7a249a588 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 15 Nov 2009 16:41:25 +0100 Subject: r300g: fix updating a vertex format We must update PSC when we change the vertex format, e.g. vertex colors from RGBA to BGRA. --- src/gallium/drivers/r300/r300_state.c | 2 ++ src/gallium/drivers/r300/r300_state_derived.c | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index f2867675f0..997d59ff6f 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -687,6 +687,8 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, draw_flush(r300->draw); draw_set_vertex_buffers(r300->draw, count, buffers); } + + r300->state |= R300_NEW_VERTEX_FORMAT; } static void r300_set_vertex_elements(struct pipe_context* pipe, diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 5aa4166d93..82f2be3101 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -512,7 +512,7 @@ static void r300_update_derived_shader_state(struct r300_context* r300) r300->vertex_info = vformat; r300->rs_block = rs_block; - r300->dirty_state |= (R300_NEW_VERTEX_FORMAT | R300_NEW_RS_BLOCK); + r300->dirty_state |= R300_NEW_RS_BLOCK; } static void r300_update_ztop(struct r300_context* r300) @@ -551,7 +551,8 @@ static void r300_update_ztop(struct r300_context* r300) void r300_update_derived_state(struct r300_context* r300) { if (r300->dirty_state & - (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER)) { + (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER | + R300_NEW_VERTEX_FORMAT)) { r300_update_derived_shader_state(r300); } -- cgit v1.2.3 From 624a0cd9c1bcc8d0952bb30e3336237fb99041b2 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 19 Nov 2009 20:41:19 +0100 Subject: r300g: fix typo in r300_reg.h to prevent the RS unit from doing random things And reorder fragment shader inputs so that the colors are before texcoords, as is allocated by the shader compiler. This commit makes VS->FS attribute routing work on R500. --- src/gallium/drivers/r300/r300_reg.h | 2 +- src/gallium/drivers/r300/r300_state_derived.c | 26 ++++++++++++-------------- 2 files changed, 13 insertions(+), 15 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 66fdada221..3a419b24b0 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -1293,7 +1293,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R500_RS_INST_TEX_ID(x) ((x) << 0) #define R500_RS_INST_TEX_CN_WRITE (1 << 4) #define R500_RS_INST_TEX_ADDR_SHIFT 5 -# define R500_RS_INST_TEX_ADDR(x) ((x) << 0) +# define R500_RS_INST_TEX_ADDR(x) ((x) << 5) #define R500_RS_INST_COL_ID_SHIFT 12 # define R500_RS_INST_COL_ID(x) ((x) << 12) #define R500_RS_INST_COL_CN_NO_WRITE (0 << 16) diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 82f2be3101..8faf78932d 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -381,17 +381,18 @@ static void r300_update_rs_block(struct r300_context* r300, col_count++; } + for (i = 0; i < col_count; i++) { + rs->inst[i] |= R500_RS_INST_COL_ID(i) | + R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_offset); + fp_offset++; + } + for (i = 0; i < tex_count; i++) { rs->inst[i] |= R500_RS_INST_TEX_ID(i) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_offset); fp_offset++; } - for (i = 0; i < col_count; i++) { - rs->inst[i] |= R500_RS_INST_COL_ID(i) | - R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_offset); - fp_offset++; - } } else { for (i = 0; i < info->num_inputs; i++) { switch (info->input_semantic_name[i]) { @@ -416,8 +417,10 @@ static void r300_update_rs_block(struct r300_context* r300, } } + /* Rasterize at least one color, or bad things happen. */ if (col_count == 0) { rs->ip[0] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001); + col_count++; } if (tex_count == 0) { @@ -428,9 +431,10 @@ static void r300_update_rs_block(struct r300_context* r300, R300_RS_SEL_Q(R300_RS_SEL_K1); } - /* Rasterize at least one color, or bad things happen. */ - if ((col_count == 0) && (tex_count == 0)) { - col_count++; + for (i = 0; i < col_count; i++) { + rs->inst[i] |= R300_RS_INST_COL_ID(i) | + R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_offset); + fp_offset++; } for (i = 0; i < tex_count; i++) { @@ -438,12 +442,6 @@ static void r300_update_rs_block(struct r300_context* r300, R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_offset); fp_offset++; } - - for (i = 0; i < col_count; i++) { - rs->inst[i] |= R300_RS_INST_COL_ID(i) | - R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_offset); - fp_offset++; - } } rs->count = (rs_tex_comp) | (col_count << R300_IC_COUNT_SHIFT) | -- cgit v1.2.3 From 435c495549d707432f9fb9868e665a42a6923058 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 19 Nov 2009 22:40:11 +0100 Subject: r300g: silence warnings --- src/gallium/drivers/r300/r300_screen.h | 2 ++ src/gallium/drivers/r300/r300_state_derived.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 41df31f670..1ce5ff3904 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -27,6 +27,8 @@ #include "r300_chipset.h" +struct r300_winsys; + struct r300_screen { /* Parent class */ struct pipe_screen screen; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 8faf78932d..962754f3b1 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -47,8 +47,8 @@ struct r300_shader_derived_value { unsigned r300_shader_key_hash(void* key) { struct r300_shader_key* shader_key = (struct r300_shader_key*)key; - unsigned vs = (unsigned)shader_key->vs; - unsigned fs = (unsigned)shader_key->fs; + unsigned vs = (intptr_t)shader_key->vs; + unsigned fs = (intptr_t)shader_key->fs; return (vs << 16) | (fs & 0xffff); } -- cgit v1.2.3 From 3a2cd66af8774af15eabef655ded9b48e67242d5 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 21 Nov 2009 05:51:13 +0100 Subject: r300g: clean up vs/fs tabs Instead of vs_tab, we use vs_output_tab and it's local now. fs_tab hasn't been used anywhere, so I removed it and r300_update_fs_tab too. --- src/gallium/drivers/r300/r300_context.h | 5 -- src/gallium/drivers/r300/r300_state_derived.c | 114 ++++++-------------------- 2 files changed, 25 insertions(+), 94 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 60ef415caa..39c0914cff 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -223,11 +223,6 @@ struct r300_texture { struct r300_vertex_info { /* Parent class */ struct vertex_info vinfo; - /* Map of vertex attributes into PVS memory for HW TCL, - * or GA memory for SW TCL. */ - int vs_tab[16]; - /* Map of rasterizer attributes from GB through RS to US. */ - int fs_tab[16]; /* R300_VAP_PROG_STREAK_CNTL_[0-7] */ uint32_t vap_prog_stream_cntl[8]; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 962754f3b1..45aeefb483 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -61,12 +61,12 @@ int r300_shader_key_compare(void* key1, void* key2) { (shader_key1->fs == shader_key2->fs); } -/* Set up the vs_tab and routes. */ -static void r300_vs_tab_routes(struct r300_context* r300, - struct r300_vertex_info* vformat) +/* Set up the vs_output_tab and routes. */ +static void r300_vs_output_tab_routes(struct r300_context* r300, + struct r300_vertex_info* vformat, + int* vs_output_tab) { struct vertex_info* vinfo = &vformat->vinfo; - int* tab = vformat->vs_tab; boolean pos = FALSE, psize = FALSE, fog = FALSE; int i, texs = 0, cols = 0; struct tgsi_shader_info* info = &r300->fs->info; @@ -79,23 +79,23 @@ static void r300_vs_tab_routes(struct r300_context* r300, switch (info->input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: pos = TRUE; - tab[i] = 0; + vs_output_tab[i] = 0; break; case TGSI_SEMANTIC_COLOR: - tab[i] = 2 + cols; + vs_output_tab[i] = 2 + cols; cols++; break; case TGSI_SEMANTIC_PSIZE: assert(psize == FALSE); psize = TRUE; - tab[i] = 15; + vs_output_tab[i] = 15; break; case TGSI_SEMANTIC_FOG: assert(fog == FALSE); fog = TRUE; /* Fall through */ case TGSI_SEMANTIC_GENERIC: - tab[i] = 6 + texs; + vs_output_tab[i] = 6 + texs; texs++; break; default: @@ -122,11 +122,11 @@ static void r300_vs_tab_routes(struct r300_context* r300, * for HW TCL case it could be generated by vertex shader */ if (!pos) { /* Make room for the position attribute - * at the beginning of the tab. */ + * at the beginning of the vs_output_tab. */ for (i = 15; i > 0; i--) { - tab[i] = tab[i-1]; + vs_output_tab[i] = vs_output_tab[i-1]; } - tab[0] = 0; + vs_output_tab[0] = 0; } /* Position. */ @@ -229,34 +229,34 @@ static void r300_vertex_psc(struct r300_context* r300, /* Update the PSC tables for SW TCL, using Draw. */ static void r300_swtcl_vertex_psc(struct r300_context* r300, - struct r300_vertex_info* vformat) + struct r300_vertex_info* vformat, + int* vs_output_tab) { struct vertex_info* vinfo = &vformat->vinfo; - int* tab = vformat->vs_tab; uint16_t type, swizzle; enum pipe_format format; unsigned i, attrib_count; /* For each Draw attribute, route it to the fragment shader according - * to the tab. */ + * to the vs_output_tab. */ attrib_count = vinfo->num_attribs; DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count); for (i = 0; i < attrib_count; i++) { DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d," - " tab %d\n", vinfo->attrib[i].src_index, + " vs_output_tab %d\n", vinfo->attrib[i].src_index, vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, - tab[i]); + vs_output_tab[i]); } for (i = 0; i < attrib_count; i++) { /* Make sure we have a proper destination for our attribute. */ - assert(tab[i] != -1); + assert(vs_output_tab[i] != -1); format = draw_translate_vinfo_format(vinfo->attrib[i].emit); /* Obtain the type of data in this attribute. */ type = r300_translate_vertex_data_type(format) | - tab[i] << R300_DST_VEC_LOC_SHIFT; + vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT; /* Obtain the swizzle for this attribute. Note that the default * swizzle in the hardware is not XYZW! */ @@ -280,68 +280,6 @@ static void r300_swtcl_vertex_psc(struct r300_context* r300, (R300_LAST_VEC << (i & 1 ? 16 : 0)); } -/* Set up the mappings from GB to US, for RS block. */ -static void r300_update_fs_tab(struct r300_context* r300, - struct r300_vertex_info* vformat) -{ - struct tgsi_shader_info* info = &r300->fs->info; - int i, cols = 0, texs = 0, cols_emitted = 0; - int* tab = vformat->fs_tab; - - for (i = 0; i < 16; i++) { - tab[i] = -1; - } - - assert(info->num_inputs <= 16); - for (i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { - case TGSI_SEMANTIC_COLOR: - tab[i] = INTERP_LINEAR; - cols++; - break; - case TGSI_SEMANTIC_POSITION: - case TGSI_SEMANTIC_PSIZE: - debug_printf("r300: Implementation error: Can't use " - "pos attribs in fragshader yet!\n"); - /* Pass through for now */ - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - tab[i] = INTERP_PERSPECTIVE; - break; - default: - debug_printf("r300: Unknown vertex input %d\n", - info->input_semantic_name[i]); - break; - } - } - - /* Now that we know where everything is... */ - DBG(r300, DBG_DRAW, "r300: fp input count: %d\n", info->num_inputs); - for (i = 0; i < info->num_inputs; i++) { - switch (tab[i]) { - case INTERP_LINEAR: - DBG(r300, DBG_DRAW, "r300: attrib: " - "stack offset %d, color, tab %d\n", - i, cols_emitted); - tab[i] = cols_emitted; - cols_emitted++; - break; - case INTERP_PERSPECTIVE: - DBG(r300, DBG_DRAW, "r300: attrib: " - "stack offset %d, texcoord, tab %d\n", - i, cols + texs); - tab[i] = cols + texs; - texs++; - break; - case -1: - debug_printf("r300: Implementation error: Bad fp interp!\n"); - default: - break; - } - } - -} - /* Set up the RS block. This is the part of the chipset that actually does * the rasterization of vertices into fragments. This is also the part of the * chipset that locks up if any part of it is even slightly wrong. */ @@ -456,8 +394,13 @@ static void r300_update_derived_shader_state(struct r300_context* r300) struct r300_screen* r300screen = r300_screen(r300->context.screen); struct r300_vertex_info* vformat; struct r300_rs_block* rs_block; + int vs_output_tab[16]; int i; + for (i = 0; i < 16; i++) { + vs_output_tab[i] = -1; + } + /* struct r300_shader_key* key; struct r300_shader_derived_value* value; @@ -488,21 +431,14 @@ static void r300_update_derived_shader_state(struct r300_context* r300) vformat = CALLOC_STRUCT(r300_vertex_info); rs_block = CALLOC_STRUCT(r300_rs_block); - for (i = 0; i < 16; i++) { - vformat->vs_tab[i] = -1; - vformat->fs_tab[i] = -1; - } - - r300_vs_tab_routes(r300, vformat); + r300_vs_output_tab_routes(r300, vformat, vs_output_tab); if (r300screen->caps->has_tcl) { r300_vertex_psc(r300, vformat); } else { - r300_swtcl_vertex_psc(r300, vformat); + r300_swtcl_vertex_psc(r300, vformat, vs_output_tab); } - r300_update_fs_tab(r300, vformat); - r300_update_rs_block(r300, rs_block); FREE(r300->vertex_info); -- cgit v1.2.3 From 44c0aaf990f46c6dcb46d58dda0c182f5d40cb42 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 20 Nov 2009 04:52:49 +0100 Subject: r300g: do not reallocate r300_vertex_info and r300_rs_block all the time --- src/gallium/drivers/r300/r300_context.c | 2 ++ src/gallium/drivers/r300/r300_state_derived.c | 42 ++++++++++++--------------- 2 files changed, 20 insertions(+), 24 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 26db536248..769733b6dd 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -69,6 +69,7 @@ static void r300_destroy_context(struct pipe_context* context) FREE(r300->blend_color_state); FREE(r300->rs_block); FREE(r300->scissor_state); + FREE(r300->vertex_info); FREE(r300->viewport_state); FREE(r300); } @@ -152,6 +153,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state); r300->rs_block = CALLOC_STRUCT(r300_rs_block); r300->scissor_state = CALLOC_STRUCT(r300_scissor_state); + r300->vertex_info = CALLOC_STRUCT(r300_vertex_info); r300->viewport_state = CALLOC_STRUCT(r300_viewport_state); /* Open up the OQ BO. */ diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 45aeefb483..6fb780cb29 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -63,10 +63,9 @@ int r300_shader_key_compare(void* key1, void* key2) { /* Set up the vs_output_tab and routes. */ static void r300_vs_output_tab_routes(struct r300_context* r300, - struct r300_vertex_info* vformat, int* vs_output_tab) { - struct vertex_info* vinfo = &vformat->vinfo; + struct vertex_info* vinfo = &r300->vertex_info->vinfo; boolean pos = FALSE, psize = FALSE, fog = FALSE; int i, texs = 0, cols = 0; struct tgsi_shader_info* info = &r300->fs->info; @@ -185,9 +184,9 @@ static void r300_vs_output_tab_routes(struct r300_context* r300, } /* Update the PSC tables. */ -static void r300_vertex_psc(struct r300_context* r300, - struct r300_vertex_info* vformat) +static void r300_vertex_psc(struct r300_context* r300) { + struct r300_vertex_info *vformat = r300->vertex_info; uint16_t type, swizzle; enum pipe_format format; unsigned i; @@ -229,9 +228,9 @@ static void r300_vertex_psc(struct r300_context* r300, /* Update the PSC tables for SW TCL, using Draw. */ static void r300_swtcl_vertex_psc(struct r300_context* r300, - struct r300_vertex_info* vformat, int* vs_output_tab) { + struct r300_vertex_info *vformat = r300->vertex_info; struct vertex_info* vinfo = &vformat->vinfo; uint16_t type, swizzle; enum pipe_format format; @@ -283,9 +282,9 @@ static void r300_swtcl_vertex_psc(struct r300_context* r300, /* Set up the RS block. This is the part of the chipset that actually does * the rasterization of vertices into fragments. This is also the part of the * chipset that locks up if any part of it is even slightly wrong. */ -static void r300_update_rs_block(struct r300_context* r300, - struct r300_rs_block* rs) +static void r300_update_rs_block(struct r300_context* r300) { + struct r300_rs_block* rs = r300->rs_block; struct tgsi_shader_info* info = &r300->fs->info; int col_count = 0, fp_offset = 0, i, tex_count = 0; int rs_tex_comp = 0; @@ -392,14 +391,9 @@ static void r300_update_rs_block(struct r300_context* r300, static void r300_update_derived_shader_state(struct r300_context* r300) { struct r300_screen* r300screen = r300_screen(r300->context.screen); - struct r300_vertex_info* vformat; - struct r300_rs_block* rs_block; int vs_output_tab[16]; int i; - for (i = 0; i < 16; i++) { - vs_output_tab[i] = -1; - } /* struct r300_shader_key* key; @@ -427,25 +421,25 @@ static void r300_update_derived_shader_state(struct r300_context* r300) (void*)key, (void*)value); } */ - /* XXX This will be refactored ASAP. */ - vformat = CALLOC_STRUCT(r300_vertex_info); - rs_block = CALLOC_STRUCT(r300_rs_block); + /* Reset structures */ + memset(r300->rs_block, 0, sizeof(struct r300_rs_block)); + memset(r300->vertex_info, 0, sizeof(struct r300_vertex_info)); - r300_vs_output_tab_routes(r300, vformat, vs_output_tab); + for (i = 0; i < 16; i++) { + vs_output_tab[i] = -1; + } + + /* Update states */ + r300_vs_output_tab_routes(r300, vs_output_tab); if (r300screen->caps->has_tcl) { - r300_vertex_psc(r300, vformat); + r300_vertex_psc(r300); } else { - r300_swtcl_vertex_psc(r300, vformat, vs_output_tab); + r300_swtcl_vertex_psc(r300, vs_output_tab); } - r300_update_rs_block(r300, rs_block); - - FREE(r300->vertex_info); - FREE(r300->rs_block); + r300_update_rs_block(r300); - r300->vertex_info = vformat; - r300->rs_block = rs_block; r300->dirty_state |= R300_NEW_RS_BLOCK; } -- cgit v1.2.3 From 2b07b640619ac68344276ba0557ea46b2cbc3f26 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 21 Nov 2009 19:13:26 -0800 Subject: r300g: Build fix. Oops. --- src/gallium/drivers/r300/r300_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 997d59ff6f..a88d66db24 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -688,7 +688,7 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, draw_set_vertex_buffers(r300->draw, count, buffers); } - r300->state |= R300_NEW_VERTEX_FORMAT; + r300->dirty_state |= R300_NEW_VERTEX_FORMAT; } static void r300_set_vertex_elements(struct pipe_context* pipe, -- cgit v1.2.3 From cc93fa3527e64963acd0e643d7d1061306d9e1df Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 23 Nov 2009 10:51:07 +0100 Subject: softpipe: Initialise TGSI machine's Face. --- src/gallium/drivers/softpipe/sp_fs_exec.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 4076114d39..a8999ed347 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -126,7 +126,13 @@ exec_run( const struct sp_fragment_shader *base, setup_pos_vector(quad->posCoef, (float)quad->input.x0, (float)quad->input.y0, &machine->QuadPos); - + + if (quad->input.facing) { + machine->Face = -1.0f; + } else { + machine->Face = 1.0f; + } + quad->inout.mask &= tgsi_exec_machine_run( machine ); if (quad->inout.mask == 0) return FALSE; -- cgit v1.2.3 From 86710c3334850eeaeffcac6d538e01fd5c203167 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Mon, 23 Nov 2009 19:59:02 +0100 Subject: svga: Scrub Makefiles a bit Remove x86 specific hacks. Not that they will ever be used on none x86 arches, but they are built by default. And the way the flags where added was a hack. --- src/gallium/drivers/svga/Makefile | 8 +------- src/gallium/winsys/drm/vmware/core/Makefile | 14 +------------- 2 files changed, 2 insertions(+), 20 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/Makefile b/src/gallium/drivers/svga/Makefile index fe1d6d7384..d1413319c9 100644 --- a/src/gallium/drivers/svga/Makefile +++ b/src/gallium/drivers/svga/Makefile @@ -51,13 +51,7 @@ LIBRARY_INCLUDES = \ -I$(TOP)/src/gallium/drivers/svga/include LIBRARY_DEFINES = \ + -std=gnu99 -fvisibility=hidden \ -DHAVE_STDINT_H -DHAVE_SYS_TYPES_H -CC = gcc -fvisibility=hidden -msse -msse2 - -# Set the gnu99 standard to enable anonymous structs in vmware headers. -# -CFLAGS = -Wall -Wmissing-prototypes -std=gnu99 -ffast-math \ - $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) $(ASM_FLAGS) - include ../../Makefile.template diff --git a/src/gallium/winsys/drm/vmware/core/Makefile b/src/gallium/winsys/drm/vmware/core/Makefile index ff8f01b322..a52957c1a5 100644 --- a/src/gallium/winsys/drm/vmware/core/Makefile +++ b/src/gallium/winsys/drm/vmware/core/Makefile @@ -28,20 +28,8 @@ LIBRARY_INCLUDES = \ $(shell pkg-config libdrm --cflags-only-I) LIBRARY_DEFINES = \ + -std=gnu99 -fvisibility=hidden \ -DHAVE_STDINT_H -D_FILE_OFFSET_BITS=64 \ $(shell pkg-config libdrm --cflags-only-other) -CC = gcc -fvisibility=hidden -msse -msse2 - -# Set the gnu99 standard to enable anonymous structs in vmware headers. -# -CFLAGS = -Wall -Wmissing-prototypes -std=gnu99 -ffast-math \ - $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) $(ASM_FLAGS) - include ../../../../Makefile.template - - -symlinks: - - -include depend -- cgit v1.2.3 From acc51ac0ace11bb375241467ba35e1014f5fb997 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Tue, 24 Nov 2009 01:14:03 +0100 Subject: svga: Filter out pendantic and ansi flags Rather have the driver compile without the flags then having to disable them. --- src/gallium/drivers/svga/Makefile | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/Makefile b/src/gallium/drivers/svga/Makefile index d1413319c9..8158364d25 100644 --- a/src/gallium/drivers/svga/Makefile +++ b/src/gallium/drivers/svga/Makefile @@ -50,6 +50,9 @@ C_SOURCES = \ LIBRARY_INCLUDES = \ -I$(TOP)/src/gallium/drivers/svga/include +# With linux-debug we get a lots of warnings, filter out the bad flags. +CFLAGS := $(filter-out -pedantic, $(filter-out -ansi, $(CFLAGS))) + LIBRARY_DEFINES = \ -std=gnu99 -fvisibility=hidden \ -DHAVE_STDINT_H -DHAVE_SYS_TYPES_H -- cgit v1.2.3 From 8d80b5400a1bbf4e959cd8257d11dfe0483e93db Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 23 Nov 2009 18:06:19 -0700 Subject: r300g: use util_bitcount() --- src/gallium/drivers/r300/r300_emit.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index ad7dff36be..e6ab8e4af1 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -721,21 +721,6 @@ void r300_emit_vertex_format_state(struct r300_context* r300) END_CS; } -/* XXX This should go to util ... */ -/* Return the number of bits set in the given number. */ -static unsigned bitcount(unsigned n) -{ - unsigned bits = 0; - - while (n) { - if (n & 1) { - bits++; - } - n >>= 1; - } - - return bits; -} void r300_emit_vertex_program_code(struct r300_context* r300, struct r300_vertex_program_code* code) @@ -745,8 +730,8 @@ void r300_emit_vertex_program_code(struct r300_context* r300, unsigned instruction_count = code->length / 4; int vtx_mem_size = r300screen->caps->is_r500 ? 128 : 72; - int input_count = MAX2(bitcount(code->InputsRead), 1); - int output_count = MAX2(bitcount(code->OutputsWritten), 1); + int input_count = MAX2(util_bitcount(code->InputsRead), 1); + int output_count = MAX2(util_bitcount(code->OutputsWritten), 1); int temp_count = MAX2(code->num_temporaries, 1); int pvs_num_slots = MIN3(vtx_mem_size / input_count, vtx_mem_size / output_count, 10); -- cgit v1.2.3 From f1ce37f74aff4854071fe5740b055718b2c0c789 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 24 Nov 2009 21:13:18 +0000 Subject: svga: cache textures as well as buffers --- src/gallium/drivers/svga/svga_screen_buffer.c | 12 ++- src/gallium/drivers/svga/svga_screen_cache.c | 93 +++++++++------- src/gallium/drivers/svga/svga_screen_cache.h | 21 ++-- src/gallium/drivers/svga/svga_screen_texture.c | 142 ++++++++++++++----------- src/gallium/drivers/svga/svga_screen_texture.h | 16 ++- 5 files changed, 167 insertions(+), 117 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_screen_buffer.c b/src/gallium/drivers/svga/svga_screen_buffer.c index 3b7811734e..101c7878bf 100644 --- a/src/gallium/drivers/svga/svga_screen_buffer.c +++ b/src/gallium/drivers/svga/svga_screen_buffer.c @@ -71,7 +71,10 @@ svga_buffer_create_host_surface(struct svga_screen *ss, sbuf->key.numFaces = 1; sbuf->key.numMipLevels = 1; + sbuf->key.cachable = 1; + SVGA_DBG(DEBUG_DMA, "surface_create for buffer sz %d\n", sbuf->base.size); + sbuf->handle = svga_screen_surface_create(ss, &sbuf->key); if(!sbuf->handle) return PIPE_ERROR_OUT_OF_MEMORY; @@ -82,7 +85,7 @@ svga_buffer_create_host_surface(struct svga_screen *ss, */ sbuf->hw.flags.discard = TRUE; - SVGA_DBG(DEBUG_DMA, " grab sid %p sz %d\n", sbuf->handle, sbuf->base.size); + SVGA_DBG(DEBUG_DMA, " --> got sid %p sz %d (buffer)\n", sbuf->handle, sbuf->base.size); } return PIPE_OK; @@ -776,12 +779,11 @@ svga_screen_buffer_wrap_surface(struct pipe_screen *screen, /* * We are not the creator of this surface and therefore we must not - * cache it for reuse. The caching code only caches SVGA3D_BUFFER surfaces - * so make sure this isn't one of those. + * cache it for reuse. Set the cacheable flag to zero in the key to + * prevent this. */ - - assert(format != SVGA3D_BUFFER); sbuf->key.format = format; + sbuf->key.cachable = 0; sws->surface_reference(sws, &sbuf->handle, srf); return buf; diff --git a/src/gallium/drivers/svga/svga_screen_cache.c b/src/gallium/drivers/svga/svga_screen_cache.c index 7360c1688b..65f5c07a72 100644 --- a/src/gallium/drivers/svga/svga_screen_cache.c +++ b/src/gallium/drivers/svga/svga_screen_cache.c @@ -24,6 +24,7 @@ **********************************************************/ #include "util/u_memory.h" +#include "util/u_hash.h" #include "svga_debug.h" #include "svga_winsys.h" @@ -36,24 +37,11 @@ /** * Compute the bucket for this key. - * - * We simply compute log2(width) for now, but */ static INLINE unsigned svga_screen_cache_bucket(const struct svga_host_surface_cache_key *key) { - unsigned bucket = 0; - unsigned size = key->size.width; - - while ((size >>= 1)) - ++bucket; - - if(key->flags & SVGA3D_SURFACE_HINT_INDEXBUFFER) - bucket += 32; - - assert(bucket < SVGA_HOST_SURFACE_CACHE_BUCKETS); - - return bucket; + return util_hash_crc32( key, sizeof key ) % SVGA_HOST_SURFACE_CACHE_BUCKETS; } @@ -69,6 +57,8 @@ svga_screen_cache_lookup(struct svga_screen *svgascreen, unsigned bucket; unsigned tries = 0; + assert(key->cachable); + bucket = svga_screen_cache_bucket(key); pipe_mutex_lock(cache->mutex); @@ -104,11 +94,9 @@ svga_screen_cache_lookup(struct svga_screen *svgascreen, pipe_mutex_unlock(cache->mutex); -#if 0 - _debug_printf("%s: cache %s after %u tries\n", __FUNCTION__, handle ? "hit" : "miss", tries); -#else - (void)tries; -#endif + if (SVGA_DEBUG & DEBUG_DMA) + debug_printf("%s: cache %s after %u tries\n", __FUNCTION__, + handle ? "hit" : "miss", tries); return handle; } @@ -128,6 +116,7 @@ svga_screen_cache_add(struct svga_screen *svgascreen, struct svga_host_surface_cache_entry *entry = NULL; struct svga_winsys_surface *handle = *p_handle; + assert(key->cachable); assert(handle); if(!handle) @@ -137,15 +126,15 @@ svga_screen_cache_add(struct svga_screen *svgascreen, pipe_mutex_lock(cache->mutex); if(!LIST_IS_EMPTY(&cache->empty)) { - /* use the first empty entry */ - entry = LIST_ENTRY(struct svga_host_surface_cache_entry, cache->empty.next, head); + /* use the first empty entry */ + entry = LIST_ENTRY(struct svga_host_surface_cache_entry, cache->empty.next, head); - LIST_DEL(&entry->head); - } + LIST_DEL(&entry->head); + } else if(!LIST_IS_EMPTY(&cache->unused)) { /* free the last used buffer and reuse its entry */ entry = LIST_ENTRY(struct svga_host_surface_cache_entry, cache->unused.prev, head); - SVGA_DBG(DEBUG_DMA, "unref sid %p\n", entry->handle); + SVGA_DBG(DEBUG_DMA, "unref sid %p (make space)\n", entry->handle); sws->surface_reference(sws, &entry->handle, NULL); LIST_DEL(&entry->bucket_head); @@ -161,7 +150,7 @@ svga_screen_cache_add(struct svga_screen *svgascreen, } else { /* Couldn't cache the buffer -- this really shouldn't happen */ - SVGA_DBG(DEBUG_DMA, "unref sid %p\n", handle); + SVGA_DBG(DEBUG_DMA, "unref sid %p (couldn't find space)\n", handle); sws->surface_reference(sws, &handle, NULL); } @@ -220,7 +209,7 @@ svga_screen_cache_cleanup(struct svga_screen *svgascreen) for(i = 0; i < SVGA_HOST_SURFACE_CACHE_SIZE; ++i) { if(cache->entries[i].handle) { - SVGA_DBG(DEBUG_DMA, "unref sid %p\n", cache->entries[i].handle); + SVGA_DBG(DEBUG_DMA, "unref sid %p (shutdown)\n", cache->entries[i].handle); sws->surface_reference(sws, &cache->entries[i].handle, NULL); } @@ -261,18 +250,42 @@ svga_screen_surface_create(struct svga_screen *svgascreen, { struct svga_winsys_screen *sws = svgascreen->sws; struct svga_winsys_surface *handle = NULL; + boolean cachable = SVGA_SURFACE_CACHE_ENABLED && key->cachable; + + SVGA_DBG(DEBUG_DMA, "%s sz %dx%dx%d mips %d faces %d cachable %d\n", + __FUNCTION__, + key->size.width, + key->size.height, + key->size.depth, + key->numMipLevels, + key->numFaces, + key->cachable); + + if (cachable) { + if (key->format == SVGA3D_BUFFER) { + /* For buffers, round the buffer size up to the nearest power + * of two to increase the probability of cache hits. Keep + * texture surface dimensions unchanged. + */ + uint32_t size = 1; + while(size < key->size.width) + size <<= 1; + key->size.width = size; + } - if (SVGA_SURFACE_CACHE_ENABLED && key->format == SVGA3D_BUFFER) { - /* round the buffer size up to the nearest power of two to increase the - * probability of cache hits */ - uint32_t size = 1; - while(size < key->size.width) - size <<= 1; - key->size.width = size; - handle = svga_screen_cache_lookup(svgascreen, key); - if (handle) - SVGA_DBG(DEBUG_DMA, " reuse sid %p sz %d\n", handle, size); + if (handle) { + if (key->format == SVGA3D_BUFFER) + SVGA_DBG(DEBUG_DMA, " reuse sid %p sz %d (buffer)\n", handle, + key->size.width); + else + SVGA_DBG(DEBUG_DMA, " reuse sid %p sz %dx%dx%d mips %d faces %d\n", handle, + key->size.width, + key->size.height, + key->size.depth, + key->numMipLevels, + key->numFaces); + } } if (!handle) { @@ -297,11 +310,15 @@ svga_screen_surface_destroy(struct svga_screen *svgascreen, { struct svga_winsys_screen *sws = svgascreen->sws; - if(SVGA_SURFACE_CACHE_ENABLED && key->format == SVGA3D_BUFFER) { + /* We only set the cachable flag for surfaces of which we are the + * exclusive owner. So just hold onto our existing reference in + * that case. + */ + if(SVGA_SURFACE_CACHE_ENABLED && key->cachable) { svga_screen_cache_add(svgascreen, key, p_handle); } else { - SVGA_DBG(DEBUG_DMA, "unref sid %p\n", *p_handle); + SVGA_DBG(DEBUG_DMA, "unref sid %p (uncachable)\n", *p_handle); sws->surface_reference(sws, p_handle, NULL); } } diff --git a/src/gallium/drivers/svga/svga_screen_cache.h b/src/gallium/drivers/svga/svga_screen_cache.h index 1bbe987768..b745769848 100644 --- a/src/gallium/drivers/svga/svga_screen_cache.h +++ b/src/gallium/drivers/svga/svga_screen_cache.h @@ -36,10 +36,18 @@ #include "util/u_double_list.h" -/* TODO: Reduce this once we don't allocate an index buffer per draw call */ +/* Guess the storage size of cached surfaces and try and keep it under + * this amount: + */ +#define SVGA_HOST_SURFACE_CACHE_BYTES 16*1024*1024 + +/* Maximum number of discrete surfaces in the cache: + */ #define SVGA_HOST_SURFACE_CACHE_SIZE 1024 -#define SVGA_HOST_SURFACE_CACHE_BUCKETS 64 +/* Number of hash buckets: + */ +#define SVGA_HOST_SURFACE_CACHE_BUCKETS 256 struct svga_winsys_surface; @@ -50,11 +58,12 @@ struct svga_screen; */ struct svga_host_surface_cache_key { - SVGA3dSurfaceFlags flags; - SVGA3dSurfaceFormat format; SVGA3dSize size; - uint32_t numFaces; - uint32_t numMipLevels; + uint32_t flags:8; + uint32_t format:8; + uint32_t numFaces:8; + uint32_t numMipLevels:7; + uint32_t cachable:1; /* False if this is a shared surface */ }; diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c index 8472dea04d..158a1e108d 100644 --- a/src/gallium/drivers/svga/svga_screen_texture.c +++ b/src/gallium/drivers/svga/svga_screen_texture.c @@ -266,14 +266,8 @@ svga_texture_create(struct pipe_screen *screen, const struct pipe_texture *templat) { struct svga_screen *svgascreen = svga_screen(screen); - struct svga_winsys_screen *sws = svgascreen->sws; struct svga_texture *tex = CALLOC_STRUCT(svga_texture); unsigned width, height, depth; - SVGA3dSurfaceFlags flags = 0; - SVGA3dSurfaceFormat format; - SVGA3dSize size; - uint32 numFaces; - uint32 numMipLevels; unsigned level; if (!tex) @@ -301,23 +295,24 @@ svga_texture_create(struct pipe_screen *screen, depth = minify(depth); } - size.width = templat->width[0]; - size.height = templat->height[0]; - size.depth = templat->depth[0]; + tex->key.flags = 0; + tex->key.size.width = templat->width[0]; + tex->key.size.height = templat->height[0]; + tex->key.size.depth = templat->depth[0]; if(templat->target == PIPE_TEXTURE_CUBE) { - flags |= SVGA3D_SURFACE_CUBEMAP; - numFaces = 6; + tex->key.flags |= SVGA3D_SURFACE_CUBEMAP; + tex->key.numFaces = 6; } else { - numFaces = 1; + tex->key.numFaces = 1; } if(templat->tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) - flags |= SVGA3D_SURFACE_HINT_TEXTURE; + tex->key.flags |= SVGA3D_SURFACE_HINT_TEXTURE; if(templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) - flags |= SVGA3D_SURFACE_HINT_SCANOUT; + tex->key.flags |= SVGA3D_SURFACE_HINT_SCANOUT; /* * XXX: Never pass the SVGA3D_SURFACE_HINT_RENDERTARGET hint. Mesa cannot @@ -328,21 +323,24 @@ svga_texture_create(struct pipe_screen *screen, #if 0 if((templat->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) && !pf_is_compressed(templat->format)) - flags |= SVGA3D_SURFACE_HINT_RENDERTARGET; + tex->key.flags |= SVGA3D_SURFACE_HINT_RENDERTARGET; #endif if(templat->tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) - flags |= SVGA3D_SURFACE_HINT_DEPTHSTENCIL; + tex->key.flags |= SVGA3D_SURFACE_HINT_DEPTHSTENCIL; - numMipLevels = templat->last_level + 1; + tex->key.numMipLevels = templat->last_level + 1; - format = svga_translate_format(templat->format); - if(format == SVGA3D_FORMAT_INVALID) + tex->key.format = svga_translate_format(templat->format); + if(tex->key.format == SVGA3D_FORMAT_INVALID) goto error2; + + tex->key.cachable = 1; - tex->handle = sws->surface_create(sws, flags, format, size, numFaces, numMipLevels); + SVGA_DBG(DEBUG_DMA, "surface_create for texture\n", tex->handle); + tex->handle = svga_screen_surface_create(svgascreen, &tex->key); if (tex->handle) - SVGA_DBG(DEBUG_DMA, "create sid %p (texture)\n", tex->handle); + SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture)\n", tex->handle); return &tex->base; @@ -398,6 +396,10 @@ svga_texture_blanket(struct pipe_screen * screen, return NULL; tex->base = *base; + + /* We don't own this storage, so don't try to cache it. + */ + tex->key.cachable = 0; if (sbuf->key.format == 1) tex->base.format = PIPE_FORMAT_X8R8G8B8_UNORM; @@ -407,6 +409,7 @@ svga_texture_blanket(struct pipe_screen * screen, pipe_reference_init(&tex->base.reference, 1); tex->base.screen = screen; + SVGA_DBG(DEBUG_DMA, "blanket sid %p\n", sbuf->handle); sws->surface_reference(sws, &tex->handle, sbuf->handle); return &tex->base; @@ -427,7 +430,7 @@ svga_texture_destroy(struct pipe_texture *pt) DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); */ SVGA_DBG(DEBUG_DMA, "unref sid %p (texture)\n", tex->handle); - ss->sws->surface_reference(ss->sws, &tex->handle, NULL); + svga_screen_surface_destroy(ss, &tex->key, &tex->handle); FREE(tex); } @@ -518,43 +521,43 @@ svga_texture_view_surface(struct pipe_context *pipe, unsigned start_mip, unsigned num_mip, int face_pick, - int zslice_pick) + int zslice_pick, + struct svga_host_surface_cache_key *key) /* OUT */ { struct svga_screen *ss = svga_screen(tex->base.screen); - struct svga_winsys_screen *sws = ss->sws; struct svga_winsys_surface *handle; int i, j; - SVGA3dSurfaceFlags flags = 0; - SVGA3dSize size; - uint32 numFaces; - uint32 numMipLevels = num_mip; unsigned z_offset = 0; SVGA_DBG(DEBUG_PERF, "svga: Create surface view: face %d zslice %d mips %d..%d\n", face_pick, zslice_pick, start_mip, start_mip+num_mip-1); - size.width = tex->base.width[start_mip]; - size.height = tex->base.height[start_mip]; - size.depth = zslice_pick < 0 ? tex->base.depth[start_mip] : 1; - assert(size.depth == 1); + key->flags = 0; + key->format = format; + key->numMipLevels = num_mip; + key->size.width = tex->base.width[start_mip]; + key->size.height = tex->base.height[start_mip]; + key->size.depth = zslice_pick < 0 ? tex->base.depth[start_mip] : 1; + key->cachable = 1; + assert(key->size.depth == 1); if(tex->base.target == PIPE_TEXTURE_CUBE && face_pick < 0) { - flags |= SVGA3D_SURFACE_CUBEMAP; - numFaces = 6; + key->flags |= SVGA3D_SURFACE_CUBEMAP; + key->numFaces = 6; } else { - numFaces = 1; + key->numFaces = 1; } - if(format == SVGA3D_FORMAT_INVALID) + if(key->format == SVGA3D_FORMAT_INVALID) return NULL; - handle = sws->surface_create(sws, flags, format, size, numFaces, numMipLevels); - + SVGA_DBG(DEBUG_DMA, "surface_create for texture view\n", handle); + handle = svga_screen_surface_create(ss, key); if (!handle) return NULL; - SVGA_DBG(DEBUG_DMA, "create sid %p (texture view)\n", handle); + SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture view)\n", handle); if (face_pick < 0) face_pick = 0; @@ -562,14 +565,20 @@ svga_texture_view_surface(struct pipe_context *pipe, if (zslice_pick >= 0) z_offset = zslice_pick; - for (i = 0; i < num_mip; i++) { - for (j = 0; j < numFaces; j++) { + for (i = 0; i < key->numMipLevels; i++) { + for (j = 0; j < key->numFaces; j++) { if(tex->defined[j + face_pick][i + start_mip]) { unsigned depth = zslice_pick < 0 ? tex->base.depth[i + start_mip] : 1; - svga_texture_copy_handle(svga_context(pipe), ss, - tex->handle, 0, 0, z_offset, i + start_mip, j + face_pick, + svga_texture_copy_handle(svga_context(pipe), + ss, + tex->handle, + 0, 0, z_offset, + i + start_mip, + j + face_pick, handle, 0, 0, 0, i, j, - tex->base.width[i + start_mip], tex->base.height[i + start_mip], depth); + tex->base.width[i + start_mip], + tex->base.height[i + start_mip], + depth); } } } @@ -586,25 +595,23 @@ svga_get_tex_surface(struct pipe_screen *screen, { struct svga_texture *tex = svga_texture(pt); struct svga_surface *s; - struct pipe_surface *ps; boolean render = flags & PIPE_BUFFER_USAGE_GPU_WRITE ? TRUE : FALSE; boolean view = FALSE; SVGA3dSurfaceFormat format; s = CALLOC_STRUCT(svga_surface); - ps = &s->base; - if (!ps) + if (!s) return NULL; - pipe_reference_init(&ps->reference, 1); - pipe_texture_reference(&ps->texture, pt); - ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; - ps->usage = flags; - ps->level = level; - ps->face = face; - ps->zslice = zslice; + pipe_reference_init(&s->base.reference, 1); + pipe_texture_reference(&s->base.texture, pt); + s->base.format = pt->format; + s->base.width = pt->width[level]; + s->base.height = pt->height[level]; + s->base.usage = flags; + s->base.level = level; + s->base.face = face; + s->base.zslice = zslice; if (!render) format = svga_translate_format(pt->format); @@ -619,11 +626,13 @@ svga_get_tex_surface(struct pipe_screen *screen, view = TRUE; /* Currently only used for compressed textures */ - if (render && (format != svga_translate_format(pt->format))) { + if (render && + format != svga_translate_format(pt->format)) { view = TRUE; } - if (level != 0 && svga_screen(screen)->debug.force_level_surface_view) + if (level != 0 && + svga_screen(screen)->debug.force_level_surface_view) view = TRUE; if (pt->target == PIPE_TEXTURE_3D) @@ -634,9 +643,10 @@ svga_get_tex_surface(struct pipe_screen *screen, if (view) { SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u face %u z %u, %p\n", - pt, level, face, zslice, ps); + pt, level, face, zslice, s); - s->handle = svga_texture_view_surface(NULL, tex, format, level, 1, face, zslice); + s->handle = svga_texture_view_surface(NULL, tex, format, level, 1, face, zslice, + &s->key); s->real_face = 0; s->real_level = 0; s->real_zslice = 0; @@ -644,15 +654,16 @@ svga_get_tex_surface(struct pipe_screen *screen, struct svga_winsys_screen *sws = svga_winsys_screen(screen); SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, face %u, z %u, %p\n", - pt, level, face, zslice, ps); + pt, level, face, zslice, s); sws->surface_reference(sws, &s->handle, tex->handle); s->real_face = face; s->real_level = level; s->real_zslice = zslice; + memset(&s->key, 0, sizeof s->key); } - return ps; + return &s->base; } @@ -663,7 +674,7 @@ svga_tex_surface_destroy(struct pipe_surface *surf) struct svga_screen *ss = svga_screen(surf->texture->screen); SVGA_DBG(DEBUG_DMA, "unref sid %p (tex surface)\n", s->handle); - ss->sws->surface_reference(ss->sws, &s->handle, NULL); + svga_screen_surface_destroy(ss, &s->key, &s->handle); pipe_texture_reference(&surf->texture, NULL); FREE(surf); } @@ -974,7 +985,8 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt, sv->handle = svga_texture_view_surface(pipe, tex, format, min_lod, max_lod - min_lod + 1, - -1, -1); + -1, -1, + &sv->key); if (!sv->handle) { assert(0); @@ -1030,7 +1042,7 @@ svga_destroy_sampler_view_priv(struct svga_sampler_view *v) struct svga_screen *ss = svga_screen(v->texture->base.screen); SVGA_DBG(DEBUG_DMA, "unref sid %p (sampler view)\n", v->handle); - ss->sws->surface_reference(ss->sws, &v->handle, NULL); + svga_screen_surface_destroy(ss, &v->key, &v->handle); FREE(v); } diff --git a/src/gallium/drivers/svga/svga_screen_texture.h b/src/gallium/drivers/svga/svga_screen_texture.h index 1e6fef59a3..1cc4063e65 100644 --- a/src/gallium/drivers/svga/svga_screen_texture.h +++ b/src/gallium/drivers/svga/svga_screen_texture.h @@ -29,7 +29,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_state.h" - +#include "svga_screen_cache.h" struct pipe_context; struct pipe_screen; @@ -68,6 +68,7 @@ struct svga_sampler_view unsigned age; + struct svga_host_surface_cache_key key; struct svga_winsys_surface *handle; }; @@ -76,8 +77,6 @@ struct svga_texture { struct pipe_texture base; - struct svga_winsys_surface *handle; - boolean defined[6][PIPE_MAX_TEXTURE_LEVELS]; struct svga_sampler_view *cached_view; @@ -86,6 +85,16 @@ struct svga_texture unsigned age; boolean views_modified; + + /** + * Creation key for the host surface handle. + * + * This structure describes all the host surface characteristics so that it + * can be looked up in cache, since creating a host surface is often a slow + * operation. + */ + struct svga_host_surface_cache_key key; + struct svga_winsys_surface *handle; }; @@ -93,6 +102,7 @@ struct svga_surface { struct pipe_surface base; + struct svga_host_surface_cache_key key; struct svga_winsys_surface *handle; unsigned real_face; -- cgit v1.2.3 From 55b0157860af0eb957262cb0d22ab47eccd85940 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 25 Nov 2009 11:44:41 +0000 Subject: svga: revert packing of surface key Over-ambitious packing of values broke my cursor. --- src/gallium/drivers/svga/svga_screen_cache.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_screen_cache.h b/src/gallium/drivers/svga/svga_screen_cache.h index b745769848..f5aa740d40 100644 --- a/src/gallium/drivers/svga/svga_screen_cache.h +++ b/src/gallium/drivers/svga/svga_screen_cache.h @@ -58,10 +58,10 @@ struct svga_screen; */ struct svga_host_surface_cache_key { + SVGA3dSurfaceFlags flags; + SVGA3dSurfaceFormat format; SVGA3dSize size; - uint32_t flags:8; - uint32_t format:8; - uint32_t numFaces:8; + uint32_t numFaces:24; uint32_t numMipLevels:7; uint32_t cachable:1; /* False if this is a shared surface */ }; -- cgit v1.2.3 From 2946aea110beda9c2e0382507b0dba7c508ff5eb Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 25 Nov 2009 17:13:04 +0000 Subject: svga: try harder to make the cachable flag work It doesn't though. --- src/gallium/drivers/svga/svga_screen_buffer.c | 2 ++ src/gallium/drivers/svga/svga_screen_texture.c | 23 +++++++++++++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_screen_buffer.c b/src/gallium/drivers/svga/svga_screen_buffer.c index 101c7878bf..c0b0f518bc 100644 --- a/src/gallium/drivers/svga/svga_screen_buffer.c +++ b/src/gallium/drivers/svga/svga_screen_buffer.c @@ -796,6 +796,8 @@ svga_screen_buffer_get_winsys_surface(struct pipe_buffer *buffer) struct svga_winsys_screen *sws = svga_winsys_screen(buffer->screen); struct svga_winsys_surface *vsurf = NULL; + assert(svga_buffer(buffer)->key.cachable == 0); + svga_buffer(buffer)->key.cachable = 0; sws->surface_reference(sws, &vsurf, svga_buffer(buffer)->handle); return vsurf; } diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c index 158a1e108d..d61d88114c 100644 --- a/src/gallium/drivers/svga/svga_screen_texture.c +++ b/src/gallium/drivers/svga/svga_screen_texture.c @@ -397,9 +397,6 @@ svga_texture_blanket(struct pipe_screen * screen, tex->base = *base; - /* We don't own this storage, so don't try to cache it. - */ - tex->key.cachable = 0; if (sbuf->key.format == 1) tex->base.format = PIPE_FORMAT_X8R8G8B8_UNORM; @@ -410,6 +407,11 @@ svga_texture_blanket(struct pipe_screen * screen, tex->base.screen = screen; SVGA_DBG(DEBUG_DMA, "blanket sid %p\n", sbuf->handle); + + /* We don't own this storage, so don't try to cache it. + */ + assert(sbuf->key.cachable == 0); + tex->key.cachable = 0; sws->surface_reference(sws, &tex->handle, sbuf->handle); return &tex->base; @@ -549,13 +551,17 @@ svga_texture_view_surface(struct pipe_context *pipe, key->numFaces = 1; } - if(key->format == SVGA3D_FORMAT_INVALID) + if(key->format == SVGA3D_FORMAT_INVALID) { + key->cachable = 0; return NULL; + } SVGA_DBG(DEBUG_DMA, "surface_create for texture view\n", handle); handle = svga_screen_surface_create(ss, key); - if (!handle) + if (!handle) { + key->cachable = 0; return NULL; + } SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture view)\n", handle); @@ -656,11 +662,11 @@ svga_get_tex_surface(struct pipe_screen *screen, SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, face %u, z %u, %p\n", pt, level, face, zslice, s); + memset(&s->key, 0, sizeof s->key); sws->surface_reference(sws, &s->handle, tex->handle); s->real_face = face; s->real_level = level; s->real_zslice = zslice; - memset(&s->key, 0, sizeof s->key); } return &s->base; @@ -674,6 +680,7 @@ svga_tex_surface_destroy(struct pipe_surface *surf) struct svga_screen *ss = svga_screen(surf->texture->screen); SVGA_DBG(DEBUG_DMA, "unref sid %p (tex surface)\n", s->handle); + assert(s->key.cachable == 0); svga_screen_surface_destroy(ss, &s->key, &s->handle); pipe_texture_reference(&surf->texture, NULL); FREE(surf); @@ -968,6 +975,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt, pt->height[0], pt->depth[0], pt->last_level); + sv->key.cachable = 0; sws->surface_reference(sws, &sv->handle, tex->handle); return sv; } @@ -990,6 +998,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt, if (!sv->handle) { assert(0); + sv->key.cachable = 0; sws->surface_reference(sws, &sv->handle, tex->handle); return sv; } @@ -1072,6 +1081,8 @@ svga_screen_texture_get_winsys_surface(struct pipe_texture *texture) struct svga_winsys_screen *sws = svga_winsys_screen(texture->screen); struct svga_winsys_surface *vsurf = NULL; + assert(svga_texture(texture)->key.cachable == 0); + svga_texture(texture)->key.cachable = 0; sws->surface_reference(sws, &vsurf, svga_texture(texture)->handle); return vsurf; } -- cgit v1.2.3 From ba1ca28cc62fed71c77902b95ae4ed36c6bf25f8 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 24 Nov 2009 13:41:03 +0000 Subject: gallium: simplify tgsi tokens further Drop anonymous 'Extended' fields, have every optional token named explicitly in its parent. Eg. there is now an Instruction.Label flag, etc. Drop destination modifiers and other functionality which cannot be generated by tgsi_ureg.c, which is now the primary way of creating shaders. Pull source modifiers into the source register token, drop the second negate flag. The source register token is now full - if we need to expand it, probably best to move all of the modifiers to a new token and have a single flag for it. --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 3 +- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 3 +- src/gallium/auxiliary/tgsi/tgsi_build.c | 271 ++++++------------------ src/gallium/auxiliary/tgsi/tgsi_build.h | 61 ++---- src/gallium/auxiliary/tgsi/tgsi_dump.c | 40 +--- src/gallium/auxiliary/tgsi/tgsi_exec.c | 10 +- src/gallium/auxiliary/tgsi/tgsi_parse.c | 83 +------- src/gallium/auxiliary/tgsi/tgsi_parse.h | 6 +- src/gallium/auxiliary/tgsi/tgsi_scan.c | 7 +- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 3 +- src/gallium/auxiliary/tgsi/tgsi_text.c | 155 +------------- src/gallium/auxiliary/tgsi/tgsi_ureg.c | 34 +-- src/gallium/auxiliary/tgsi/tgsi_util.c | 30 +-- src/gallium/auxiliary/vl/vl_shader_build.c | 3 +- src/gallium/drivers/i915/i915_fpc_translate.c | 2 +- src/gallium/drivers/svga/svga_tgsi_insn.c | 12 +- src/gallium/include/pipe/p_shader_tokens.h | 158 ++------------ 17 files changed, 160 insertions(+), 721 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 9f956715a2..e374010fee 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -268,7 +268,8 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; newInst.FullDstRegisters[0].DstRegister.Index = aactx->texTemp; newInst.Instruction.NumSrcRegs = 2; - newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + newInst.Instruction.Texture = TRUE; + newInst.InstructionTexture.Texture = TGSI_TEXTURE_2D; newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->maxInput + 1; newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 283502cdf3..9de06e37ed 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -296,7 +296,8 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; newInst.Instruction.NumSrcRegs = 2; - newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + newInst.Instruction.Texture = TRUE; + newInst.InstructionTexture.Texture = TGSI_TEXTURE_2D; newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 9791e58db3..ce9e72e8b5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -122,7 +122,6 @@ tgsi_default_declaration( void ) declaration.Centroid = 0; declaration.Invariant = 0; declaration.Padding = 0; - declaration.Extended = 0; return declaration; } @@ -311,7 +310,6 @@ tgsi_default_immediate( void ) immediate.NrTokens = 1; immediate.DataType = TGSI_IMM_FLOAT32; immediate.Padding = 0; - immediate.Extended = 0; return immediate; } @@ -422,8 +420,9 @@ tgsi_default_instruction( void ) instruction.Predicate = 0; instruction.NumDstRegs = 1; instruction.NumSrcRegs = 1; + instruction.Label = 0; + instruction.Texture = 0; instruction.Padding = 0; - instruction.Extended = 0; return instruction; } @@ -475,8 +474,8 @@ tgsi_default_full_instruction( void ) full_instruction.Instruction = tgsi_default_instruction(); full_instruction.InstructionPredicate = tgsi_default_instruction_predicate(); - full_instruction.InstructionExtLabel = tgsi_default_instruction_ext_label(); - full_instruction.InstructionExtTexture = tgsi_default_instruction_ext_texture(); + full_instruction.InstructionLabel = tgsi_default_instruction_label(); + full_instruction.InstructionTexture = tgsi_default_instruction_texture(); for( i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) { full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register(); } @@ -534,42 +533,42 @@ tgsi_build_full_instruction( header); } - if( tgsi_compare_instruction_ext_label( - full_inst->InstructionExtLabel, - tgsi_default_instruction_ext_label() ) ) { - struct tgsi_instruction_ext_label *instruction_ext_label; + if( tgsi_compare_instruction_label( + full_inst->InstructionLabel, + tgsi_default_instruction_label() ) ) { + struct tgsi_instruction_label *instruction_label; if( maxsize <= size ) return 0; - instruction_ext_label = - (struct tgsi_instruction_ext_label *) &tokens[size]; + instruction_label = + (struct tgsi_instruction_label *) &tokens[size]; size++; - *instruction_ext_label = tgsi_build_instruction_ext_label( - full_inst->InstructionExtLabel.Label, + *instruction_label = tgsi_build_instruction_label( + full_inst->InstructionLabel.Label, prev_token, instruction, header ); - prev_token = (struct tgsi_token *) instruction_ext_label; + prev_token = (struct tgsi_token *) instruction_label; } - if( tgsi_compare_instruction_ext_texture( - full_inst->InstructionExtTexture, - tgsi_default_instruction_ext_texture() ) ) { - struct tgsi_instruction_ext_texture *instruction_ext_texture; + if( tgsi_compare_instruction_texture( + full_inst->InstructionTexture, + tgsi_default_instruction_texture() ) ) { + struct tgsi_instruction_texture *instruction_texture; if( maxsize <= size ) return 0; - instruction_ext_texture = - (struct tgsi_instruction_ext_texture *) &tokens[size]; + instruction_texture = + (struct tgsi_instruction_texture *) &tokens[size]; size++; - *instruction_ext_texture = tgsi_build_instruction_ext_texture( - full_inst->InstructionExtTexture.Texture, + *instruction_texture = tgsi_build_instruction_texture( + full_inst->InstructionTexture.Texture, prev_token, instruction, header ); - prev_token = (struct tgsi_token *) instruction_ext_texture; + prev_token = (struct tgsi_token *) instruction_texture; } for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) { @@ -591,25 +590,6 @@ tgsi_build_full_instruction( header ); prev_token = (struct tgsi_token *) dst_register; - if( tgsi_compare_dst_register_ext_modulate( - reg->DstRegisterExtModulate, - tgsi_default_dst_register_ext_modulate() ) ) { - struct tgsi_dst_register_ext_modulate *dst_register_ext_modulate; - - if( maxsize <= size ) - return 0; - dst_register_ext_modulate = - (struct tgsi_dst_register_ext_modulate *) &tokens[size]; - size++; - - *dst_register_ext_modulate = tgsi_build_dst_register_ext_modulate( - reg->DstRegisterExtModulate.Modulate, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) dst_register_ext_modulate; - } - if( reg->DstRegister.Indirect ) { struct tgsi_src_register *ind; @@ -625,6 +605,7 @@ tgsi_build_full_instruction( reg->DstRegisterInd.SwizzleZ, reg->DstRegisterInd.SwizzleW, reg->DstRegisterInd.Negate, + reg->DstRegisterInd.Absolute, reg->DstRegisterInd.Indirect, reg->DstRegisterInd.Dimension, reg->DstRegisterInd.Index, @@ -650,6 +631,7 @@ tgsi_build_full_instruction( reg->SrcRegister.SwizzleZ, reg->SrcRegister.SwizzleW, reg->SrcRegister.Negate, + reg->SrcRegister.Absolute, reg->SrcRegister.Indirect, reg->SrcRegister.Dimension, reg->SrcRegister.Index, @@ -657,29 +639,6 @@ tgsi_build_full_instruction( header ); prev_token = (struct tgsi_token *) src_register; - if( tgsi_compare_src_register_ext_mod( - reg->SrcRegisterExtMod, - tgsi_default_src_register_ext_mod() ) ) { - struct tgsi_src_register_ext_mod *src_register_ext_mod; - - if( maxsize <= size ) - return 0; - src_register_ext_mod = - (struct tgsi_src_register_ext_mod *) &tokens[size]; - size++; - - *src_register_ext_mod = tgsi_build_src_register_ext_mod( - reg->SrcRegisterExtMod.Complement, - reg->SrcRegisterExtMod.Bias, - reg->SrcRegisterExtMod.Scale2X, - reg->SrcRegisterExtMod.Absolute, - reg->SrcRegisterExtMod.Negate, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) src_register_ext_mod; - } - if( reg->SrcRegister.Indirect ) { struct tgsi_src_register *ind; @@ -695,6 +654,7 @@ tgsi_build_full_instruction( reg->SrcRegisterInd.SwizzleZ, reg->SrcRegisterInd.SwizzleW, reg->SrcRegisterInd.Negate, + reg->SrcRegisterInd.Absolute, reg->SrcRegisterInd.Indirect, reg->SrcRegisterInd.Dimension, reg->SrcRegisterInd.Index, @@ -733,6 +693,7 @@ tgsi_build_full_instruction( reg->SrcRegisterDimInd.SwizzleZ, reg->SrcRegisterDimInd.SwizzleW, reg->SrcRegisterDimInd.Negate, + reg->SrcRegisterDimInd.Absolute, reg->SrcRegisterDimInd.Indirect, reg->SrcRegisterDimInd.Dimension, reg->SrcRegisterDimInd.Index, @@ -793,86 +754,80 @@ compare32(const void *a, const void *b) return *((uint32_t *) a) != *((uint32_t *) b); } -struct tgsi_instruction_ext_label -tgsi_default_instruction_ext_label( void ) +struct tgsi_instruction_label +tgsi_default_instruction_label( void ) { - struct tgsi_instruction_ext_label instruction_ext_label; + struct tgsi_instruction_label instruction_label; - instruction_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL; - instruction_ext_label.Label = 0; - instruction_ext_label.Padding = 0; - instruction_ext_label.Extended = 0; + instruction_label.Label = 0; + instruction_label.Padding = 0; - return instruction_ext_label; + return instruction_label; } unsigned -tgsi_compare_instruction_ext_label( - struct tgsi_instruction_ext_label a, - struct tgsi_instruction_ext_label b ) +tgsi_compare_instruction_label( + struct tgsi_instruction_label a, + struct tgsi_instruction_label b ) { a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; return compare32(&a, &b); } -struct tgsi_instruction_ext_label -tgsi_build_instruction_ext_label( +struct tgsi_instruction_label +tgsi_build_instruction_label( unsigned label, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, struct tgsi_header *header ) { - struct tgsi_instruction_ext_label instruction_ext_label; + struct tgsi_instruction_label instruction_label; - instruction_ext_label = tgsi_default_instruction_ext_label(); - instruction_ext_label.Label = label; + instruction_label = tgsi_default_instruction_label(); + instruction_label.Label = label; + instruction->Label = 1; - prev_token->Extended = 1; instruction_grow( instruction, header ); - return instruction_ext_label; + return instruction_label; } -struct tgsi_instruction_ext_texture -tgsi_default_instruction_ext_texture( void ) +struct tgsi_instruction_texture +tgsi_default_instruction_texture( void ) { - struct tgsi_instruction_ext_texture instruction_ext_texture; + struct tgsi_instruction_texture instruction_texture; - instruction_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE; - instruction_ext_texture.Texture = TGSI_TEXTURE_UNKNOWN; - instruction_ext_texture.Padding = 0; - instruction_ext_texture.Extended = 0; + instruction_texture.Texture = TGSI_TEXTURE_UNKNOWN; + instruction_texture.Padding = 0; - return instruction_ext_texture; + return instruction_texture; } unsigned -tgsi_compare_instruction_ext_texture( - struct tgsi_instruction_ext_texture a, - struct tgsi_instruction_ext_texture b ) +tgsi_compare_instruction_texture( + struct tgsi_instruction_texture a, + struct tgsi_instruction_texture b ) { a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; return compare32(&a, &b); } -struct tgsi_instruction_ext_texture -tgsi_build_instruction_ext_texture( +struct tgsi_instruction_texture +tgsi_build_instruction_texture( unsigned texture, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, struct tgsi_header *header ) { - struct tgsi_instruction_ext_texture instruction_ext_texture; + struct tgsi_instruction_texture instruction_texture; - instruction_ext_texture = tgsi_default_instruction_ext_texture(); - instruction_ext_texture.Texture = texture; + instruction_texture = tgsi_default_instruction_texture(); + instruction_texture.Texture = texture; + instruction->Texture = 1; - prev_token->Extended = 1; instruction_grow( instruction, header ); - return instruction_ext_texture; + return instruction_texture; } struct tgsi_src_register @@ -886,10 +841,10 @@ tgsi_default_src_register( void ) src_register.SwizzleZ = TGSI_SWIZZLE_Z; src_register.SwizzleW = TGSI_SWIZZLE_W; src_register.Negate = 0; + src_register.Absolute = 0; src_register.Indirect = 0; src_register.Dimension = 0; src_register.Index = 0; - src_register.Extended = 0; return src_register; } @@ -902,6 +857,7 @@ tgsi_build_src_register( unsigned swizzle_z, unsigned swizzle_w, unsigned negate, + unsigned absolute, unsigned indirect, unsigned dimension, int index, @@ -925,6 +881,7 @@ tgsi_build_src_register( src_register.SwizzleZ = swizzle_z; src_register.SwizzleW = swizzle_w; src_register.Negate = negate; + src_register.Absolute = absolute; src_register.Indirect = indirect; src_register.Dimension = dimension; src_register.Index = index; @@ -940,7 +897,6 @@ tgsi_default_full_src_register( void ) struct tgsi_full_src_register full_src_register; full_src_register.SrcRegister = tgsi_default_src_register(); - full_src_register.SrcRegisterExtMod = tgsi_default_src_register_ext_mod(); full_src_register.SrcRegisterInd = tgsi_default_src_register(); full_src_register.SrcRegisterDim = tgsi_default_dimension(); full_src_register.SrcRegisterDimInd = tgsi_default_src_register(); @@ -949,65 +905,6 @@ tgsi_default_full_src_register( void ) } -struct tgsi_src_register_ext_mod -tgsi_default_src_register_ext_mod( void ) -{ - struct tgsi_src_register_ext_mod src_register_ext_mod; - - src_register_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD; - src_register_ext_mod.Complement = 0; - src_register_ext_mod.Bias = 0; - src_register_ext_mod.Scale2X = 0; - src_register_ext_mod.Absolute = 0; - src_register_ext_mod.Negate = 0; - src_register_ext_mod.Padding = 0; - src_register_ext_mod.Extended = 0; - - return src_register_ext_mod; -} - -unsigned -tgsi_compare_src_register_ext_mod( - struct tgsi_src_register_ext_mod a, - struct tgsi_src_register_ext_mod b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return compare32(&a, &b); -} - -struct tgsi_src_register_ext_mod -tgsi_build_src_register_ext_mod( - unsigned complement, - unsigned bias, - unsigned scale_2x, - unsigned absolute, - unsigned negate, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_src_register_ext_mod src_register_ext_mod; - - assert( complement <= 1 ); - assert( bias <= 1 ); - assert( scale_2x <= 1 ); - assert( absolute <= 1 ); - assert( negate <= 1 ); - - src_register_ext_mod = tgsi_default_src_register_ext_mod(); - src_register_ext_mod.Complement = complement; - src_register_ext_mod.Bias = bias; - src_register_ext_mod.Scale2X = scale_2x; - src_register_ext_mod.Absolute = absolute; - src_register_ext_mod.Negate = negate; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return src_register_ext_mod; -} - struct tgsi_dimension tgsi_default_dimension( void ) { @@ -1017,7 +914,6 @@ tgsi_default_dimension( void ) dimension.Dimension = 0; dimension.Padding = 0; dimension.Index = 0; - dimension.Extended = 0; return dimension; } @@ -1051,7 +947,6 @@ tgsi_default_dst_register( void ) dst_register.Dimension = 0; dst_register.Index = 0; dst_register.Padding = 0; - dst_register.Extended = 0; return dst_register; } @@ -1089,51 +984,7 @@ tgsi_default_full_dst_register( void ) full_dst_register.DstRegister = tgsi_default_dst_register(); full_dst_register.DstRegisterInd = tgsi_default_src_register(); - full_dst_register.DstRegisterExtModulate = - tgsi_default_dst_register_ext_modulate(); return full_dst_register; } -struct tgsi_dst_register_ext_modulate -tgsi_default_dst_register_ext_modulate( void ) -{ - struct tgsi_dst_register_ext_modulate dst_register_ext_modulate; - - dst_register_ext_modulate.Type = TGSI_DST_REGISTER_EXT_TYPE_MODULATE; - dst_register_ext_modulate.Modulate = TGSI_MODULATE_1X; - dst_register_ext_modulate.Padding = 0; - dst_register_ext_modulate.Extended = 0; - - return dst_register_ext_modulate; -} - -unsigned -tgsi_compare_dst_register_ext_modulate( - struct tgsi_dst_register_ext_modulate a, - struct tgsi_dst_register_ext_modulate b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return compare32(&a, &b); -} - -struct tgsi_dst_register_ext_modulate -tgsi_build_dst_register_ext_modulate( - unsigned modulate, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_dst_register_ext_modulate dst_register_ext_modulate; - - assert( modulate <= TGSI_MODULATE_EIGHTH ); - - dst_register_ext_modulate = tgsi_default_dst_register_ext_modulate(); - dst_register_ext_modulate.Modulate = modulate; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return dst_register_ext_modulate; -} diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h index 0fe5f229d3..0fbc8b1b0a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -171,31 +171,31 @@ tgsi_build_instruction_predicate(int index, struct tgsi_instruction *instruction, struct tgsi_header *header); -struct tgsi_instruction_ext_label -tgsi_default_instruction_ext_label( void ); +struct tgsi_instruction_label +tgsi_default_instruction_label( void ); unsigned -tgsi_compare_instruction_ext_label( - struct tgsi_instruction_ext_label a, - struct tgsi_instruction_ext_label b ); +tgsi_compare_instruction_label( + struct tgsi_instruction_label a, + struct tgsi_instruction_label b ); -struct tgsi_instruction_ext_label -tgsi_build_instruction_ext_label( +struct tgsi_instruction_label +tgsi_build_instruction_label( unsigned label, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, struct tgsi_header *header ); -struct tgsi_instruction_ext_texture -tgsi_default_instruction_ext_texture( void ); +struct tgsi_instruction_texture +tgsi_default_instruction_texture( void ); unsigned -tgsi_compare_instruction_ext_texture( - struct tgsi_instruction_ext_texture a, - struct tgsi_instruction_ext_texture b ); +tgsi_compare_instruction_texture( + struct tgsi_instruction_texture a, + struct tgsi_instruction_texture b ); -struct tgsi_instruction_ext_texture -tgsi_build_instruction_ext_texture( +struct tgsi_instruction_texture +tgsi_build_instruction_texture( unsigned texture, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, @@ -212,6 +212,7 @@ tgsi_build_src_register( unsigned swizzle_z, unsigned swizzle_w, unsigned negate, + unsigned absolute, unsigned indirect, unsigned dimension, int index, @@ -221,24 +222,6 @@ tgsi_build_src_register( struct tgsi_full_src_register tgsi_default_full_src_register( void ); -struct tgsi_src_register_ext_mod -tgsi_default_src_register_ext_mod( void ); - -unsigned -tgsi_compare_src_register_ext_mod( - struct tgsi_src_register_ext_mod a, - struct tgsi_src_register_ext_mod b ); - -struct tgsi_src_register_ext_mod -tgsi_build_src_register_ext_mod( - unsigned complement, - unsigned bias, - unsigned scale_2x, - unsigned absolute, - unsigned negate, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); struct tgsi_dimension tgsi_default_dimension( void ); @@ -265,20 +248,6 @@ tgsi_build_dst_register( struct tgsi_full_dst_register tgsi_default_full_dst_register( void ); -struct tgsi_dst_register_ext_modulate -tgsi_default_dst_register_ext_modulate( void ); - -unsigned -tgsi_compare_dst_register_ext_modulate( - struct tgsi_dst_register_ext_modulate a, - struct tgsi_dst_register_ext_modulate b ); - -struct tgsi_dst_register_ext_modulate -tgsi_build_dst_register_ext_modulate( - unsigned modulate, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); #if defined __cplusplus } diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index d16e64f9c5..7eb64167fb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -150,17 +150,6 @@ static const char *texture_names[] = }; -static const char *modulate_names[TGSI_MODULATE_COUNT] = -{ - "", - "_2X", - "_4X", - "_8X", - "_D2", - "_D4", - "_D8" -}; - static void _dump_register( struct dump_ctx *ctx, @@ -385,7 +374,6 @@ iter_instruction( dst->DstRegister.Index, dst->DstRegister.Index ); } - ENM( dst->DstRegisterExtModulate.Modulate, modulate_names ); _dump_writemask( ctx, dst->DstRegister.WriteMask ); first_reg = FALSE; @@ -398,18 +386,10 @@ iter_instruction( CHR( ',' ); CHR( ' ' ); - if (src->SrcRegisterExtMod.Negate) + if (src->SrcRegister.Negate) TXT( "-(" ); - if (src->SrcRegisterExtMod.Absolute) + if (src->SrcRegister.Absolute) CHR( '|' ); - if (src->SrcRegisterExtMod.Scale2X) - TXT( "2*(" ); - if (src->SrcRegisterExtMod.Bias) - CHR( '(' ); - if (src->SrcRegisterExtMod.Complement) - TXT( "1-(" ); - if (src->SrcRegister.Negate) - CHR( '-' ); if (src->SrcRegister.Indirect) { _dump_register_ind( @@ -439,23 +419,17 @@ iter_instruction( ENM( src->SrcRegister.SwizzleW, swizzle_names ); } - if (src->SrcRegisterExtMod.Complement) - CHR( ')' ); - if (src->SrcRegisterExtMod.Bias) - TXT( ")-.5" ); - if (src->SrcRegisterExtMod.Scale2X) - CHR( ')' ); - if (src->SrcRegisterExtMod.Absolute) + if (src->SrcRegister.Absolute) CHR( '|' ); - if (src->SrcRegisterExtMod.Negate) + if (src->SrcRegister.Negate) CHR( ')' ); first_reg = FALSE; } - if (inst->InstructionExtTexture.Texture != TGSI_TEXTURE_UNKNOWN) { + if (inst->Instruction.Texture) { TXT( ", " ); - ENM( inst->InstructionExtTexture.Texture, texture_names ); + ENM( inst->InstructionTexture.Texture, texture_names ); } switch (inst->Instruction.Opcode) { @@ -465,7 +439,7 @@ iter_instruction( case TGSI_OPCODE_ENDLOOP: case TGSI_OPCODE_CAL: TXT( " :" ); - UID( inst->InstructionExtLabel.Label ); + UID( inst->InstructionLabel.Label ); break; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 89740cee89..61d38e57f1 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1397,10 +1397,6 @@ fetch_source( case TGSI_UTIL_SIGN_KEEP: break; } - - if (reg->SrcRegisterExtMod.Complement) { - micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); - } } static void @@ -1679,7 +1675,7 @@ exec_tex(struct tgsi_exec_machine *mach, /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ - switch (inst->InstructionExtTexture.Texture) { + switch (inst->InstructionTexture.Texture) { case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: @@ -1777,7 +1773,7 @@ exec_txd(struct tgsi_exec_machine *mach, * XXX: This is fake TXD -- the derivatives are not taken into account, yet. */ - switch (inst->InstructionExtTexture.Texture) { + switch (inst->InstructionTexture.Texture) { case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: @@ -2744,7 +2740,7 @@ exec_instruction( mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; /* Finally, jump to the subroutine */ - *pc = inst->InstructionExtLabel.Label; + *pc = inst->InstructionLabel.Label; } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index 9ca2993452..853485b48b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -28,7 +28,6 @@ #include "util/u_debug.h" #include "pipe/p_shader_tokens.h" #include "tgsi_parse.h" -#include "tgsi_build.h" #include "util/u_memory.h" void @@ -59,7 +58,7 @@ tgsi_parse_init( ctx->FullHeader.Processor = *(struct tgsi_processor *) &tokens[2]; } else { - ctx->FullHeader.Processor = tgsi_default_processor(); + return TGSI_PARSE_ERROR; } ctx->Tokens = tokens; @@ -129,7 +128,7 @@ tgsi_parse_token( { struct tgsi_full_declaration *decl = &ctx->FullToken.FullDeclaration; - *decl = tgsi_default_full_declaration(); + memset(decl, 0, sizeof *decl); copy_token(&decl->Declaration, &token); next_token( ctx, &decl->DeclarationRange ); @@ -145,9 +144,8 @@ tgsi_parse_token( { struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate; - *imm = tgsi_default_full_immediate(); + memset(imm, 0, sizeof *imm); copy_token(&imm->Immediate, &token); - assert( !imm->Immediate.Extended ); switch (imm->Immediate.DataType) { case TGSI_IMM_FLOAT32: @@ -169,41 +167,25 @@ tgsi_parse_token( case TGSI_TOKEN_TYPE_INSTRUCTION: { struct tgsi_full_instruction *inst = &ctx->FullToken.FullInstruction; - unsigned extended; - *inst = tgsi_default_full_instruction(); + memset(inst, 0, sizeof *inst); copy_token(&inst->Instruction, &token); - extended = inst->Instruction.Extended; if (inst->Instruction.Predicate) { next_token(ctx, &inst->InstructionPredicate); } - while( extended ) { - struct tgsi_src_register_ext token; - - next_token( ctx, &token ); - - switch( token.Type ) { - case TGSI_INSTRUCTION_EXT_TYPE_LABEL: - copy_token(&inst->InstructionExtLabel, &token); - break; - - case TGSI_INSTRUCTION_EXT_TYPE_TEXTURE: - copy_token(&inst->InstructionExtTexture, &token); - break; - - default: - assert( 0 ); - } + if (inst->Instruction.Label) { + next_token( ctx, &inst->InstructionLabel); + } - extended = token.Extended; + if (inst->Instruction.Texture) { + next_token( ctx, &inst->InstructionTexture); } assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS ); for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { - unsigned extended; next_token( ctx, &inst->FullDstRegisters[i].DstRegister ); @@ -212,65 +194,23 @@ tgsi_parse_token( */ assert( !inst->FullDstRegisters[i].DstRegister.Dimension ); - extended = inst->FullDstRegisters[i].DstRegister.Extended; - - while( extended ) { - struct tgsi_src_register_ext token; - - next_token( ctx, &token ); - - switch( token.Type ) { - case TGSI_DST_REGISTER_EXT_TYPE_MODULATE: - copy_token(&inst->FullDstRegisters[i].DstRegisterExtModulate, - &token); - break; - - default: - assert( 0 ); - } - - extended = token.Extended; - } - if( inst->FullDstRegisters[i].DstRegister.Indirect ) { next_token( ctx, &inst->FullDstRegisters[i].DstRegisterInd ); /* * No support for indirect or multi-dimensional addressing. */ - assert( !inst->FullDstRegisters[i].DstRegisterInd.Indirect ); assert( !inst->FullDstRegisters[i].DstRegisterInd.Dimension ); - assert( !inst->FullDstRegisters[i].DstRegisterInd.Extended ); + assert( !inst->FullDstRegisters[i].DstRegisterInd.Indirect ); } } assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS ); for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { - unsigned extended; next_token( ctx, &inst->FullSrcRegisters[i].SrcRegister ); - extended = inst->FullSrcRegisters[i].SrcRegister.Extended; - - while( extended ) { - struct tgsi_src_register_ext token; - - next_token( ctx, &token ); - - switch( token.Type ) { - case TGSI_SRC_REGISTER_EXT_TYPE_MOD: - copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtMod, - &token); - break; - - default: - assert( 0 ); - } - - extended = token.Extended; - } - if( inst->FullSrcRegisters[i].SrcRegister.Indirect ) { next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterInd ); @@ -279,7 +219,6 @@ tgsi_parse_token( */ assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect ); assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension ); - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended ); } if( inst->FullSrcRegisters[i].SrcRegister.Dimension ) { @@ -289,7 +228,6 @@ tgsi_parse_token( * No support for multi-dimensional addressing. */ assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Dimension ); - assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Extended ); if( inst->FullSrcRegisters[i].SrcRegisterDim.Indirect ) { next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDimInd ); @@ -299,7 +237,6 @@ tgsi_parse_token( */ assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect ); assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension ); - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended ); } } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index cb4772ade8..ba9578c6a5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -49,13 +49,11 @@ struct tgsi_full_dst_register { struct tgsi_dst_register DstRegister; struct tgsi_src_register DstRegisterInd; - struct tgsi_dst_register_ext_modulate DstRegisterExtModulate; }; struct tgsi_full_src_register { struct tgsi_src_register SrcRegister; - struct tgsi_src_register_ext_mod SrcRegisterExtMod; struct tgsi_src_register SrcRegisterInd; struct tgsi_dimension SrcRegisterDim; struct tgsi_src_register SrcRegisterDimInd; @@ -81,8 +79,8 @@ struct tgsi_full_instruction { struct tgsi_instruction Instruction; struct tgsi_instruction_predicate InstructionPredicate; - struct tgsi_instruction_ext_label InstructionExtLabel; - struct tgsi_instruction_ext_texture InstructionExtTexture; + struct tgsi_instruction_label InstructionLabel; + struct tgsi_instruction_texture InstructionTexture; struct tgsi_full_dst_register FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS]; struct tgsi_full_src_register FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS]; uint Flags; /**< user-defined usage */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index f9c16f1b6c..55595539ec 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -35,7 +35,6 @@ #include "util/u_math.h" -#include "tgsi/tgsi_build.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" @@ -217,11 +216,7 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens) src->SrcRegister.Index != dst->DstRegister.Index || src->SrcRegister.Negate || - src->SrcRegisterExtMod.Negate || - src->SrcRegisterExtMod.Absolute || - src->SrcRegisterExtMod.Scale2X || - src->SrcRegisterExtMod.Bias || - src->SrcRegisterExtMod.Complement || + src->SrcRegister.Absolute || src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index a96fc94c7a..a6cc3a5398 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1464,7 +1464,8 @@ emit_tex( struct x86_function *func, unsigned count; unsigned i; - switch (inst->InstructionExtTexture.Texture) { + assert(inst->Instruction.Texture); + switch (inst->InstructionTexture.Texture) { case TGSI_TEXTURE_1D: count = 1; break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index d2b03ffb2f..7250f98cc9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -486,16 +486,6 @@ parse_register_dcl( return TRUE; } -static const char *modulate_names[TGSI_MODULATE_COUNT] = -{ - "_1X", - "_2X", - "_4X", - "_8X", - "_D2", - "_D4", - "_D8" -}; static boolean parse_dst_operand( @@ -512,19 +502,6 @@ parse_dst_operand( cur = ctx->cur; eat_opt_white( &cur ); - if (*cur == '_') { - uint i; - - for (i = 0; i < TGSI_MODULATE_COUNT; i++) { - if (str_match_no_case( &cur, modulate_names[i] )) { - if (!is_digit_alpha_underscore( cur )) { - dst->DstRegisterExtModulate.Modulate = i; - ctx->cur = cur; - break; - } - } - } - } if (!parse_opt_writemask( ctx, &writemask )) return FALSE; @@ -577,92 +554,24 @@ parse_src_operand( struct translate_ctx *ctx, struct tgsi_full_src_register *src ) { - const char *cur; - float value; uint file; int index; uint ind_file; int ind_index; uint ind_comp; uint swizzle[4]; - boolean parsed_ext_negate_paren = FALSE; boolean parsed_swizzle; - if (*ctx->cur == '-') { - cur = ctx->cur; - cur++; - eat_opt_white( &cur ); - if (*cur == '(') { - cur++; - src->SrcRegisterExtMod.Negate = 1; - eat_opt_white( &cur ); - ctx->cur = cur; - parsed_ext_negate_paren = TRUE; - } - else if (*cur == '|') { - cur++; - src->SrcRegisterExtMod.Negate = 1; - src->SrcRegisterExtMod.Absolute = 1; - eat_opt_white(&cur); - ctx->cur = cur; - } - } - else if (*ctx->cur == '|') { - ctx->cur++; - eat_opt_white( &ctx->cur ); - src->SrcRegisterExtMod.Absolute = 1; - } - if (*ctx->cur == '-') { ctx->cur++; eat_opt_white( &ctx->cur ); src->SrcRegister.Negate = 1; } - - cur = ctx->cur; - if (parse_float( &cur, &value )) { - if (value == 2.0f) { - eat_opt_white( &cur ); - if (*cur != '*') { - report_error( ctx, "Expected `*'" ); - return FALSE; - } - cur++; - if (*cur != '(') { - report_error( ctx, "Expected `('" ); - return FALSE; - } - cur++; - src->SrcRegisterExtMod.Scale2X = 1; - eat_opt_white( &cur ); - ctx->cur = cur; - } - } - - if (*ctx->cur == '(') { + + if (*ctx->cur == '|') { ctx->cur++; eat_opt_white( &ctx->cur ); - src->SrcRegisterExtMod.Bias = 1; - } - - cur = ctx->cur; - if (parse_float( &cur, &value )) { - if (value == 1.0f) { - eat_opt_white( &cur ); - if (*cur != '-') { - report_error( ctx, "Expected `-'" ); - return FALSE; - } - cur++; - if (*cur != '(') { - report_error( ctx, "Expected `('" ); - return FALSE; - } - cur++; - src->SrcRegisterExtMod.Complement = 1; - eat_opt_white( &cur ); - ctx->cur = cur; - } + src->SrcRegister.Absolute = 1; } if (!parse_register_src(ctx, &file, &index, &ind_file, &ind_index, &ind_comp)) @@ -690,49 +599,7 @@ parse_src_operand( } } - if (src->SrcRegisterExtMod.Complement) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ')') { - report_error( ctx, "Expected `)'" ); - return FALSE; - } - ctx->cur++; - } - - if (src->SrcRegisterExtMod.Bias) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ')') { - report_error( ctx, "Expected `)'" ); - return FALSE; - } - ctx->cur++; - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '-') { - report_error( ctx, "Expected `-'" ); - return FALSE; - } - ctx->cur++; - eat_opt_white( &ctx->cur ); - if (!parse_float( &ctx->cur, &value )) { - report_error( ctx, "Expected literal floating point" ); - return FALSE; - } - if (value != 0.5f) { - report_error( ctx, "Expected 0.5" ); - return FALSE; - } - } - - if (src->SrcRegisterExtMod.Scale2X) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ')') { - report_error( ctx, "Expected `)'" ); - return FALSE; - } - ctx->cur++; - } - - if (src->SrcRegisterExtMod.Absolute) { + if (src->SrcRegister.Absolute) { eat_opt_white( &ctx->cur ); if (*ctx->cur != '|') { report_error( ctx, "Expected `|'" ); @@ -741,14 +608,6 @@ parse_src_operand( ctx->cur++; } - if (parsed_ext_negate_paren) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ')') { - report_error( ctx, "Expected `)'" ); - return FALSE; - } - ctx->cur++; - } return TRUE; } @@ -853,7 +712,8 @@ parse_instruction( for (j = 0; j < TGSI_TEXTURE_COUNT; j++) { if (str_match_no_case( &ctx->cur, texture_names[j] )) { if (!is_digit_alpha_underscore( ctx->cur )) { - inst.InstructionExtTexture.Texture = j; + inst.Instruction.Texture = 1; + inst.InstructionTexture.Texture = j; break; } } @@ -879,7 +739,8 @@ parse_instruction( report_error( ctx, "Expected a label" ); return FALSE; } - inst.InstructionExtLabel.Label = target; + inst.Instruction.Label = 1; + inst.InstructionLabel.Label = target; } advance = tgsi_build_full_instruction( diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 5526a5d034..de4bc6edb0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -48,13 +48,11 @@ union tgsi_any_token { union tgsi_immediate_data imm_data; struct tgsi_instruction insn; struct tgsi_instruction_predicate insn_predicate; - struct tgsi_instruction_ext_label insn_ext_label; - struct tgsi_instruction_ext_texture insn_ext_texture; + struct tgsi_instruction_label insn_label; + struct tgsi_instruction_texture insn_texture; struct tgsi_src_register src; - struct tgsi_src_register_ext_mod src_ext_mod; struct tgsi_dimension dim; struct tgsi_dst_register dst; - struct tgsi_dst_register_ext_modulate dst_ext_mod; unsigned value; }; @@ -575,17 +573,8 @@ ureg_emit_src( struct ureg_program *ureg, out[n].src.SwizzleW = src.SwizzleW; out[n].src.Index = src.Index; out[n].src.Negate = src.Negate; + out[0].src.Absolute = src.Absolute; n++; - - if (src.Absolute) { - out[0].src.Extended = 1; - out[0].src.Negate = 0; - out[n].value = 0; - out[n].src_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD; - out[n].src_ext_mod.Absolute = 1; - out[n].src_ext_mod.Negate = src.Negate; - n++; - } if (src.Indirect) { out[0].src.Indirect = 1; @@ -712,13 +701,11 @@ ureg_emit_label(struct ureg_program *ureg, return; out = get_tokens( ureg, DOMAIN_INSN, 1 ); - insn = retrieve_token( ureg, DOMAIN_INSN, extended_token ); + out[0].value = 0; - insn->token.Extended = 1; + insn = retrieve_token( ureg, DOMAIN_INSN, extended_token ); + insn->insn.Label = 1; - out[0].value = 0; - out[0].insn_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL; - *label_token = ureg->domain[DOMAIN_INSN].count - 1; } @@ -741,8 +728,7 @@ ureg_fixup_label(struct ureg_program *ureg, { union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, label_token ); - assert(out->insn_ext_label.Type == TGSI_INSTRUCTION_EXT_TYPE_LABEL); - out->insn_ext_label.Label = instruction_number; + out->insn_label.Label = instruction_number; } @@ -756,11 +742,10 @@ ureg_emit_texture(struct ureg_program *ureg, out = get_tokens( ureg, DOMAIN_INSN, 1 ); insn = retrieve_token( ureg, DOMAIN_INSN, extended_token ); - insn->token.Extended = 1; + insn->insn.Texture = 1; out[0].value = 0; - out[0].insn_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE; - out[0].insn_ext_texture.Texture = target; + out[0].insn_texture.Texture = target; } @@ -961,7 +946,6 @@ static void emit_immediate( struct ureg_program *ureg, out[0].imm.NrTokens = 5; out[0].imm.DataType = TGSI_IMM_FLOAT32; out[0].imm.Padding = 0; - out[0].imm.Extended = 0; out[1].imm_data.Float = v[0]; out[2].imm_data.Float = v[1]; diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index 4dee1be9e8..3544011b47 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -111,10 +111,10 @@ tgsi_util_get_full_src_register_sign_mode( { unsigned sign_mode; - if( reg->SrcRegisterExtMod.Absolute ) { + if( reg->SrcRegister.Absolute ) { /* Consider only the post-abs negation. */ - if( reg->SrcRegisterExtMod.Negate ) { + if( reg->SrcRegister.Negate ) { sign_mode = TGSI_UTIL_SIGN_SET; } else { @@ -122,17 +122,7 @@ tgsi_util_get_full_src_register_sign_mode( } } else { - /* Accumulate the three negations. */ - - unsigned negate; - - negate = reg->SrcRegister.Negate; - - if( reg->SrcRegisterExtMod.Negate ) { - negate = !negate; - } - - if( negate ) { + if( reg->SrcRegister.Negate ) { sign_mode = TGSI_UTIL_SIGN_TOGGLE; } else { @@ -152,26 +142,22 @@ tgsi_util_set_full_src_register_sign_mode( { case TGSI_UTIL_SIGN_CLEAR: reg->SrcRegister.Negate = 0; - reg->SrcRegisterExtMod.Absolute = 1; - reg->SrcRegisterExtMod.Negate = 0; + reg->SrcRegister.Absolute = 1; break; case TGSI_UTIL_SIGN_SET: - reg->SrcRegister.Negate = 0; - reg->SrcRegisterExtMod.Absolute = 1; - reg->SrcRegisterExtMod.Negate = 1; + reg->SrcRegister.Absolute = 1; + reg->SrcRegister.Negate = 1; break; case TGSI_UTIL_SIGN_TOGGLE: reg->SrcRegister.Negate = 1; - reg->SrcRegisterExtMod.Absolute = 0; - reg->SrcRegisterExtMod.Negate = 0; + reg->SrcRegister.Absolute = 0; break; case TGSI_UTIL_SIGN_KEEP: reg->SrcRegister.Negate = 0; - reg->SrcRegisterExtMod.Absolute = 0; - reg->SrcRegisterExtMod.Negate = 0; + reg->SrcRegister.Absolute = 0; break; default: diff --git a/src/gallium/auxiliary/vl/vl_shader_build.c b/src/gallium/auxiliary/vl/vl_shader_build.c index faa20a903c..9637cbed8a 100644 --- a/src/gallium/auxiliary/vl/vl_shader_build.c +++ b/src/gallium/auxiliary/vl/vl_shader_build.c @@ -191,7 +191,8 @@ struct tgsi_full_instruction vl_tex inst.FullDstRegisters[0].DstRegister.File = dst_file; inst.FullDstRegisters[0].DstRegister.Index = dst_index; inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = tex; + inst.Instruction.Texture = 1; + inst.InstructionTexture.Texture = tex; inst.FullSrcRegisters[0].SrcRegister.File = src1_file; inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; inst.FullSrcRegisters[1].SrcRegister.File = src2_file; diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 379d47e79a..a96ba8f192 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -339,7 +339,7 @@ emit_tex(struct i915_fp_compile *p, const struct tgsi_full_instruction *inst, uint opcode) { - uint texture = inst->InstructionExtTexture.Texture; + uint texture = inst->InstructionTexture.Texture; uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; uint tex = translate_tex_src_target( p, texture ); uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index ea409b7e16..3ef6cb1074 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -227,14 +227,14 @@ translate_src_register( const struct svga_shader_emitter *emit, /* src.mod isn't a bitfield, unfortunately: * See tgsi_util_get_full_src_register_sign_mode for implementation details. */ - if (reg->SrcRegisterExtMod.Absolute) { - if (reg->SrcRegisterExtMod.Negate) + if (reg->SrcRegister.Absolute) { + if (reg->SrcRegister.Negate) src.base.srcMod = SVGA3DSRCMOD_ABSNEG; else src.base.srcMod = SVGA3DSRCMOD_ABS; } else { - if (reg->SrcRegister.Negate != reg->SrcRegisterExtMod.Negate) + if (reg->SrcRegister.Negate) src.base.srcMod = SVGA3DSRCMOD_NEG; else src.base.srcMod = SVGA3DSRCMOD_NONE; @@ -986,8 +986,8 @@ static boolean emit_kil(struct svga_shader_emitter *emit, inst = inst_token( SVGA3DOP_TEXKILL ); src0 = translate_src_register( emit, reg ); - if (reg->SrcRegisterExtMod.Absolute || - reg->SrcRegister.Negate != reg->SrcRegisterExtMod.Negate || + if (reg->SrcRegister.Absolute || + reg->SrcRegister.Negate || reg->SrcRegister.Indirect || reg->SrcRegister.SwizzleX != 0 || reg->SrcRegister.SwizzleY != 1 || @@ -1953,7 +1953,7 @@ static boolean emit_bgnsub( struct svga_shader_emitter *emit, static boolean emit_call( struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn ) { - unsigned position = insn->InstructionExtLabel.Label; + unsigned position = insn->InstructionLabel.Label; unsigned i; for (i = 0; i < emit->nr_labels; i++) { diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index c4c28522c8..ac999e0c18 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -66,8 +66,7 @@ struct tgsi_token { unsigned Type : 4; /**< TGSI_TOKEN_TYPE_x */ unsigned NrTokens : 8; /**< UINT */ - unsigned Padding : 19; - unsigned Extended : 1; /**< BOOL */ + unsigned Padding : 20; }; enum tgsi_file_type { @@ -117,8 +116,7 @@ struct tgsi_declaration unsigned Semantic : 1; /**< BOOL, any semantic info? */ unsigned Centroid : 1; /**< centroid sampling? */ unsigned Invariant : 1; /**< invariant optimization? */ - unsigned Padding : 4; - unsigned Extended : 1; /**< BOOL */ + unsigned Padding : 5; }; struct tgsi_declaration_range @@ -151,8 +149,7 @@ struct tgsi_immediate unsigned Type : 4; /**< TGSI_TOKEN_TYPE_IMMEDIATE */ unsigned NrTokens : 8; /**< UINT */ unsigned DataType : 4; /**< one of TGSI_IMM_x */ - unsigned Padding : 15; - unsigned Extended : 1; /**< BOOL */ + unsigned Padding : 16; }; union tgsi_immediate_data @@ -295,8 +292,6 @@ union tgsi_immediate_data * * If Predicate is TRUE, tgsi_instruction_predicate token immediately follows. * - * If Extended is TRUE, it is now executed. - * * Saturate controls how are final results in destination registers modified. */ @@ -309,12 +304,15 @@ struct tgsi_instruction unsigned NumDstRegs : 2; /* UINT */ unsigned NumSrcRegs : 4; /* UINT */ unsigned Predicate : 1; /* BOOL */ - unsigned Padding : 2; - unsigned Extended : 1; /* BOOL */ + unsigned Label : 1; + unsigned Texture : 1; + unsigned Padding : 1; }; /* - * If tgsi_instruction::Extended is TRUE, tgsi_instruction_ext follows. + * If tgsi_instruction::Label is TRUE, tgsi_instruction_label follows. + * + * If tgsi_instruction::Texture is TRUE, tgsi_instruction_texture follows. * * Then, tgsi_instruction::NumDstRegs of tgsi_dst_register follow. * @@ -324,38 +322,15 @@ struct tgsi_instruction * instruction, including the instruction word. */ -#define TGSI_INSTRUCTION_EXT_TYPE_LABEL 1 -#define TGSI_INSTRUCTION_EXT_TYPE_TEXTURE 2 - -struct tgsi_instruction_ext -{ - unsigned Type : 4; /* TGSI_INSTRUCTION_EXT_TYPE_ */ - unsigned Padding : 27; - unsigned Extended : 1; /* BOOL */ -}; - -/* - * If tgsi_instruction_ext::Type is TGSI_INSTRUCTION_EXT_TYPE_LABEL, it - * should be cast to tgsi_instruction_ext_label. - * - * If tgsi_instruction_ext::Type is TGSI_INSTRUCTION_EXT_TYPE_TEXTURE, it - * should be cast to tgsi_instruction_ext_texture. - * - * If tgsi_instruction_ext::Extended is TRUE, another tgsi_instruction_ext - * follows. - */ - #define TGSI_SWIZZLE_X 0 #define TGSI_SWIZZLE_Y 1 #define TGSI_SWIZZLE_Z 2 #define TGSI_SWIZZLE_W 3 -struct tgsi_instruction_ext_label +struct tgsi_instruction_label { - unsigned Type : 4; /* TGSI_INSTRUCTION_EXT_TYPE_LABEL */ unsigned Label : 24; /* UINT */ - unsigned Padding : 3; - unsigned Extended : 1; /* BOOL */ + unsigned Padding : 8; }; #define TGSI_TEXTURE_UNKNOWN 0 @@ -369,12 +344,10 @@ struct tgsi_instruction_ext_label #define TGSI_TEXTURE_SHADOWRECT 8 #define TGSI_TEXTURE_COUNT 9 -struct tgsi_instruction_ext_texture +struct tgsi_instruction_texture { - unsigned Type : 4; /* TGSI_INSTRUCTION_EXT_TYPE_TEXTURE */ unsigned Texture : 8; /* TGSI_TEXTURE_ */ - unsigned Padding : 19; - unsigned Extended : 1; /* BOOL */ + unsigned Padding : 24; }; /* @@ -406,26 +379,24 @@ struct tgsi_instruction_predicate * The fetched register components are swizzled according to SwizzleX, SwizzleY, * SwizzleZ and SwizzleW. * - * If Extended is TRUE, any further modifications to the source register are - * made to this temporary storage. */ struct tgsi_src_register { unsigned File : 4; /* TGSI_FILE_ */ + unsigned Indirect : 1; /* BOOL */ + unsigned Dimension : 1; /* BOOL */ + int Index : 16; /* SINT */ unsigned SwizzleX : 2; /* TGSI_SWIZZLE_ */ unsigned SwizzleY : 2; /* TGSI_SWIZZLE_ */ unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_ */ unsigned SwizzleW : 2; /* TGSI_SWIZZLE_ */ - unsigned Negate : 1; /* BOOL */ - unsigned Indirect : 1; /* BOOL */ - unsigned Dimension : 1; /* BOOL */ - int Index : 16; /* SINT */ - unsigned Extended : 1; /* BOOL */ + unsigned Absolute : 1; /* BOOL */ + unsigned Negate : 1; /* BOOL */ }; /** - * If tgsi_src_register::Extended is TRUE, tgsi_src_register_ext follows. + * If tgsi_src_register::Modifier is TRUE, tgsi_src_register_modifier follows. * * Then, if tgsi_src_register::Indirect is TRUE, another tgsi_src_register * follows. @@ -433,58 +404,13 @@ struct tgsi_src_register * Then, if tgsi_src_register::Dimension is TRUE, tgsi_dimension follows. */ -#define TGSI_SRC_REGISTER_EXT_TYPE_MOD 1 - -struct tgsi_src_register_ext -{ - unsigned Type : 4; /* TGSI_SRC_REGISTER_EXT_TYPE_ */ - unsigned Padding : 27; - unsigned Extended : 1; /* BOOL */ -}; - -/** - * If tgsi_src_register_ext::Type is TGSI_SRC_REGISTER_EXT_TYPE_MOD, - * it should be cast to tgsi_src_register_ext_mod. - * - * If tgsi_dst_register_ext::Extended is TRUE, another tgsi_dst_register_ext - * follows. - */ - - -/** - * Extra src register modifiers - * - * If Complement is TRUE, the source register is modified by subtracting it - * from 1.0. - * - * If Bias is TRUE, the source register is modified by subtracting 0.5 from it. - * - * If Scale2X is TRUE, the source register is modified by multiplying it by 2.0. - * - * If Absolute is TRUE, the source register is modified by removing the sign. - * - * If Negate is TRUE, the source register is modified by negating it. - */ - -struct tgsi_src_register_ext_mod -{ - unsigned Type : 4; /* TGSI_SRC_REGISTER_EXT_TYPE_MOD */ - unsigned Complement : 1; /* BOOL */ - unsigned Bias : 1; /* BOOL */ - unsigned Scale2X : 1; /* BOOL */ - unsigned Absolute : 1; /* BOOL */ - unsigned Negate : 1; /* BOOL */ - unsigned Padding : 22; - unsigned Extended : 1; /* BOOL */ -}; struct tgsi_dimension { unsigned Indirect : 1; /* BOOL */ unsigned Dimension : 1; /* BOOL */ - unsigned Padding : 13; + unsigned Padding : 14; int Index : 16; /* SINT */ - unsigned Extended : 1; /* BOOL */ }; struct tgsi_dst_register @@ -494,51 +420,9 @@ struct tgsi_dst_register unsigned Indirect : 1; /* BOOL */ unsigned Dimension : 1; /* BOOL */ int Index : 16; /* SINT */ - unsigned Padding : 5; - unsigned Extended : 1; /* BOOL */ -}; - -/* - * If tgsi_dst_register::Extended is TRUE, tgsi_dst_register_ext follows. - * - * Then, if tgsi_dst_register::Indirect is TRUE, tgsi_src_register follows. - */ - -#define TGSI_DST_REGISTER_EXT_TYPE_MODULATE 1 - -struct tgsi_dst_register_ext -{ - unsigned Type : 4; /* TGSI_DST_REGISTER_EXT_TYPE_ */ - unsigned Padding : 27; - unsigned Extended : 1; /* BOOL */ + unsigned Padding : 6; }; -/** - * Extra destination register modifiers - * - * If tgsi_dst_register_ext::Type is TGSI_DST_REGISTER_EXT_TYPE_MODULATE, - * it should be cast to tgsi_dst_register_ext_modulate. - * - * If tgsi_dst_register_ext::Extended is TRUE, another tgsi_dst_register_ext - * follows. - */ - -#define TGSI_MODULATE_1X 0 -#define TGSI_MODULATE_2X 1 -#define TGSI_MODULATE_4X 2 -#define TGSI_MODULATE_8X 3 -#define TGSI_MODULATE_HALF 4 -#define TGSI_MODULATE_QUARTER 5 -#define TGSI_MODULATE_EIGHTH 6 -#define TGSI_MODULATE_COUNT 7 - -struct tgsi_dst_register_ext_modulate -{ - unsigned Type : 4; /* TGSI_DST_REGISTER_EXT_TYPE_MODULATE */ - unsigned Modulate : 4; /* TGSI_MODULATE_ */ - unsigned Padding : 23; - unsigned Extended : 1; /* BOOL */ -}; #ifdef __cplusplus } -- cgit v1.2.3 From 56ee132f9671f70ff2b3ee04659beac0dfc6126d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 24 Nov 2009 14:09:24 +0000 Subject: gallium: try and update r300 and nv drivers for tgsi changes It would be nice if these drivers built under the linux-debug header so that these types of interface changes can be minimally propogated into those drivers by people without the hardware. They don't have to generate a working driver -- though a command-dumping winsys would be an excellent for regression checking. --- src/gallium/drivers/i915/i915_fpc_translate.c | 5 ++--- src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 2 +- src/gallium/drivers/nv20/nv20_vertprog.c | 2 +- src/gallium/drivers/nv30/nv30_fragprog.c | 2 +- src/gallium/drivers/nv30/nv30_vertprog.c | 2 +- src/gallium/drivers/nv40/nv40_fragprog.c | 2 +- src/gallium/drivers/nv40/nv40_vertprog.c | 2 +- src/gallium/drivers/nv50/nv50_program.c | 10 +++++----- src/gallium/drivers/r300/r300_tgsi_to_rc.c | 7 ++++--- 9 files changed, 17 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index a96ba8f192..f2554998a9 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -229,12 +229,11 @@ src_vector(struct i915_fp_compile *p, src = negate(src, n, n, n, n); } - /* no abs() or post-abs negation */ + /* no abs() */ #if 0 /* XXX assertions disabled to allow arbfplight.c to run */ /* XXX enable these assertions, or fix things */ - assert(!source->SrcRegisterExtMod.Absolute); - assert(!source->SrcRegisterExtMod.Negate); + assert(!source->SrcRegister.Absolute); #endif return src; } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index 64027de6aa..b2234ef679 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -326,7 +326,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, unsigned num_coords; unsigned i; - switch (inst->InstructionExtTexture.Texture) { + switch (inst->InstructionTexture.Texture) { case TGSI_TEXTURE_1D: num_coords = 1; break; diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c index 388245ecb0..48df356faf 100644 --- a/src/gallium/drivers/nv20/nv20_vertprog.c +++ b/src/gallium/drivers/nv20/nv20_vertprog.c @@ -273,7 +273,7 @@ tgsi_src(struct nv20_vpc *vpc, const struct tgsi_full_src_register *fsrc) { break; } - src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.abs = fsrc->SrcRegister.Absolute; src.negate = fsrc->SrcRegister.Negate; src.swz[0] = fsrc->SrcRegister.SwizzleX; src.swz[1] = fsrc->SrcRegister.SwizzleY; diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 0ce702d6f8..eb978b6838 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -268,7 +268,7 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) break; } - src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.abs = fsrc->SrcRegister.Absolute; src.negate = fsrc->SrcRegister.Negate; src.swz[0] = fsrc->SrcRegister.SwizzleX; src.swz[1] = fsrc->SrcRegister.SwizzleY; diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 14a5c0260d..b04fb229bc 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -273,7 +273,7 @@ tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) { break; } - src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.abs = fsrc->SrcRegister.Absolute; src.negate = fsrc->SrcRegister.Negate; src.swz[0] = fsrc->SrcRegister.SwizzleX; src.swz[1] = fsrc->SrcRegister.SwizzleY; diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 99277506fc..dbbb736670 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -279,7 +279,7 @@ tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) break; } - src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.abs = fsrc->SrcRegister.Absolute; src.negate = fsrc->SrcRegister.Negate; src.swz[0] = fsrc->SrcRegister.SwizzleX; src.swz[1] = fsrc->SrcRegister.SwizzleY; diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 31dae2457f..df9cb227a3 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -313,7 +313,7 @@ tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { break; } - src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.abs = fsrc->SrcRegister.Absolute; src.negate = fsrc->SrcRegister.Negate; src.swz[0] = fsrc->SrcRegister.SwizzleX; src.swz[1] = fsrc->SrcRegister.SwizzleY; diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index bf50982dd1..e40e37d07c 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1575,10 +1575,10 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c) case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXP: { - const struct tgsi_instruction_ext_texture *tex; + const struct tgsi_instruction_texture *tex; - assert(insn->Instruction.Extended); - tex = &insn->InstructionExtTexture; + assert(insn->Instruction.Texture); + tex = &insn->InstructionTexture; mask = 0x7; if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) @@ -2181,11 +2181,11 @@ nv50_program_tx_insn(struct nv50_pc *pc, break; case TGSI_OPCODE_TEX: emit_tex(pc, dst, mask, src[0], unit, - inst->InstructionExtTexture.Texture, FALSE); + inst->InstructionTexture.Texture, FALSE); break; case TGSI_OPCODE_TXP: emit_tex(pc, dst, mask, src[0], unit, - inst->InstructionExtTexture.Texture, TRUE); + inst->InstructionTexture.Texture, TRUE); break; case TGSI_OPCODE_TRUNC: for (c = 0; c < 4; c++) { diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 589f1984ee..25a634e5a2 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -208,11 +208,11 @@ static void transform_srcreg( dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 1) << 3; dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 2) << 6; dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 3) << 9; - dst->Abs = src->SrcRegisterExtMod.Absolute; + dst->Abs = src->SrcRegister.Absolute; dst->Negate = src->SrcRegister.Negate ? RC_MASK_XYZW : 0; } -static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_ext_texture src) +static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_texture src) { switch(src.Texture) { case TGSI_TEXTURE_1D: @@ -268,7 +268,8 @@ static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_inst } /* Texturing. */ - transform_texture(dst, src->InstructionExtTexture); + if (src->Instruction.Texture) + transform_texture(dst, src->InstructionTexture); } static void handle_immediate(struct tgsi_to_rc * ttr, struct tgsi_full_immediate * imm) -- cgit v1.2.3 From 52df532b02594e624bddd58ee60fd25075f8ec42 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 10 Nov 2009 16:55:44 -0800 Subject: llvmpipe: Fix typo in comparison operator. --- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 98ec1cb1b9..d438c0e63d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -90,7 +90,7 @@ lp_depth_type(const struct util_format_description *format_desc, if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { type.floating = TRUE; - assert(swizzle = 0); + assert(swizzle == 0); assert(format_desc->channel[swizzle].size == format_desc->block.bits); } else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { -- cgit v1.2.3 From b375526b50271317868a20484c8a1f36707e6005 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 10 Nov 2009 17:51:06 -0800 Subject: llvmpipe: Be more conservative with the supported formats. We'll likely support much more formats, but doing this allows to run more testsuites without immediately hit assertion failures. --- src/gallium/drivers/llvmpipe/lp_screen.c | 58 ++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 0518927458..0fb133486a 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -27,6 +27,7 @@ #include "util/u_memory.h" +#include "util/u_format.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" @@ -131,17 +132,17 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); struct llvmpipe_winsys *winsys = screen->winsys; + const struct util_format_description *format_desc; + + format_desc = util_format_description(format); + if(!format_desc) + return FALSE; assert(target == PIPE_TEXTURE_1D || target == PIPE_TEXTURE_2D || target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE); - if(format == PIPE_FORMAT_Z16_UNORM) - return FALSE; - if(format == PIPE_FORMAT_S8_UNORM) - return FALSE; - switch(format) { case PIPE_FORMAT_DXT1_RGB: case PIPE_FORMAT_DXT1_RGBA: @@ -152,8 +153,51 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, break; } - if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) - return winsys->is_displaytarget_format_supported(winsys, format); + if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + if(format_desc->block.width != 1 || + format_desc->block.height != 1) + return FALSE; + + if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR && + format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH && + format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) + return FALSE; + + if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB && + format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) + return FALSE; + } + + if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { + if(!winsys->is_displaytarget_format_supported(winsys, format)) + return FALSE; + } + + if(tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { + if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + return FALSE; + + /* FIXME: Temporary restriction. See lp_state_fs.c. */ + if(format_desc->block.bits != 32) + return FALSE; + } + + /* FIXME: Temporary restrictions. See lp_bld_sample_soa.c */ + if(tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) { + if(format_desc->block.width != 1 || + format_desc->block.height != 1) + return FALSE; + + if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR && + format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH && + format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) + return FALSE; + + if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB && + format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB && + format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + return FALSE; + } return TRUE; } -- cgit v1.2.3 From 2282fb7710d386bd10ccdd18f030069fae0a5d55 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 10 Nov 2009 17:52:53 -0800 Subject: llvmpipe: Use the generic conversion routine for depths. This allows for z32f depth format to work correctly. --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 2e9aa9fffe..2bde24430e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -148,6 +148,20 @@ generate_depth(LLVMBuilderRef builder, format_desc = util_format_description(key->zsbuf_format); assert(format_desc); + /* + * Depths are expected to be between 0 and 1, even if they are stored in + * floats. Setting these bits here will ensure that the lp_build_conv() call + * below won't try to unnecessarily clamp the incoming values. + */ + if(src_type.floating) { + src_type.sign = FALSE; + src_type.norm = TRUE; + } + else { + assert(!src_type.sign); + assert(src_type.norm); + } + /* Pick the depth type. */ dst_type = lp_depth_type(format_desc, src_type.width*src_type.length); @@ -155,14 +169,11 @@ generate_depth(LLVMBuilderRef builder, assert(dst_type.width == src_type.width); assert(dst_type.length == src_type.length); -#if 1 - src = lp_build_clamped_float_to_unsigned_norm(builder, - src_type, - dst_type.width, - src); -#else lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1); -#endif + + dst_ptr = LLVMBuildBitCast(builder, + dst_ptr, + LLVMPointerType(lp_build_vec_type(dst_type), 0), ""); lp_build_depth_test(builder, &key->depth, -- cgit v1.2.3 From 066991c8d147db94b9661361bb191919b962fc4e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 15 Nov 2009 06:46:48 -0800 Subject: llvmpipe: Fix memory leak. --- src/gallium/drivers/llvmpipe/lp_state_vs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c index 15c3029614..8a761648e7 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c @@ -92,5 +92,6 @@ llvmpipe_delete_vs_state(struct pipe_context *pipe, void *vs) (struct lp_vertex_shader *)vs; draw_delete_vertex_shader(llvmpipe->draw, state->draw_data); + FREE( (void *)state->shader.tokens ); FREE( state ); } -- cgit v1.2.3 From 4ae3e88dc9856f2f32c37dd04a3321765ed61e07 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 23 Nov 2009 11:21:11 +0000 Subject: llvmpipe: Use assert instead of abort. Only verify functions on debug builds. --- src/gallium/drivers/llvmpipe/lp_jit.c | 2 +- src/gallium/drivers/llvmpipe/lp_state_fs.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 13535dd638..c601c79480 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -167,7 +167,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) { _debug_printf("%s\n", error); LLVMDisposeMessage(error); - abort(); + assert(0); } screen->target = LLVMGetExecutionEngineTargetData(screen->engine); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 2bde24430e..ee0f69b2af 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -622,10 +622,12 @@ generate_fragment(struct llvmpipe_context *lp, * Translate the LLVM IR into machine code. */ +#ifdef DEBUG if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { LLVMDumpValue(variant->function); - abort(); + assert(0); } +#endif LLVMRunFunctionPassManager(screen->pass, variant->function); -- cgit v1.2.3 From a71f8365049fb81f63245089b5438dcad6e83b19 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 24 Nov 2009 14:37:45 +0000 Subject: svga: Use consistent file names for dumping facilities. --- src/gallium/drivers/svga/Makefile | 4 +- src/gallium/drivers/svga/SConscript | 4 +- src/gallium/drivers/svga/svga_tgsi.c | 2 +- src/gallium/drivers/svga/svgadump/st_shader.h | 214 ------- src/gallium/drivers/svga/svgadump/st_shader_dump.c | 649 --------------------- src/gallium/drivers/svga/svgadump/st_shader_dump.h | 42 -- src/gallium/drivers/svga/svgadump/st_shader_op.c | 168 ------ src/gallium/drivers/svga/svgadump/st_shader_op.h | 46 -- src/gallium/drivers/svga/svgadump/svga_dump.c | 2 +- src/gallium/drivers/svga/svgadump/svga_dump.py | 2 +- src/gallium/drivers/svga/svgadump/svga_shader.h | 214 +++++++ .../drivers/svga/svgadump/svga_shader_dump.c | 649 +++++++++++++++++++++ .../drivers/svga/svgadump/svga_shader_dump.h | 42 ++ src/gallium/drivers/svga/svgadump/svga_shader_op.c | 168 ++++++ src/gallium/drivers/svga/svgadump/svga_shader_op.h | 46 ++ 15 files changed, 1126 insertions(+), 1126 deletions(-) delete mode 100644 src/gallium/drivers/svga/svgadump/st_shader.h delete mode 100644 src/gallium/drivers/svga/svgadump/st_shader_dump.c delete mode 100644 src/gallium/drivers/svga/svgadump/st_shader_dump.h delete mode 100644 src/gallium/drivers/svga/svgadump/st_shader_op.c delete mode 100644 src/gallium/drivers/svga/svgadump/st_shader_op.h create mode 100644 src/gallium/drivers/svga/svgadump/svga_shader.h create mode 100644 src/gallium/drivers/svga/svgadump/svga_shader_dump.c create mode 100644 src/gallium/drivers/svga/svgadump/svga_shader_dump.h create mode 100644 src/gallium/drivers/svga/svgadump/svga_shader_op.c create mode 100644 src/gallium/drivers/svga/svgadump/svga_shader_op.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/Makefile b/src/gallium/drivers/svga/Makefile index d1413319c9..38b63394e3 100644 --- a/src/gallium/drivers/svga/Makefile +++ b/src/gallium/drivers/svga/Makefile @@ -4,8 +4,8 @@ include $(TOP)/configs/current LIBNAME = svga C_SOURCES = \ - svgadump/st_shader_dump.c \ - svgadump/st_shader_op.c \ + svgadump/svga_shader_dump.c \ + svgadump/svga_shader_op.c \ svgadump/svga_dump.c \ svga_cmd.c \ svga_context.c \ diff --git a/src/gallium/drivers/svga/SConscript b/src/gallium/drivers/svga/SConscript index ff9645fc03..737b791ceb 100644 --- a/src/gallium/drivers/svga/SConscript +++ b/src/gallium/drivers/svga/SConscript @@ -60,8 +60,8 @@ sources = [ 'svga_tgsi_insn.c', 'svgadump/svga_dump.c', - 'svgadump/st_shader_dump.c', - 'svgadump/st_shader_op.c', + 'svgadump/svga_shader_dump.c', + 'svgadump/svga_shader_op.c', ] svga = env.ConvenienceLibrary( diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c index 44d0930bc0..81eea1a145 100644 --- a/src/gallium/drivers/svga/svga_tgsi.c +++ b/src/gallium/drivers/svga/svga_tgsi.c @@ -32,7 +32,7 @@ #include "tgsi/tgsi_scan.h" #include "util/u_memory.h" -#include "svgadump/st_shader_dump.h" +#include "svgadump/svga_shader_dump.h" #include "svga_context.h" #include "svga_tgsi.h" diff --git a/src/gallium/drivers/svga/svgadump/st_shader.h b/src/gallium/drivers/svga/svgadump/st_shader.h deleted file mode 100644 index 2fc1796a90..0000000000 --- a/src/gallium/drivers/svga/svgadump/st_shader.h +++ /dev/null @@ -1,214 +0,0 @@ -/********************************************************** - * Copyright 2007-2009 VMware, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **********************************************************/ - -/** - * @file - * SVGA Shader Token Definitions - * - * @author Michal Krol - */ - -#ifndef ST_SHADER_SVGA_H -#define ST_SHADER_SVGA_H - -#include "pipe/p_compiler.h" - -struct sh_op -{ - unsigned opcode:16; - unsigned control:8; - unsigned length:4; - unsigned predicated:1; - unsigned unused:1; - unsigned coissue:1; - unsigned is_reg:1; -}; - -struct sh_reg -{ - unsigned number:11; - unsigned type_hi:2; - unsigned relative:1; - unsigned unused:14; - unsigned type_lo:3; - unsigned is_reg:1; -}; - -static INLINE unsigned -sh_reg_type( struct sh_reg reg ) -{ - return reg.type_lo | (reg.type_hi << 3); -} - -struct sh_cdata -{ - float xyzw[4]; -}; - -struct sh_def -{ - struct sh_op op; - struct sh_reg reg; - struct sh_cdata cdata; -}; - -struct sh_defb -{ - struct sh_op op; - struct sh_reg reg; - uint data; -}; - -struct sh_idata -{ - int xyzw[4]; -}; - -struct sh_defi -{ - struct sh_op op; - struct sh_reg reg; - struct sh_idata idata; -}; - -#define PS_TEXTURETYPE_UNKNOWN SVGA3DSAMP_UNKNOWN -#define PS_TEXTURETYPE_2D SVGA3DSAMP_2D -#define PS_TEXTURETYPE_CUBE SVGA3DSAMP_CUBE -#define PS_TEXTURETYPE_VOLUME SVGA3DSAMP_VOLUME - -struct ps_sampleinfo -{ - unsigned unused:27; - unsigned texture_type:4; - unsigned is_reg:1; -}; - -struct vs_semantic -{ - unsigned usage:5; - unsigned unused1:11; - unsigned usage_index:4; - unsigned unused2:12; -}; - -struct sh_dstreg -{ - unsigned number:11; - unsigned type_hi:2; - unsigned relative:1; - unsigned unused:2; - unsigned write_mask:4; - unsigned modifier:4; - unsigned shift_scale:4; - unsigned type_lo:3; - unsigned is_reg:1; -}; - -static INLINE unsigned -sh_dstreg_type( struct sh_dstreg reg ) -{ - return reg.type_lo | (reg.type_hi << 3); -} - -struct sh_dcl -{ - struct sh_op op; - union { - struct { - struct ps_sampleinfo sampleinfo; - } ps; - struct { - struct vs_semantic semantic; - } vs; - } u; - struct sh_dstreg reg; -}; - - -struct sh_srcreg -{ - unsigned number:11; - unsigned type_hi:2; - unsigned relative:1; - unsigned unused:2; - unsigned swizzle_x:2; - unsigned swizzle_y:2; - unsigned swizzle_z:2; - unsigned swizzle_w:2; - unsigned modifier:4; - unsigned type_lo:3; - unsigned is_reg:1; -}; - -static INLINE unsigned -sh_srcreg_type( struct sh_srcreg reg ) -{ - return reg.type_lo | (reg.type_hi << 3); -} - -struct sh_dstop -{ - struct sh_op op; - struct sh_dstreg dst; -}; - -struct sh_srcop -{ - struct sh_op op; - struct sh_srcreg src; -}; - -struct sh_src2op -{ - struct sh_op op; - struct sh_srcreg src0; - struct sh_srcreg src1; -}; - -struct sh_unaryop -{ - struct sh_op op; - struct sh_dstreg dst; - struct sh_srcreg src; -}; - -struct sh_binaryop -{ - struct sh_op op; - struct sh_dstreg dst; - struct sh_srcreg src0; - struct sh_srcreg src1; -}; - -struct sh_trinaryop -{ - struct sh_op op; - struct sh_dstreg dst; - struct sh_srcreg src0; - struct sh_srcreg src1; - struct sh_srcreg src2; -}; - -#endif /* ST_SHADER_SVGA_H */ diff --git a/src/gallium/drivers/svga/svgadump/st_shader_dump.c b/src/gallium/drivers/svga/svgadump/st_shader_dump.c deleted file mode 100644 index d65cc93bfd..0000000000 --- a/src/gallium/drivers/svga/svgadump/st_shader_dump.c +++ /dev/null @@ -1,649 +0,0 @@ -/********************************************************** - * Copyright 2008-2009 VMware, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **********************************************************/ - -/** - * @file - * SVGA Shader Dump Facilities - * - * @author Michal Krol - */ - -#include "st_shader.h" -#include "st_shader_dump.h" -#include "st_shader_op.h" -#include "util/u_debug.h" - -#include "../svga_hw_reg.h" -#include "svga3d_shaderdefs.h" - -struct dump_info -{ - SVGA3dShaderVersion version; - boolean is_ps; -}; - -static void dump_op( struct sh_op op, const char *mnemonic ) -{ - assert( op.predicated == 0 ); - assert( op.is_reg == 0 ); - - if (op.coissue) - debug_printf( "+" ); - debug_printf( "%s", mnemonic ); - switch (op.control) { - case 0: - break; - case SVGA3DOPCONT_PROJECT: - debug_printf( "p" ); - break; - case SVGA3DOPCONT_BIAS: - debug_printf( "b" ); - break; - default: - assert( 0 ); - } -} - - -static void dump_comp_op( struct sh_op op, const char *mnemonic ) -{ - assert( op.is_reg == 0 ); - - if (op.coissue) - debug_printf( "+" ); - debug_printf( "%s", mnemonic ); - switch (op.control) { - case SVGA3DOPCOMP_RESERVED0: - break; - case SVGA3DOPCOMP_GT: - debug_printf("_gt"); - break; - case SVGA3DOPCOMP_EQ: - debug_printf("_eq"); - break; - case SVGA3DOPCOMP_GE: - debug_printf("_ge"); - break; - case SVGA3DOPCOMP_LT: - debug_printf("_lt"); - break; - case SVGA3DOPCOMPC_NE: - debug_printf("_ne"); - break; - case SVGA3DOPCOMP_LE: - debug_printf("_le"); - break; - case SVGA3DOPCOMP_RESERVED1: - default: - assert( 0 ); - } -} - - -static void dump_reg( struct sh_reg reg, struct sh_srcreg *indreg, const struct dump_info *di ) -{ - assert( sh_reg_type( reg ) == SVGA3DREG_CONST || reg.relative == 0 ); - assert( reg.is_reg == 1 ); - - switch (sh_reg_type( reg )) { - case SVGA3DREG_TEMP: - debug_printf( "r%u", reg.number ); - break; - - case SVGA3DREG_INPUT: - debug_printf( "v%u", reg.number ); - break; - - case SVGA3DREG_CONST: - if (reg.relative) { - if (sh_srcreg_type( *indreg ) == SVGA3DREG_LOOP) - debug_printf( "c[aL+%u]", reg.number ); - else - debug_printf( "c[a%u.x+%u]", indreg->number, reg.number ); - } - else - debug_printf( "c%u", reg.number ); - break; - - case SVGA3DREG_ADDR: /* VS */ - /* SVGA3DREG_TEXTURE */ /* PS */ - if (di->is_ps) - debug_printf( "t%u", reg.number ); - else - debug_printf( "a%u", reg.number ); - break; - - case SVGA3DREG_RASTOUT: - switch (reg.number) { - case 0 /*POSITION*/: - debug_printf( "oPos" ); - break; - case 1 /*FOG*/: - debug_printf( "oFog" ); - break; - case 2 /*POINT_SIZE*/: - debug_printf( "oPts" ); - break; - default: - assert( 0 ); - debug_printf( "???" ); - } - break; - - case SVGA3DREG_ATTROUT: - assert( reg.number < 2 ); - debug_printf( "oD%u", reg.number ); - break; - - case SVGA3DREG_TEXCRDOUT: - /* SVGA3DREG_OUTPUT */ - debug_printf( "oT%u", reg.number ); - break; - - case SVGA3DREG_COLOROUT: - debug_printf( "oC%u", reg.number ); - break; - - case SVGA3DREG_DEPTHOUT: - debug_printf( "oD%u", reg.number ); - break; - - case SVGA3DREG_SAMPLER: - debug_printf( "s%u", reg.number ); - break; - - case SVGA3DREG_CONSTBOOL: - assert( !reg.relative ); - debug_printf( "b%u", reg.number ); - break; - - case SVGA3DREG_CONSTINT: - assert( !reg.relative ); - debug_printf( "i%u", reg.number ); - break; - - case SVGA3DREG_LOOP: - assert( reg.number == 0 ); - debug_printf( "aL" ); - break; - - case SVGA3DREG_MISCTYPE: - switch (reg.number) { - case SVGA3DMISCREG_POSITION: - debug_printf( "vPos" ); - break; - case SVGA3DMISCREG_FACE: - debug_printf( "vFace" ); - break; - default: - assert(0); - break; - } - break; - - case SVGA3DREG_LABEL: - debug_printf( "l%u", reg.number ); - break; - - case SVGA3DREG_PREDICATE: - debug_printf( "p%u", reg.number ); - break; - - - default: - assert( 0 ); - debug_printf( "???" ); - } -} - -static void dump_cdata( struct sh_cdata cdata ) -{ - debug_printf( "%f, %f, %f, %f", cdata.xyzw[0], cdata.xyzw[1], cdata.xyzw[2], cdata.xyzw[3] ); -} - -static void dump_idata( struct sh_idata idata ) -{ - debug_printf( "%d, %d, %d, %d", idata.xyzw[0], idata.xyzw[1], idata.xyzw[2], idata.xyzw[3] ); -} - -static void dump_bdata( boolean bdata ) -{ - debug_printf( bdata ? "TRUE" : "FALSE" ); -} - -static void dump_sampleinfo( struct ps_sampleinfo sampleinfo ) -{ - switch (sampleinfo.texture_type) { - case SVGA3DSAMP_2D: - debug_printf( "_2d" ); - break; - case SVGA3DSAMP_CUBE: - debug_printf( "_cube" ); - break; - case SVGA3DSAMP_VOLUME: - debug_printf( "_volume" ); - break; - default: - assert( 0 ); - } -} - - -static void dump_usageinfo( struct vs_semantic semantic ) -{ - switch (semantic.usage) { - case SVGA3D_DECLUSAGE_POSITION: - debug_printf("_position" ); - break; - case SVGA3D_DECLUSAGE_BLENDWEIGHT: - debug_printf("_blendweight" ); - break; - case SVGA3D_DECLUSAGE_BLENDINDICES: - debug_printf("_blendindices" ); - break; - case SVGA3D_DECLUSAGE_NORMAL: - debug_printf("_normal" ); - break; - case SVGA3D_DECLUSAGE_PSIZE: - debug_printf("_psize" ); - break; - case SVGA3D_DECLUSAGE_TEXCOORD: - debug_printf("_texcoord"); - break; - case SVGA3D_DECLUSAGE_TANGENT: - debug_printf("_tangent" ); - break; - case SVGA3D_DECLUSAGE_BINORMAL: - debug_printf("_binormal" ); - break; - case SVGA3D_DECLUSAGE_TESSFACTOR: - debug_printf("_tessfactor" ); - break; - case SVGA3D_DECLUSAGE_POSITIONT: - debug_printf("_positiont" ); - break; - case SVGA3D_DECLUSAGE_COLOR: - debug_printf("_color" ); - break; - case SVGA3D_DECLUSAGE_FOG: - debug_printf("_fog" ); - break; - case SVGA3D_DECLUSAGE_DEPTH: - debug_printf("_depth" ); - break; - case SVGA3D_DECLUSAGE_SAMPLE: - debug_printf("_sample"); - break; - default: - assert( 0 ); - return; - } - - if (semantic.usage_index != 0) { - debug_printf("%d", semantic.usage_index ); - } -} - -static void dump_dstreg( struct sh_dstreg dstreg, const struct dump_info *di ) -{ - union { - struct sh_reg reg; - struct sh_dstreg dstreg; - } u; - - assert( (dstreg.modifier & (SVGA3DDSTMOD_SATURATE | SVGA3DDSTMOD_PARTIALPRECISION)) == dstreg.modifier ); - - if (dstreg.modifier & SVGA3DDSTMOD_SATURATE) - debug_printf( "_sat" ); - if (dstreg.modifier & SVGA3DDSTMOD_PARTIALPRECISION) - debug_printf( "_pp" ); - switch (dstreg.shift_scale) { - case 0: - break; - case 1: - debug_printf( "_x2" ); - break; - case 2: - debug_printf( "_x4" ); - break; - case 3: - debug_printf( "_x8" ); - break; - case 13: - debug_printf( "_d8" ); - break; - case 14: - debug_printf( "_d4" ); - break; - case 15: - debug_printf( "_d2" ); - break; - default: - assert( 0 ); - } - debug_printf( " " ); - - u.dstreg = dstreg; - dump_reg( u.reg, NULL, di ); - if (dstreg.write_mask != SVGA3DWRITEMASK_ALL) { - debug_printf( "." ); - if (dstreg.write_mask & SVGA3DWRITEMASK_0) - debug_printf( "x" ); - if (dstreg.write_mask & SVGA3DWRITEMASK_1) - debug_printf( "y" ); - if (dstreg.write_mask & SVGA3DWRITEMASK_2) - debug_printf( "z" ); - if (dstreg.write_mask & SVGA3DWRITEMASK_3) - debug_printf( "w" ); - } -} - -static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, const struct dump_info *di ) -{ - union { - struct sh_reg reg; - struct sh_srcreg srcreg; - } u; - - switch (srcreg.modifier) { - case SVGA3DSRCMOD_NEG: - case SVGA3DSRCMOD_BIASNEG: - case SVGA3DSRCMOD_SIGNNEG: - case SVGA3DSRCMOD_X2NEG: - debug_printf( "-" ); - break; - case SVGA3DSRCMOD_ABS: - debug_printf( "|" ); - break; - case SVGA3DSRCMOD_ABSNEG: - debug_printf( "-|" ); - break; - case SVGA3DSRCMOD_COMP: - debug_printf( "1-" ); - break; - case SVGA3DSRCMOD_NOT: - debug_printf( "!" ); - } - - u.srcreg = srcreg; - dump_reg( u.reg, indreg, di ); - switch (srcreg.modifier) { - case SVGA3DSRCMOD_NONE: - case SVGA3DSRCMOD_NEG: - case SVGA3DSRCMOD_COMP: - case SVGA3DSRCMOD_NOT: - break; - case SVGA3DSRCMOD_ABS: - case SVGA3DSRCMOD_ABSNEG: - debug_printf( "|" ); - break; - case SVGA3DSRCMOD_BIAS: - case SVGA3DSRCMOD_BIASNEG: - debug_printf( "_bias" ); - break; - case SVGA3DSRCMOD_SIGN: - case SVGA3DSRCMOD_SIGNNEG: - debug_printf( "_bx2" ); - break; - case SVGA3DSRCMOD_X2: - case SVGA3DSRCMOD_X2NEG: - debug_printf( "_x2" ); - break; - case SVGA3DSRCMOD_DZ: - debug_printf( "_dz" ); - break; - case SVGA3DSRCMOD_DW: - debug_printf( "_dw" ); - break; - default: - assert( 0 ); - } - if (srcreg.swizzle_x != 0 || srcreg.swizzle_y != 1 || srcreg.swizzle_z != 2 || srcreg.swizzle_w != 3) { - debug_printf( "." ); - if (srcreg.swizzle_x == srcreg.swizzle_y && srcreg.swizzle_y == srcreg.swizzle_z && srcreg.swizzle_z == srcreg.swizzle_w) { - debug_printf( "%c", "xyzw"[srcreg.swizzle_x] ); - } - else { - debug_printf( "%c", "xyzw"[srcreg.swizzle_x] ); - debug_printf( "%c", "xyzw"[srcreg.swizzle_y] ); - debug_printf( "%c", "xyzw"[srcreg.swizzle_z] ); - debug_printf( "%c", "xyzw"[srcreg.swizzle_w] ); - } - } -} - -void -sh_svga_dump( - const unsigned *assem, - unsigned dwords, - unsigned do_binary ) -{ - const unsigned *start = assem; - boolean finished = FALSE; - struct dump_info di; - unsigned i; - - if (do_binary) { - for (i = 0; i < dwords; i++) - debug_printf(" 0x%08x,\n", assem[i]); - - debug_printf("\n\n"); - } - - di.version.value = *assem++; - di.is_ps = (di.version.type == SVGA3D_PS_TYPE); - - debug_printf( - "%s_%u_%u\n", - di.is_ps ? "ps" : "vs", - di.version.major, - di.version.minor ); - - while (!finished) { - struct sh_op op = *(struct sh_op *) assem; - - if (assem - start >= dwords) { - debug_printf("... ran off end of buffer\n"); - assert(0); - return; - } - - switch (op.opcode) { - case SVGA3DOP_DCL: - { - struct sh_dcl dcl = *(struct sh_dcl *) assem; - - debug_printf( "dcl" ); - if (sh_dstreg_type( dcl.reg ) == SVGA3DREG_SAMPLER) - dump_sampleinfo( dcl.u.ps.sampleinfo ); - else if (di.is_ps) { - if (di.version.major == 3 && - sh_dstreg_type( dcl.reg ) != SVGA3DREG_MISCTYPE) - dump_usageinfo( dcl.u.vs.semantic ); - } - else - dump_usageinfo( dcl.u.vs.semantic ); - dump_dstreg( dcl.reg, &di ); - debug_printf( "\n" ); - assem += sizeof( struct sh_dcl ) / sizeof( unsigned ); - } - break; - - case SVGA3DOP_DEFB: - { - struct sh_defb defb = *(struct sh_defb *) assem; - - debug_printf( "defb " ); - dump_reg( defb.reg, NULL, &di ); - debug_printf( ", " ); - dump_bdata( defb.data ); - debug_printf( "\n" ); - assem += sizeof( struct sh_defb ) / sizeof( unsigned ); - } - break; - - case SVGA3DOP_DEFI: - { - struct sh_defi defi = *(struct sh_defi *) assem; - - debug_printf( "defi " ); - dump_reg( defi.reg, NULL, &di ); - debug_printf( ", " ); - dump_idata( defi.idata ); - debug_printf( "\n" ); - assem += sizeof( struct sh_defi ) / sizeof( unsigned ); - } - break; - - case SVGA3DOP_TEXCOORD: - assert( di.is_ps ); - dump_op( op, "texcoord" ); - if (0) { - struct sh_dstop dstop = *(struct sh_dstop *) assem; - dump_dstreg( dstop.dst, &di ); - assem += sizeof( struct sh_dstop ) / sizeof( unsigned ); - } - else { - struct sh_unaryop unaryop = *(struct sh_unaryop *) assem; - dump_dstreg( unaryop.dst, &di ); - debug_printf( ", " ); - dump_srcreg( unaryop.src, NULL, &di ); - assem += sizeof( struct sh_unaryop ) / sizeof( unsigned ); - } - debug_printf( "\n" ); - break; - - case SVGA3DOP_TEX: - assert( di.is_ps ); - if (0) { - dump_op( op, "tex" ); - if (0) { - struct sh_dstop dstop = *(struct sh_dstop *) assem; - - dump_dstreg( dstop.dst, &di ); - assem += sizeof( struct sh_dstop ) / sizeof( unsigned ); - } - else { - struct sh_unaryop unaryop = *(struct sh_unaryop *) assem; - - dump_dstreg( unaryop.dst, &di ); - debug_printf( ", " ); - dump_srcreg( unaryop.src, NULL, &di ); - assem += sizeof( struct sh_unaryop ) / sizeof( unsigned ); - } - } - else { - struct sh_binaryop binaryop = *(struct sh_binaryop *) assem; - - dump_op( op, "texld" ); - dump_dstreg( binaryop.dst, &di ); - debug_printf( ", " ); - dump_srcreg( binaryop.src0, NULL, &di ); - debug_printf( ", " ); - dump_srcreg( binaryop.src1, NULL, &di ); - assem += sizeof( struct sh_binaryop ) / sizeof( unsigned ); - } - debug_printf( "\n" ); - break; - - case SVGA3DOP_DEF: - { - struct sh_def def = *(struct sh_def *) assem; - - debug_printf( "def " ); - dump_reg( def.reg, NULL, &di ); - debug_printf( ", " ); - dump_cdata( def.cdata ); - debug_printf( "\n" ); - assem += sizeof( struct sh_def ) / sizeof( unsigned ); - } - break; - - case SVGA3DOP_PHASE: - debug_printf( "phase\n" ); - assem += sizeof( struct sh_op ) / sizeof( unsigned ); - break; - - case SVGA3DOP_COMMENT: - assert( 0 ); - break; - - case SVGA3DOP_RET: - debug_printf( "ret\n" ); - assem += sizeof( struct sh_op ) / sizeof( unsigned ); - break; - - case SVGA3DOP_END: - debug_printf( "end\n" ); - finished = TRUE; - break; - - default: - { - const struct sh_opcode_info *info = sh_svga_opcode_info( op.opcode ); - uint i; - uint num_src = info->num_src + op.predicated; - boolean not_first_arg = FALSE; - - assert( info->num_dst <= 1 ); - - if (op.opcode == SVGA3DOP_SINCOS && di.version.major < 3) - num_src += 2; - - dump_comp_op( op, info->mnemonic ); - assem += sizeof( struct sh_op ) / sizeof( unsigned ); - - if (info->num_dst > 0) { - struct sh_dstreg dstreg = *(struct sh_dstreg *) assem; - - dump_dstreg( dstreg, &di ); - assem += sizeof( struct sh_dstreg ) / sizeof( unsigned ); - not_first_arg = TRUE; - } - - for (i = 0; i < num_src; i++) { - struct sh_srcreg srcreg; - struct sh_srcreg indreg; - - srcreg = *(struct sh_srcreg *) assem; - assem += sizeof( struct sh_srcreg ) / sizeof( unsigned ); - if (srcreg.relative && !di.is_ps && di.version.major >= 2) { - indreg = *(struct sh_srcreg *) assem; - assem += sizeof( struct sh_srcreg ) / sizeof( unsigned ); - } - - if (not_first_arg) - debug_printf( ", " ); - else - debug_printf( " " ); - dump_srcreg( srcreg, &indreg, &di ); - not_first_arg = TRUE; - } - - debug_printf( "\n" ); - } - } - } -} diff --git a/src/gallium/drivers/svga/svgadump/st_shader_dump.h b/src/gallium/drivers/svga/svgadump/st_shader_dump.h deleted file mode 100644 index af5549cdba..0000000000 --- a/src/gallium/drivers/svga/svgadump/st_shader_dump.h +++ /dev/null @@ -1,42 +0,0 @@ -/********************************************************** - * Copyright 2008-2009 VMware, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **********************************************************/ - -/** - * @file - * SVGA Shader Dump Facilities - * - * @author Michal Krol - */ - -#ifndef ST_SHADER_SVGA_DUMP_H -#define ST_SHADER_SVGA_DUMP_H - -void -sh_svga_dump( - const unsigned *assem, - unsigned dwords, - unsigned do_binary ); - -#endif /* ST_SHADER_SVGA_DUMP_H */ diff --git a/src/gallium/drivers/svga/svgadump/st_shader_op.c b/src/gallium/drivers/svga/svgadump/st_shader_op.c deleted file mode 100644 index 2c05382ab9..0000000000 --- a/src/gallium/drivers/svga/svgadump/st_shader_op.c +++ /dev/null @@ -1,168 +0,0 @@ -/********************************************************** - * Copyright 2008-2009 VMware, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **********************************************************/ - -/** - * @file - * SVGA Shader Token Opcode Info - * - * @author Michal Krol - */ - -#include "util/u_debug.h" -#include "st_shader_op.h" - -#include "../svga_hw_reg.h" -#include "svga3d_shaderdefs.h" - -#define SVGA3DOP_INVALID SVGA3DOP_END -#define TGSI_OPCODE_INVALID TGSI_OPCODE_LAST - -static struct sh_opcode_info opcode_info[] = -{ - { "nop", 0, 0, SVGA3DOP_NOP }, - { "mov", 1, 1, SVGA3DOP_MOV, }, - { "add", 1, 2, SVGA3DOP_ADD, }, - { "sub", 1, 2, SVGA3DOP_SUB, }, - { "mad", 1, 3, SVGA3DOP_MAD, }, - { "mul", 1, 2, SVGA3DOP_MUL, }, - { "rcp", 1, 1, SVGA3DOP_RCP, }, - { "rsq", 1, 1, SVGA3DOP_RSQ, }, - { "dp3", 1, 2, SVGA3DOP_DP3, }, - { "dp4", 1, 2, SVGA3DOP_DP4, }, - { "min", 1, 2, SVGA3DOP_MIN, }, - { "max", 1, 2, SVGA3DOP_MAX, }, - { "slt", 1, 2, SVGA3DOP_SLT, }, - { "sge", 1, 2, SVGA3DOP_SGE, }, - { "exp", 1, 1, SVGA3DOP_EXP, }, - { "log", 1, 1, SVGA3DOP_LOG, }, - { "lit", 1, 1, SVGA3DOP_LIT, }, - { "dst", 1, 2, SVGA3DOP_DST, }, - { "lrp", 1, 3, SVGA3DOP_LRP, }, - { "frc", 1, 1, SVGA3DOP_FRC, }, - { "m4x4", 1, 2, SVGA3DOP_M4x4, }, - { "m4x3", 1, 2, SVGA3DOP_M4x3, }, - { "m3x4", 1, 2, SVGA3DOP_M3x4, }, - { "m3x3", 1, 2, SVGA3DOP_M3x3, }, - { "m3x2", 1, 2, SVGA3DOP_M3x2, }, - { "call", 0, 1, SVGA3DOP_CALL, }, - { "callnz", 0, 2, SVGA3DOP_CALLNZ, }, - { "loop", 0, 2, SVGA3DOP_LOOP, }, - { "ret", 0, 0, SVGA3DOP_RET, }, - { "endloop", 0, 0, SVGA3DOP_ENDLOOP, }, - { "label", 0, 1, SVGA3DOP_LABEL, }, - { "dcl", 0, 0, SVGA3DOP_DCL, }, - { "pow", 1, 2, SVGA3DOP_POW, }, - { "crs", 1, 2, SVGA3DOP_CRS, }, - { "sgn", 1, 3, SVGA3DOP_SGN, }, - { "abs", 1, 1, SVGA3DOP_ABS, }, - { "nrm", 1, 1, SVGA3DOP_NRM, }, /* 3-componenet normalization */ - { "sincos", 1, 1, SVGA3DOP_SINCOS, }, - { "rep", 0, 1, SVGA3DOP_REP, }, - { "endrep", 0, 0, SVGA3DOP_ENDREP, }, - { "if", 0, 1, SVGA3DOP_IF, }, - { "ifc", 0, 2, SVGA3DOP_IFC, }, - { "else", 0, 0, SVGA3DOP_ELSE, }, - { "endif", 0, 0, SVGA3DOP_ENDIF, }, - { "break", 0, 0, SVGA3DOP_BREAK, }, - { "breakc", 0, 0, SVGA3DOP_BREAKC, }, - { "mova", 1, 1, SVGA3DOP_MOVA, }, - { "defb", 0, 0, SVGA3DOP_DEFB, }, - { "defi", 0, 0, SVGA3DOP_DEFI, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "texcoord", 0, 0, SVGA3DOP_TEXCOORD, }, - { "texkill", 1, 0, SVGA3DOP_TEXKILL, }, - { "tex", 0, 0, SVGA3DOP_TEX, }, - { "texbem", 1, 1, SVGA3DOP_TEXBEM, }, - { "texbeml", 1, 1, SVGA3DOP_TEXBEML, }, - { "texreg2ar", 1, 1, SVGA3DOP_TEXREG2AR, }, - { "texreg2gb", 1, 1, SVGA3DOP_TEXREG2GB, }, - { "texm3x2pad", 1, 1, SVGA3DOP_TEXM3x2PAD, }, - { "texm3x2tex", 1, 1, SVGA3DOP_TEXM3x2TEX, }, - { "texm3x3pad", 1, 1, SVGA3DOP_TEXM3x3PAD, }, - { "texm3x3tex", 1, 1, SVGA3DOP_TEXM3x3TEX, }, - { "reserved0", 0, 0, SVGA3DOP_RESERVED0, }, - { "texm3x3spec", 1, 2, SVGA3DOP_TEXM3x3SPEC, }, - { "texm3x3vspec", 1, 1, SVGA3DOP_TEXM3x3VSPEC,}, - { "expp", 1, 1, SVGA3DOP_EXPP, }, - { "logp", 1, 1, SVGA3DOP_LOGP, }, - { "cnd", 1, 3, SVGA3DOP_CND, }, - { "def", 0, 0, SVGA3DOP_DEF, }, - { "texreg2rgb", 1, 1, SVGA3DOP_TEXREG2RGB, }, - { "texdp3tex", 1, 1, SVGA3DOP_TEXDP3TEX, }, - { "texm3x2depth", 1, 1, SVGA3DOP_TEXM3x2DEPTH,}, - { "texdp3", 1, 1, SVGA3DOP_TEXDP3, }, - { "texm3x3", 1, 1, SVGA3DOP_TEXM3x3, }, - { "texdepth", 1, 0, SVGA3DOP_TEXDEPTH, }, - { "cmp", 1, 3, SVGA3DOP_CMP, }, - { "bem", 1, 2, SVGA3DOP_BEM, }, - { "dp2add", 1, 3, SVGA3DOP_DP2ADD, }, - { "dsx", 1, 1, SVGA3DOP_INVALID, }, - { "dsy", 1, 1, SVGA3DOP_INVALID, }, - { "texldd", 1, 1, SVGA3DOP_INVALID, }, - { "setp", 1, 2, SVGA3DOP_SETP, }, - { "texldl", 1, 1, SVGA3DOP_INVALID, }, - { "breakp", 1, 1, SVGA3DOP_INVALID, }, -}; - -const struct sh_opcode_info *sh_svga_opcode_info( uint op ) -{ - struct sh_opcode_info *info; - - if (op >= sizeof( opcode_info ) / sizeof( opcode_info[0] )) { - /* The opcode is either PHASE, COMMENT, END or out of range. - */ - assert( 0 ); - return NULL; - } - - info = &opcode_info[op]; - - if (info->svga_opcode == SVGA3DOP_INVALID) { - /* No valid information. Please provide number of dst/src registers. - */ - assert( 0 ); - return NULL; - } - - /* Sanity check. - */ - assert( op == info->svga_opcode ); - - return info; -} diff --git a/src/gallium/drivers/svga/svgadump/st_shader_op.h b/src/gallium/drivers/svga/svgadump/st_shader_op.h deleted file mode 100644 index 01d39dca84..0000000000 --- a/src/gallium/drivers/svga/svgadump/st_shader_op.h +++ /dev/null @@ -1,46 +0,0 @@ -/********************************************************** - * Copyright 2008-2009 VMware, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **********************************************************/ - -/** - * @file - * SVGA Shader Token Opcode Info - * - * @author Michal Krol - */ - -#ifndef ST_SHADER_SVGA_OP_H -#define ST_SHADER_SVGA_OP_H - -struct sh_opcode_info -{ - const char *mnemonic; - unsigned num_dst:8; - unsigned num_src:8; - unsigned svga_opcode:16; -}; - -const struct sh_opcode_info *sh_svga_opcode_info( unsigned op ); - -#endif /* ST_SHADER_SVGA_OP_H */ diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c index 180dde8dc1..c6c353f58e 100644 --- a/src/gallium/drivers/svga/svgadump/svga_dump.c +++ b/src/gallium/drivers/svga/svgadump/svga_dump.c @@ -31,7 +31,7 @@ */ #include "svga_types.h" -#include "st_shader_dump.h" +#include "svga_shader_dump.h" #include "svga3d_reg.h" #include "util/u_debug.h" diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.py b/src/gallium/drivers/svga/svgadump/svga_dump.py index 3cb29c395b..288e753296 100755 --- a/src/gallium/drivers/svga/svgadump/svga_dump.py +++ b/src/gallium/drivers/svga/svgadump/svga_dump.py @@ -291,7 +291,7 @@ def main(): print ' */' print print '#include "svga_types.h"' - print '#include "shader_dump/st_shader_dump.h"' + print '#include "svga_shader_dump.h"' print '#include "svga3d_reg.h"' print print '#include "pipe/p_debug.h"' diff --git a/src/gallium/drivers/svga/svgadump/svga_shader.h b/src/gallium/drivers/svga/svgadump/svga_shader.h new file mode 100644 index 0000000000..2fc1796a90 --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/svga_shader.h @@ -0,0 +1,214 @@ +/********************************************************** + * Copyright 2007-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * SVGA Shader Token Definitions + * + * @author Michal Krol + */ + +#ifndef ST_SHADER_SVGA_H +#define ST_SHADER_SVGA_H + +#include "pipe/p_compiler.h" + +struct sh_op +{ + unsigned opcode:16; + unsigned control:8; + unsigned length:4; + unsigned predicated:1; + unsigned unused:1; + unsigned coissue:1; + unsigned is_reg:1; +}; + +struct sh_reg +{ + unsigned number:11; + unsigned type_hi:2; + unsigned relative:1; + unsigned unused:14; + unsigned type_lo:3; + unsigned is_reg:1; +}; + +static INLINE unsigned +sh_reg_type( struct sh_reg reg ) +{ + return reg.type_lo | (reg.type_hi << 3); +} + +struct sh_cdata +{ + float xyzw[4]; +}; + +struct sh_def +{ + struct sh_op op; + struct sh_reg reg; + struct sh_cdata cdata; +}; + +struct sh_defb +{ + struct sh_op op; + struct sh_reg reg; + uint data; +}; + +struct sh_idata +{ + int xyzw[4]; +}; + +struct sh_defi +{ + struct sh_op op; + struct sh_reg reg; + struct sh_idata idata; +}; + +#define PS_TEXTURETYPE_UNKNOWN SVGA3DSAMP_UNKNOWN +#define PS_TEXTURETYPE_2D SVGA3DSAMP_2D +#define PS_TEXTURETYPE_CUBE SVGA3DSAMP_CUBE +#define PS_TEXTURETYPE_VOLUME SVGA3DSAMP_VOLUME + +struct ps_sampleinfo +{ + unsigned unused:27; + unsigned texture_type:4; + unsigned is_reg:1; +}; + +struct vs_semantic +{ + unsigned usage:5; + unsigned unused1:11; + unsigned usage_index:4; + unsigned unused2:12; +}; + +struct sh_dstreg +{ + unsigned number:11; + unsigned type_hi:2; + unsigned relative:1; + unsigned unused:2; + unsigned write_mask:4; + unsigned modifier:4; + unsigned shift_scale:4; + unsigned type_lo:3; + unsigned is_reg:1; +}; + +static INLINE unsigned +sh_dstreg_type( struct sh_dstreg reg ) +{ + return reg.type_lo | (reg.type_hi << 3); +} + +struct sh_dcl +{ + struct sh_op op; + union { + struct { + struct ps_sampleinfo sampleinfo; + } ps; + struct { + struct vs_semantic semantic; + } vs; + } u; + struct sh_dstreg reg; +}; + + +struct sh_srcreg +{ + unsigned number:11; + unsigned type_hi:2; + unsigned relative:1; + unsigned unused:2; + unsigned swizzle_x:2; + unsigned swizzle_y:2; + unsigned swizzle_z:2; + unsigned swizzle_w:2; + unsigned modifier:4; + unsigned type_lo:3; + unsigned is_reg:1; +}; + +static INLINE unsigned +sh_srcreg_type( struct sh_srcreg reg ) +{ + return reg.type_lo | (reg.type_hi << 3); +} + +struct sh_dstop +{ + struct sh_op op; + struct sh_dstreg dst; +}; + +struct sh_srcop +{ + struct sh_op op; + struct sh_srcreg src; +}; + +struct sh_src2op +{ + struct sh_op op; + struct sh_srcreg src0; + struct sh_srcreg src1; +}; + +struct sh_unaryop +{ + struct sh_op op; + struct sh_dstreg dst; + struct sh_srcreg src; +}; + +struct sh_binaryop +{ + struct sh_op op; + struct sh_dstreg dst; + struct sh_srcreg src0; + struct sh_srcreg src1; +}; + +struct sh_trinaryop +{ + struct sh_op op; + struct sh_dstreg dst; + struct sh_srcreg src0; + struct sh_srcreg src1; + struct sh_srcreg src2; +}; + +#endif /* ST_SHADER_SVGA_H */ diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c new file mode 100644 index 0000000000..c654126d3a --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c @@ -0,0 +1,649 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * SVGA Shader Dump Facilities + * + * @author Michal Krol + */ + +#include "svga_shader.h" +#include "svga_shader_dump.h" +#include "svga_shader_op.h" +#include "util/u_debug.h" + +#include "../svga_hw_reg.h" +#include "svga3d_shaderdefs.h" + +struct dump_info +{ + SVGA3dShaderVersion version; + boolean is_ps; +}; + +static void dump_op( struct sh_op op, const char *mnemonic ) +{ + assert( op.predicated == 0 ); + assert( op.is_reg == 0 ); + + if (op.coissue) + debug_printf( "+" ); + debug_printf( "%s", mnemonic ); + switch (op.control) { + case 0: + break; + case SVGA3DOPCONT_PROJECT: + debug_printf( "p" ); + break; + case SVGA3DOPCONT_BIAS: + debug_printf( "b" ); + break; + default: + assert( 0 ); + } +} + + +static void dump_comp_op( struct sh_op op, const char *mnemonic ) +{ + assert( op.is_reg == 0 ); + + if (op.coissue) + debug_printf( "+" ); + debug_printf( "%s", mnemonic ); + switch (op.control) { + case SVGA3DOPCOMP_RESERVED0: + break; + case SVGA3DOPCOMP_GT: + debug_printf("_gt"); + break; + case SVGA3DOPCOMP_EQ: + debug_printf("_eq"); + break; + case SVGA3DOPCOMP_GE: + debug_printf("_ge"); + break; + case SVGA3DOPCOMP_LT: + debug_printf("_lt"); + break; + case SVGA3DOPCOMPC_NE: + debug_printf("_ne"); + break; + case SVGA3DOPCOMP_LE: + debug_printf("_le"); + break; + case SVGA3DOPCOMP_RESERVED1: + default: + assert( 0 ); + } +} + + +static void dump_reg( struct sh_reg reg, struct sh_srcreg *indreg, const struct dump_info *di ) +{ + assert( sh_reg_type( reg ) == SVGA3DREG_CONST || reg.relative == 0 ); + assert( reg.is_reg == 1 ); + + switch (sh_reg_type( reg )) { + case SVGA3DREG_TEMP: + debug_printf( "r%u", reg.number ); + break; + + case SVGA3DREG_INPUT: + debug_printf( "v%u", reg.number ); + break; + + case SVGA3DREG_CONST: + if (reg.relative) { + if (sh_srcreg_type( *indreg ) == SVGA3DREG_LOOP) + debug_printf( "c[aL+%u]", reg.number ); + else + debug_printf( "c[a%u.x+%u]", indreg->number, reg.number ); + } + else + debug_printf( "c%u", reg.number ); + break; + + case SVGA3DREG_ADDR: /* VS */ + /* SVGA3DREG_TEXTURE */ /* PS */ + if (di->is_ps) + debug_printf( "t%u", reg.number ); + else + debug_printf( "a%u", reg.number ); + break; + + case SVGA3DREG_RASTOUT: + switch (reg.number) { + case 0 /*POSITION*/: + debug_printf( "oPos" ); + break; + case 1 /*FOG*/: + debug_printf( "oFog" ); + break; + case 2 /*POINT_SIZE*/: + debug_printf( "oPts" ); + break; + default: + assert( 0 ); + debug_printf( "???" ); + } + break; + + case SVGA3DREG_ATTROUT: + assert( reg.number < 2 ); + debug_printf( "oD%u", reg.number ); + break; + + case SVGA3DREG_TEXCRDOUT: + /* SVGA3DREG_OUTPUT */ + debug_printf( "oT%u", reg.number ); + break; + + case SVGA3DREG_COLOROUT: + debug_printf( "oC%u", reg.number ); + break; + + case SVGA3DREG_DEPTHOUT: + debug_printf( "oD%u", reg.number ); + break; + + case SVGA3DREG_SAMPLER: + debug_printf( "s%u", reg.number ); + break; + + case SVGA3DREG_CONSTBOOL: + assert( !reg.relative ); + debug_printf( "b%u", reg.number ); + break; + + case SVGA3DREG_CONSTINT: + assert( !reg.relative ); + debug_printf( "i%u", reg.number ); + break; + + case SVGA3DREG_LOOP: + assert( reg.number == 0 ); + debug_printf( "aL" ); + break; + + case SVGA3DREG_MISCTYPE: + switch (reg.number) { + case SVGA3DMISCREG_POSITION: + debug_printf( "vPos" ); + break; + case SVGA3DMISCREG_FACE: + debug_printf( "vFace" ); + break; + default: + assert(0); + break; + } + break; + + case SVGA3DREG_LABEL: + debug_printf( "l%u", reg.number ); + break; + + case SVGA3DREG_PREDICATE: + debug_printf( "p%u", reg.number ); + break; + + + default: + assert( 0 ); + debug_printf( "???" ); + } +} + +static void dump_cdata( struct sh_cdata cdata ) +{ + debug_printf( "%f, %f, %f, %f", cdata.xyzw[0], cdata.xyzw[1], cdata.xyzw[2], cdata.xyzw[3] ); +} + +static void dump_idata( struct sh_idata idata ) +{ + debug_printf( "%d, %d, %d, %d", idata.xyzw[0], idata.xyzw[1], idata.xyzw[2], idata.xyzw[3] ); +} + +static void dump_bdata( boolean bdata ) +{ + debug_printf( bdata ? "TRUE" : "FALSE" ); +} + +static void dump_sampleinfo( struct ps_sampleinfo sampleinfo ) +{ + switch (sampleinfo.texture_type) { + case SVGA3DSAMP_2D: + debug_printf( "_2d" ); + break; + case SVGA3DSAMP_CUBE: + debug_printf( "_cube" ); + break; + case SVGA3DSAMP_VOLUME: + debug_printf( "_volume" ); + break; + default: + assert( 0 ); + } +} + + +static void dump_usageinfo( struct vs_semantic semantic ) +{ + switch (semantic.usage) { + case SVGA3D_DECLUSAGE_POSITION: + debug_printf("_position" ); + break; + case SVGA3D_DECLUSAGE_BLENDWEIGHT: + debug_printf("_blendweight" ); + break; + case SVGA3D_DECLUSAGE_BLENDINDICES: + debug_printf("_blendindices" ); + break; + case SVGA3D_DECLUSAGE_NORMAL: + debug_printf("_normal" ); + break; + case SVGA3D_DECLUSAGE_PSIZE: + debug_printf("_psize" ); + break; + case SVGA3D_DECLUSAGE_TEXCOORD: + debug_printf("_texcoord"); + break; + case SVGA3D_DECLUSAGE_TANGENT: + debug_printf("_tangent" ); + break; + case SVGA3D_DECLUSAGE_BINORMAL: + debug_printf("_binormal" ); + break; + case SVGA3D_DECLUSAGE_TESSFACTOR: + debug_printf("_tessfactor" ); + break; + case SVGA3D_DECLUSAGE_POSITIONT: + debug_printf("_positiont" ); + break; + case SVGA3D_DECLUSAGE_COLOR: + debug_printf("_color" ); + break; + case SVGA3D_DECLUSAGE_FOG: + debug_printf("_fog" ); + break; + case SVGA3D_DECLUSAGE_DEPTH: + debug_printf("_depth" ); + break; + case SVGA3D_DECLUSAGE_SAMPLE: + debug_printf("_sample"); + break; + default: + assert( 0 ); + return; + } + + if (semantic.usage_index != 0) { + debug_printf("%d", semantic.usage_index ); + } +} + +static void dump_dstreg( struct sh_dstreg dstreg, const struct dump_info *di ) +{ + union { + struct sh_reg reg; + struct sh_dstreg dstreg; + } u; + + assert( (dstreg.modifier & (SVGA3DDSTMOD_SATURATE | SVGA3DDSTMOD_PARTIALPRECISION)) == dstreg.modifier ); + + if (dstreg.modifier & SVGA3DDSTMOD_SATURATE) + debug_printf( "_sat" ); + if (dstreg.modifier & SVGA3DDSTMOD_PARTIALPRECISION) + debug_printf( "_pp" ); + switch (dstreg.shift_scale) { + case 0: + break; + case 1: + debug_printf( "_x2" ); + break; + case 2: + debug_printf( "_x4" ); + break; + case 3: + debug_printf( "_x8" ); + break; + case 13: + debug_printf( "_d8" ); + break; + case 14: + debug_printf( "_d4" ); + break; + case 15: + debug_printf( "_d2" ); + break; + default: + assert( 0 ); + } + debug_printf( " " ); + + u.dstreg = dstreg; + dump_reg( u.reg, NULL, di ); + if (dstreg.write_mask != SVGA3DWRITEMASK_ALL) { + debug_printf( "." ); + if (dstreg.write_mask & SVGA3DWRITEMASK_0) + debug_printf( "x" ); + if (dstreg.write_mask & SVGA3DWRITEMASK_1) + debug_printf( "y" ); + if (dstreg.write_mask & SVGA3DWRITEMASK_2) + debug_printf( "z" ); + if (dstreg.write_mask & SVGA3DWRITEMASK_3) + debug_printf( "w" ); + } +} + +static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, const struct dump_info *di ) +{ + union { + struct sh_reg reg; + struct sh_srcreg srcreg; + } u; + + switch (srcreg.modifier) { + case SVGA3DSRCMOD_NEG: + case SVGA3DSRCMOD_BIASNEG: + case SVGA3DSRCMOD_SIGNNEG: + case SVGA3DSRCMOD_X2NEG: + debug_printf( "-" ); + break; + case SVGA3DSRCMOD_ABS: + debug_printf( "|" ); + break; + case SVGA3DSRCMOD_ABSNEG: + debug_printf( "-|" ); + break; + case SVGA3DSRCMOD_COMP: + debug_printf( "1-" ); + break; + case SVGA3DSRCMOD_NOT: + debug_printf( "!" ); + } + + u.srcreg = srcreg; + dump_reg( u.reg, indreg, di ); + switch (srcreg.modifier) { + case SVGA3DSRCMOD_NONE: + case SVGA3DSRCMOD_NEG: + case SVGA3DSRCMOD_COMP: + case SVGA3DSRCMOD_NOT: + break; + case SVGA3DSRCMOD_ABS: + case SVGA3DSRCMOD_ABSNEG: + debug_printf( "|" ); + break; + case SVGA3DSRCMOD_BIAS: + case SVGA3DSRCMOD_BIASNEG: + debug_printf( "_bias" ); + break; + case SVGA3DSRCMOD_SIGN: + case SVGA3DSRCMOD_SIGNNEG: + debug_printf( "_bx2" ); + break; + case SVGA3DSRCMOD_X2: + case SVGA3DSRCMOD_X2NEG: + debug_printf( "_x2" ); + break; + case SVGA3DSRCMOD_DZ: + debug_printf( "_dz" ); + break; + case SVGA3DSRCMOD_DW: + debug_printf( "_dw" ); + break; + default: + assert( 0 ); + } + if (srcreg.swizzle_x != 0 || srcreg.swizzle_y != 1 || srcreg.swizzle_z != 2 || srcreg.swizzle_w != 3) { + debug_printf( "." ); + if (srcreg.swizzle_x == srcreg.swizzle_y && srcreg.swizzle_y == srcreg.swizzle_z && srcreg.swizzle_z == srcreg.swizzle_w) { + debug_printf( "%c", "xyzw"[srcreg.swizzle_x] ); + } + else { + debug_printf( "%c", "xyzw"[srcreg.swizzle_x] ); + debug_printf( "%c", "xyzw"[srcreg.swizzle_y] ); + debug_printf( "%c", "xyzw"[srcreg.swizzle_z] ); + debug_printf( "%c", "xyzw"[srcreg.swizzle_w] ); + } + } +} + +void +sh_svga_dump( + const unsigned *assem, + unsigned dwords, + unsigned do_binary ) +{ + const unsigned *start = assem; + boolean finished = FALSE; + struct dump_info di; + unsigned i; + + if (do_binary) { + for (i = 0; i < dwords; i++) + debug_printf(" 0x%08x,\n", assem[i]); + + debug_printf("\n\n"); + } + + di.version.value = *assem++; + di.is_ps = (di.version.type == SVGA3D_PS_TYPE); + + debug_printf( + "%s_%u_%u\n", + di.is_ps ? "ps" : "vs", + di.version.major, + di.version.minor ); + + while (!finished) { + struct sh_op op = *(struct sh_op *) assem; + + if (assem - start >= dwords) { + debug_printf("... ran off end of buffer\n"); + assert(0); + return; + } + + switch (op.opcode) { + case SVGA3DOP_DCL: + { + struct sh_dcl dcl = *(struct sh_dcl *) assem; + + debug_printf( "dcl" ); + if (sh_dstreg_type( dcl.reg ) == SVGA3DREG_SAMPLER) + dump_sampleinfo( dcl.u.ps.sampleinfo ); + else if (di.is_ps) { + if (di.version.major == 3 && + sh_dstreg_type( dcl.reg ) != SVGA3DREG_MISCTYPE) + dump_usageinfo( dcl.u.vs.semantic ); + } + else + dump_usageinfo( dcl.u.vs.semantic ); + dump_dstreg( dcl.reg, &di ); + debug_printf( "\n" ); + assem += sizeof( struct sh_dcl ) / sizeof( unsigned ); + } + break; + + case SVGA3DOP_DEFB: + { + struct sh_defb defb = *(struct sh_defb *) assem; + + debug_printf( "defb " ); + dump_reg( defb.reg, NULL, &di ); + debug_printf( ", " ); + dump_bdata( defb.data ); + debug_printf( "\n" ); + assem += sizeof( struct sh_defb ) / sizeof( unsigned ); + } + break; + + case SVGA3DOP_DEFI: + { + struct sh_defi defi = *(struct sh_defi *) assem; + + debug_printf( "defi " ); + dump_reg( defi.reg, NULL, &di ); + debug_printf( ", " ); + dump_idata( defi.idata ); + debug_printf( "\n" ); + assem += sizeof( struct sh_defi ) / sizeof( unsigned ); + } + break; + + case SVGA3DOP_TEXCOORD: + assert( di.is_ps ); + dump_op( op, "texcoord" ); + if (0) { + struct sh_dstop dstop = *(struct sh_dstop *) assem; + dump_dstreg( dstop.dst, &di ); + assem += sizeof( struct sh_dstop ) / sizeof( unsigned ); + } + else { + struct sh_unaryop unaryop = *(struct sh_unaryop *) assem; + dump_dstreg( unaryop.dst, &di ); + debug_printf( ", " ); + dump_srcreg( unaryop.src, NULL, &di ); + assem += sizeof( struct sh_unaryop ) / sizeof( unsigned ); + } + debug_printf( "\n" ); + break; + + case SVGA3DOP_TEX: + assert( di.is_ps ); + if (0) { + dump_op( op, "tex" ); + if (0) { + struct sh_dstop dstop = *(struct sh_dstop *) assem; + + dump_dstreg( dstop.dst, &di ); + assem += sizeof( struct sh_dstop ) / sizeof( unsigned ); + } + else { + struct sh_unaryop unaryop = *(struct sh_unaryop *) assem; + + dump_dstreg( unaryop.dst, &di ); + debug_printf( ", " ); + dump_srcreg( unaryop.src, NULL, &di ); + assem += sizeof( struct sh_unaryop ) / sizeof( unsigned ); + } + } + else { + struct sh_binaryop binaryop = *(struct sh_binaryop *) assem; + + dump_op( op, "texld" ); + dump_dstreg( binaryop.dst, &di ); + debug_printf( ", " ); + dump_srcreg( binaryop.src0, NULL, &di ); + debug_printf( ", " ); + dump_srcreg( binaryop.src1, NULL, &di ); + assem += sizeof( struct sh_binaryop ) / sizeof( unsigned ); + } + debug_printf( "\n" ); + break; + + case SVGA3DOP_DEF: + { + struct sh_def def = *(struct sh_def *) assem; + + debug_printf( "def " ); + dump_reg( def.reg, NULL, &di ); + debug_printf( ", " ); + dump_cdata( def.cdata ); + debug_printf( "\n" ); + assem += sizeof( struct sh_def ) / sizeof( unsigned ); + } + break; + + case SVGA3DOP_PHASE: + debug_printf( "phase\n" ); + assem += sizeof( struct sh_op ) / sizeof( unsigned ); + break; + + case SVGA3DOP_COMMENT: + assert( 0 ); + break; + + case SVGA3DOP_RET: + debug_printf( "ret\n" ); + assem += sizeof( struct sh_op ) / sizeof( unsigned ); + break; + + case SVGA3DOP_END: + debug_printf( "end\n" ); + finished = TRUE; + break; + + default: + { + const struct sh_opcode_info *info = sh_svga_opcode_info( op.opcode ); + uint i; + uint num_src = info->num_src + op.predicated; + boolean not_first_arg = FALSE; + + assert( info->num_dst <= 1 ); + + if (op.opcode == SVGA3DOP_SINCOS && di.version.major < 3) + num_src += 2; + + dump_comp_op( op, info->mnemonic ); + assem += sizeof( struct sh_op ) / sizeof( unsigned ); + + if (info->num_dst > 0) { + struct sh_dstreg dstreg = *(struct sh_dstreg *) assem; + + dump_dstreg( dstreg, &di ); + assem += sizeof( struct sh_dstreg ) / sizeof( unsigned ); + not_first_arg = TRUE; + } + + for (i = 0; i < num_src; i++) { + struct sh_srcreg srcreg; + struct sh_srcreg indreg; + + srcreg = *(struct sh_srcreg *) assem; + assem += sizeof( struct sh_srcreg ) / sizeof( unsigned ); + if (srcreg.relative && !di.is_ps && di.version.major >= 2) { + indreg = *(struct sh_srcreg *) assem; + assem += sizeof( struct sh_srcreg ) / sizeof( unsigned ); + } + + if (not_first_arg) + debug_printf( ", " ); + else + debug_printf( " " ); + dump_srcreg( srcreg, &indreg, &di ); + not_first_arg = TRUE; + } + + debug_printf( "\n" ); + } + } + } +} diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.h b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h new file mode 100644 index 0000000000..af5549cdba --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h @@ -0,0 +1,42 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * SVGA Shader Dump Facilities + * + * @author Michal Krol + */ + +#ifndef ST_SHADER_SVGA_DUMP_H +#define ST_SHADER_SVGA_DUMP_H + +void +sh_svga_dump( + const unsigned *assem, + unsigned dwords, + unsigned do_binary ); + +#endif /* ST_SHADER_SVGA_DUMP_H */ diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.c b/src/gallium/drivers/svga/svgadump/svga_shader_op.c new file mode 100644 index 0000000000..cecc22106b --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.c @@ -0,0 +1,168 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * SVGA Shader Token Opcode Info + * + * @author Michal Krol + */ + +#include "util/u_debug.h" +#include "svga_shader_op.h" + +#include "../svga_hw_reg.h" +#include "svga3d_shaderdefs.h" + +#define SVGA3DOP_INVALID SVGA3DOP_END +#define TGSI_OPCODE_INVALID TGSI_OPCODE_LAST + +static struct sh_opcode_info opcode_info[] = +{ + { "nop", 0, 0, SVGA3DOP_NOP }, + { "mov", 1, 1, SVGA3DOP_MOV, }, + { "add", 1, 2, SVGA3DOP_ADD, }, + { "sub", 1, 2, SVGA3DOP_SUB, }, + { "mad", 1, 3, SVGA3DOP_MAD, }, + { "mul", 1, 2, SVGA3DOP_MUL, }, + { "rcp", 1, 1, SVGA3DOP_RCP, }, + { "rsq", 1, 1, SVGA3DOP_RSQ, }, + { "dp3", 1, 2, SVGA3DOP_DP3, }, + { "dp4", 1, 2, SVGA3DOP_DP4, }, + { "min", 1, 2, SVGA3DOP_MIN, }, + { "max", 1, 2, SVGA3DOP_MAX, }, + { "slt", 1, 2, SVGA3DOP_SLT, }, + { "sge", 1, 2, SVGA3DOP_SGE, }, + { "exp", 1, 1, SVGA3DOP_EXP, }, + { "log", 1, 1, SVGA3DOP_LOG, }, + { "lit", 1, 1, SVGA3DOP_LIT, }, + { "dst", 1, 2, SVGA3DOP_DST, }, + { "lrp", 1, 3, SVGA3DOP_LRP, }, + { "frc", 1, 1, SVGA3DOP_FRC, }, + { "m4x4", 1, 2, SVGA3DOP_M4x4, }, + { "m4x3", 1, 2, SVGA3DOP_M4x3, }, + { "m3x4", 1, 2, SVGA3DOP_M3x4, }, + { "m3x3", 1, 2, SVGA3DOP_M3x3, }, + { "m3x2", 1, 2, SVGA3DOP_M3x2, }, + { "call", 0, 1, SVGA3DOP_CALL, }, + { "callnz", 0, 2, SVGA3DOP_CALLNZ, }, + { "loop", 0, 2, SVGA3DOP_LOOP, }, + { "ret", 0, 0, SVGA3DOP_RET, }, + { "endloop", 0, 0, SVGA3DOP_ENDLOOP, }, + { "label", 0, 1, SVGA3DOP_LABEL, }, + { "dcl", 0, 0, SVGA3DOP_DCL, }, + { "pow", 1, 2, SVGA3DOP_POW, }, + { "crs", 1, 2, SVGA3DOP_CRS, }, + { "sgn", 1, 3, SVGA3DOP_SGN, }, + { "abs", 1, 1, SVGA3DOP_ABS, }, + { "nrm", 1, 1, SVGA3DOP_NRM, }, /* 3-componenet normalization */ + { "sincos", 1, 1, SVGA3DOP_SINCOS, }, + { "rep", 0, 1, SVGA3DOP_REP, }, + { "endrep", 0, 0, SVGA3DOP_ENDREP, }, + { "if", 0, 1, SVGA3DOP_IF, }, + { "ifc", 0, 2, SVGA3DOP_IFC, }, + { "else", 0, 0, SVGA3DOP_ELSE, }, + { "endif", 0, 0, SVGA3DOP_ENDIF, }, + { "break", 0, 0, SVGA3DOP_BREAK, }, + { "breakc", 0, 0, SVGA3DOP_BREAKC, }, + { "mova", 1, 1, SVGA3DOP_MOVA, }, + { "defb", 0, 0, SVGA3DOP_DEFB, }, + { "defi", 0, 0, SVGA3DOP_DEFI, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "texcoord", 0, 0, SVGA3DOP_TEXCOORD, }, + { "texkill", 1, 0, SVGA3DOP_TEXKILL, }, + { "tex", 0, 0, SVGA3DOP_TEX, }, + { "texbem", 1, 1, SVGA3DOP_TEXBEM, }, + { "texbeml", 1, 1, SVGA3DOP_TEXBEML, }, + { "texreg2ar", 1, 1, SVGA3DOP_TEXREG2AR, }, + { "texreg2gb", 1, 1, SVGA3DOP_TEXREG2GB, }, + { "texm3x2pad", 1, 1, SVGA3DOP_TEXM3x2PAD, }, + { "texm3x2tex", 1, 1, SVGA3DOP_TEXM3x2TEX, }, + { "texm3x3pad", 1, 1, SVGA3DOP_TEXM3x3PAD, }, + { "texm3x3tex", 1, 1, SVGA3DOP_TEXM3x3TEX, }, + { "reserved0", 0, 0, SVGA3DOP_RESERVED0, }, + { "texm3x3spec", 1, 2, SVGA3DOP_TEXM3x3SPEC, }, + { "texm3x3vspec", 1, 1, SVGA3DOP_TEXM3x3VSPEC,}, + { "expp", 1, 1, SVGA3DOP_EXPP, }, + { "logp", 1, 1, SVGA3DOP_LOGP, }, + { "cnd", 1, 3, SVGA3DOP_CND, }, + { "def", 0, 0, SVGA3DOP_DEF, }, + { "texreg2rgb", 1, 1, SVGA3DOP_TEXREG2RGB, }, + { "texdp3tex", 1, 1, SVGA3DOP_TEXDP3TEX, }, + { "texm3x2depth", 1, 1, SVGA3DOP_TEXM3x2DEPTH,}, + { "texdp3", 1, 1, SVGA3DOP_TEXDP3, }, + { "texm3x3", 1, 1, SVGA3DOP_TEXM3x3, }, + { "texdepth", 1, 0, SVGA3DOP_TEXDEPTH, }, + { "cmp", 1, 3, SVGA3DOP_CMP, }, + { "bem", 1, 2, SVGA3DOP_BEM, }, + { "dp2add", 1, 3, SVGA3DOP_DP2ADD, }, + { "dsx", 1, 1, SVGA3DOP_INVALID, }, + { "dsy", 1, 1, SVGA3DOP_INVALID, }, + { "texldd", 1, 1, SVGA3DOP_INVALID, }, + { "setp", 1, 2, SVGA3DOP_SETP, }, + { "texldl", 1, 1, SVGA3DOP_INVALID, }, + { "breakp", 1, 1, SVGA3DOP_INVALID, }, +}; + +const struct sh_opcode_info *sh_svga_opcode_info( uint op ) +{ + struct sh_opcode_info *info; + + if (op >= sizeof( opcode_info ) / sizeof( opcode_info[0] )) { + /* The opcode is either PHASE, COMMENT, END or out of range. + */ + assert( 0 ); + return NULL; + } + + info = &opcode_info[op]; + + if (info->svga_opcode == SVGA3DOP_INVALID) { + /* No valid information. Please provide number of dst/src registers. + */ + assert( 0 ); + return NULL; + } + + /* Sanity check. + */ + assert( op == info->svga_opcode ); + + return info; +} diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.h b/src/gallium/drivers/svga/svgadump/svga_shader_op.h new file mode 100644 index 0000000000..01d39dca84 --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.h @@ -0,0 +1,46 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * SVGA Shader Token Opcode Info + * + * @author Michal Krol + */ + +#ifndef ST_SHADER_SVGA_OP_H +#define ST_SHADER_SVGA_OP_H + +struct sh_opcode_info +{ + const char *mnemonic; + unsigned num_dst:8; + unsigned num_src:8; + unsigned svga_opcode:16; +}; + +const struct sh_opcode_info *sh_svga_opcode_info( unsigned op ); + +#endif /* ST_SHADER_SVGA_OP_H */ -- cgit v1.2.3 From d185c2fd1318bd41f303ab4a5f6e0a048b76c11c Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 24 Nov 2009 14:43:30 +0000 Subject: svga: Use consistent names for public symbol names of shader dumping facilities. --- src/gallium/drivers/svga/svgadump/svga_dump.c | 2 +- src/gallium/drivers/svga/svgadump/svga_shader_dump.c | 4 ++-- src/gallium/drivers/svga/svgadump/svga_shader_dump.h | 8 ++++---- src/gallium/drivers/svga/svgadump/svga_shader_op.c | 2 +- src/gallium/drivers/svga/svgadump/svga_shader_op.h | 8 ++++---- 5 files changed, 12 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c index c6c353f58e..910afa2528 100644 --- a/src/gallium/drivers/svga/svgadump/svga_dump.c +++ b/src/gallium/drivers/svga/svgadump/svga_dump.c @@ -1627,7 +1627,7 @@ svga_dump_commands(const void *commands, uint32_t size) const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body; dump_SVGA3dCmdDefineShader(cmd); body = (const uint8_t *)&cmd[1]; - sh_svga_dump((const uint32_t *)body, + svga_shader_dump((const uint32_t *)body, (unsigned)(next - body)/sizeof(uint32_t), FALSE ); body = next; diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c index c654126d3a..7718bdf757 100644 --- a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c +++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c @@ -435,7 +435,7 @@ static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, cons } void -sh_svga_dump( +svga_shader_dump( const unsigned *assem, unsigned dwords, unsigned do_binary ) @@ -602,7 +602,7 @@ sh_svga_dump( default: { - const struct sh_opcode_info *info = sh_svga_opcode_info( op.opcode ); + const struct sh_opcode_info *info = svga_opcode_info( op.opcode ); uint i; uint num_src = info->num_src + op.predicated; boolean not_first_arg = FALSE; diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.h b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h index af5549cdba..a2657acb2f 100644 --- a/src/gallium/drivers/svga/svgadump/svga_shader_dump.h +++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h @@ -30,13 +30,13 @@ * @author Michal Krol */ -#ifndef ST_SHADER_SVGA_DUMP_H -#define ST_SHADER_SVGA_DUMP_H +#ifndef SVGA_SHADER_DUMP_H +#define SVGA_SHADER_DUMP_H void -sh_svga_dump( +svga_shader_dump( const unsigned *assem, unsigned dwords, unsigned do_binary ); -#endif /* ST_SHADER_SVGA_DUMP_H */ +#endif /* SVGA_SHADER_DUMP_H */ diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.c b/src/gallium/drivers/svga/svgadump/svga_shader_op.c index cecc22106b..8343bfdaab 100644 --- a/src/gallium/drivers/svga/svgadump/svga_shader_op.c +++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.c @@ -140,7 +140,7 @@ static struct sh_opcode_info opcode_info[] = { "breakp", 1, 1, SVGA3DOP_INVALID, }, }; -const struct sh_opcode_info *sh_svga_opcode_info( uint op ) +const struct sh_opcode_info *svga_opcode_info( uint op ) { struct sh_opcode_info *info; diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.h b/src/gallium/drivers/svga/svgadump/svga_shader_op.h index 01d39dca84..e558de02c5 100644 --- a/src/gallium/drivers/svga/svgadump/svga_shader_op.h +++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.h @@ -30,8 +30,8 @@ * @author Michal Krol */ -#ifndef ST_SHADER_SVGA_OP_H -#define ST_SHADER_SVGA_OP_H +#ifndef SVGA_SHADER_OP_H +#define SVGA_SHADER_OP_H struct sh_opcode_info { @@ -41,6 +41,6 @@ struct sh_opcode_info unsigned svga_opcode:16; }; -const struct sh_opcode_info *sh_svga_opcode_info( unsigned op ); +const struct sh_opcode_info *svga_opcode_info( unsigned op ); -#endif /* ST_SHADER_SVGA_OP_H */ +#endif /* SVGA_SHADER_OP_H */ -- cgit v1.2.3 From f3a0615fb0452f11f4db88861b30b2177bdd948a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 15 Nov 2009 12:14:03 -0800 Subject: svga: Handle comment tokens when dumping. --- src/gallium/drivers/svga/svgadump/svga_shader.h | 6 ++++++ src/gallium/drivers/svga/svgadump/svga_shader_dump.c | 7 ++++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svgadump/svga_shader.h b/src/gallium/drivers/svga/svgadump/svga_shader.h index 2fc1796a90..9217af2dd9 100644 --- a/src/gallium/drivers/svga/svgadump/svga_shader.h +++ b/src/gallium/drivers/svga/svgadump/svga_shader.h @@ -211,4 +211,10 @@ struct sh_trinaryop struct sh_srcreg src2; }; +struct sh_comment +{ + unsigned opcode:16; + unsigned size:16; +}; + #endif /* ST_SHADER_SVGA_H */ diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c index 7718bdf757..b0e7fdf378 100644 --- a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c +++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c @@ -587,7 +587,12 @@ svga_shader_dump( break; case SVGA3DOP_COMMENT: - assert( 0 ); + { + struct sh_comment comment = *(struct sh_comment *)assem; + + /* Ignore comment contents. */ + assem += sizeof(struct sh_comment) / sizeof(unsigned) + comment.size; + } break; case SVGA3DOP_RET: -- cgit v1.2.3 From 763426a0256f0ab06f8af53947bd630f8600183a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 24 Nov 2009 14:53:29 +0000 Subject: tgsi: reduce repetition of structure name in its members Rename Semantic.SemanticName to Semantic.Name. Similar for SemanticIndex, and the members of the tgsi_version struct. --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 14 ++++----- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 14 ++++----- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 6 ++-- src/gallium/auxiliary/tgsi/tgsi_build.c | 16 +++++------ src/gallium/auxiliary/tgsi/tgsi_dump.c | 12 ++++---- src/gallium/auxiliary/tgsi/tgsi_dump_c.c | 16 +++++------ src/gallium/auxiliary/tgsi/tgsi_exec.c | 8 +++--- src/gallium/auxiliary/tgsi/tgsi_parse.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_scan.c | 10 +++---- src/gallium/auxiliary/tgsi/tgsi_text.c | 4 +-- src/gallium/auxiliary/tgsi/tgsi_ureg.c | 8 +++--- src/gallium/auxiliary/vl/vl_shader_build.c | 16 +++++------ src/gallium/drivers/nv20/nv20_vertprog.c | 14 ++++----- src/gallium/drivers/nv30/nv30_fragprog.c | 12 ++++---- src/gallium/drivers/nv30/nv30_vertprog.c | 14 ++++----- src/gallium/drivers/nv40/nv40_fragprog.c | 14 ++++----- src/gallium/drivers/nv40/nv40_vertprog.c | 14 ++++----- src/gallium/drivers/nv50/nv50_program.c | 4 +-- src/gallium/drivers/r300/r300_vs.c | 4 +-- src/gallium/drivers/svga/svga_tgsi_decl_sm20.c | 34 +++++++++++----------- src/gallium/drivers/svga/svga_tgsi_decl_sm30.c | 38 ++++++++++++------------- src/gallium/include/pipe/p_shader_tokens.h | 8 +++--- 22 files changed, 141 insertions(+), 141 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index e374010fee..7b0b236a3d 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -139,8 +139,8 @@ aa_transform_decl(struct tgsi_transform_context *ctx, struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; if (decl->Declaration.File == TGSI_FILE_OUTPUT && - decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && - decl->Semantic.SemanticIndex == 0) { + decl->Semantic.Name == TGSI_SEMANTIC_COLOR && + decl->Semantic.Index == 0) { aactx->colorOutput = decl->DeclarationRange.First; } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) { @@ -153,9 +153,9 @@ aa_transform_decl(struct tgsi_transform_context *ctx, else if (decl->Declaration.File == TGSI_FILE_INPUT) { if ((int) decl->DeclarationRange.Last > aactx->maxInput) aactx->maxInput = decl->DeclarationRange.Last; - if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && - (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) { - aactx->maxGeneric = decl->Semantic.SemanticIndex; + if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC && + (int) decl->Semantic.Index > aactx->maxGeneric) { + aactx->maxGeneric = decl->Semantic.Index; } } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { @@ -228,8 +228,8 @@ aa_transform_inst(struct tgsi_transform_context *ctx, /* XXX this could be linear... */ decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; + decl.Semantic.Name = TGSI_SEMANTIC_GENERIC; + decl.Semantic.Index = aactx->maxGeneric + 1; decl.DeclarationRange.First = decl.DeclarationRange.Last = aactx->maxInput + 1; ctx->emit_declaration(ctx, &decl); diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index ae1712fe12..1f29448ff8 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -131,16 +131,16 @@ aa_transform_decl(struct tgsi_transform_context *ctx, struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; if (decl->Declaration.File == TGSI_FILE_OUTPUT && - decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && - decl->Semantic.SemanticIndex == 0) { + decl->Semantic.Name == TGSI_SEMANTIC_COLOR && + decl->Semantic.Index == 0) { aactx->colorOutput = decl->DeclarationRange.First; } else if (decl->Declaration.File == TGSI_FILE_INPUT) { if ((int) decl->DeclarationRange.Last > aactx->maxInput) aactx->maxInput = decl->DeclarationRange.Last; - if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && - (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) { - aactx->maxGeneric = decl->Semantic.SemanticIndex; + if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC && + (int) decl->Semantic.Index > aactx->maxGeneric) { + aactx->maxGeneric = decl->Semantic.Index; } } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { @@ -198,8 +198,8 @@ aa_transform_inst(struct tgsi_transform_context *ctx, /* XXX this could be linear... */ decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; + decl.Semantic.Name = TGSI_SEMANTIC_GENERIC; + decl.Semantic.Index = aactx->maxGeneric + 1; decl.DeclarationRange.First = decl.DeclarationRange.Last = texInput; ctx->emit_declaration(ctx, &decl); diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 9de06e37ed..75774e6626 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -140,7 +140,7 @@ pstip_transform_decl(struct tgsi_transform_context *ctx, } else if (decl->Declaration.File == TGSI_FILE_INPUT) { pctx->maxInput = MAX2(pctx->maxInput, (int) decl->DeclarationRange.Last); - if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) + if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) pctx->wincoordInput = (int) decl->DeclarationRange.First; } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { @@ -226,8 +226,8 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, decl.Declaration.File = TGSI_FILE_INPUT; decl.Declaration.Interpolate = TGSI_INTERPOLATE_LINEAR; /* XXX? */ decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION; - decl.Semantic.SemanticIndex = 0; + decl.Semantic.Name = TGSI_SEMANTIC_POSITION; + decl.Semantic.Index = 0; decl.DeclarationRange.First = decl.DeclarationRange.Last = wincoordInput; ctx->emit_declaration(ctx, &decl); diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index fbac265640..2e6c5b38b4 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -39,8 +39,8 @@ tgsi_build_version( void ) { struct tgsi_version version; - version.MajorVersion = 1; - version.MinorVersion = 1; + version.Major = 1; + version.Minor = 1; version.Padding = 0; return version; @@ -223,8 +223,8 @@ tgsi_build_full_declaration( size++; *ds = tgsi_build_declaration_semantic( - full_decl->Semantic.SemanticName, - full_decl->Semantic.SemanticIndex, + full_decl->Semantic.Name, + full_decl->Semantic.Index, declaration, header ); } @@ -269,8 +269,8 @@ tgsi_default_declaration_semantic( void ) { struct tgsi_declaration_semantic ds; - ds.SemanticName = TGSI_SEMANTIC_POSITION; - ds.SemanticIndex = 0; + ds.Name = TGSI_SEMANTIC_POSITION; + ds.Index = 0; ds.Padding = 0; return ds; @@ -289,8 +289,8 @@ tgsi_build_declaration_semantic( assert( semantic_index <= 0xFFFF ); ds = tgsi_default_declaration_semantic(); - ds.SemanticName = semantic_name; - ds.SemanticIndex = semantic_index; + ds.Name = semantic_name; + ds.Index = semantic_index; declaration_grow( declaration, header ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 7eb64167fb..8f26d5dae3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -232,11 +232,11 @@ iter_declaration( if (decl->Declaration.Semantic) { TXT( ", " ); - ENM( decl->Semantic.SemanticName, semantic_names ); - if (decl->Semantic.SemanticIndex != 0 || - decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC) { + ENM( decl->Semantic.Name, semantic_names ); + if (decl->Semantic.Index != 0 || + decl->Semantic.Name == TGSI_SEMANTIC_GENERIC) { CHR( '[' ); - UID( decl->Semantic.SemanticIndex ); + UID( decl->Semantic.Index ); CHR( ']' ); } } @@ -477,9 +477,9 @@ prolog( { struct dump_ctx *ctx = (struct dump_ctx *) iter; ENM( iter->processor.Processor, processor_type_names ); - UID( iter->version.MajorVersion ); + UID( iter->version.Major ); CHR( '.' ); - UID( iter->version.MinorVersion ); + UID( iter->version.Minor ); EOL(); return TRUE; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c index 4648051e29..11d28b1653 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -229,10 +229,10 @@ dump_declaration_verbose( if( decl->Declaration.Semantic ) { EOL(); - TXT( "\nSemanticName : " ); - ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS ); - TXT( "\nSemanticIndex: " ); - UID( decl->Semantic.SemanticIndex ); + TXT( "\nName : " ); + ENM( decl->Semantic.Name, TGSI_SEMANTICS ); + TXT( "\nIndex: " ); + UID( decl->Semantic.Index ); if( ignored ) { TXT( "\nPadding : " ); UIX( decl->Semantic.Padding ); @@ -485,10 +485,10 @@ tgsi_dump_c( TXT( "tgsi-dump begin -----------------" ); - TXT( "\nMajorVersion: " ); - UID( parse.FullVersion.Version.MajorVersion ); - TXT( "\nMinorVersion: " ); - UID( parse.FullVersion.Version.MinorVersion ); + TXT( "\nMajor: " ); + UID( parse.FullVersion.Version.Major ); + TXT( "\nMinor: " ); + UID( parse.FullVersion.Version.Minor ); EOL(); TXT( "\nHeaderSize: " ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 61d38e57f1..c113f4a3bc 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1899,16 +1899,16 @@ exec_declaration(struct tgsi_exec_machine *mach, last = decl->DeclarationRange.Last; mask = decl->Declaration.UsageMask; - if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) { - assert(decl->Semantic.SemanticIndex == 0); + if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { + assert(decl->Semantic.Index == 0); assert(first == last); assert(mask = TGSI_WRITEMASK_XYZW); mach->Inputs[first] = mach->QuadPos; - } else if (decl->Semantic.SemanticName == TGSI_SEMANTIC_FACE) { + } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { uint i; - assert(decl->Semantic.SemanticIndex == 0); + assert(decl->Semantic.Index == 0); assert(first == last); for (i = 0; i < QUAD_SIZE; i++) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index 52c714ebbd..d4f27499b8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -36,7 +36,7 @@ tgsi_parse_init( const struct tgsi_token *tokens ) { ctx->FullVersion.Version = *(struct tgsi_version *) &tokens[0]; - if( ctx->FullVersion.Version.MajorVersion > 1 ) { + if( ctx->FullVersion.Version.Major > 1 ) { return TGSI_PARSE_ERROR; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 55595539ec..69567130e3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -129,21 +129,21 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->file_max[file] = MAX2(info->file_max[file], (int)reg); if (file == TGSI_FILE_INPUT) { - info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName; - info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex; + info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name; + info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index; info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate; info->num_inputs++; } else if (file == TGSI_FILE_OUTPUT) { - info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName; - info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex; + info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name; + info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index; info->num_outputs++; } /* special case */ if (procType == TGSI_PROCESSOR_FRAGMENT && file == TGSI_FILE_OUTPUT && - fulldecl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) { + fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) { info->writes_z = TRUE; } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 7250f98cc9..d25f590df7 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -831,13 +831,13 @@ static boolean parse_declaration( struct translate_ctx *ctx ) } cur2++; - decl.Semantic.SemanticIndex = index; + decl.Semantic.Index = index; cur = cur2; } decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = i; + decl.Semantic.Name = i; ctx->cur = cur; break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index de4bc6edb0..6d646a529a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -910,8 +910,8 @@ static void emit_decl( struct ureg_program *ureg, out[1].decl_range.Last = index; out[2].value = 0; - out[2].decl_semantic.SemanticName = semantic_name; - out[2].decl_semantic.SemanticIndex = semantic_index; + out[2].decl_semantic.Name = semantic_name; + out[2].decl_semantic.Index = semantic_index; } @@ -1064,8 +1064,8 @@ emit_header( struct ureg_program *ureg ) { union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 3 ); - out[0].version.MajorVersion = 1; - out[0].version.MinorVersion = 1; + out[0].version.Major = 1; + out[0].version.Minor = 1; out[0].version.Padding = 0; out[1].header.HeaderSize = 2; diff --git a/src/gallium/auxiliary/vl/vl_shader_build.c b/src/gallium/auxiliary/vl/vl_shader_build.c index 9637cbed8a..d052e2c797 100644 --- a/src/gallium/auxiliary/vl/vl_shader_build.c +++ b/src/gallium/auxiliary/vl/vl_shader_build.c @@ -36,8 +36,8 @@ struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index decl.Declaration.File = TGSI_FILE_INPUT; decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = name; - decl.Semantic.SemanticIndex = index; + decl.Semantic.Name = name; + decl.Semantic.Index = index; decl.DeclarationRange.First = first; decl.DeclarationRange.Last = last; @@ -64,8 +64,8 @@ struct tgsi_full_declaration vl_decl_interpolated_input decl.Declaration.File = TGSI_FILE_INPUT; decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = name; - decl.Semantic.SemanticIndex = index; + decl.Semantic.Name = name; + decl.Semantic.Index = index; decl.Declaration.Interpolate = interpolation;; decl.DeclarationRange.First = first; decl.DeclarationRange.Last = last; @@ -79,8 +79,8 @@ struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int i decl.Declaration.File = TGSI_FILE_CONSTANT; decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = name; - decl.Semantic.SemanticIndex = index; + decl.Semantic.Name = name; + decl.Semantic.Index = index; decl.DeclarationRange.First = first; decl.DeclarationRange.Last = last; @@ -93,8 +93,8 @@ struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int inde decl.Declaration.File = TGSI_FILE_OUTPUT; decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = name; - decl.Semantic.SemanticIndex = index; + decl.Semantic.Name = name; + decl.Semantic.Index = index; decl.DeclarationRange.First = first; decl.DeclarationRange.Last = last; diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c index 48df356faf..cd76910744 100644 --- a/src/gallium/drivers/nv20/nv20_vertprog.c +++ b/src/gallium/drivers/nv20/nv20_vertprog.c @@ -490,15 +490,15 @@ nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc, { int hw; - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: hw = NV30_VP_INST_DEST_POS; break; case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV30_VP_INST_DEST_COL0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV30_VP_INST_DEST_COL1; } else { NOUVEAU_ERR("bad colour semantic index\n"); @@ -506,10 +506,10 @@ nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc, } break; case TGSI_SEMANTIC_BCOLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV30_VP_INST_DEST_BFC0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV30_VP_INST_DEST_BFC1; } else { NOUVEAU_ERR("bad bcolour semantic index\n"); @@ -523,8 +523,8 @@ nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc, hw = NV30_VP_INST_DEST_PSZ; break; case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.SemanticIndex <= 7) { - hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); + if (fdec->Semantic.Index <= 7) { + hw = NV30_VP_INST_DEST_TC(fdec->Semantic.Index); } else { NOUVEAU_ERR("bad generic semantic index\n"); return FALSE; diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index eb978b6838..acf216bb61 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -572,15 +572,15 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, { int hw; - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: hw = NV30_FP_OP_INPUT_SRC_POSITION; break; case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV30_FP_OP_INPUT_SRC_COL0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV30_FP_OP_INPUT_SRC_COL1; } else { NOUVEAU_ERR("bad colour semantic index\n"); @@ -591,9 +591,9 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, hw = NV30_FP_OP_INPUT_SRC_FOGC; break; case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.SemanticIndex <= 7) { + if (fdec->Semantic.Index <= 7) { hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic. - SemanticIndex); + Index); } else { NOUVEAU_ERR("bad generic semantic index\n"); return FALSE; @@ -612,7 +612,7 @@ static boolean nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc, const struct tgsi_full_declaration *fdec) { - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: fpc->depth_id = fdec->DeclarationRange.First; break; diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index b04fb229bc..e8fba8ab16 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -490,15 +490,15 @@ nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc, { int hw; - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: hw = NV30_VP_INST_DEST_POS; break; case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV30_VP_INST_DEST_COL0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV30_VP_INST_DEST_COL1; } else { NOUVEAU_ERR("bad colour semantic index\n"); @@ -506,10 +506,10 @@ nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc, } break; case TGSI_SEMANTIC_BCOLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV30_VP_INST_DEST_BFC0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV30_VP_INST_DEST_BFC1; } else { NOUVEAU_ERR("bad bcolour semantic index\n"); @@ -523,8 +523,8 @@ nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc, hw = NV30_VP_INST_DEST_PSZ; break; case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.SemanticIndex <= 7) { - hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); + if (fdec->Semantic.Index <= 7) { + hw = NV30_VP_INST_DEST_TC(fdec->Semantic.Index); } else { NOUVEAU_ERR("bad generic semantic index\n"); return FALSE; diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index dbbb736670..ca6a957fc1 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -644,15 +644,15 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, { int hw; - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: hw = NV40_FP_OP_INPUT_SRC_POSITION; break; case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV40_FP_OP_INPUT_SRC_COL0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV40_FP_OP_INPUT_SRC_COL1; } else { NOUVEAU_ERR("bad colour semantic index\n"); @@ -663,9 +663,9 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, hw = NV40_FP_OP_INPUT_SRC_FOGC; break; case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.SemanticIndex <= 7) { + if (fdec->Semantic.Index <= 7) { hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic. - SemanticIndex); + Index); } else { NOUVEAU_ERR("bad generic semantic index\n"); return FALSE; @@ -687,12 +687,12 @@ nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, unsigned idx = fdec->DeclarationRange.First; unsigned hw; - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: hw = 1; break; case TGSI_SEMANTIC_COLOR: - switch (fdec->Semantic.SemanticIndex) { + switch (fdec->Semantic.Index) { case 0: hw = 0; break; case 1: hw = 2; break; case 2: hw = 3; break; diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index df9cb227a3..ed0f1d857d 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -580,16 +580,16 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, unsigned idx = fdec->DeclarationRange.First; int hw; - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: hw = NV40_VP_INST_DEST_POS; vpc->hpos_idx = idx; break; case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV40_VP_INST_DEST_COL0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV40_VP_INST_DEST_COL1; } else { NOUVEAU_ERR("bad colour semantic index\n"); @@ -597,10 +597,10 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, } break; case TGSI_SEMANTIC_BCOLOR: - if (fdec->Semantic.SemanticIndex == 0) { + if (fdec->Semantic.Index == 0) { hw = NV40_VP_INST_DEST_BFC0; } else - if (fdec->Semantic.SemanticIndex == 1) { + if (fdec->Semantic.Index == 1) { hw = NV40_VP_INST_DEST_BFC1; } else { NOUVEAU_ERR("bad bcolour semantic index\n"); @@ -614,8 +614,8 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, hw = NV40_VP_INST_DEST_PSZ; break; case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.SemanticIndex <= 7) { - hw = NV40_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); + if (fdec->Semantic.Index <= 7) { + hw = NV40_VP_INST_DEST_TC(fdec->Semantic.Index); } else { NOUVEAU_ERR("bad generic semantic index\n"); return FALSE; diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index e40e37d07c..00518af8c0 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -2558,8 +2558,8 @@ nv50_program_tx_prep(struct nv50_pc *pc) p->type == PIPE_SHADER_FRAGMENT) break; - si = d->Semantic.SemanticIndex; - switch (d->Semantic.SemanticName) { + si = d->Semantic.Index; + switch (d->Semantic.Name) { case TGSI_SEMANTIC_BCOLOR: p->cfg.two_side[si].hw = first; if (p->cfg.io_nr > first) diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 74ef416dc1..939b13e4b3 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -77,7 +77,7 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) if (decl->Declaration.File != TGSI_FILE_OUTPUT) continue; - switch (decl->Semantic.SemanticName) { + switch (decl->Semantic.Name) { case TGSI_SEMANTIC_POSITION: c->code->outputs[decl->DeclarationRange.First] = 0; break; @@ -98,7 +98,7 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) break; default: debug_printf("r300: vs: Bad semantic declaration %d\n", - decl->Semantic.SemanticName); + decl->Semantic.Name); break; } } diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c index 54457082a0..6f4822a89d 100644 --- a/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c +++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c @@ -46,7 +46,7 @@ static boolean ps20_input( struct svga_shader_emitter *emit, dcl.values[0] = 0; dcl.values[1] = 0; - switch (semantic.SemanticName) { + switch (semantic.Name) { case TGSI_SEMANTIC_POSITION: /* Special case: */ @@ -55,15 +55,15 @@ static boolean ps20_input( struct svga_shader_emitter *emit, break; case TGSI_SEMANTIC_COLOR: reg = src_register( SVGA3DREG_INPUT, - semantic.SemanticIndex ); + semantic.Index ); break; case TGSI_SEMANTIC_FOG: - assert(semantic.SemanticIndex == 0); + assert(semantic.Index == 0); reg = src_register( SVGA3DREG_TEXTURE, 0 ); break; case TGSI_SEMANTIC_GENERIC: reg = src_register( SVGA3DREG_TEXTURE, - semantic.SemanticIndex + 1 ); + semantic.Index + 1 ); break; default: assert(0); @@ -90,16 +90,16 @@ static boolean ps20_output( struct svga_shader_emitter *emit, { SVGA3dShaderDestToken reg; - switch (semantic.SemanticName) { + switch (semantic.Name) { case TGSI_SEMANTIC_COLOR: - if (semantic.SemanticIndex < PIPE_MAX_COLOR_BUFS) { - unsigned cbuf = semantic.SemanticIndex; + if (semantic.Index < PIPE_MAX_COLOR_BUFS) { + unsigned cbuf = semantic.Index; emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ ); emit->temp_col[cbuf] = emit->output_map[idx]; emit->true_col[cbuf] = dst_register( SVGA3DREG_COLOROUT, - semantic.SemanticIndex ); + semantic.Index ); } else { assert(0); @@ -111,7 +111,7 @@ static boolean ps20_output( struct svga_shader_emitter *emit, emit->nr_hw_temp++ ); emit->temp_pos = emit->output_map[idx]; emit->true_pos = dst_register( SVGA3DREG_DEPTHOUT, - semantic.SemanticIndex ); + semantic.Index ); break; default: assert(0); @@ -169,9 +169,9 @@ static boolean vs20_output( struct svga_shader_emitter *emit, /* Just build the register map table: */ - switch (semantic.SemanticName) { + switch (semantic.Name) { case TGSI_SEMANTIC_POSITION: - assert(semantic.SemanticIndex == 0); + assert(semantic.Index == 0); emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ ); emit->temp_pos = emit->output_map[idx]; @@ -179,7 +179,7 @@ static boolean vs20_output( struct svga_shader_emitter *emit, SVGA3DRASTOUT_POSITION); break; case TGSI_SEMANTIC_PSIZE: - assert(semantic.SemanticIndex == 0); + assert(semantic.Index == 0); emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ ); emit->temp_psiz = emit->output_map[idx]; @@ -187,17 +187,17 @@ static boolean vs20_output( struct svga_shader_emitter *emit, SVGA3DRASTOUT_PSIZE ); break; case TGSI_SEMANTIC_FOG: - assert(semantic.SemanticIndex == 0); + assert(semantic.Index == 0); emit->output_map[idx] = dst_register( SVGA3DREG_TEXCRDOUT, 0 ); break; case TGSI_SEMANTIC_COLOR: /* oD0 */ emit->output_map[idx] = dst_register( SVGA3DREG_ATTROUT, - semantic.SemanticIndex ); + semantic.Index ); break; case TGSI_SEMANTIC_GENERIC: emit->output_map[idx] = dst_register( SVGA3DREG_TEXCRDOUT, - semantic.SemanticIndex + 1 ); + semantic.Index + 1 ); break; default: assert(0); @@ -237,8 +237,8 @@ boolean svga_translate_decl_sm20( struct svga_shader_emitter *emit, unsigned idx; if (decl->Declaration.Semantic) { - semantic = decl->Semantic.SemanticName; - semantic_idx = decl->Semantic.SemanticIndex; + semantic = decl->Semantic.Name; + semantic_idx = decl->Semantic.Index; } for( idx = first; idx <= last; idx++ ) { diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c index 08e7dfb117..65aa23ce3e 100644 --- a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c +++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c @@ -35,35 +35,35 @@ static boolean translate_vs_ps_semantic( struct tgsi_declaration_semantic semant unsigned *usage, unsigned *idx ) { - switch (semantic.SemanticName) { + switch (semantic.Name) { case TGSI_SEMANTIC_POSITION: - *idx = semantic.SemanticIndex; + *idx = semantic.Index; *usage = SVGA3D_DECLUSAGE_POSITION; break; case TGSI_SEMANTIC_COLOR: - *idx = semantic.SemanticIndex; + *idx = semantic.Index; *usage = SVGA3D_DECLUSAGE_COLOR; break; case TGSI_SEMANTIC_BCOLOR: - *idx = semantic.SemanticIndex + 2; /* sharing with COLOR */ + *idx = semantic.Index + 2; /* sharing with COLOR */ *usage = SVGA3D_DECLUSAGE_COLOR; break; case TGSI_SEMANTIC_FOG: *idx = 0; - assert(semantic.SemanticIndex == 0); + assert(semantic.Index == 0); *usage = SVGA3D_DECLUSAGE_TEXCOORD; break; case TGSI_SEMANTIC_PSIZE: - *idx = semantic.SemanticIndex; + *idx = semantic.Index; *usage = SVGA3D_DECLUSAGE_PSIZE; break; case TGSI_SEMANTIC_GENERIC: - *idx = semantic.SemanticIndex + 1; /* texcoord[0] is reserved for fog */ + *idx = semantic.Index + 1; /* texcoord[0] is reserved for fog */ *usage = SVGA3D_DECLUSAGE_TEXCOORD; break; case TGSI_SEMANTIC_NORMAL: - *idx = semantic.SemanticIndex; + *idx = semantic.Index; *usage = SVGA3D_DECLUSAGE_NORMAL; break; default: @@ -120,7 +120,7 @@ static boolean ps30_input( struct svga_shader_emitter *emit, unsigned usage, index; SVGA3dShaderDestToken reg; - if (semantic.SemanticName == TGSI_SEMANTIC_POSITION) { + if (semantic.Name == TGSI_SEMANTIC_POSITION) { emit->input_map[idx] = src_register( SVGA3DREG_MISCTYPE, SVGA3DMISCREG_POSITION ); @@ -135,7 +135,7 @@ static boolean ps30_input( struct svga_shader_emitter *emit, return emit_decl( emit, reg, 0, 0 ); } else if (emit->key.fkey.light_twoside && - (semantic.SemanticName == TGSI_SEMANTIC_COLOR)) { + (semantic.Name == TGSI_SEMANTIC_COLOR)) { if (!translate_vs_ps_semantic( semantic, &usage, &index )) return FALSE; @@ -150,7 +150,7 @@ static boolean ps30_input( struct svga_shader_emitter *emit, if (!emit_decl( emit, reg, usage, index )) return FALSE; - semantic.SemanticName = TGSI_SEMANTIC_BCOLOR; + semantic.Name = TGSI_SEMANTIC_BCOLOR; if (!translate_vs_ps_semantic( semantic, &usage, &index )) return FALSE; @@ -164,7 +164,7 @@ static boolean ps30_input( struct svga_shader_emitter *emit, return TRUE; } - else if (semantic.SemanticName == TGSI_SEMANTIC_FACE) { + else if (semantic.Name == TGSI_SEMANTIC_FACE) { if (!emit_vface_decl( emit )) return FALSE; emit->emit_frontface = TRUE; @@ -193,17 +193,17 @@ static boolean ps30_output( struct svga_shader_emitter *emit, { SVGA3dShaderDestToken reg; - switch (semantic.SemanticName) { + switch (semantic.Name) { case TGSI_SEMANTIC_COLOR: emit->output_map[idx] = dst_register( SVGA3DREG_COLOROUT, - semantic.SemanticIndex ); + semantic.Index ); break; case TGSI_SEMANTIC_POSITION: emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ ); emit->temp_pos = emit->output_map[idx]; emit->true_pos = dst_register( SVGA3DREG_DEPTHOUT, - semantic.SemanticIndex ); + semantic.Index ); break; default: assert(0); @@ -283,14 +283,14 @@ static boolean vs30_output( struct svga_shader_emitter *emit, dcl.index = index; dcl.values[0] |= 1<<31; - if (semantic.SemanticName == TGSI_SEMANTIC_POSITION) { + if (semantic.Name == TGSI_SEMANTIC_POSITION) { assert(idx == 0); emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ ); emit->temp_pos = emit->output_map[idx]; emit->true_pos = dcl.dst; } - else if (semantic.SemanticName == TGSI_SEMANTIC_PSIZE) { + else if (semantic.Name == TGSI_SEMANTIC_PSIZE) { emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ ); emit->temp_psiz = emit->output_map[idx]; @@ -342,8 +342,8 @@ boolean svga_translate_decl_sm30( struct svga_shader_emitter *emit, unsigned idx; if (decl->Declaration.Semantic) { - semantic = decl->Semantic.SemanticName; - semantic_idx = decl->Semantic.SemanticIndex; + semantic = decl->Semantic.Name; + semantic_idx = decl->Semantic.Index; } for( idx = first; idx <= last; idx++ ) { diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index ac999e0c18..7d73d7df85 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -37,8 +37,8 @@ extern "C" { struct tgsi_version { - unsigned MajorVersion : 8; - unsigned MinorVersion : 8; + unsigned Major : 8; + unsigned Minor : 8; unsigned Padding : 16; }; @@ -137,8 +137,8 @@ struct tgsi_declaration_range struct tgsi_declaration_semantic { - unsigned SemanticName : 8; /**< one of TGSI_SEMANTIC_x */ - unsigned SemanticIndex : 16; /**< UINT */ + unsigned Name : 8; /**< one of TGSI_SEMANTIC_x */ + unsigned Index : 16; /**< UINT */ unsigned Padding : 8; }; -- cgit v1.2.3 From 7d6c8f980d1e23ad6f557d650e89c715861a3b0c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 24 Nov 2009 15:02:23 +0000 Subject: tgsi: rename fields of tgsi_full_instruction to avoid excessive verbosity InstructionPredicate -> Predicate InstructionLabel -> Label InstructionTexture -> Texture FullSrcRegisters -> Src FullDstRegisters -> Dst --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 40 ++-- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 238 +++++++++++------------ src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 32 +-- src/gallium/auxiliary/draw/draw_vs_aos.c | 176 ++++++++--------- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 8 +- src/gallium/auxiliary/tgsi/tgsi_build.c | 34 ++-- src/gallium/auxiliary/tgsi/tgsi_dump.c | 8 +- src/gallium/auxiliary/tgsi/tgsi_dump_c.c | 8 +- src/gallium/auxiliary/tgsi/tgsi_exec.c | 46 ++--- src/gallium/auxiliary/tgsi/tgsi_parse.c | 42 ++-- src/gallium/auxiliary/tgsi/tgsi_parse.h | 10 +- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 8 +- src/gallium/auxiliary/tgsi/tgsi_sanity.c | 24 +-- src/gallium/auxiliary/tgsi/tgsi_scan.c | 6 +- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 16 +- src/gallium/auxiliary/tgsi/tgsi_text.c | 8 +- src/gallium/auxiliary/vl/vl_compositor.c | 2 +- src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 32 +-- src/gallium/auxiliary/vl/vl_shader_build.c | 50 ++--- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 226 ++++++++++----------- src/gallium/drivers/cell/spu/spu_exec.c | 12 +- src/gallium/drivers/i915/i915_fpc_translate.c | 116 +++++------ src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 16 +- src/gallium/drivers/nv20/nv20_vertprog.c | 8 +- src/gallium/drivers/nv30/nv30_fragprog.c | 8 +- src/gallium/drivers/nv30/nv30_vertprog.c | 8 +- src/gallium/drivers/nv40/nv40_fragprog.c | 8 +- src/gallium/drivers/nv40/nv40_vertprog.c | 10 +- src/gallium/drivers/nv50/nv50_program.c | 36 ++-- src/gallium/drivers/r300/r300_tgsi_to_rc.c | 10 +- src/gallium/drivers/svga/svga_tgsi_insn.c | 110 +++++------ 31 files changed, 678 insertions(+), 678 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 7b0b236a3d..58d867faeb 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -265,15 +265,15 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_TEX; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = aactx->texTemp; + newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].DstRegister.Index = aactx->texTemp; newInst.Instruction.NumSrcRegs = 2; newInst.Instruction.Texture = TRUE; - newInst.InstructionTexture.Texture = TGSI_TEXTURE_2D; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->maxInput + 1; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->freeSampler; + newInst.Texture.Texture = TGSI_TEXTURE_2D; + newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.Src[0].SrcRegister.Index = aactx->maxInput + 1; + newInst.Src[1].SrcRegister.File = TGSI_FILE_SAMPLER; + newInst.Src[1].SrcRegister.Index = aactx->freeSampler; ctx->emit_instruction(ctx, &newInst); @@ -281,26 +281,26 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MOV; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; + newInst.Dst[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.Dst[0].DstRegister.Index = aactx->colorOutput; + newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].SrcRegister.Index = aactx->colorTemp; ctx->emit_instruction(ctx, &newInst); /* MUL alpha */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MUL; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Dst[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.Dst[0].DstRegister.Index = aactx->colorOutput; + newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->texTemp; + newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].SrcRegister.Index = aactx->colorTemp; + newInst.Src[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.Src[1].SrcRegister.Index = aactx->texTemp; ctx->emit_instruction(ctx, &newInst); /* END */ @@ -317,7 +317,7 @@ aa_transform_inst(struct tgsi_transform_context *ctx, uint i; for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + struct tgsi_full_dst_register *dst = &inst->Dst[i]; if (dst->DstRegister.File == TGSI_FILE_OUTPUT && dst->DstRegister.Index == aactx->colorOutput) { dst->DstRegister.File = TGSI_FILE_TEMPORARY; diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 1f29448ff8..09fc55cb5e 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -234,30 +234,30 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MUL; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY; + newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].DstRegister.Index = tmp0; + newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.Src[0].SrcRegister.Index = texInput; + newInst.Src[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.Src[1].SrcRegister.Index = texInput; ctx->emit_instruction(ctx, &newInst); /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_ADD; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].DstRegister.Index = tmp0; + newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].SrcRegister.Index = tmp0; + newInst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + newInst.Src[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.Src[1].SrcRegister.Index = tmp0; + newInst.Src[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; ctx->emit_instruction(ctx, &newInst); #if NORMALIZE /* OPTIONAL normalization of length */ @@ -265,24 +265,24 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_RSQ; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].DstRegister.Index = tmp0; + newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].SrcRegister.Index = tmp0; ctx->emit_instruction(ctx, &newInst); /* RCP t0.x, t0.x; */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_RCP; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].DstRegister.Index = tmp0; + newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].SrcRegister.Index = tmp0; ctx->emit_instruction(ctx, &newInst); #endif @@ -290,16 +290,16 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_SGT; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].DstRegister.Index = tmp0; + newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].SrcRegister.Index = tmp0; + newInst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + newInst.Src[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.Src[1].SrcRegister.Index = texInput; + newInst.Src[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; ctx->emit_instruction(ctx, &newInst); /* KIL -tmp0.yyyy; # if -tmp0.y < 0, KILL */ @@ -307,13 +307,13 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.Instruction.Opcode = TGSI_OPCODE_KIL; newInst.Instruction.NumDstRegs = 0; newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].SrcRegister.Index = tmp0; + newInst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + newInst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + newInst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + newInst.Src[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + newInst.Src[0].SrcRegister.Negate = 1; ctx->emit_instruction(ctx, &newInst); @@ -323,77 +323,77 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_SUB; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].DstRegister.Index = tmp0; + newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; + newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.Src[0].SrcRegister.Index = texInput; + newInst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + newInst.Src[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.Src[1].SrcRegister.Index = texInput; + newInst.Src[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; ctx->emit_instruction(ctx, &newInst); /* RCP t0.z, t0.z; # t0.z = 1 / m */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_RCP; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].DstRegister.Index = tmp0; + newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z; + newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].SrcRegister.Index = tmp0; + newInst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z; ctx->emit_instruction(ctx, &newInst); /* SUB t0.y, 1, t0.x; # d = 1 - d */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_SUB; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].DstRegister.Index = tmp0; + newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.Src[0].SrcRegister.Index = texInput; + newInst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.Src[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.Src[1].SrcRegister.Index = tmp0; + newInst.Src[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; ctx->emit_instruction(ctx, &newInst); /* MUL t0.w, t0.y, t0.z; # coverage = d * m */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MUL; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].DstRegister.Index = tmp0; + newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z; + newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].SrcRegister.Index = tmp0; + newInst.Src[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + newInst.Src[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.Src[1].SrcRegister.Index = tmp0; + newInst.Src[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z; ctx->emit_instruction(ctx, &newInst); /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_SLE; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].DstRegister.Index = tmp0; + newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z; + newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].SrcRegister.Index = tmp0; + newInst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + newInst.Src[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.Src[1].SrcRegister.Index = texInput; + newInst.Src[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z; ctx->emit_instruction(ctx, &newInst); /* CMP t0.w, -t0.y, tex.w, t0.w; @@ -405,29 +405,29 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_CMP; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].DstRegister.Index = tmp0; + newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; newInst.Instruction.NumSrcRegs = 3; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[2].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].SrcRegister.Index = tmp0; + newInst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + newInst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + newInst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + newInst.Src[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + newInst.Src[0].SrcRegister.Negate = 1; + newInst.Src[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.Src[1].SrcRegister.Index = texInput; + newInst.Src[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + newInst.Src[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.Src[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + newInst.Src[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + newInst.Src[2].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.Src[2].SrcRegister.Index = tmp0; + newInst.Src[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + newInst.Src[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.Src[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + newInst.Src[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; ctx->emit_instruction(ctx, &newInst); } @@ -439,26 +439,26 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MOV; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; + newInst.Dst[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.Dst[0].DstRegister.Index = aactx->colorOutput; + newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].SrcRegister.Index = aactx->colorTemp; ctx->emit_instruction(ctx, &newInst); /* MUL result.color.w, colorTemp, tmp0.w; */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MUL; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Dst[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.Dst[0].DstRegister.Index = aactx->colorOutput; + newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->tmp0; + newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].SrcRegister.Index = aactx->colorTemp; + newInst.Src[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.Src[1].SrcRegister.Index = aactx->tmp0; ctx->emit_instruction(ctx, &newInst); } else { @@ -468,7 +468,7 @@ aa_transform_inst(struct tgsi_transform_context *ctx, uint i; for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + struct tgsi_full_dst_register *dst = &inst->Dst[i]; if (dst->DstRegister.File == TGSI_FILE_OUTPUT && dst->DstRegister.Index == aactx->colorOutput) { dst->DstRegister.File = TGSI_FILE_TEMPORARY; diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 75774e6626..fe0d511218 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -280,28 +280,28 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MUL; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; + newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].DstRegister.Index = pctx->texTemp; newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = wincoordInput; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_IMMEDIATE; - newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->numImmed; + newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.Src[0].SrcRegister.Index = wincoordInput; + newInst.Src[1].SrcRegister.File = TGSI_FILE_IMMEDIATE; + newInst.Src[1].SrcRegister.Index = pctx->numImmed; ctx->emit_instruction(ctx, &newInst); /* TEX texTemp, texTemp, sampler; */ newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_TEX; newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; + newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].DstRegister.Index = pctx->texTemp; newInst.Instruction.NumSrcRegs = 2; newInst.Instruction.Texture = TRUE; - newInst.InstructionTexture.Texture = TGSI_TEXTURE_2D; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->freeSampler; + newInst.Texture.Texture = TGSI_TEXTURE_2D; + newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].SrcRegister.Index = pctx->texTemp; + newInst.Src[1].SrcRegister.File = TGSI_FILE_SAMPLER; + newInst.Src[1].SrcRegister.Index = pctx->freeSampler; ctx->emit_instruction(ctx, &newInst); /* KIL -texTemp; # if -texTemp < 0, KILL fragment */ @@ -309,9 +309,9 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, newInst.Instruction.Opcode = TGSI_OPCODE_KIL; newInst.Instruction.NumDstRegs = 0; newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; - newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].SrcRegister.Index = pctx->texTemp; + newInst.Src[0].SrcRegister.Negate = 1; ctx->emit_instruction(ctx, &newInst); } diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 88bc790b62..a9c8715bc8 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -956,7 +956,7 @@ static void emit_print( struct aos_compilation *cp, static boolean emit_ABS( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); struct x86_reg neg = aos_get_internal(cp, IMM_NEGS); struct x86_reg tmp = aos_get_xmm_reg(cp); @@ -964,27 +964,27 @@ static boolean emit_ABS( struct aos_compilation *cp, const struct tgsi_full_inst sse_mulps(cp->func, tmp, neg); sse_maxps(cp->func, tmp, arg0); - store_dest(cp, &op->FullDstRegisters[0], tmp); + store_dest(cp, &op->Dst[0], tmp); return TRUE; } static boolean emit_ADD( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg dst = get_xmm_writable(cp, arg0); sse_addps(cp->func, dst, arg1); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_COS( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - x87_fld_src(cp, &op->FullSrcRegisters[0], 0); + x87_fld_src(cp, &op->Src[0], 0); x87_fcos(cp->func); - x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + x87_fstp_dest4(cp, &op->Dst[0]); return TRUE; } @@ -993,8 +993,8 @@ static boolean emit_COS( struct aos_compilation *cp, const struct tgsi_full_inst */ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg tmp = aos_get_xmm_reg(cp); struct x86_reg dst = get_xmm_writable(cp, arg0); @@ -1007,14 +1007,14 @@ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_inst sse_addss(cp->func, dst, tmp); aos_release_xmm_reg(cp, tmp.idx); - store_scalar_dest(cp, &op->FullDstRegisters[0], dst); + store_scalar_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg tmp = aos_get_xmm_reg(cp); struct x86_reg dst = get_xmm_writable(cp, arg0); @@ -1028,14 +1028,14 @@ static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_inst sse_addss(cp->func, dst, tmp); aos_release_xmm_reg(cp, tmp.idx); - store_scalar_dest(cp, &op->FullDstRegisters[0], dst); + store_scalar_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg tmp = aos_get_xmm_reg(cp); struct x86_reg dst = get_xmm_writable(cp, arg0); @@ -1051,14 +1051,14 @@ static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_inst sse_addss(cp->func, dst, tmp); aos_release_xmm_reg(cp, tmp.idx); - store_scalar_dest(cp, &op->FullDstRegisters[0], dst); + store_scalar_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_DST( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg dst = aos_get_xmm_reg(cp); struct x86_reg tmp = aos_get_xmm_reg(cp); struct x86_reg ones = aos_get_internal(cp, IMM_ONES); @@ -1073,25 +1073,25 @@ static boolean emit_DST( struct aos_compilation *cp, const struct tgsi_full_inst sse_mulps(cp->func, dst, tmp); aos_release_xmm_reg(cp, tmp.idx); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_LG2( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { x87_fld1(cp->func); /* 1 */ - x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0 1 */ + x87_fld_src(cp, &op->Src[0], 0); /* a0 1 */ x87_fyl2x(cp->func); /* log2(a0) */ - x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + x87_fstp_dest4(cp, &op->Dst[0]); return TRUE; } #if 0 static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - x87_fld_src(cp, &op->FullSrcRegisters[0], 0); + x87_fld_src(cp, &op->Src[0], 0); x87_emit_ex2(cp); - x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + x87_fstp_dest4(cp, &op->Dst[0]); return TRUE; } #endif @@ -1099,8 +1099,8 @@ static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); - unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); + unsigned writemask = op->Dst[0].DstRegister.WriteMask; int i; set_fpu_round_neg_inf( cp ); @@ -1109,7 +1109,7 @@ static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_inst */ for (i = 3; i >= 0; i--) { if (writemask & (1<FullSrcRegisters[0], i); + x87_fld_src(cp, &op->Src[0], i); } } @@ -1126,8 +1126,8 @@ static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_RND( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); - unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); + unsigned writemask = op->Dst[0].DstRegister.WriteMask; int i; set_fpu_round_nearest( cp ); @@ -1136,7 +1136,7 @@ static boolean emit_RND( struct aos_compilation *cp, const struct tgsi_full_inst */ for (i = 3; i >= 0; i--) { if (writemask & (1<FullSrcRegisters[0], i); + x87_fld_src(cp, &op->Src[0], i); } } @@ -1153,10 +1153,10 @@ static boolean emit_RND( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); + struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); struct x86_reg st0 = x86_make_reg(file_x87, 0); struct x86_reg st1 = x86_make_reg(file_x87, 1); - unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + unsigned writemask = op->Dst[0].DstRegister.WriteMask; int i; set_fpu_round_neg_inf( cp ); @@ -1166,7 +1166,7 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst */ for (i = 3; i >= 0; i--) { if (writemask & (1<FullSrcRegisters[0], i); + x87_fld_src(cp, &op->Src[0], i); } } @@ -1190,7 +1190,7 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); - unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + unsigned writemask = op->Dst[0].DstRegister.WriteMask; unsigned lit_count = cp->lit_count++; struct x86_reg result, arg0; unsigned i; @@ -1209,10 +1209,10 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst if (writemask != TGSI_WRITEMASK_XYZW) result = x86_make_disp(cp->machine_EDX, Offset(struct aos_machine, tmp[0])); else - result = get_dst_ptr(cp, &op->FullDstRegisters[0]); + result = get_dst_ptr(cp, &op->Dst[0]); - arg0 = fetch_src( cp, &op->FullSrcRegisters[0] ); + arg0 = fetch_src( cp, &op->Src[0] ); if (arg0.file == file_XMM) { struct x86_reg tmp = x86_make_disp(cp->machine_EDX, Offset(struct aos_machine, tmp[1])); @@ -1259,7 +1259,7 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst if (writemask != TGSI_WRITEMASK_XYZW) { store_dest( cp, - &op->FullDstRegisters[0], + &op->Dst[0], get_xmm_writable( cp, result ) ); } @@ -1269,8 +1269,8 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst #if 0 static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); - unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); + unsigned writemask = op->Dst[0].DstRegister.WriteMask; if (writemask & TGSI_WRITEMASK_YZ) { struct x86_reg st1 = x86_make_reg(file_x87, 1); @@ -1286,13 +1286,13 @@ static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_fu */ x87_fldz(cp->func); /* 1 0 */ #endif - x87_fld_src(cp, &op->FullSrcRegisters[0], 1); /* a1 1 0 */ + x87_fld_src(cp, &op->Src[0], 1); /* a1 1 0 */ x87_fcomi(cp->func, st2); /* a1 1 0 */ x87_fcmovb(cp->func, st1); /* a1' 1 0 */ x87_fstp(cp->func, st1); /* a1' 0 */ x87_fstp(cp->func, st1); /* a1' */ - x87_fld_src(cp, &op->FullSrcRegisters[0], 3); /* a3 a1' */ + x87_fld_src(cp, &op->Src[0], 3); /* a3 a1' */ x87_fxch(cp->func, st1); /* a1' a3 */ @@ -1305,7 +1305,7 @@ static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_fu /* a0' = max2(a0, 0): */ x87_fldz(cp->func); /* 0 r2 */ - x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0 0 r2 */ + x87_fld_src(cp, &op->Src[0], 0); /* a0 0 r2 */ x87_fcomi(cp->func, st1); x87_fcmovb(cp->func, st1); /* a0' 0 r2 */ @@ -1333,58 +1333,58 @@ static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_fu static boolean emit_MAX( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg dst = get_xmm_writable(cp, arg0); sse_maxps(cp->func, dst, arg1); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg dst = get_xmm_writable(cp, arg0); sse_minps(cp->func, dst, arg1); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_MOV( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); struct x86_reg dst = get_xmm_writable(cp, arg0); /* potentially nothing to do */ - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_MUL( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg dst = get_xmm_writable(cp, arg0); sse_mulps(cp->func, dst, arg1); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg arg2 = fetch_src(cp, &op->FullSrcRegisters[2]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); + struct x86_reg arg2 = fetch_src(cp, &op->Src[2]); /* If we can't clobber old contents of arg0, get a temporary & copy * it there, then clobber it... @@ -1393,7 +1393,7 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst sse_mulps(cp->func, arg0, arg1); sse_addps(cp->func, arg0, arg2); - store_dest(cp, &op->FullDstRegisters[0], arg0); + store_dest(cp, &op->Dst[0], arg0); return TRUE; } @@ -1425,13 +1425,13 @@ static float PIPE_CDECL _exp2(float x) static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { #if 0 - x87_fld_src(cp, &op->FullSrcRegisters[1], 0); /* a1.x */ - x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0.x a1.x */ + x87_fld_src(cp, &op->Src[1], 0); /* a1.x */ + x87_fld_src(cp, &op->Src[0], 0); /* a0.x a1.x */ x87_fyl2x(cp->func); /* a1*log2(a0) */ x87_emit_ex2( cp ); /* 2^(a1*log2(a0)) */ - x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + x87_fstp_dest4(cp, &op->Dst[0]); #else uint i; @@ -1450,9 +1450,9 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -8) ); - x87_fld_src( cp, &op->FullSrcRegisters[1], 0 ); + x87_fld_src( cp, &op->Src[1], 0 ); x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 4 ) ); - x87_fld_src( cp, &op->FullSrcRegisters[0], 0 ); + x87_fld_src( cp, &op->Src[0], 0 ); x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) ); /* tmp_EAX has been pushed & will be restored below */ @@ -1467,7 +1467,7 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst */ cp->func->x87_stack++; - x87_fstp_dest4( cp, &op->FullDstRegisters[0] ); + x87_fstp_dest4( cp, &op->Dst[0] ); #endif return TRUE; } @@ -1493,7 +1493,7 @@ static boolean emit_EXPBASE2( struct aos_compilation *cp, const struct tgsi_full x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -4) ); - x87_fld_src( cp, &op->FullSrcRegisters[0], 0 ); + x87_fld_src( cp, &op->Src[0], 0 ); x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) ); /* tmp_EAX has been pushed & will be restored below */ @@ -1508,7 +1508,7 @@ static boolean emit_EXPBASE2( struct aos_compilation *cp, const struct tgsi_full */ cp->func->x87_stack++; - x87_fstp_dest4( cp, &op->FullDstRegisters[0] ); + x87_fstp_dest4( cp, &op->Dst[0] ); return TRUE; } @@ -1517,7 +1517,7 @@ static boolean emit_EXPBASE2( struct aos_compilation *cp, const struct tgsi_full static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); struct x86_reg dst = aos_get_xmm_reg(cp); if (cp->have_sse2) { @@ -1531,7 +1531,7 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst sse_divss(cp->func, dst, arg0); } - store_scalar_dest(cp, &op->FullDstRegisters[0], dst); + store_scalar_dest(cp, &op->Dst[0], dst); return TRUE; } @@ -1551,14 +1551,14 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { if (0) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); struct x86_reg r = aos_get_xmm_reg(cp); sse_rsqrtss(cp->func, r, arg0); - store_scalar_dest(cp, &op->FullDstRegisters[0], r); + store_scalar_dest(cp, &op->Dst[0], r); return TRUE; } else { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); struct x86_reg r = aos_get_xmm_reg(cp); struct x86_reg neg_half = get_reg_ptr( cp, AOS_FILE_INTERNAL, IMM_RSQ ); @@ -1578,7 +1578,7 @@ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_inst sse_addss( cp->func, tmp, one_point_five ); /* 1.5 - .5 * a * r * r */ sse_mulss( cp->func, r, tmp ); /* r * (1.5 - .5 * a * r * r) */ - store_scalar_dest(cp, &op->FullDstRegisters[0], r); + store_scalar_dest(cp, &op->Dst[0], r); aos_release_xmm_reg(cp, tmp.idx); @@ -1589,23 +1589,23 @@ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_SGE( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg ones = aos_get_internal(cp, IMM_ONES); struct x86_reg dst = get_xmm_writable(cp, arg0); sse_cmpps(cp->func, dst, arg1, cc_NotLessThan); sse_andps(cp->func, dst, ones); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_SIN( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - x87_fld_src(cp, &op->FullSrcRegisters[0], 0); + x87_fld_src(cp, &op->Src[0], 0); x87_fsin(cp->func); - x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + x87_fstp_dest4(cp, &op->Dst[0]); return TRUE; } @@ -1613,46 +1613,46 @@ static boolean emit_SIN( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_SLT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg ones = aos_get_internal(cp, IMM_ONES); struct x86_reg dst = get_xmm_writable(cp, arg0); sse_cmpps(cp->func, dst, arg1, cc_LessThan); sse_andps(cp->func, dst, ones); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg dst = get_xmm_writable(cp, arg0); sse_subps(cp->func, dst, arg1); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->Dst[0], dst); return TRUE; } static boolean emit_TRUNC( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); struct x86_reg tmp0 = aos_get_xmm_reg(cp); sse2_cvttps2dq(cp->func, tmp0, arg0); sse2_cvtdq2ps(cp->func, tmp0, tmp0); - store_dest(cp, &op->FullDstRegisters[0], tmp0); + store_dest(cp, &op->Dst[0], tmp0); return TRUE; } static boolean emit_XPD( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg0 = fetch_src(cp, &op->Src[0]); + struct x86_reg arg1 = fetch_src(cp, &op->Src[1]); struct x86_reg tmp0 = aos_get_xmm_reg(cp); struct x86_reg tmp1 = aos_get_xmm_reg(cp); @@ -1670,7 +1670,7 @@ static boolean emit_XPD( struct aos_compilation *cp, const struct tgsi_full_inst aos_release_xmm_reg(cp, tmp0.idx); - store_dest(cp, &op->FullDstRegisters[0], tmp1); + store_dest(cp, &op->Dst[0], tmp1); return TRUE; } @@ -1897,10 +1897,10 @@ static void find_last_write_outputs( struct aos_compilation *cp ) continue; for (i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++) { - if (parse.FullToken.FullInstruction.FullDstRegisters[i].DstRegister.File == + if (parse.FullToken.FullInstruction.Dst[i].DstRegister.File == TGSI_FILE_OUTPUT) { - unsigned idx = parse.FullToken.FullInstruction.FullDstRegisters[i].DstRegister.Index; + unsigned idx = parse.FullToken.FullInstruction.Dst[i].DstRegister.Index; cp->output_last_write[idx] = this_instruction; } } diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index bf84401e11..fbf4d2636d 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -234,7 +234,7 @@ translate_instruction(llvm::Module *module, inputs[3] = 0; for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { - struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + struct tgsi_full_src_register *src = &inst->Src[i]; llvm::Value *val = 0; llvm::Value *indIdx = 0; @@ -656,7 +656,7 @@ translate_instruction(llvm::Module *module, /* store results */ for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + struct tgsi_full_dst_register *dst = &inst->Dst[i]; if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); @@ -683,7 +683,7 @@ translate_instructionir(llvm::Module *module, std::vector< std::vector > inputs(inst->Instruction.NumSrcRegs); for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { - struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + struct tgsi_full_src_register *src = &inst->Src[i]; std::vector val; llvm::Value *indIdx = 0; int swizzle = swizzleInt(src); @@ -993,7 +993,7 @@ translate_instructionir(llvm::Module *module, /* store results */ for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + struct tgsi_full_dst_register *dst = &inst->Dst[i]; storage->store((enum tgsi_file_type)dst->DstRegister.File, dst->DstRegister.Index, out, dst->DstRegister.WriteMask, instr->getIRBuilder() ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 2e6c5b38b4..7ec832aad9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -473,14 +473,14 @@ tgsi_default_full_instruction( void ) unsigned i; full_instruction.Instruction = tgsi_default_instruction(); - full_instruction.InstructionPredicate = tgsi_default_instruction_predicate(); - full_instruction.InstructionLabel = tgsi_default_instruction_label(); - full_instruction.InstructionTexture = tgsi_default_instruction_texture(); + full_instruction.Predicate = tgsi_default_instruction_predicate(); + full_instruction.Label = tgsi_default_instruction_label(); + full_instruction.Texture = tgsi_default_instruction_texture(); for( i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) { - full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register(); + full_instruction.Dst[i] = tgsi_default_full_dst_register(); } for( i = 0; i < TGSI_FULL_MAX_SRC_REGISTERS; i++ ) { - full_instruction.FullSrcRegisters[i] = tgsi_default_full_src_register(); + full_instruction.Src[i] = tgsi_default_full_src_register(); } return full_instruction; @@ -521,18 +521,18 @@ tgsi_build_full_instruction( size++; *instruction_predicate = - tgsi_build_instruction_predicate(full_inst->InstructionPredicate.Index, - full_inst->InstructionPredicate.Negate, - full_inst->InstructionPredicate.SwizzleX, - full_inst->InstructionPredicate.SwizzleY, - full_inst->InstructionPredicate.SwizzleZ, - full_inst->InstructionPredicate.SwizzleW, + tgsi_build_instruction_predicate(full_inst->Predicate.Index, + full_inst->Predicate.Negate, + full_inst->Predicate.SwizzleX, + full_inst->Predicate.SwizzleY, + full_inst->Predicate.SwizzleZ, + full_inst->Predicate.SwizzleW, instruction, header); } if( tgsi_compare_instruction_label( - full_inst->InstructionLabel, + full_inst->Label, tgsi_default_instruction_label() ) ) { struct tgsi_instruction_label *instruction_label; @@ -543,7 +543,7 @@ tgsi_build_full_instruction( size++; *instruction_label = tgsi_build_instruction_label( - full_inst->InstructionLabel.Label, + full_inst->Label.Label, prev_token, instruction, header ); @@ -551,7 +551,7 @@ tgsi_build_full_instruction( } if( tgsi_compare_instruction_texture( - full_inst->InstructionTexture, + full_inst->Texture, tgsi_default_instruction_texture() ) ) { struct tgsi_instruction_texture *instruction_texture; @@ -562,7 +562,7 @@ tgsi_build_full_instruction( size++; *instruction_texture = tgsi_build_instruction_texture( - full_inst->InstructionTexture.Texture, + full_inst->Texture.Texture, prev_token, instruction, header ); @@ -570,7 +570,7 @@ tgsi_build_full_instruction( } for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) { - const struct tgsi_full_dst_register *reg = &full_inst->FullDstRegisters[i]; + const struct tgsi_full_dst_register *reg = &full_inst->Dst[i]; struct tgsi_dst_register *dst_register; struct tgsi_token *prev_token; @@ -613,7 +613,7 @@ tgsi_build_full_instruction( } for( i = 0; i < full_inst->Instruction.NumSrcRegs; i++ ) { - const struct tgsi_full_src_register *reg = &full_inst->FullSrcRegisters[i]; + const struct tgsi_full_src_register *reg = &full_inst->Src[i]; struct tgsi_src_register *src_register; struct tgsi_token *prev_token; diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 8f26d5dae3..4ff7f4b11e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -352,7 +352,7 @@ iter_instruction( } for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + const struct tgsi_full_dst_register *dst = &inst->Dst[i]; if (!first_reg) CHR( ',' ); @@ -380,7 +380,7 @@ iter_instruction( } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + const struct tgsi_full_src_register *src = &inst->Src[i]; if (!first_reg) CHR( ',' ); @@ -429,7 +429,7 @@ iter_instruction( if (inst->Instruction.Texture) { TXT( ", " ); - ENM( inst->InstructionTexture.Texture, texture_names ); + ENM( inst->Texture.Texture, texture_names ); } switch (inst->Instruction.Opcode) { @@ -439,7 +439,7 @@ iter_instruction( case TGSI_OPCODE_ENDLOOP: case TGSI_OPCODE_CAL: TXT( " :" ); - UID( inst->InstructionLabel.Label ); + UID( inst->Label.Label ); break; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c index 11d28b1653..194b2473bc 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -334,8 +334,8 @@ dump_instruction_verbose( } for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i]; + struct tgsi_full_dst_register *dst = &inst->Dst[i]; + struct tgsi_full_dst_register *fd = &fi->Dst[i]; EOL(); TXT( "\nFile : " ); @@ -387,8 +387,8 @@ dump_instruction_verbose( } for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { - struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; - struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i]; + struct tgsi_full_src_register *src = &inst->Src[i]; + struct tgsi_full_src_register *fs = &fi->Src[i]; EOL(); TXT( "\nFile : "); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index c113f4a3bc..a9bfb0d6df 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -107,10 +107,10 @@ #define TEMP_P0 TGSI_EXEC_TEMP_P0 #define IS_CHANNEL_ENABLED(INST, CHAN)\ - ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[0].DstRegister.WriteMask & (1 << (CHAN))) #define IS_CHANNEL_ENABLED2(INST, CHAN)\ - ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[1].DstRegister.WriteMask & (1 << (CHAN))) #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ @@ -188,7 +188,7 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) { uint i, chan; - uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; + uint writemask = inst->Dst[0].DstRegister.WriteMask; if (writemask == TGSI_WRITEMASK_X || writemask == TGSI_WRITEMASK_Y || writemask == TGSI_WRITEMASK_Z || @@ -200,15 +200,15 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) /* loop over src regs */ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - if ((inst->FullSrcRegisters[i].SrcRegister.File == - inst->FullDstRegisters[0].DstRegister.File) && - (inst->FullSrcRegisters[i].SrcRegister.Index == - inst->FullDstRegisters[0].DstRegister.Index)) { + if ((inst->Src[i].SrcRegister.File == + inst->Dst[0].DstRegister.File) && + (inst->Src[i].SrcRegister.Index == + inst->Dst[0].DstRegister.Index)) { /* loop over dest channels */ uint channelsWritten = 0x0; FOR_EACH_ENABLED_CHANNEL(*inst, chan) { /* check if we're reading a channel that's been written */ - uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->FullSrcRegisters[i], chan); + uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); if (channelsWritten & (1 << swizzle)) { return TRUE; } @@ -1500,27 +1500,27 @@ store_dest( switch (chan_index) { case CHAN_X: - swizzle = inst->InstructionPredicate.SwizzleX; + swizzle = inst->Predicate.SwizzleX; break; case CHAN_Y: - swizzle = inst->InstructionPredicate.SwizzleY; + swizzle = inst->Predicate.SwizzleY; break; case CHAN_Z: - swizzle = inst->InstructionPredicate.SwizzleZ; + swizzle = inst->Predicate.SwizzleZ; break; case CHAN_W: - swizzle = inst->InstructionPredicate.SwizzleW; + swizzle = inst->Predicate.SwizzleW; break; default: assert(0); return; } - assert(inst->InstructionPredicate.Index == 0); + assert(inst->Predicate.Index == 0); - pred = &mach->Predicates[inst->InstructionPredicate.Index].xyzw[swizzle]; + pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle]; - if (inst->InstructionPredicate.Negate) { + if (inst->Predicate.Negate) { for (i = 0; i < QUAD_SIZE; i++) { if (pred->u[i]) { execmask &= ~(1 << i); @@ -1572,10 +1572,10 @@ store_dest( } #define FETCH(VAL,INDEX,CHAN)\ - fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) + fetch_source (mach, VAL, &inst->Src[INDEX], CHAN) #define STORE(VAL,INDEX,CHAN)\ - store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) + store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN ) /** @@ -1601,7 +1601,7 @@ exec_kil(struct tgsi_exec_machine *mach, /* unswizzle channel */ swizzle = tgsi_util_get_full_src_register_swizzle ( - &inst->FullSrcRegisters[0], + &inst->Src[0], chan_index); /* check if the component has not been already tested */ @@ -1668,14 +1668,14 @@ exec_tex(struct tgsi_exec_machine *mach, boolean biasLod, boolean projected) { - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + const uint unit = inst->Src[1].SrcRegister.Index; union tgsi_exec_channel r[4]; uint chan_index; float lodBias; /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ - switch (inst->InstructionTexture.Texture) { + switch (inst->Texture.Texture) { case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: @@ -1765,7 +1765,7 @@ static void exec_txd(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { - const uint unit = inst->FullSrcRegisters[3].SrcRegister.Index; + const uint unit = inst->Src[3].SrcRegister.Index; union tgsi_exec_channel r[4]; uint chan_index; @@ -1773,7 +1773,7 @@ exec_txd(struct tgsi_exec_machine *mach, * XXX: This is fake TXD -- the derivatives are not taken into account, yet. */ - switch (inst->InstructionTexture.Texture) { + switch (inst->Texture.Texture) { case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: @@ -2740,7 +2740,7 @@ exec_instruction( mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; /* Finally, jump to the subroutine */ - *pc = inst->InstructionLabel.Label; + *pc = inst->Label.Label; } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index d4f27499b8..ff593fdc32 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -153,36 +153,36 @@ tgsi_parse_token( copy_token(&inst->Instruction, &token); if (inst->Instruction.Predicate) { - next_token(ctx, &inst->InstructionPredicate); + next_token(ctx, &inst->Predicate); } if (inst->Instruction.Label) { - next_token( ctx, &inst->InstructionLabel); + next_token( ctx, &inst->Label); } if (inst->Instruction.Texture) { - next_token( ctx, &inst->InstructionTexture); + next_token( ctx, &inst->Texture); } assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS ); for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { - next_token( ctx, &inst->FullDstRegisters[i].DstRegister ); + next_token( ctx, &inst->Dst[i].DstRegister ); /* * No support for indirect or multi-dimensional addressing. */ - assert( !inst->FullDstRegisters[i].DstRegister.Dimension ); + assert( !inst->Dst[i].DstRegister.Dimension ); - if( inst->FullDstRegisters[i].DstRegister.Indirect ) { - next_token( ctx, &inst->FullDstRegisters[i].DstRegisterInd ); + if( inst->Dst[i].DstRegister.Indirect ) { + next_token( ctx, &inst->Dst[i].DstRegisterInd ); /* * No support for indirect or multi-dimensional addressing. */ - assert( !inst->FullDstRegisters[i].DstRegisterInd.Dimension ); - assert( !inst->FullDstRegisters[i].DstRegisterInd.Indirect ); + assert( !inst->Dst[i].DstRegisterInd.Dimension ); + assert( !inst->Dst[i].DstRegisterInd.Indirect ); } } @@ -190,34 +190,34 @@ tgsi_parse_token( for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { - next_token( ctx, &inst->FullSrcRegisters[i].SrcRegister ); + next_token( ctx, &inst->Src[i].SrcRegister ); - if( inst->FullSrcRegisters[i].SrcRegister.Indirect ) { - next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterInd ); + if( inst->Src[i].SrcRegister.Indirect ) { + next_token( ctx, &inst->Src[i].SrcRegisterInd ); /* * No support for indirect or multi-dimensional addressing. */ - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect ); - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension ); + assert( !inst->Src[i].SrcRegisterInd.Indirect ); + assert( !inst->Src[i].SrcRegisterInd.Dimension ); } - if( inst->FullSrcRegisters[i].SrcRegister.Dimension ) { - next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDim ); + if( inst->Src[i].SrcRegister.Dimension ) { + next_token( ctx, &inst->Src[i].SrcRegisterDim ); /* * No support for multi-dimensional addressing. */ - assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Dimension ); + assert( !inst->Src[i].SrcRegisterDim.Dimension ); - if( inst->FullSrcRegisters[i].SrcRegisterDim.Indirect ) { - next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDimInd ); + if( inst->Src[i].SrcRegisterDim.Indirect ) { + next_token( ctx, &inst->Src[i].SrcRegisterDimInd ); /* * No support for indirect or multi-dimensional addressing. */ - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect ); - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension ); + assert( !inst->Src[i].SrcRegisterInd.Indirect ); + assert( !inst->Src[i].SrcRegisterInd.Dimension ); } } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index 48e6987ab7..2f8f4d488b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -78,11 +78,11 @@ struct tgsi_full_immediate struct tgsi_full_instruction { struct tgsi_instruction Instruction; - struct tgsi_instruction_predicate InstructionPredicate; - struct tgsi_instruction_label InstructionLabel; - struct tgsi_instruction_texture InstructionTexture; - struct tgsi_full_dst_register FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS]; - struct tgsi_full_src_register FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS]; + struct tgsi_instruction_predicate Predicate; + struct tgsi_instruction_label Label; + struct tgsi_instruction_texture Texture; + struct tgsi_full_dst_register Dst[TGSI_FULL_MAX_DST_REGISTERS]; + struct tgsi_full_src_register Src[TGSI_FULL_MAX_SRC_REGISTERS]; }; union tgsi_full_token diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 617fd7f6be..8397f432f9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -60,7 +60,7 @@ const float ppc_builtin_constants[] ALIGN16_ATTRIB = { for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[0].DstRegister.WriteMask & (1 << (CHAN))) #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) @@ -431,7 +431,7 @@ get_src_vec(struct gen_context *gen, struct tgsi_full_instruction *inst, int src_reg, uint chan) { const const struct tgsi_full_src_register *src = - &inst->FullSrcRegisters[src_reg]; + &inst->Src[src_reg]; int vec; uint i; @@ -482,7 +482,7 @@ get_dst_vec(struct gen_context *gen, const struct tgsi_full_instruction *inst, unsigned chan_index) { - const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0]; + const struct tgsi_full_dst_register *reg = &inst->Dst[0]; if (is_ppc_vec_temporary_dst(reg)) { int vec = gen->temps_map[reg->DstRegister.Index][chan_index]; @@ -505,7 +505,7 @@ emit_store(struct gen_context *gen, unsigned chan_index, boolean free_vec) { - const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0]; + const struct tgsi_full_dst_register *reg = &inst->Dst[0]; switch (reg->DstRegister.File) { case TGSI_FILE_OUTPUT: diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 36e27ea52f..8422b91a30 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -212,24 +212,24 @@ iter_instruction( for (i = 0; i < inst->Instruction.NumDstRegs; i++) { check_register_usage( ctx, - inst->FullDstRegisters[i].DstRegister.File, - inst->FullDstRegisters[i].DstRegister.Index, + inst->Dst[i].DstRegister.File, + inst->Dst[i].DstRegister.Index, "destination", FALSE ); } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { check_register_usage( ctx, - inst->FullSrcRegisters[i].SrcRegister.File, - inst->FullSrcRegisters[i].SrcRegister.Index, + inst->Src[i].SrcRegister.File, + inst->Src[i].SrcRegister.Index, "source", - (boolean)inst->FullSrcRegisters[i].SrcRegister.Indirect ); - if (inst->FullSrcRegisters[i].SrcRegister.Indirect) { + (boolean)inst->Src[i].SrcRegister.Indirect ); + if (inst->Src[i].SrcRegister.Indirect) { uint file; int index; - file = inst->FullSrcRegisters[i].SrcRegisterInd.File; - index = inst->FullSrcRegisters[i].SrcRegisterInd.Index; + file = inst->Src[i].SrcRegisterInd.File; + index = inst->Src[i].SrcRegisterInd.Index; check_register_usage( ctx, file, @@ -245,8 +245,8 @@ iter_instruction( switch (inst->Instruction.Opcode) { case TGSI_OPCODE_BGNFOR: case TGSI_OPCODE_ENDFOR: - if (inst->FullDstRegisters[0].DstRegister.File != TGSI_FILE_LOOP || - inst->FullDstRegisters[0].DstRegister.Index != 0) { + if (inst->Dst[0].DstRegister.File != TGSI_FILE_LOOP || + inst->Dst[0].DstRegister.Index != 0) { report_error(ctx, "Destination register must be LOOP[0]"); } break; @@ -254,8 +254,8 @@ iter_instruction( switch (inst->Instruction.Opcode) { case TGSI_OPCODE_BGNFOR: - if (inst->FullSrcRegisters[0].SrcRegister.File != TGSI_FILE_CONSTANT && - inst->FullSrcRegisters[0].SrcRegister.File != TGSI_FILE_IMMEDIATE) { + if (inst->Src[0].SrcRegister.File != TGSI_FILE_CONSTANT && + inst->Src[0].SrcRegister.File != TGSI_FILE_IMMEDIATE) { report_error(ctx, "Source register file must be either CONST or IMM"); } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 69567130e3..be25b3dc5c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -96,7 +96,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, uint i; for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *src = - &fullinst->FullSrcRegisters[i]; + &fullinst->Src[i]; if (src->SrcRegister.File == TGSI_FILE_INPUT) { const int ind = src->SrcRegister.Index; if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FOG) { @@ -205,9 +205,9 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens) struct tgsi_full_instruction *fullinst = &parse.FullToken.FullInstruction; const struct tgsi_full_src_register *src = - &fullinst->FullSrcRegisters[0]; + &fullinst->Src[0]; const struct tgsi_full_dst_register *dst = - &fullinst->FullDstRegisters[0]; + &fullinst->Dst[0]; /* Do a whole bunch of checks for a simple move */ if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV || diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index a6cc3a5398..2d2ee321c9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -58,7 +58,7 @@ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[0].DstRegister.WriteMask & (1 << (CHAN))) #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) @@ -1331,7 +1331,7 @@ emit_fetch( } #define FETCH( FUNC, INST, XMM, INDEX, CHAN )\ - emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN ) + emit_fetch( FUNC, XMM, &(INST).Src[INDEX], CHAN ) /** * Register store. @@ -1402,7 +1402,7 @@ emit_store( } #define STORE( FUNC, INST, XMM, INDEX, CHAN )\ - emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) + emit_store( FUNC, XMM, &(INST).Dst[INDEX], &(INST), CHAN ) static void PIPE_CDECL @@ -1459,13 +1459,13 @@ emit_tex( struct x86_function *func, boolean lodbias, boolean projected) { - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + const uint unit = inst->Src[1].SrcRegister.Index; struct x86_reg args[2]; unsigned count; unsigned i; assert(inst->Instruction.Texture); - switch (inst->InstructionTexture.Texture) { + switch (inst->Texture.Texture) { case TGSI_TEXTURE_1D: count = 1; break; @@ -1720,13 +1720,13 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst) { uint i; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i]; + const struct tgsi_full_src_register *reg = &inst->Src[i]; if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY && reg->SrcRegister.Indirect) return TRUE; } for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i]; + const struct tgsi_full_dst_register *reg = &inst->Dst[i]; if (reg->DstRegister.File == TGSI_FILE_TEMPORARY && reg->DstRegister.Indirect) return TRUE; @@ -2244,7 +2244,7 @@ emit_instruction( case TGSI_OPCODE_KIL: /* conditional kill */ - emit_kil( func, &inst->FullSrcRegisters[0] ); + emit_kil( func, &inst->Src[0] ); break; case TGSI_OPCODE_PK2H: diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index d25f590df7..e9b1a21fb4 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -699,11 +699,11 @@ parse_instruction( } if (i < info->num_dst) { - if (!parse_dst_operand( ctx, &inst.FullDstRegisters[i] )) + if (!parse_dst_operand( ctx, &inst.Dst[i] )) return FALSE; } else if (i < info->num_dst + info->num_src) { - if (!parse_src_operand( ctx, &inst.FullSrcRegisters[i - info->num_dst] )) + if (!parse_src_operand( ctx, &inst.Src[i - info->num_dst] )) return FALSE; } else { @@ -713,7 +713,7 @@ parse_instruction( if (str_match_no_case( &ctx->cur, texture_names[j] )) { if (!is_digit_alpha_underscore( ctx->cur )) { inst.Instruction.Texture = 1; - inst.InstructionTexture.Texture = j; + inst.Texture.Texture = j; break; } } @@ -740,7 +740,7 @@ parse_instruction( return FALSE; } inst.Instruction.Label = 1; - inst.InstructionLabel.Label = target; + inst.Label.Label = target; } advance = tgsi_build_full_instruction( diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index cda6dc134a..34a02b5042 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -213,7 +213,7 @@ create_frag_shader(struct vl_compositor *c) */ for (i = 0; i < 4; ++i) { inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i); - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index c4ba69817f..93e79e7f37 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -237,10 +237,10 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r) ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -415,10 +415,10 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -620,10 +620,10 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -642,10 +642,10 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.Src[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ diff --git a/src/gallium/auxiliary/vl/vl_shader_build.c b/src/gallium/auxiliary/vl/vl_shader_build.c index d052e2c797..82300b1da2 100644 --- a/src/gallium/auxiliary/vl/vl_shader_build.c +++ b/src/gallium/auxiliary/vl/vl_shader_build.c @@ -138,11 +138,11 @@ struct tgsi_full_instruction vl_inst2 inst.Instruction.Opcode = opcode; inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = dst_file; - inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Dst[0].DstRegister.File = dst_file; + inst.Dst[0].DstRegister.Index = dst_index; inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = src_file; - inst.FullSrcRegisters[0].SrcRegister.Index = src_index; + inst.Src[0].SrcRegister.File = src_file; + inst.Src[0].SrcRegister.Index = src_index; return inst; } @@ -162,13 +162,13 @@ struct tgsi_full_instruction vl_inst3 inst.Instruction.Opcode = opcode; inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = dst_file; - inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Dst[0].DstRegister.File = dst_file; + inst.Dst[0].DstRegister.Index = dst_index; inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = src1_file; - inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; - inst.FullSrcRegisters[1].SrcRegister.File = src2_file; - inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + inst.Src[0].SrcRegister.File = src1_file; + inst.Src[0].SrcRegister.Index = src1_index; + inst.Src[1].SrcRegister.File = src2_file; + inst.Src[1].SrcRegister.Index = src2_index; return inst; } @@ -188,15 +188,15 @@ struct tgsi_full_instruction vl_tex inst.Instruction.Opcode = TGSI_OPCODE_TEX; inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = dst_file; - inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Dst[0].DstRegister.File = dst_file; + inst.Dst[0].DstRegister.Index = dst_index; inst.Instruction.NumSrcRegs = 2; inst.Instruction.Texture = 1; - inst.InstructionTexture.Texture = tex; - inst.FullSrcRegisters[0].SrcRegister.File = src1_file; - inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; - inst.FullSrcRegisters[1].SrcRegister.File = src2_file; - inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + inst.Texture.Texture = tex; + inst.Src[0].SrcRegister.File = src1_file; + inst.Src[0].SrcRegister.Index = src1_index; + inst.Src[1].SrcRegister.File = src2_file; + inst.Src[1].SrcRegister.Index = src2_index; return inst; } @@ -218,15 +218,15 @@ struct tgsi_full_instruction vl_inst4 inst.Instruction.Opcode = opcode; inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = dst_file; - inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Dst[0].DstRegister.File = dst_file; + inst.Dst[0].DstRegister.Index = dst_index; inst.Instruction.NumSrcRegs = 3; - inst.FullSrcRegisters[0].SrcRegister.File = src1_file; - inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; - inst.FullSrcRegisters[1].SrcRegister.File = src2_file; - inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; - inst.FullSrcRegisters[2].SrcRegister.File = src3_file; - inst.FullSrcRegisters[2].SrcRegister.Index = src3_index; + inst.Src[0].SrcRegister.File = src1_file; + inst.Src[0].SrcRegister.Index = src1_index; + inst.Src[1].SrcRegister.File = src2_file; + inst.Src[1].SrcRegister.Index = src2_index; + inst.Src[2].SrcRegister.File = src3_file; + inst.Src[2].SrcRegister.Index = src3_index; return inst; } diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 19e3ab0844..b0afad349f 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -544,7 +544,7 @@ emit_epilogue(struct codegen *gen) #define FOR_EACH_ENABLED_CHANNEL(inst, ch) \ for (ch = 0; ch < 4; ch++) \ - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) + if (inst->Dst[0].DstRegister.WriteMask & (1 << ch)) static boolean @@ -552,7 +552,7 @@ emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch = 0, src_reg, addr_reg; - src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + src_reg = get_src_reg(gen, ch, &inst->Src[0]); addr_reg = get_address_reg(gen); /* convert float to int */ @@ -570,19 +570,19 @@ emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) int ch, src_reg[4], dst_reg[4]; FOR_EACH_ENABLED_CHANNEL(inst, ch) { - src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + src_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + dst_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - if (is_register_src(gen, ch, &inst->FullSrcRegisters[0]) && - is_memory_dst(gen, ch, &inst->FullDstRegisters[0])) { + if (is_register_src(gen, ch, &inst->Src[0]) && + is_memory_dst(gen, ch, &inst->Dst[0])) { /* special-case: register to memory store */ - store_dest_reg(gen, src_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, src_reg[ch], ch, &inst->Dst[0]); } else { spe_move(gen->f, dst_reg[ch], src_reg[ch]); - store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, dst_reg[ch], ch, &inst->Dst[0]); } } @@ -601,9 +601,9 @@ emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst) /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */ FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); } /* Loop over Red/Green/Blue/Alpha channels, do the op, store results */ @@ -626,7 +626,7 @@ emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst) /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } /* Free any intermediate temps we allocated */ @@ -645,16 +645,16 @@ emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst) int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4]; FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); + s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); } FOR_EACH_ENABLED_CHANNEL(inst, ch) { spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]); } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); return TRUE; @@ -671,10 +671,10 @@ emit_LRP(struct codegen *gen, const struct tgsi_full_instruction *inst) /* setup/get src/dst/temp regs */ FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); + s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); tmp_reg[ch] = get_itemp(gen); } @@ -687,7 +687,7 @@ emit_LRP(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]); } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); return TRUE; @@ -704,8 +704,8 @@ emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst) int ch, s1_reg[4], d_reg[4], tmp_reg[4]; FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); tmp_reg[ch] = get_itemp(gen); } @@ -726,7 +726,7 @@ emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst) } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); @@ -747,8 +747,8 @@ emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); } /* d = sign bit cleared in s1 */ @@ -757,7 +757,7 @@ emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst) } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); @@ -775,12 +775,12 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) int s2x_reg, s2y_reg, s2z_reg; int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); - s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); - s2x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); - s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); - s2y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); - s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); - s2z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); + s2x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); + s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); + s2y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); + s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); + s2z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); /* t0 = x0 * x1 */ spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg); @@ -795,9 +795,9 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_fa(gen->f, t0_reg, t0_reg, t1_reg); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); spe_move(gen->f, d_reg, t0_reg); - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); } free_itemps(gen); @@ -815,14 +815,14 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) int s1x_reg, s1y_reg, s1z_reg, s1w_reg; int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); - s0x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); - s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); - s0y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); - s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); - s0z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); - s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); - s0w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]); - s1w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]); + s0x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); + s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); + s0y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); + s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); + s0z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); + s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); + s0w_reg = get_src_reg(gen, CHAN_W, &inst->Src[0]); + s1w_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]); /* t0 = x0 * x1 */ spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg); @@ -840,9 +840,9 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_fa(gen->f, t0_reg, t0_reg, t1_reg); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); spe_move(gen->f, d_reg, t0_reg); - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); } free_itemps(gen); @@ -857,31 +857,31 @@ emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst) { /* XXX rewrite this function to look more like DP3/DP4 */ int ch; - int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + int s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); + int s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); int tmp_reg = get_itemp(gen); /* t = x0 * x1 */ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); + s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); /* t = y0 * y1 + t */ spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); /* t = z0 * z1 + t */ spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]); + s2_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]); /* t = w1 + t */ spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); spe_move(gen->f, d_reg, tmp_reg); - store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, tmp_reg, ch, &inst->Dst[0]); } free_itemps(gen); @@ -898,9 +898,9 @@ emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst) int src_reg[3]; int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); - src_reg[0] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); - src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); - src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + src_reg[0] = get_src_reg(gen, CHAN_X, &inst->Src[0]); + src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->Src[0]); + src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->Src[0]); /* t0 = x * x */ spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]); @@ -919,10 +919,10 @@ emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_fi(gen->f, t1_reg, t0_reg, t1_reg); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); /* dst = src[ch] * t1 */ spe_fm(gen->f, d_reg, src_reg[ch], t1_reg); - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); } free_itemps(gen); @@ -936,48 +936,48 @@ emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst) static boolean emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst) { - int s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + int s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); + int s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); int tmp_reg = get_itemp(gen); /* t = z0 * y1 */ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); /* t = y0 * z1 - t */ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_X)) { - store_dest_reg(gen, tmp_reg, CHAN_X, &inst->FullDstRegisters[0]); + if (inst->Dst[0].DstRegister.WriteMask & (1 << CHAN_X)) { + store_dest_reg(gen, tmp_reg, CHAN_X, &inst->Dst[0]); } - s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); /* t = x0 * z1 */ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); + s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); /* t = z0 * x1 - t */ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Y)) { - store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->FullDstRegisters[0]); + if (inst->Dst[0].DstRegister.WriteMask & (1 << CHAN_Y)) { + store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->Dst[0]); } - s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); + s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); /* t = y0 * x1 */ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); + s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); /* t = x0 * y1 - t */ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Z)) { - store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->FullDstRegisters[0]); + if (inst->Dst[0].DstRegister.WriteMask & (1 << CHAN_Z)) { + store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->Dst[0]); } free_itemps(gen); @@ -1000,9 +1000,9 @@ emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst) one_reg = get_const_one_reg(gen); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); } FOR_EACH_ENABLED_CHANNEL(inst, ch) { @@ -1043,7 +1043,7 @@ emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst) } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); @@ -1060,10 +1060,10 @@ emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) int ch; FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int s1_reg = get_src_reg(gen, ch, &inst->Src[0]); + int s2_reg = get_src_reg(gen, ch, &inst->Src[1]); + int s3_reg = get_src_reg(gen, ch, &inst->Src[2]); + int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); int zero_reg = get_itemp(gen); spe_zero(gen->f, zero_reg); @@ -1072,7 +1072,7 @@ emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_fcgt(gen->f, d_reg, zero_reg, s1_reg); spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg); - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); free_itemps(gen); } @@ -1090,8 +1090,8 @@ emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst) int ch, s1_reg[4], d_reg[4]; FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); } /* Convert float to int */ @@ -1105,7 +1105,7 @@ emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst) } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); @@ -1129,8 +1129,8 @@ emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) one_reg = get_const_one_reg(gen); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); tmp_reg[ch] = get_itemp(gen); } @@ -1156,7 +1156,7 @@ emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); @@ -1177,8 +1177,8 @@ emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) one_reg = get_const_one_reg(gen); FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); tmp_reg[ch] = get_itemp(gen); } @@ -1210,7 +1210,7 @@ emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) /* store result */ FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); @@ -1272,7 +1272,7 @@ emit_function_call(struct codegen *gen, if (scalar) { for (a = 0; a < num_args; a++) { - s_regs[a] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[a]); + s_regs[a] = get_src_reg(gen, CHAN_X, &inst->Src[a]); } /* we'll call the function, put the return value in this register, * then replicate it across all write-enabled components in d_reg. @@ -1287,11 +1287,11 @@ emit_function_call(struct codegen *gen, if (!scalar) { for (a = 0; a < num_args; a++) { - s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]); + s_regs[a] = get_src_reg(gen, ch, &inst->Src[a]); } } - d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); if (!scalar || !func_called) { /* for a scalar function, we'll really only call the function once */ @@ -1336,7 +1336,7 @@ emit_function_call(struct codegen *gen, spe_move(gen->f, d_reg, retval_reg); } - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); free_itemps(gen); } @@ -1352,7 +1352,7 @@ static boolean emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) { const uint target = inst->InstructionExtTexture.Texture; - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + const uint unit = inst->Src[1].SrcRegister.Index; uint addr; int ch; int coord_regs[4], d_regs[4]; @@ -1373,14 +1373,14 @@ emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) return FALSE; } - assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER); + assert(inst->Src[1].SrcRegister.File == TGSI_FILE_SAMPLER); spe_comment(gen->f, -4, "CALL tex:"); /* get src/dst reg info */ for (ch = 0; ch < 4; ch++) { - coord_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - d_regs[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + coord_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]); + d_regs[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); } { @@ -1425,7 +1425,7 @@ emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_regs[ch], ch, &inst->Dst[0]); free_itemps(gen); } @@ -1452,7 +1452,7 @@ emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst) /* get src regs */ FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]); } /* test if any src regs are < 0 */ @@ -1500,9 +1500,9 @@ emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4]; FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s0_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); tmp_reg[ch] = get_itemp(gen); } @@ -1518,7 +1518,7 @@ emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) } FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); } free_itemps(gen); @@ -1575,7 +1575,7 @@ emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) /* update conditional execution mask with the predicate register */ int tmp_reg = get_itemp(gen); - int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]); + int s1_reg = get_src_reg(gen, channel, &inst->Src[0]); /* tmp = (s1_reg == 0) */ spe_ceqi(gen->f, tmp_reg, s1_reg, 0); @@ -1699,8 +1699,8 @@ emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst, int ch; FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int s_reg = get_src_reg(gen, ch, &inst->Src[0]); + int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); int t1_reg = get_itemp(gen); int t2_reg = get_itemp(gen); diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 4c32b2d06d..8d58c534be 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -108,10 +108,10 @@ for (CHAN = 0; CHAN < 4; CHAN++) #define IS_CHANNEL_ENABLED(INST, CHAN)\ - ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[0].DstRegister.WriteMask & (1 << (CHAN))) #define IS_CHANNEL_ENABLED2(INST, CHAN)\ - ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[1].DstRegister.WriteMask & (1 << (CHAN))) #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ FOR_EACH_CHANNEL( CHAN )\ @@ -583,10 +583,10 @@ store_dest( } #define FETCH(VAL,INDEX,CHAN)\ - fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) + fetch_source (mach, VAL, &inst->Src[INDEX], CHAN) #define STORE(VAL,INDEX,CHAN)\ - store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) + store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN ) /** @@ -612,7 +612,7 @@ exec_kil(struct spu_exec_machine *mach, /* unswizzle channel */ swizzle = tgsi_util_get_full_src_register_swizzle ( - &inst->FullSrcRegisters[0], + &inst->Src[0], chan_index); /* check if the component has not been already tested */ @@ -677,7 +677,7 @@ exec_tex(struct spu_exec_machine *mach, const struct tgsi_full_instruction *inst, boolean biasLod, boolean projected) { - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + const uint unit = inst->Src[1].SrcRegister.Index; union spu_exec_channel r[8]; uint chan_index; float lodBias; diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index f2554998a9..9e626c85c0 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -276,7 +276,7 @@ static uint get_result_flags(const struct tgsi_full_instruction *inst) { const uint writeMask - = inst->FullDstRegisters[0].DstRegister.WriteMask; + = inst->Dst[0].DstRegister.WriteMask; uint flags = 0x0; if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) @@ -338,14 +338,14 @@ emit_tex(struct i915_fp_compile *p, const struct tgsi_full_instruction *inst, uint opcode) { - uint texture = inst->InstructionTexture.Texture; - uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + uint texture = inst->Texture.Texture; + uint unit = inst->Src[1].SrcRegister.Index; uint tex = translate_tex_src_target( p, texture ); uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); - uint coord = src_vector( p, &inst->FullSrcRegisters[0]); + uint coord = src_vector( p, &inst->Src[0]); i915_emit_texld( p, - get_result_vector( p, &inst->FullDstRegisters[0] ), + get_result_vector( p, &inst->Dst[0] ), get_result_flags( inst ), sampler, coord, @@ -367,13 +367,13 @@ emit_simple_arith(struct i915_fp_compile *p, assert(numArgs <= 3); - arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->FullSrcRegisters[0] ); - arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->FullSrcRegisters[1] ); - arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->FullSrcRegisters[2] ); + arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0] ); + arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1] ); + arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2] ); i915_emit_arith( p, opcode, - get_result_vector( p, &inst->FullDstRegisters[0]), + get_result_vector( p, &inst->Dst[0]), get_result_flags( inst ), 0, arg1, arg2, @@ -393,8 +393,8 @@ emit_simple_arith_swap2(struct i915_fp_compile *p, /* transpose first two registers */ inst2 = *inst; - inst2.FullSrcRegisters[0] = inst->FullSrcRegisters[1]; - inst2.FullSrcRegisters[1] = inst->FullSrcRegisters[0]; + inst2.Src[0] = inst->Src[1]; + inst2.Src[1] = inst->Src[0]; emit_simple_arith(p, &inst2, opcode, numArgs); } @@ -423,10 +423,10 @@ i915_translate_instruction(struct i915_fp_compile *p, switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ABS: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); i915_emit_arith(p, A0_MAX, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, src0, negate(src0, 1, 1, 1, 1), 0); break; @@ -436,17 +436,17 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_CMP: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); - src2 = src_vector(p, &inst->FullSrcRegisters[2]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); + src2 = src_vector(p, &inst->Src[2]); i915_emit_arith(p, A0_CMP, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, src0, src2, src1); /* NOTE: order of src2, src1 */ break; case TGSI_OPCODE_COS: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); tmp = i915_get_utemp(p); i915_emit_arith(p, @@ -489,7 +489,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_DP4, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(tmp, ONE, Z, Y, X), i915_emit_const4fv(p, cos_constants), 0); @@ -504,19 +504,19 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_DPH: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); i915_emit_arith(p, A0_DP4, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(src0, X, Y, Z, ONE), src1, 0); break; case TGSI_OPCODE_DST: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); /* result[0] = 1 * 1; * result[1] = a[1] * b[1]; @@ -525,7 +525,7 @@ i915_translate_instruction(struct i915_fp_compile *p, */ i915_emit_arith(p, A0_MUL, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(src0, ONE, Y, Z, ONE), swizzle(src1, ONE, Y, ONE, W), 0); @@ -536,11 +536,11 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_EX2: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); i915_emit_arith(p, A0_EXP, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 0); break; @@ -555,7 +555,7 @@ i915_translate_instruction(struct i915_fp_compile *p, case TGSI_OPCODE_KIL: /* kill if src[0].x < 0 || src[0].y < 0 ... */ - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); tmp = i915_get_utemp(p); i915_emit_texld(p, @@ -571,17 +571,17 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_LG2: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); i915_emit_arith(p, A0_LOG, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 0); break; case TGSI_OPCODE_LIT: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); tmp = i915_get_utemp(p); /* tmp = max( a.xyzw, a.00zw ) @@ -605,7 +605,7 @@ i915_translate_instruction(struct i915_fp_compile *p, swizzle(tmp, Y, Y, Y, Y), 0, 0); i915_emit_arith(p, A0_CMP, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), swizzle(tmp, ONE, X, ZERO, ONE), @@ -614,9 +614,9 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_LRP: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); - src2 = src_vector(p, &inst->FullSrcRegisters[2]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); + src2 = src_vector(p, &inst->Src[2]); flags = get_result_flags(inst); tmp = i915_get_utemp(p); @@ -631,7 +631,7 @@ i915_translate_instruction(struct i915_fp_compile *p, flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); i915_emit_arith(p, A0_MAD, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); break; @@ -644,8 +644,8 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_MIN: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); tmp = i915_get_utemp(p); flags = get_result_flags(inst); @@ -657,7 +657,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_MOV, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0); break; @@ -670,8 +670,8 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_POW: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); tmp = i915_get_utemp(p); flags = get_result_flags(inst); @@ -686,7 +686,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_EXP, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), flags, 0, swizzle(tmp, X, X, X, X), 0, 0); break; @@ -695,27 +695,27 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_RCP: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); i915_emit_arith(p, A0_RCP, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 0); break; case TGSI_OPCODE_RSQ: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); i915_emit_arith(p, A0_RSQ, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 0); break; case TGSI_OPCODE_SCS: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); tmp = i915_get_utemp(p); /* @@ -738,7 +738,7 @@ i915_translate_instruction(struct i915_fp_compile *p, swizzle(tmp, X, Y, X, Y), swizzle(tmp, X, X, ONE, ONE), 0); - writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; + writemask = inst->Dst[0].DstRegister.WriteMask; if (writemask & TGSI_WRITEMASK_Y) { uint tmp1; @@ -756,7 +756,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_DP4, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), A0_DEST_CHANNEL_Y, 0, swizzle(tmp1, W, Z, Y, X), i915_emit_const4fv(p, sin_constants), 0); @@ -771,7 +771,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_DP4, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), A0_DEST_CHANNEL_X, 0, swizzle(tmp, ONE, Z, Y, X), i915_emit_const4fv(p, cos_constants), 0); @@ -788,7 +788,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_SIN: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src0 = src_vector(p, &inst->Src[0]); tmp = i915_get_utemp(p); i915_emit_arith(p, @@ -831,7 +831,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_DP4, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(tmp, W, Z, Y, X), i915_emit_const4fv(p, sin_constants), 0); @@ -847,12 +847,12 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_SUB: - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); i915_emit_arith(p, A0_ADD, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, src0, negate(src1, 1, 1, 1, 1), 0); break; @@ -876,8 +876,8 @@ i915_translate_instruction(struct i915_fp_compile *p, * result.z = src0.x * src1.y - src0.y * src1.x; * result.w = undef; */ - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src0 = src_vector(p, &inst->Src[0]); + src1 = src_vector(p, &inst->Src[1]); tmp = i915_get_utemp(p); i915_emit_arith(p, @@ -888,7 +888,7 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_arith(p, A0_MAD, - get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0, swizzle(src0, Y, Z, X, ONE), swizzle(src1, Z, X, Y, ONE), diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index b2234ef679..893e665e69 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -64,7 +64,7 @@ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - ((INST)->FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST)->Dst[0].DstRegister.WriteMask & (1 << (CHAN))) #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) @@ -157,7 +157,7 @@ emit_fetch( unsigned index, const unsigned chan_index ) { - const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index]; + const struct tgsi_full_src_register *reg = &inst->Src[index]; unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); LLVMValueRef res; @@ -267,7 +267,7 @@ emit_store( unsigned chan_index, LLVMValueRef value) { - const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[index]; + const struct tgsi_full_dst_register *reg = &inst->Dst[index]; switch( inst->Instruction.Saturate ) { case TGSI_SAT_NONE: @@ -319,14 +319,14 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, boolean projected, LLVMValueRef *texel) { - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + const uint unit = inst->Src[1].SrcRegister.Index; LLVMValueRef lodbias; LLVMValueRef oow; LLVMValueRef coords[3]; unsigned num_coords; unsigned i; - switch (inst->InstructionTexture.Texture) { + switch (inst->Texture.Texture) { case TGSI_TEXTURE_1D: num_coords = 1; break; @@ -375,7 +375,7 @@ emit_kil( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst ) { - const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[0]; + const struct tgsi_full_src_register *reg = &inst->Src[0]; LLVMValueRef terms[NUM_CHANNELS]; LLVMValueRef mask; unsigned chan_index; @@ -423,13 +423,13 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst) { uint i; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i]; + const struct tgsi_full_src_register *reg = &inst->Src[i]; if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY && reg->SrcRegister.Indirect) return TRUE; } for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i]; + const struct tgsi_full_dst_register *reg = &inst->Dst[i]; if (reg->DstRegister.File == TGSI_FILE_TEMPORARY && reg->DstRegister.Indirect) return TRUE; diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c index cd76910744..e82a23d475 100644 --- a/src/gallium/drivers/nv20/nv20_vertprog.c +++ b/src/gallium/drivers/nv20/nv20_vertprog.c @@ -334,7 +334,7 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; + fsrc = &finst->Src[i]; if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(vpc, fsrc); } @@ -343,7 +343,7 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; + fsrc = &finst->Src[i]; switch (fsrc->SrcRegister.File) { case TGSI_FILE_INPUT: if (ai == -1 || ai == fsrc->SrcRegister.Index) { @@ -378,8 +378,8 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc, } } - dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); - mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + dst = tgsi_dst(vpc, &finst->Dst[0]); + mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask); switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index acf216bb61..dfffeb3263 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -363,7 +363,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; + fsrc = &finst->Src[i]; if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(fpc, fsrc); } @@ -372,7 +372,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; + fsrc = &finst->Src[i]; switch (fsrc->SrcRegister.File) { case TGSI_FILE_INPUT: @@ -423,8 +423,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, } } - dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]); - mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + dst = tgsi_dst(fpc, &finst->Dst[0]); + mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask); sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); switch (finst->Instruction.Opcode) { diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index e8fba8ab16..41bd45ad29 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -334,7 +334,7 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; + fsrc = &finst->Src[i]; if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(vpc, fsrc); } @@ -343,7 +343,7 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; + fsrc = &finst->Src[i]; switch (fsrc->SrcRegister.File) { case TGSI_FILE_INPUT: if (ai == -1 || ai == fsrc->SrcRegister.Index) { @@ -378,8 +378,8 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, } } - dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); - mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + dst = tgsi_dst(vpc, &finst->Dst[0]); + mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask); switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index ca6a957fc1..6addc45247 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -364,7 +364,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; + fsrc = &finst->Src[i]; if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(fpc, fsrc); } @@ -373,7 +373,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; + fsrc = &finst->Src[i]; switch (fsrc->SrcRegister.File) { case TGSI_FILE_INPUT: @@ -433,8 +433,8 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, } } - dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]); - mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + dst = tgsi_dst(fpc, &finst->Dst[0]); + mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask); sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); switch (finst->Instruction.Opcode) { diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index ed0f1d857d..0cdc511166 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -405,7 +405,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; + fsrc = &finst->Src[i]; if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(vpc, fsrc); } @@ -414,7 +414,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; + fsrc = &finst->Src[i]; switch (fsrc->SrcRegister.File) { case TGSI_FILE_INPUT: @@ -469,8 +469,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, } } - dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); - mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + dst = tgsi_dst(vpc, &finst->Dst[0]); + mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask); switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: @@ -681,7 +681,7 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) const struct tgsi_full_dst_register *fdst; finst = &p.FullToken.FullInstruction; - fdst = &finst->FullDstRegisters[0]; + fdst = &finst->Dst[0]; if (fdst->DstRegister.File == TGSI_FILE_ADDRESS) { if (fdst->DstRegister.Index > high_addr) diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 00518af8c0..9fbf918601 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1535,10 +1535,10 @@ negate_supported(const struct tgsi_full_instruction *insn, int i) for (s = 0; s < insn->Instruction.NumSrcRegs; ++s) { if (s == i) continue; - if ((insn->FullSrcRegisters[s].SrcRegister.Index == - insn->FullSrcRegisters[i].SrcRegister.Index) && - (insn->FullSrcRegisters[s].SrcRegister.File == - insn->FullSrcRegisters[i].SrcRegister.File)) + if ((insn->Src[s].SrcRegister.Index == + insn->Src[i].SrcRegister.Index) && + (insn->Src[s].SrcRegister.File == + insn->Src[i].SrcRegister.File)) return FALSE; } @@ -1549,7 +1549,7 @@ negate_supported(const struct tgsi_full_instruction *insn, int i) static unsigned nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c) { - unsigned x, mask = insn->FullDstRegisters[0].DstRegister.WriteMask; + unsigned x, mask = insn->Dst[0].DstRegister.WriteMask; switch (insn->Instruction.Opcode) { case TGSI_OPCODE_COS: @@ -1578,7 +1578,7 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c) const struct tgsi_instruction_texture *tex; assert(insn->Instruction.Texture); - tex = &insn->InstructionTexture; + tex = &insn->Texture; mask = 0x7; if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) @@ -1850,21 +1850,21 @@ nv50_program_tx_insn(struct nv50_pc *pc, unsigned mask, sat, unit; int i, c; - mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + mask = inst->Dst[0].DstRegister.WriteMask; sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; memset(src, 0, sizeof(src)); for (c = 0; c < 4; c++) { if ((mask & (1 << c)) && !pc->r_dst[c]) - dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); + dst[c] = tgsi_dst(pc, c, &inst->Dst[0]); else dst[c] = pc->r_dst[c]; rdst[c] = dst[c]; } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i]; + const struct tgsi_full_src_register *fs = &inst->Src[i]; unsigned src_mask; boolean neg_supp; @@ -2181,11 +2181,11 @@ nv50_program_tx_insn(struct nv50_pc *pc, break; case TGSI_OPCODE_TEX: emit_tex(pc, dst, mask, src[0], unit, - inst->InstructionTexture.Texture, FALSE); + inst->Texture.Texture, FALSE); break; case TGSI_OPCODE_TXP: emit_tex(pc, dst, mask, src[0], unit, - inst->InstructionTexture.Texture, TRUE); + inst->Texture.Texture, TRUE); break; case TGSI_OPCODE_TRUNC: for (c = 0; c < 4; c++) { @@ -2264,7 +2264,7 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) const struct tgsi_dst_register *dst; unsigned i, c, k, mask; - dst = &insn->FullDstRegisters[0].DstRegister; + dst = &insn->Dst[0].DstRegister; mask = dst->WriteMask; if (dst->File == TGSI_FILE_TEMPORARY) @@ -2282,7 +2282,7 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) } for (i = 0; i < insn->Instruction.NumSrcRegs; i++) { - src = &insn->FullSrcRegisters[i]; + src = &insn->Src[i]; if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) reg = pc->temp; @@ -2379,7 +2379,7 @@ static unsigned nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, unsigned rdep[4]) { - const struct tgsi_full_dst_register *fd = &insn->FullDstRegisters[0]; + const struct tgsi_full_dst_register *fd = &insn->Dst[0]; const struct tgsi_full_src_register *fs; unsigned i, deqs = 0; @@ -2390,7 +2390,7 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, unsigned chn, mask = nv50_tgsi_src_mask(insn, i); boolean neg_supp = negate_supported(insn, i); - fs = &insn->FullSrcRegisters[i]; + fs = &insn->Src[i]; if (fs->SrcRegister.File != fd->DstRegister.File || fs->SrcRegister.Index != fd->DstRegister.Index) continue; @@ -2427,7 +2427,7 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) const struct tgsi_full_dst_register *fd; unsigned i, deqs, rdep[4], m[4]; - fd = &tok->FullInstruction.FullDstRegisters[0]; + fd = &tok->FullInstruction.Dst[0]; deqs = nv50_tgsi_scan_swizzle(&insn, rdep); if (is_scalar_op(insn.Instruction.Opcode)) { @@ -2446,10 +2446,10 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) for (i = 0; i < 4; ++i) { assert(pc->r_dst[m[i]] == NULL); - insn.FullDstRegisters[0].DstRegister.WriteMask = + insn.Dst[0].DstRegister.WriteMask = fd->DstRegister.WriteMask & (1 << m[i]); - if (!insn.FullDstRegisters[0].DstRegister.WriteMask) + if (!insn.Dst[0].DstRegister.WriteMask) continue; if (deqs & (1 << i)) diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 25a634e5a2..82466e245a 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -258,18 +258,18 @@ static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_inst dst->U.I.SaturateMode = translate_saturate(src->Instruction.Saturate); if (src->Instruction.NumDstRegs) - transform_dstreg(ttr, &dst->U.I.DstReg, &src->FullDstRegisters[0]); + transform_dstreg(ttr, &dst->U.I.DstReg, &src->Dst[0]); for(i = 0; i < src->Instruction.NumSrcRegs; ++i) { - if (src->FullSrcRegisters[i].SrcRegister.File == TGSI_FILE_SAMPLER) - dst->U.I.TexSrcUnit = src->FullSrcRegisters[i].SrcRegister.Index; + if (src->Src[i].SrcRegister.File == TGSI_FILE_SAMPLER) + dst->U.I.TexSrcUnit = src->Src[i].SrcRegister.Index; else - transform_srcreg(ttr, &dst->U.I.SrcReg[i], &src->FullSrcRegisters[i]); + transform_srcreg(ttr, &dst->U.I.SrcReg[i], &src->Src[i]); } /* Texturing. */ if (src->Instruction.Texture) - transform_texture(dst, src->InstructionTexture); + transform_texture(dst, src->Texture); } static void handle_immediate(struct tgsi_to_rc * ttr, struct tgsi_full_immediate * imm) diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 3ef6cb1074..39fd7a6025 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -96,7 +96,7 @@ translate_dst_register( struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn, unsigned idx ) { - const struct tgsi_full_dst_register *reg = &insn->FullDstRegisters[idx]; + const struct tgsi_full_dst_register *reg = &insn->Dst[idx]; SVGA3dShaderDestToken dest; switch (reg->DstRegister.File) { @@ -629,7 +629,7 @@ static boolean emit_fake_arl(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); struct src_register src1 = get_fake_arl_const( emit ); SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); SVGA3dShaderDestToken tmp = get_temp( emit ); @@ -653,7 +653,7 @@ static boolean emit_if(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { const struct src_register src = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); struct src_register zero = get_zero_immediate( emit ); SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC ); @@ -690,7 +690,7 @@ static boolean emit_floor(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); SVGA3dShaderDestToken temp = get_temp( emit ); /* FRC TMP, SRC */ @@ -716,11 +716,11 @@ static boolean emit_cmp(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); const struct src_register src1 = translate_src_register( - emit, &insn->FullSrcRegisters[1] ); + emit, &insn->Src[1] ); const struct src_register src2 = translate_src_register( - emit, &insn->FullSrcRegisters[2] ); + emit, &insn->Src[2] ); /* CMP DST, SRC0, SRC2, SRC1 */ return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, src0, src2, src1); @@ -740,9 +740,9 @@ static boolean emit_div(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); const struct src_register src1 = translate_src_register( - emit, &insn->FullSrcRegisters[1] ); + emit, &insn->Src[1] ); SVGA3dShaderDestToken temp = get_temp( emit ); int i; @@ -782,9 +782,9 @@ static boolean emit_dp2(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); const struct src_register src1 = translate_src_register( - emit, &insn->FullSrcRegisters[1] ); + emit, &insn->Src[1] ); SVGA3dShaderDestToken temp = get_temp( emit ); struct src_register temp_src0, temp_src1; @@ -815,9 +815,9 @@ static boolean emit_dph(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); struct src_register src1 = translate_src_register( - emit, &insn->FullSrcRegisters[1] ); + emit, &insn->Src[1] ); SVGA3dShaderDestToken temp = get_temp( emit ); /* DP3 TMP, SRC1, SRC2 */ @@ -846,7 +846,7 @@ static boolean emit_nrm(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); SVGA3dShaderDestToken temp = get_temp( emit ); /* DP3 TMP, SRC, SRC */ @@ -889,7 +889,7 @@ static boolean emit_sincos(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); SVGA3dShaderDestToken temp = get_temp( emit ); /* SCS TMP SRC */ @@ -912,7 +912,7 @@ static boolean emit_sin(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); SVGA3dShaderDestToken temp = get_temp( emit ); /* SCS TMP SRC */ @@ -937,7 +937,7 @@ static boolean emit_cos(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); SVGA3dShaderDestToken temp = get_temp( emit ); /* SCS TMP SRC */ @@ -962,9 +962,9 @@ static boolean emit_sub(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); struct src_register src1 = translate_src_register( - emit, &insn->FullSrcRegisters[1] ); + emit, &insn->Src[1] ); src1 = negate(src1); @@ -980,7 +980,7 @@ static boolean emit_kil(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn ) { SVGA3dShaderInstToken inst; - const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[0]; + const struct tgsi_full_src_register *reg = &insn->Src[0]; struct src_register src0; inst = inst_token( SVGA3DOP_TEXKILL ); @@ -1154,9 +1154,9 @@ static boolean emit_select_op(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); struct src_register src1 = translate_src_register( - emit, &insn->FullSrcRegisters[1] ); + emit, &insn->Src[1] ); return emit_select( emit, compare, dst, src0, src1 ); } @@ -1189,8 +1189,8 @@ static boolean emit_tex2(struct svga_shader_emitter *emit, return FALSE; } - src0 = translate_src_register( emit, &insn->FullSrcRegisters[0] ); - src1 = translate_src_register( emit, &insn->FullSrcRegisters[1] ); + src0 = translate_src_register( emit, &insn->Src[0] ); + src1 = translate_src_register( emit, &insn->Src[1] ); if (emit->key.fkey.tex[src1.base.num].unnormalized) { struct src_register wh = get_tex_dimensions( emit, src1.base.num ); @@ -1231,9 +1231,9 @@ static boolean emit_tex3(struct svga_shader_emitter *emit, break; } - src0 = translate_src_register( emit, &insn->FullSrcRegisters[0] ); - src1 = translate_src_register( emit, &insn->FullSrcRegisters[1] ); - src2 = translate_src_register( emit, &insn->FullSrcRegisters[2] ); + src0 = translate_src_register( emit, &insn->Src[0] ); + src1 = translate_src_register( emit, &insn->Src[1] ); + src2 = translate_src_register( emit, &insn->Src[2] ); return submit_op3( emit, inst, dst, src0, src1, src2 ); } @@ -1245,9 +1245,9 @@ static boolean emit_tex(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); struct src_register src0 = - translate_src_register( emit, &insn->FullSrcRegisters[0] ); + translate_src_register( emit, &insn->Src[0] ); struct src_register src1 = - translate_src_register( emit, &insn->FullSrcRegisters[1] ); + translate_src_register( emit, &insn->Src[1] ); SVGA3dShaderDestToken tex_result; @@ -1359,7 +1359,7 @@ static boolean emit_scalar_op1( struct svga_shader_emitter *emit, inst = inst_token( opcode ); dst = translate_dst_register( emit, insn, 0 ); - src = translate_src_register( emit, &insn->FullSrcRegisters[0] ); + src = translate_src_register( emit, &insn->Src[0] ); src = scalar( src, TGSI_SWIZZLE_X ); return submit_op1( emit, inst, dst, src ); @@ -1370,7 +1370,7 @@ static boolean emit_simple_instruction(struct svga_shader_emitter *emit, unsigned opcode, const struct tgsi_full_instruction *insn ) { - const struct tgsi_full_src_register *src = insn->FullSrcRegisters; + const struct tgsi_full_src_register *src = insn->Src; SVGA3dShaderInstToken inst; SVGA3dShaderDestToken dst; @@ -1428,13 +1428,13 @@ static boolean emit_pow(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); struct src_register src1 = translate_src_register( - emit, &insn->FullSrcRegisters[1] ); + emit, &insn->Src[1] ); boolean need_tmp = FALSE; /* POW can only output to a temporary */ - if (insn->FullDstRegisters[0].DstRegister.File != TGSI_FILE_TEMPORARY) + if (insn->Dst[0].DstRegister.File != TGSI_FILE_TEMPORARY) need_tmp = TRUE; /* POW src1 must not be the same register as dst */ @@ -1463,9 +1463,9 @@ static boolean emit_xpd(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); const struct src_register src1 = translate_src_register( - emit, &insn->FullSrcRegisters[1] ); + emit, &insn->Src[1] ); boolean need_dst_tmp = FALSE; /* XPD can only output to a temporary */ @@ -1517,11 +1517,11 @@ static boolean emit_lrp(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); SVGA3dShaderDestToken tmp; const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); const struct src_register src1 = translate_src_register( - emit, &insn->FullSrcRegisters[1] ); + emit, &insn->Src[1] ); const struct src_register src2 = translate_src_register( - emit, &insn->FullSrcRegisters[2] ); + emit, &insn->Src[2] ); boolean need_dst_tmp = FALSE; /* The dst reg must not be the same as src0 or src2 */ @@ -1568,9 +1568,9 @@ static boolean emit_dst_insn(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); SVGA3dShaderDestToken tmp; const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); const struct src_register src1 = translate_src_register( - emit, &insn->FullSrcRegisters[1] ); + emit, &insn->Src[1] ); struct src_register zero = get_zero_immediate( emit ); boolean need_tmp = FALSE; @@ -1633,7 +1633,7 @@ static boolean emit_exp(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); struct src_register src0 = - translate_src_register( emit, &insn->FullSrcRegisters[0] ); + translate_src_register( emit, &insn->Src[0] ); struct src_register zero = get_zero_immediate( emit ); SVGA3dShaderDestToken fraction; @@ -1723,7 +1723,7 @@ static boolean emit_lit(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); SVGA3dShaderDestToken tmp = get_temp( emit ); const struct src_register src0 = translate_src_register( - emit, &insn->FullSrcRegisters[0] ); + emit, &insn->Src[0] ); struct src_register zero = get_zero_immediate( emit ); /* tmp = pow(src.y, src.w) @@ -1806,7 +1806,7 @@ static boolean emit_ex2( struct svga_shader_emitter *emit, inst = inst_token( SVGA3DOP_EXP ); dst = translate_dst_register( emit, insn, 0 ); - src0 = translate_src_register( emit, &insn->FullSrcRegisters[0] ); + src0 = translate_src_register( emit, &insn->Src[0] ); src0 = scalar( src0, TGSI_SWIZZLE_X ); if (dst.mask != TGSI_WRITEMASK_XYZW) { @@ -1829,7 +1829,7 @@ static boolean emit_log(struct svga_shader_emitter *emit, { SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); struct src_register src0 = - translate_src_register( emit, &insn->FullSrcRegisters[0] ); + translate_src_register( emit, &insn->Src[0] ); struct src_register zero = get_zero_immediate( emit ); SVGA3dShaderDestToken abs_tmp; struct src_register abs_src0; @@ -1953,7 +1953,7 @@ static boolean emit_bgnsub( struct svga_shader_emitter *emit, static boolean emit_call( struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn ) { - unsigned position = insn->InstructionLabel.Label; + unsigned position = insn->Label.Label; unsigned i; for (i = 0; i < emit->nr_labels; i++) { @@ -2543,25 +2543,25 @@ pre_parse_instruction( struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn, int current_arl) { - if (insn->FullSrcRegisters[0].SrcRegister.Indirect && - insn->FullSrcRegisters[0].SrcRegisterInd.File == TGSI_FILE_ADDRESS) { - const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[0]; + if (insn->Src[0].SrcRegister.Indirect && + insn->Src[0].SrcRegisterInd.File == TGSI_FILE_ADDRESS) { + const struct tgsi_full_src_register *reg = &insn->Src[0]; if (reg->SrcRegister.Index < 0) { pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl); } } - if (insn->FullSrcRegisters[1].SrcRegister.Indirect && - insn->FullSrcRegisters[1].SrcRegisterInd.File == TGSI_FILE_ADDRESS) { - const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[1]; + if (insn->Src[1].SrcRegister.Indirect && + insn->Src[1].SrcRegisterInd.File == TGSI_FILE_ADDRESS) { + const struct tgsi_full_src_register *reg = &insn->Src[1]; if (reg->SrcRegister.Index < 0) { pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl); } } - if (insn->FullSrcRegisters[2].SrcRegister.Indirect && - insn->FullSrcRegisters[2].SrcRegisterInd.File == TGSI_FILE_ADDRESS) { - const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[2]; + if (insn->Src[2].SrcRegister.Indirect && + insn->Src[2].SrcRegisterInd.File == TGSI_FILE_ADDRESS) { + const struct tgsi_full_src_register *reg = &insn->Src[2]; if (reg->SrcRegister.Index < 0) { pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl); } -- cgit v1.2.3 From fe2b31e4a896167a33d267822b36eb2de0ceecba Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 24 Nov 2009 15:04:18 +0000 Subject: tgsi: rename fields of tgsi_full_declaration to reduce verbosity DeclarationRange -> Range --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 30 ++++++++++++------------- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 22 +++++++++--------- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 24 ++++++++++---------- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 6 ++--- src/gallium/auxiliary/tgsi/tgsi_build.c | 6 ++--- src/gallium/auxiliary/tgsi/tgsi_dump.c | 4 ++-- src/gallium/auxiliary/tgsi/tgsi_dump_c.c | 4 ++-- src/gallium/auxiliary/tgsi/tgsi_exec.c | 4 ++-- src/gallium/auxiliary/tgsi/tgsi_parse.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_parse.h | 2 +- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 4 ++-- src/gallium/auxiliary/tgsi/tgsi_sanity.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_scan.c | 4 ++-- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 4 ++-- src/gallium/auxiliary/tgsi/tgsi_text.c | 4 ++-- src/gallium/auxiliary/vl/vl_shader_build.c | 24 ++++++++++---------- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 4 ++-- src/gallium/drivers/cell/spu/spu_exec.c | 4 ++-- src/gallium/drivers/i915/i915_fpc_translate.c | 8 +++---- src/gallium/drivers/llvmpipe/lp_bld_interp.c | 4 ++-- src/gallium/drivers/nv20/nv20_vertprog.c | 2 +- src/gallium/drivers/nv30/nv30_fragprog.c | 10 ++++----- src/gallium/drivers/nv30/nv30_vertprog.c | 2 +- src/gallium/drivers/nv40/nv40_fragprog.c | 8 +++---- src/gallium/drivers/nv40/nv40_vertprog.c | 10 ++++----- src/gallium/drivers/nv50/nv50_program.c | 4 ++-- src/gallium/drivers/r300/r300_vs.c | 8 +++---- src/gallium/drivers/svga/svga_tgsi_decl_sm20.c | 4 ++-- src/gallium/drivers/svga/svga_tgsi_decl_sm30.c | 4 ++-- 29 files changed, 109 insertions(+), 109 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 58d867faeb..f1d1715237 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -141,18 +141,18 @@ aa_transform_decl(struct tgsi_transform_context *ctx, if (decl->Declaration.File == TGSI_FILE_OUTPUT && decl->Semantic.Name == TGSI_SEMANTIC_COLOR && decl->Semantic.Index == 0) { - aactx->colorOutput = decl->DeclarationRange.First; + aactx->colorOutput = decl->Range.First; } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) { uint i; - for (i = decl->DeclarationRange.First; - i <= decl->DeclarationRange.Last; i++) { + for (i = decl->Range.First; + i <= decl->Range.Last; i++) { aactx->samplersUsed |= 1 << i; } } else if (decl->Declaration.File == TGSI_FILE_INPUT) { - if ((int) decl->DeclarationRange.Last > aactx->maxInput) - aactx->maxInput = decl->DeclarationRange.Last; + if ((int) decl->Range.Last > aactx->maxInput) + aactx->maxInput = decl->Range.Last; if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC && (int) decl->Semantic.Index > aactx->maxGeneric) { aactx->maxGeneric = decl->Semantic.Index; @@ -160,8 +160,8 @@ aa_transform_decl(struct tgsi_transform_context *ctx, } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { uint i; - for (i = decl->DeclarationRange.First; - i <= decl->DeclarationRange.Last; i++) { + for (i = decl->Range.First; + i <= decl->Range.Last; i++) { aactx->tempsUsed |= (1 << i); } } @@ -230,28 +230,28 @@ aa_transform_inst(struct tgsi_transform_context *ctx, decl.Declaration.Semantic = 1; decl.Semantic.Name = TGSI_SEMANTIC_GENERIC; decl.Semantic.Index = aactx->maxGeneric + 1; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = aactx->maxInput + 1; + decl.Range.First = + decl.Range.Last = aactx->maxInput + 1; ctx->emit_declaration(ctx, &decl); /* declare new sampler */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = aactx->freeSampler; + decl.Range.First = + decl.Range.Last = aactx->freeSampler; ctx->emit_declaration(ctx, &decl); /* declare new temp regs */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = aactx->texTemp; + decl.Range.First = + decl.Range.Last = aactx->texTemp; ctx->emit_declaration(ctx, &decl); decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = aactx->colorTemp; + decl.Range.First = + decl.Range.Last = aactx->colorTemp; ctx->emit_declaration(ctx, &decl); aactx->firstInstruction = FALSE; diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 09fc55cb5e..e9e2402c23 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -133,11 +133,11 @@ aa_transform_decl(struct tgsi_transform_context *ctx, if (decl->Declaration.File == TGSI_FILE_OUTPUT && decl->Semantic.Name == TGSI_SEMANTIC_COLOR && decl->Semantic.Index == 0) { - aactx->colorOutput = decl->DeclarationRange.First; + aactx->colorOutput = decl->Range.First; } else if (decl->Declaration.File == TGSI_FILE_INPUT) { - if ((int) decl->DeclarationRange.Last > aactx->maxInput) - aactx->maxInput = decl->DeclarationRange.Last; + if ((int) decl->Range.Last > aactx->maxInput) + aactx->maxInput = decl->Range.Last; if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC && (int) decl->Semantic.Index > aactx->maxGeneric) { aactx->maxGeneric = decl->Semantic.Index; @@ -145,8 +145,8 @@ aa_transform_decl(struct tgsi_transform_context *ctx, } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { uint i; - for (i = decl->DeclarationRange.First; - i <= decl->DeclarationRange.Last; i++) { + for (i = decl->Range.First; + i <= decl->Range.Last; i++) { aactx->tempsUsed |= (1 << i); } } @@ -200,21 +200,21 @@ aa_transform_inst(struct tgsi_transform_context *ctx, decl.Declaration.Semantic = 1; decl.Semantic.Name = TGSI_SEMANTIC_GENERIC; decl.Semantic.Index = aactx->maxGeneric + 1; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = texInput; + decl.Range.First = + decl.Range.Last = texInput; ctx->emit_declaration(ctx, &decl); /* declare new temp regs */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = tmp0; + decl.Range.First = + decl.Range.Last = tmp0; ctx->emit_declaration(ctx, &decl); decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = aactx->colorTemp; + decl.Range.First = + decl.Range.Last = aactx->colorTemp; ctx->emit_declaration(ctx, &decl); aactx->firstInstruction = FALSE; diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index fe0d511218..218dcb9d12 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -133,20 +133,20 @@ pstip_transform_decl(struct tgsi_transform_context *ctx, if (decl->Declaration.File == TGSI_FILE_SAMPLER) { uint i; - for (i = decl->DeclarationRange.First; - i <= decl->DeclarationRange.Last; i++) { + for (i = decl->Range.First; + i <= decl->Range.Last; i++) { pctx->samplersUsed |= 1 << i; } } else if (decl->Declaration.File == TGSI_FILE_INPUT) { - pctx->maxInput = MAX2(pctx->maxInput, (int) decl->DeclarationRange.Last); + pctx->maxInput = MAX2(pctx->maxInput, (int) decl->Range.Last); if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) - pctx->wincoordInput = (int) decl->DeclarationRange.First; + pctx->wincoordInput = (int) decl->Range.First; } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { uint i; - for (i = decl->DeclarationRange.First; - i <= decl->DeclarationRange.Last; i++) { + for (i = decl->Range.First; + i <= decl->Range.Last; i++) { pctx->tempsUsed |= (1 << i); } } @@ -228,23 +228,23 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, decl.Declaration.Semantic = 1; decl.Semantic.Name = TGSI_SEMANTIC_POSITION; decl.Semantic.Index = 0; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = wincoordInput; + decl.Range.First = + decl.Range.Last = wincoordInput; ctx->emit_declaration(ctx, &decl); } /* declare new sampler */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = pctx->freeSampler; + decl.Range.First = + decl.Range.Last = pctx->freeSampler; ctx->emit_declaration(ctx, &decl); /* declare new temp regs */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.DeclarationRange.First = - decl.DeclarationRange.Last = pctx->texTemp; + decl.Range.First = + decl.Range.Last = pctx->texTemp; ctx->emit_declaration(ctx, &decl); /* emit immediate = {1/32, 1/32, 1, 1} diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index fbf4d2636d..3edff0e5b2 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -94,8 +94,8 @@ translate_declaration(struct gallivm_ir *prog, unsigned first, last, mask; uint interp_method; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; /* Do not touch WPOS.xy */ @@ -149,7 +149,7 @@ translate_declarationir(struct gallivm_ir *, struct tgsi_full_declaration *) { if (decl->Declaration.File == TGSI_FILE_ADDRESS) { - int idx = decl->DeclarationRange.First; + int idx = decl->Range.First; storage->addAddress(idx); } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 7ec832aad9..094d8d52d8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -172,7 +172,7 @@ tgsi_default_full_declaration( void ) struct tgsi_full_declaration full_declaration; full_declaration.Declaration = tgsi_default_declaration(); - full_declaration.DeclarationRange = tgsi_default_declaration_range(); + full_declaration.Range = tgsi_default_declaration_range(); full_declaration.Semantic = tgsi_default_declaration_semantic(); return full_declaration; @@ -209,8 +209,8 @@ tgsi_build_full_declaration( size++; *dr = tgsi_build_declaration_range( - full_decl->DeclarationRange.First, - full_decl->DeclarationRange.Last, + full_decl->Range.First, + full_decl->Range.Last, declaration, header ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 4ff7f4b11e..7791f9f4fc 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -224,8 +224,8 @@ iter_declaration( _dump_register( ctx, decl->Declaration.File, - decl->DeclarationRange.First, - decl->DeclarationRange.Last ); + decl->Range.First, + decl->Range.Last ); _dump_writemask( ctx, decl->Declaration.UsageMask ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c index 194b2473bc..5593942154 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -223,9 +223,9 @@ dump_declaration_verbose( EOL(); TXT( "\nFirst: " ); - UID( decl->DeclarationRange.First ); + UID( decl->Range.First ); TXT( "\nLast : " ); - UID( decl->DeclarationRange.Last ); + UID( decl->Range.Last ); if( decl->Declaration.Semantic ) { EOL(); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index a9bfb0d6df..3f8d59e46a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1895,8 +1895,8 @@ exec_declaration(struct tgsi_exec_machine *mach, if (decl->Declaration.File == TGSI_FILE_INPUT) { uint first, last, mask; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index ff593fdc32..7946fdd732 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -112,7 +112,7 @@ tgsi_parse_token( memset(decl, 0, sizeof *decl); copy_token(&decl->Declaration, &token); - next_token( ctx, &decl->DeclarationRange ); + next_token( ctx, &decl->Range ); if( decl->Declaration.Semantic ) { next_token( ctx, &decl->Semantic ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index 2f8f4d488b..1965c5181d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -62,7 +62,7 @@ struct tgsi_full_src_register struct tgsi_full_declaration { struct tgsi_declaration Declaration; - struct tgsi_declaration_range DeclarationRange; + struct tgsi_declaration_range Range; struct tgsi_declaration_semantic Semantic; }; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 8397f432f9..ec5f235143 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -1178,8 +1178,8 @@ emit_declaration( unsigned first, last, mask; unsigned i, j; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; for( i = first; i <= last; i++ ) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 8422b91a30..005894e604 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -286,7 +286,7 @@ iter_declaration( file = decl->Declaration.File; if (!check_file_name( ctx, file )) return TRUE; - for (i = decl->DeclarationRange.First; i <= decl->DeclarationRange.Last; i++) { + for (i = decl->Range.First; i <= decl->Range.Last; i++) { if (is_register_declared( ctx, file, i )) report_error( ctx, "%s[%u]: The same register declared more than once", file_names[file], i ); ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG)); diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index be25b3dc5c..6ca25c36ec 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -119,8 +119,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens, = &parse.FullToken.FullDeclaration; const uint file = fulldecl->Declaration.File; uint reg; - for (reg = fulldecl->DeclarationRange.First; - reg <= fulldecl->DeclarationRange.Last; + for (reg = fulldecl->Range.First; + reg <= fulldecl->Range.Last; reg++) { /* only first 32 regs will appear in this bitfield */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 2d2ee321c9..c23b0cc343 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -2637,8 +2637,8 @@ emit_declaration( unsigned first, last, mask; unsigned i, j; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; for( i = first; i <= last; i++ ) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index e9b1a21fb4..295ded9664 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -799,8 +799,8 @@ static boolean parse_declaration( struct translate_ctx *ctx ) decl = tgsi_default_full_declaration(); decl.Declaration.File = file; decl.Declaration.UsageMask = writemask; - decl.DeclarationRange.First = first; - decl.DeclarationRange.Last = last; + decl.Range.First = first; + decl.Range.Last = last; cur = ctx->cur; eat_opt_white( &cur ); diff --git a/src/gallium/auxiliary/vl/vl_shader_build.c b/src/gallium/auxiliary/vl/vl_shader_build.c index 82300b1da2..548dfca05a 100644 --- a/src/gallium/auxiliary/vl/vl_shader_build.c +++ b/src/gallium/auxiliary/vl/vl_shader_build.c @@ -38,8 +38,8 @@ struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index decl.Declaration.Semantic = 1; decl.Semantic.Name = name; decl.Semantic.Index = index; - decl.DeclarationRange.First = first; - decl.DeclarationRange.Last = last; + decl.Range.First = first; + decl.Range.Last = last; return decl; } @@ -67,8 +67,8 @@ struct tgsi_full_declaration vl_decl_interpolated_input decl.Semantic.Name = name; decl.Semantic.Index = index; decl.Declaration.Interpolate = interpolation;; - decl.DeclarationRange.First = first; - decl.DeclarationRange.Last = last; + decl.Range.First = first; + decl.Range.Last = last; return decl; } @@ -81,8 +81,8 @@ struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int i decl.Declaration.Semantic = 1; decl.Semantic.Name = name; decl.Semantic.Index = index; - decl.DeclarationRange.First = first; - decl.DeclarationRange.Last = last; + decl.Range.First = first; + decl.Range.Last = last; return decl; } @@ -95,8 +95,8 @@ struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int inde decl.Declaration.Semantic = 1; decl.Semantic.Name = name; decl.Semantic.Index = index; - decl.DeclarationRange.First = first; - decl.DeclarationRange.Last = last; + decl.Range.First = first; + decl.Range.Last = last; return decl; } @@ -107,8 +107,8 @@ struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.DeclarationRange.First = first; - decl.DeclarationRange.Last = last; + decl.Range.First = first; + decl.Range.Last = last; return decl; } @@ -119,8 +119,8 @@ struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int l decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.DeclarationRange.First = first; - decl.DeclarationRange.Last = last; + decl.Range.First = first; + decl.Range.Last = last; return decl; } diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index b0afad349f..aeabe002d0 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -1909,8 +1909,8 @@ emit_declaration(struct cell_context *cell, switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: - for (i = decl->DeclarationRange.First; - i <= decl->DeclarationRange.Last; + for (i = decl->Range.First; + i <= decl->Range.Last; i++) { assert(i < MAX_TEMPS); for (ch = 0; ch < 4; ch++) { diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 8d58c534be..ee5e3432d5 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -833,8 +833,8 @@ exec_declaration(struct spu_exec_machine *mach, unsigned first, last, mask; interpolation_func interp; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; switch( decl->Declaration.Interpolate ) { diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 9e626c85c0..1a4a7bbe62 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -928,8 +928,8 @@ i915_translate_instructions(struct i915_fp_compile *p, if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_CONSTANT) { uint i; - for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; - i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; + for (i = parse.FullToken.FullDeclaration.Range.First; + i <= parse.FullToken.FullDeclaration.Range.Last; i++) { assert(ifs->constant_flags[i] == 0x0); ifs->constant_flags[i] = I915_CONSTFLAG_USER; @@ -939,8 +939,8 @@ i915_translate_instructions(struct i915_fp_compile *p, else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_TEMPORARY) { uint i; - for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; - i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; + for (i = parse.FullToken.FullDeclaration.Range.First; + i <= parse.FullToken.FullDeclaration.Range.Last; i++) { assert(i < I915_MAX_TEMPORARY); /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 818c0e943e..49dab8ab61 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -303,8 +303,8 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, unsigned first, last, mask; unsigned attrib; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; for( attrib = first; attrib <= last; ++attrib ) { diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c index e82a23d475..abffbe33a8 100644 --- a/src/gallium/drivers/nv20/nv20_vertprog.c +++ b/src/gallium/drivers/nv20/nv20_vertprog.c @@ -535,7 +535,7 @@ nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc, return FALSE; } - vpc->output_map[fdec->DeclarationRange.First] = hw; + vpc->output_map[fdec->Range.First] = hw; return TRUE; } diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index dfffeb3263..20f7d4152c 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -604,7 +604,7 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, return FALSE; } - fpc->attrib_map[fdec->DeclarationRange.First] = hw; + fpc->attrib_map[fdec->Range.First] = hw; return TRUE; } @@ -614,10 +614,10 @@ nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc, { switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: - fpc->depth_id = fdec->DeclarationRange.First; + fpc->depth_id = fdec->Range.First; break; case TGSI_SEMANTIC_COLOR: - fpc->colour_id = fdec->DeclarationRange.First; + fpc->colour_id = fdec->Range.First; break; default: NOUVEAU_ERR("bad output semantic\n"); @@ -653,9 +653,9 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc) goto out_err; break; /*case TGSI_FILE_TEMPORARY: - if (fdec->DeclarationRange.Last > high_temp) { + if (fdec->Range.Last > high_temp) { high_temp = - fdec->DeclarationRange.Last; + fdec->Range.Last; } break;*/ default: diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 41bd45ad29..99fde93245 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -535,7 +535,7 @@ nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc, return FALSE; } - vpc->output_map[fdec->DeclarationRange.First] = hw; + vpc->output_map[fdec->Range.First] = hw; return TRUE; } diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 6addc45247..8e8cba1a0c 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -676,7 +676,7 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, return FALSE; } - fpc->attrib_map[fdec->DeclarationRange.First] = hw; + fpc->attrib_map[fdec->Range.First] = hw; return TRUE; } @@ -684,7 +684,7 @@ static boolean nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, const struct tgsi_full_declaration *fdec) { - unsigned idx = fdec->DeclarationRange.First; + unsigned idx = fdec->Range.First; unsigned hw; switch (fdec->Semantic.Name) { @@ -738,9 +738,9 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc) goto out_err; break; case TGSI_FILE_TEMPORARY: - if (fdec->DeclarationRange.Last > high_temp) { + if (fdec->Range.Last > high_temp) { high_temp = - fdec->DeclarationRange.Last; + fdec->Range.Last; } break; default: diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 0cdc511166..913e050389 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -577,7 +577,7 @@ static boolean nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, const struct tgsi_full_declaration *fdec) { - unsigned idx = fdec->DeclarationRange.First; + unsigned idx = fdec->Range.First; int hw; switch (fdec->Semantic.Name) { @@ -652,16 +652,16 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) fdec = &p.FullToken.FullDeclaration; switch (fdec->Declaration.File) { case TGSI_FILE_TEMPORARY: - if (fdec->DeclarationRange.Last > high_temp) { + if (fdec->Range.Last > high_temp) { high_temp = - fdec->DeclarationRange.Last; + fdec->Range.Last; } break; #if 0 /* this would be nice.. except gallium doesn't track it */ case TGSI_FILE_ADDRESS: - if (fdec->DeclarationRange.Last > high_addr) { + if (fdec->Range.Last > high_addr) { high_addr = - fdec->DeclarationRange.Last; + fdec->Range.Last; } break; #endif diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 9fbf918601..57747a1840 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -2547,8 +2547,8 @@ nv50_program_tx_prep(struct nv50_pc *pc) unsigned si, last, first, mode; d = &tp.FullToken.FullDeclaration; - first = d->DeclarationRange.First; - last = d->DeclarationRange.Last; + first = d->Range.First; + last = d->Range.Last; switch (d->Declaration.File) { case TGSI_FILE_TEMPORARY: diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 939b13e4b3..096707dda4 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -79,19 +79,19 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) switch (decl->Semantic.Name) { case TGSI_SEMANTIC_POSITION: - c->code->outputs[decl->DeclarationRange.First] = 0; + c->code->outputs[decl->Range.First] = 0; break; case TGSI_SEMANTIC_PSIZE: - c->code->outputs[decl->DeclarationRange.First] = 1; + c->code->outputs[decl->Range.First] = 1; break; case TGSI_SEMANTIC_COLOR: - c->code->outputs[decl->DeclarationRange.First] = 1 + + c->code->outputs[decl->Range.First] = 1 + (pointsize ? 1 : 0) + colors++; break; case TGSI_SEMANTIC_FOG: case TGSI_SEMANTIC_GENERIC: - c->code->outputs[decl->DeclarationRange.First] = 1 + + c->code->outputs[decl->Range.First] = 1 + (pointsize ? 1 : 0) + out_colors + generic++; diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c index 6f4822a89d..23b3ace7f3 100644 --- a/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c +++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c @@ -230,8 +230,8 @@ static boolean ps20_sampler( struct svga_shader_emitter *emit, boolean svga_translate_decl_sm20( struct svga_shader_emitter *emit, const struct tgsi_full_declaration *decl ) { - unsigned first = decl->DeclarationRange.First; - unsigned last = decl->DeclarationRange.Last; + unsigned first = decl->Range.First; + unsigned last = decl->Range.Last; unsigned semantic = 0; unsigned semantic_idx = 0; unsigned idx; diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c index 65aa23ce3e..d1c7336dec 100644 --- a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c +++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c @@ -335,8 +335,8 @@ static boolean ps30_sampler( struct svga_shader_emitter *emit, boolean svga_translate_decl_sm30( struct svga_shader_emitter *emit, const struct tgsi_full_declaration *decl ) { - unsigned first = decl->DeclarationRange.First; - unsigned last = decl->DeclarationRange.Last; + unsigned first = decl->Range.First; + unsigned last = decl->Range.Last; unsigned semantic = 0; unsigned semantic_idx = 0; unsigned idx; -- cgit v1.2.3 From 5b0824dfe5eaf59fa87134e7482b3d147b262901 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 24 Nov 2009 15:08:55 +0000 Subject: tgsi: rename fields of tgsi_full_dst_register to reduce verbosity DstRegister -> Register DstRegisterInd -> Indirect --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 24 +++---- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 86 ++++++++++++------------ src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 8 +-- src/gallium/auxiliary/draw/draw_vs_aos.c | 50 +++++++------- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 16 ++--- src/gallium/auxiliary/tgsi/tgsi_build.c | 34 +++++----- src/gallium/auxiliary/tgsi/tgsi_dump.c | 20 +++--- src/gallium/auxiliary/tgsi/tgsi_dump_c.c | 38 +++++------ src/gallium/auxiliary/tgsi/tgsi_exec.c | 36 +++++----- src/gallium/auxiliary/tgsi/tgsi_parse.c | 12 ++-- src/gallium/auxiliary/tgsi/tgsi_parse.h | 4 +- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 18 ++--- src/gallium/auxiliary/tgsi/tgsi_sanity.c | 8 +-- src/gallium/auxiliary/tgsi/tgsi_scan.c | 6 +- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 14 ++-- src/gallium/auxiliary/tgsi/tgsi_text.c | 6 +- src/gallium/auxiliary/vl/vl_compositor.c | 2 +- src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 6 +- src/gallium/auxiliary/vl/vl_shader_build.c | 16 ++--- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 20 +++--- src/gallium/drivers/cell/spu/spu_exec.c | 12 ++-- src/gallium/drivers/i915/i915_fpc_translate.c | 10 +-- src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 12 ++-- src/gallium/drivers/nv20/nv20_vertprog.c | 8 +-- src/gallium/drivers/nv30/nv30_fragprog.c | 10 +-- src/gallium/drivers/nv30/nv30_vertprog.c | 8 +-- src/gallium/drivers/nv40/nv40_fragprog.c | 10 +-- src/gallium/drivers/nv40/nv40_vertprog.c | 16 ++--- src/gallium/drivers/nv50/nv50_program.c | 36 +++++----- src/gallium/drivers/r300/r300_tgsi_to_rc.c | 8 +-- src/gallium/drivers/svga/svga_tgsi_insn.c | 12 ++-- 31 files changed, 283 insertions(+), 283 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index f1d1715237..fe200983ca 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -265,8 +265,8 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_TEX; newInst.Instruction.NumDstRegs = 1; - newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.Dst[0].DstRegister.Index = aactx->texTemp; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = aactx->texTemp; newInst.Instruction.NumSrcRegs = 2; newInst.Instruction.Texture = TRUE; newInst.Texture.Texture = TGSI_TEXTURE_2D; @@ -281,9 +281,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MOV; newInst.Instruction.NumDstRegs = 1; - newInst.Dst[0].DstRegister.File = TGSI_FILE_OUTPUT; - newInst.Dst[0].DstRegister.Index = aactx->colorOutput; - newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; + newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT; + newInst.Dst[0].Register.Index = aactx->colorOutput; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ; newInst.Instruction.NumSrcRegs = 1; newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; newInst.Src[0].SrcRegister.Index = aactx->colorTemp; @@ -293,9 +293,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MUL; newInst.Instruction.NumDstRegs = 1; - newInst.Dst[0].DstRegister.File = TGSI_FILE_OUTPUT; - newInst.Dst[0].DstRegister.Index = aactx->colorOutput; - newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT; + newInst.Dst[0].Register.Index = aactx->colorOutput; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W; newInst.Instruction.NumSrcRegs = 2; newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; newInst.Src[0].SrcRegister.Index = aactx->colorTemp; @@ -318,10 +318,10 @@ aa_transform_inst(struct tgsi_transform_context *ctx, for (i = 0; i < inst->Instruction.NumDstRegs; i++) { struct tgsi_full_dst_register *dst = &inst->Dst[i]; - if (dst->DstRegister.File == TGSI_FILE_OUTPUT && - dst->DstRegister.Index == aactx->colorOutput) { - dst->DstRegister.File = TGSI_FILE_TEMPORARY; - dst->DstRegister.Index = aactx->colorTemp; + if (dst->Register.File == TGSI_FILE_OUTPUT && + dst->Register.Index == aactx->colorOutput) { + dst->Register.File = TGSI_FILE_TEMPORARY; + dst->Register.Index = aactx->colorTemp; } } diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index e9e2402c23..39e1406e96 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -234,9 +234,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MUL; newInst.Instruction.NumDstRegs = 1; - newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.Dst[0].DstRegister.Index = tmp0; - newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XY; newInst.Instruction.NumSrcRegs = 2; newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT; newInst.Src[0].SrcRegister.Index = texInput; @@ -248,9 +248,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_ADD; newInst.Instruction.NumDstRegs = 1; - newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.Dst[0].DstRegister.Index = tmp0; - newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X; newInst.Instruction.NumSrcRegs = 2; newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; newInst.Src[0].SrcRegister.Index = tmp0; @@ -265,9 +265,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_RSQ; newInst.Instruction.NumDstRegs = 1; - newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.Dst[0].DstRegister.Index = tmp0; - newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X; newInst.Instruction.NumSrcRegs = 1; newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; newInst.Src[0].SrcRegister.Index = tmp0; @@ -277,9 +277,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_RCP; newInst.Instruction.NumDstRegs = 1; - newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.Dst[0].DstRegister.Index = tmp0; - newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X; newInst.Instruction.NumSrcRegs = 1; newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; newInst.Src[0].SrcRegister.Index = tmp0; @@ -290,9 +290,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_SGT; newInst.Instruction.NumDstRegs = 1; - newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.Dst[0].DstRegister.Index = tmp0; - newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y; newInst.Instruction.NumSrcRegs = 2; newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; newInst.Src[0].SrcRegister.Index = tmp0; @@ -323,9 +323,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_SUB; newInst.Instruction.NumDstRegs = 1; - newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.Dst[0].DstRegister.Index = tmp0; - newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z; newInst.Instruction.NumSrcRegs = 2; newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT; newInst.Src[0].SrcRegister.Index = texInput; @@ -339,9 +339,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_RCP; newInst.Instruction.NumDstRegs = 1; - newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.Dst[0].DstRegister.Index = tmp0; - newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z; newInst.Instruction.NumSrcRegs = 1; newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; newInst.Src[0].SrcRegister.Index = tmp0; @@ -352,9 +352,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_SUB; newInst.Instruction.NumDstRegs = 1; - newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.Dst[0].DstRegister.Index = tmp0; - newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y; newInst.Instruction.NumSrcRegs = 2; newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT; newInst.Src[0].SrcRegister.Index = texInput; @@ -368,9 +368,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MUL; newInst.Instruction.NumDstRegs = 1; - newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.Dst[0].DstRegister.Index = tmp0; - newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W; newInst.Instruction.NumSrcRegs = 2; newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; newInst.Src[0].SrcRegister.Index = tmp0; @@ -384,9 +384,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_SLE; newInst.Instruction.NumDstRegs = 1; - newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.Dst[0].DstRegister.Index = tmp0; - newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y; newInst.Instruction.NumSrcRegs = 2; newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; newInst.Src[0].SrcRegister.Index = tmp0; @@ -405,9 +405,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_CMP; newInst.Instruction.NumDstRegs = 1; - newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.Dst[0].DstRegister.Index = tmp0; - newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = tmp0; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W; newInst.Instruction.NumSrcRegs = 3; newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; newInst.Src[0].SrcRegister.Index = tmp0; @@ -439,9 +439,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MOV; newInst.Instruction.NumDstRegs = 1; - newInst.Dst[0].DstRegister.File = TGSI_FILE_OUTPUT; - newInst.Dst[0].DstRegister.Index = aactx->colorOutput; - newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; + newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT; + newInst.Dst[0].Register.Index = aactx->colorOutput; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ; newInst.Instruction.NumSrcRegs = 1; newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; newInst.Src[0].SrcRegister.Index = aactx->colorTemp; @@ -451,9 +451,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MUL; newInst.Instruction.NumDstRegs = 1; - newInst.Dst[0].DstRegister.File = TGSI_FILE_OUTPUT; - newInst.Dst[0].DstRegister.Index = aactx->colorOutput; - newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT; + newInst.Dst[0].Register.Index = aactx->colorOutput; + newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W; newInst.Instruction.NumSrcRegs = 2; newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; newInst.Src[0].SrcRegister.Index = aactx->colorTemp; @@ -469,10 +469,10 @@ aa_transform_inst(struct tgsi_transform_context *ctx, for (i = 0; i < inst->Instruction.NumDstRegs; i++) { struct tgsi_full_dst_register *dst = &inst->Dst[i]; - if (dst->DstRegister.File == TGSI_FILE_OUTPUT && - dst->DstRegister.Index == aactx->colorOutput) { - dst->DstRegister.File = TGSI_FILE_TEMPORARY; - dst->DstRegister.Index = aactx->colorTemp; + if (dst->Register.File == TGSI_FILE_OUTPUT && + dst->Register.Index == aactx->colorOutput) { + dst->Register.File = TGSI_FILE_TEMPORARY; + dst->Register.Index = aactx->colorTemp; } } } diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 218dcb9d12..99165b1006 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -280,8 +280,8 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_MUL; newInst.Instruction.NumDstRegs = 1; - newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.Dst[0].DstRegister.Index = pctx->texTemp; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = pctx->texTemp; newInst.Instruction.NumSrcRegs = 2; newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT; newInst.Src[0].SrcRegister.Index = wincoordInput; @@ -293,8 +293,8 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, newInst = tgsi_default_full_instruction(); newInst.Instruction.Opcode = TGSI_OPCODE_TEX; newInst.Instruction.NumDstRegs = 1; - newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.Dst[0].DstRegister.Index = pctx->texTemp; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = pctx->texTemp; newInst.Instruction.NumSrcRegs = 2; newInst.Instruction.Texture = TRUE; newInst.Texture.Texture = TGSI_TEXTURE_2D; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index a9c8715bc8..8c93642954 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -361,8 +361,8 @@ static struct x86_reg aos_get_shader_reg_ptr( struct aos_compilation *cp, static struct x86_reg get_dst_ptr( struct aos_compilation *cp, const struct tgsi_full_dst_register *dst ) { - unsigned file = dst->DstRegister.File; - unsigned idx = dst->DstRegister.Index; + unsigned file = dst->Register.File; + unsigned idx = dst->Register.Index; unsigned i; @@ -669,15 +669,15 @@ static void store_dest( struct aos_compilation *cp, { struct x86_reg dst; - switch (reg->DstRegister.WriteMask) { + switch (reg->Register.WriteMask) { case 0: return; case TGSI_WRITEMASK_XYZW: aos_adopt_xmm_reg(cp, get_xmm_writable(cp, result), - reg->DstRegister.File, - reg->DstRegister.Index, + reg->Register.File, + reg->Register.Index, TRUE); return; default: @@ -685,10 +685,10 @@ static void store_dest( struct aos_compilation *cp, } dst = aos_get_shader_reg_xmm(cp, - reg->DstRegister.File, - reg->DstRegister.Index); + reg->Register.File, + reg->Register.Index); - switch (reg->DstRegister.WriteMask) { + switch (reg->Register.WriteMask) { case TGSI_WRITEMASK_X: sse_movss(cp->func, dst, get_xmm(cp, result)); break; @@ -710,14 +710,14 @@ static void store_dest( struct aos_compilation *cp, break; default: - mask_write(cp, dst, result, reg->DstRegister.WriteMask); + mask_write(cp, dst, result, reg->Register.WriteMask); break; } aos_adopt_xmm_reg(cp, dst, - reg->DstRegister.File, - reg->DstRegister.Index, + reg->Register.File, + reg->Register.Index, TRUE); } @@ -737,7 +737,7 @@ static void store_scalar_dest( struct aos_compilation *cp, const struct tgsi_full_dst_register *reg, struct x86_reg result ) { - unsigned writemask = reg->DstRegister.WriteMask; + unsigned writemask = reg->Register.WriteMask; struct x86_reg dst; if (writemask != TGSI_WRITEMASK_X && @@ -754,12 +754,12 @@ static void store_scalar_dest( struct aos_compilation *cp, result = get_xmm(cp, result); dst = aos_get_shader_reg_xmm(cp, - reg->DstRegister.File, - reg->DstRegister.Index); + reg->Register.File, + reg->Register.Index); - switch (reg->DstRegister.WriteMask) { + switch (reg->Register.WriteMask) { case TGSI_WRITEMASK_X: sse_movss(cp->func, dst, result); break; @@ -782,8 +782,8 @@ static void store_scalar_dest( struct aos_compilation *cp, aos_adopt_xmm_reg(cp, dst, - reg->DstRegister.File, - reg->DstRegister.Index, + reg->Register.File, + reg->Register.Index, TRUE); } @@ -819,7 +819,7 @@ static void x87_fstp_dest4( struct aos_compilation *cp, const struct tgsi_full_dst_register *dst ) { struct x86_reg ptr = get_dst_ptr(cp, dst); - unsigned writemask = dst->DstRegister.WriteMask; + unsigned writemask = dst->Register.WriteMask; x87_fst_or_nop(cp->func, writemask, 0, ptr); x87_fst_or_nop(cp->func, writemask, 1, ptr); @@ -1100,7 +1100,7 @@ static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); - unsigned writemask = op->Dst[0].DstRegister.WriteMask; + unsigned writemask = op->Dst[0].Register.WriteMask; int i; set_fpu_round_neg_inf( cp ); @@ -1127,7 +1127,7 @@ static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_RND( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); - unsigned writemask = op->Dst[0].DstRegister.WriteMask; + unsigned writemask = op->Dst[0].Register.WriteMask; int i; set_fpu_round_nearest( cp ); @@ -1156,7 +1156,7 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); struct x86_reg st0 = x86_make_reg(file_x87, 0); struct x86_reg st1 = x86_make_reg(file_x87, 1); - unsigned writemask = op->Dst[0].DstRegister.WriteMask; + unsigned writemask = op->Dst[0].Register.WriteMask; int i; set_fpu_round_neg_inf( cp ); @@ -1190,7 +1190,7 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); - unsigned writemask = op->Dst[0].DstRegister.WriteMask; + unsigned writemask = op->Dst[0].Register.WriteMask; unsigned lit_count = cp->lit_count++; struct x86_reg result, arg0; unsigned i; @@ -1270,7 +1270,7 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); - unsigned writemask = op->Dst[0].DstRegister.WriteMask; + unsigned writemask = op->Dst[0].Register.WriteMask; if (writemask & TGSI_WRITEMASK_YZ) { struct x86_reg st1 = x86_make_reg(file_x87, 1); @@ -1897,10 +1897,10 @@ static void find_last_write_outputs( struct aos_compilation *cp ) continue; for (i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++) { - if (parse.FullToken.FullInstruction.Dst[i].DstRegister.File == + if (parse.FullToken.FullInstruction.Dst[i].Register.File == TGSI_FILE_OUTPUT) { - unsigned idx = parse.FullToken.FullInstruction.Dst[i].DstRegister.Index; + unsigned idx = parse.FullToken.FullInstruction.Dst[i].Register.Index; cp->output_last_write[idx] = this_instruction; } } diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 3edff0e5b2..135d307ce1 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -658,12 +658,12 @@ translate_instruction(llvm::Module *module, for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { struct tgsi_full_dst_register *dst = &inst->Dst[i]; - if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { - storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) { - storage->setTempElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) { - storage->setAddrElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + if (dst->Register.File == TGSI_FILE_OUTPUT) { + storage->setOutputElement(dst->Register.Index, out, dst->Register.WriteMask); + } else if (dst->Register.File == TGSI_FILE_TEMPORARY) { + storage->setTempElement(dst->Register.Index, out, dst->Register.WriteMask); + } else if (dst->Register.File == TGSI_FILE_ADDRESS) { + storage->setAddrElement(dst->Register.Index, out, dst->Register.WriteMask); } else { fprintf(stderr, "ERROR: unsupported LLVM destination!"); assert(!"wrong destination"); @@ -994,8 +994,8 @@ translate_instructionir(llvm::Module *module, /* store results */ for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { struct tgsi_full_dst_register *dst = &inst->Dst[i]; - storage->store((enum tgsi_file_type)dst->DstRegister.File, - dst->DstRegister.Index, out, dst->DstRegister.WriteMask, + storage->store((enum tgsi_file_type)dst->Register.File, + dst->Register.Index, out, dst->Register.WriteMask, instr->getIRBuilder() ); } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 094d8d52d8..91fb4f68e5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -580,15 +580,15 @@ tgsi_build_full_instruction( size++; *dst_register = tgsi_build_dst_register( - reg->DstRegister.File, - reg->DstRegister.WriteMask, - reg->DstRegister.Indirect, - reg->DstRegister.Index, + reg->Register.File, + reg->Register.WriteMask, + reg->Register.Indirect, + reg->Register.Index, instruction, header ); prev_token = (struct tgsi_token *) dst_register; - if( reg->DstRegister.Indirect ) { + if( reg->Register.Indirect ) { struct tgsi_src_register *ind; if( maxsize <= size ) @@ -597,16 +597,16 @@ tgsi_build_full_instruction( size++; *ind = tgsi_build_src_register( - reg->DstRegisterInd.File, - reg->DstRegisterInd.SwizzleX, - reg->DstRegisterInd.SwizzleY, - reg->DstRegisterInd.SwizzleZ, - reg->DstRegisterInd.SwizzleW, - reg->DstRegisterInd.Negate, - reg->DstRegisterInd.Absolute, - reg->DstRegisterInd.Indirect, - reg->DstRegisterInd.Dimension, - reg->DstRegisterInd.Index, + reg->Indirect.File, + reg->Indirect.SwizzleX, + reg->Indirect.SwizzleY, + reg->Indirect.SwizzleZ, + reg->Indirect.SwizzleW, + reg->Indirect.Negate, + reg->Indirect.Absolute, + reg->Indirect.Indirect, + reg->Indirect.Dimension, + reg->Indirect.Index, instruction, header ); } @@ -980,8 +980,8 @@ tgsi_default_full_dst_register( void ) { struct tgsi_full_dst_register full_dst_register; - full_dst_register.DstRegister = tgsi_default_dst_register(); - full_dst_register.DstRegisterInd = tgsi_default_src_register(); + full_dst_register.Register = tgsi_default_dst_register(); + full_dst_register.Indirect = tgsi_default_src_register(); return full_dst_register; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 7791f9f4fc..6141865f03 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -358,23 +358,23 @@ iter_instruction( CHR( ',' ); CHR( ' ' ); - if (dst->DstRegister.Indirect) { + if (dst->Register.Indirect) { _dump_register_ind( ctx, - dst->DstRegister.File, - dst->DstRegister.Index, - dst->DstRegisterInd.File, - dst->DstRegisterInd.Index, - dst->DstRegisterInd.SwizzleX ); + dst->Register.File, + dst->Register.Index, + dst->Indirect.File, + dst->Indirect.Index, + dst->Indirect.SwizzleX ); } else { _dump_register( ctx, - dst->DstRegister.File, - dst->DstRegister.Index, - dst->DstRegister.Index ); + dst->Register.File, + dst->Register.Index, + dst->Register.Index ); } - _dump_writemask( ctx, dst->DstRegister.WriteMask ); + _dump_writemask( ctx, dst->Register.WriteMask ); first_reg = FALSE; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c index 5593942154..5fae5a225f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -339,48 +339,48 @@ dump_instruction_verbose( EOL(); TXT( "\nFile : " ); - ENM( dst->DstRegister.File, TGSI_FILES ); - if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) { + ENM( dst->Register.File, TGSI_FILES ); + if( deflt || fd->Register.WriteMask != dst->Register.WriteMask ) { TXT( "\nWriteMask: " ); - ENM( dst->DstRegister.WriteMask, TGSI_WRITEMASKS ); + ENM( dst->Register.WriteMask, TGSI_WRITEMASKS ); } if( ignored ) { - if( deflt || fd->DstRegister.Indirect != dst->DstRegister.Indirect ) { + if( deflt || fd->Register.Indirect != dst->Register.Indirect ) { TXT( "\nIndirect : " ); - UID( dst->DstRegister.Indirect ); + UID( dst->Register.Indirect ); } - if( deflt || fd->DstRegister.Dimension != dst->DstRegister.Dimension ) { + if( deflt || fd->Register.Dimension != dst->Register.Dimension ) { TXT( "\nDimension: " ); - UID( dst->DstRegister.Dimension ); + UID( dst->Register.Dimension ); } } - if( deflt || fd->DstRegister.Index != dst->DstRegister.Index ) { + if( deflt || fd->Register.Index != dst->Register.Index ) { TXT( "\nIndex : " ); - SID( dst->DstRegister.Index ); + SID( dst->Register.Index ); } if( ignored ) { TXT( "\nPadding : " ); - UIX( dst->DstRegister.Padding ); - if( deflt || fd->DstRegister.Extended != dst->DstRegister.Extended ) { + UIX( dst->Register.Padding ); + if( deflt || fd->Register.Extended != dst->Register.Extended ) { TXT( "\nExtended : " ); - UID( dst->DstRegister.Extended ); + UID( dst->Register.Extended ); } } - if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) { + if( deflt || tgsi_compare_dst_register_ext_modulate( dst->RegisterExtModulate, fd->RegisterExtModulate ) ) { EOL(); TXT( "\nType : " ); - ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS ); - if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) { + ENM( dst->RegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS ); + if( deflt || fd->RegisterExtModulate.Modulate != dst->RegisterExtModulate.Modulate ) { TXT( "\nModulate: " ); - ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES ); + ENM( dst->RegisterExtModulate.Modulate, TGSI_MODULATES ); } if( ignored ) { TXT( "\nPadding : " ); - UIX( dst->DstRegisterExtModulate.Padding ); - if( deflt || fd->DstRegisterExtModulate.Extended != dst->DstRegisterExtModulate.Extended ) { + UIX( dst->RegisterExtModulate.Padding ); + if( deflt || fd->RegisterExtModulate.Extended != dst->RegisterExtModulate.Extended ) { TXT( "\nExtended: " ); - UID( dst->DstRegisterExtModulate.Extended ); + UID( dst->RegisterExtModulate.Extended ); } } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 3f8d59e46a..a6bd1a784f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -107,10 +107,10 @@ #define TEMP_P0 TGSI_EXEC_TEMP_P0 #define IS_CHANNEL_ENABLED(INST, CHAN)\ - ((INST).Dst[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) #define IS_CHANNEL_ENABLED2(INST, CHAN)\ - ((INST).Dst[1].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[1].Register.WriteMask & (1 << (CHAN))) #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ @@ -188,7 +188,7 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) { uint i, chan; - uint writemask = inst->Dst[0].DstRegister.WriteMask; + uint writemask = inst->Dst[0].Register.WriteMask; if (writemask == TGSI_WRITEMASK_X || writemask == TGSI_WRITEMASK_Y || writemask == TGSI_WRITEMASK_Z || @@ -201,9 +201,9 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) /* loop over src regs */ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { if ((inst->Src[i].SrcRegister.File == - inst->Dst[0].DstRegister.File) && + inst->Dst[0].Register.File) && (inst->Src[i].SrcRegister.Index == - inst->Dst[0].DstRegister.Index)) { + inst->Dst[0].Register.Index)) { /* loop over dest channels */ uint channelsWritten = 0x0; FOR_EACH_ENABLED_CHANNEL(*inst, chan) { @@ -1424,11 +1424,11 @@ store_dest( * * file[ind[2].x+1], * where: - * ind = DstRegisterInd.File - * [2] = DstRegisterInd.Index - * .x = DstRegisterInd.SwizzleX + * ind = Indirect.File + * [2] = Indirect.Index + * .x = Indirect.SwizzleX */ - if (reg->DstRegister.Indirect) { + if (reg->Register.Indirect) { union tgsi_exec_channel index; union tgsi_exec_channel indir_index; uint swizzle; @@ -1437,15 +1437,15 @@ store_dest( index.i[0] = index.i[1] = index.i[2] = - index.i[3] = reg->DstRegisterInd.Index; + index.i[3] = reg->Indirect.Index; /* get current value of address register[swizzle] */ - swizzle = tgsi_util_get_src_register_swizzle( ®->DstRegisterInd, CHAN_X ); + swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); /* fetch values from the address/indirection register */ fetch_src_file_channel( mach, - reg->DstRegisterInd.File, + reg->Indirect.File, swizzle, &index, &indir_index ); @@ -1454,37 +1454,37 @@ store_dest( offset = (int) indir_index.f[0]; } - switch (reg->DstRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_NULL: dst = &null; break; case TGSI_FILE_OUTPUT: index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] - + reg->DstRegister.Index; + + reg->Register.Index; dst = &mach->Outputs[offset + index].xyzw[chan_index]; break; case TGSI_FILE_TEMPORARY: - index = reg->DstRegister.Index; + index = reg->Register.Index; assert( index < TGSI_EXEC_NUM_TEMPS ); dst = &mach->Temps[offset + index].xyzw[chan_index]; break; case TGSI_FILE_ADDRESS: - index = reg->DstRegister.Index; + index = reg->Register.Index; dst = &mach->Addrs[index].xyzw[chan_index]; break; case TGSI_FILE_LOOP: - assert(reg->DstRegister.Index == 0); + assert(reg->Register.Index == 0); assert(mach->LoopCounterStackTop > 0); assert(chan_index == CHAN_X); dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index]; break; case TGSI_FILE_PREDICATE: - index = reg->DstRegister.Index; + index = reg->Register.Index; assert(index < TGSI_EXEC_NUM_PREDS); dst = &mach->Predicates[index].xyzw[chan_index]; break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index 7946fdd732..e3a6bc0f54 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -168,21 +168,21 @@ tgsi_parse_token( for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { - next_token( ctx, &inst->Dst[i].DstRegister ); + next_token( ctx, &inst->Dst[i].Register ); /* * No support for indirect or multi-dimensional addressing. */ - assert( !inst->Dst[i].DstRegister.Dimension ); + assert( !inst->Dst[i].Register.Dimension ); - if( inst->Dst[i].DstRegister.Indirect ) { - next_token( ctx, &inst->Dst[i].DstRegisterInd ); + if( inst->Dst[i].Register.Indirect ) { + next_token( ctx, &inst->Dst[i].Indirect ); /* * No support for indirect or multi-dimensional addressing. */ - assert( !inst->Dst[i].DstRegisterInd.Dimension ); - assert( !inst->Dst[i].DstRegisterInd.Indirect ); + assert( !inst->Dst[i].Indirect.Dimension ); + assert( !inst->Dst[i].Indirect.Indirect ); } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index 1965c5181d..331a533dd9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -47,8 +47,8 @@ struct tgsi_full_header struct tgsi_full_dst_register { - struct tgsi_dst_register DstRegister; - struct tgsi_src_register DstRegisterInd; + struct tgsi_dst_register Register; + struct tgsi_src_register Indirect; }; struct tgsi_full_src_register diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index ec5f235143..adb16f6ac9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -60,7 +60,7 @@ const float ppc_builtin_constants[] ALIGN16_ATTRIB = { for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - ((INST).Dst[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) @@ -167,8 +167,8 @@ is_ppc_vec_temporary(const struct tgsi_full_src_register *reg) static boolean is_ppc_vec_temporary_dst(const struct tgsi_full_dst_register *reg) { - return (reg->DstRegister.File == TGSI_FILE_TEMPORARY && - reg->DstRegister.Index < MAX_PPC_TEMPS); + return (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Index < MAX_PPC_TEMPS); } @@ -485,7 +485,7 @@ get_dst_vec(struct gen_context *gen, const struct tgsi_full_dst_register *reg = &inst->Dst[0]; if (is_ppc_vec_temporary_dst(reg)) { - int vec = gen->temps_map[reg->DstRegister.Index][chan_index]; + int vec = gen->temps_map[reg->Register.Index][chan_index]; return vec; } else { @@ -507,10 +507,10 @@ emit_store(struct gen_context *gen, { const struct tgsi_full_dst_register *reg = &inst->Dst[0]; - switch (reg->DstRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_OUTPUT: { - int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; + int offset = (reg->Register.Index * 4 + chan_index) * 16; int offset_reg = emit_li_offset(gen, offset); ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg); } @@ -518,14 +518,14 @@ emit_store(struct gen_context *gen, case TGSI_FILE_TEMPORARY: if (is_ppc_vec_temporary_dst(reg)) { if (!free_vec) { - int dst_vec = gen->temps_map[reg->DstRegister.Index][chan_index]; + int dst_vec = gen->temps_map[reg->Register.Index][chan_index]; if (dst_vec != src_vec) ppc_vmove(gen->f, dst_vec, src_vec); } free_vec = FALSE; } else { - int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; + int offset = (reg->Register.Index * 4 + chan_index) * 16; int offset_reg = emit_li_offset(gen, offset); ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg); } @@ -535,7 +535,7 @@ emit_store(struct gen_context *gen, emit_addrs( func, xmm, - reg->DstRegister.Index, + reg->Register.Index, chan_index ); break; #endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 005894e604..7e50e25353 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -212,8 +212,8 @@ iter_instruction( for (i = 0; i < inst->Instruction.NumDstRegs; i++) { check_register_usage( ctx, - inst->Dst[i].DstRegister.File, - inst->Dst[i].DstRegister.Index, + inst->Dst[i].Register.File, + inst->Dst[i].Register.Index, "destination", FALSE ); } @@ -245,8 +245,8 @@ iter_instruction( switch (inst->Instruction.Opcode) { case TGSI_OPCODE_BGNFOR: case TGSI_OPCODE_ENDFOR: - if (inst->Dst[0].DstRegister.File != TGSI_FILE_LOOP || - inst->Dst[0].DstRegister.Index != 0) { + if (inst->Dst[0].Register.File != TGSI_FILE_LOOP || + inst->Dst[0].Register.Index != 0) { report_error(ctx, "Destination register must be LOOP[0]"); } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 6ca25c36ec..90832e71bb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -212,8 +212,8 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens) /* Do a whole bunch of checks for a simple move */ if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV || src->SrcRegister.File != TGSI_FILE_INPUT || - dst->DstRegister.File != TGSI_FILE_OUTPUT || - src->SrcRegister.Index != dst->DstRegister.Index || + dst->Register.File != TGSI_FILE_OUTPUT || + src->SrcRegister.Index != dst->Register.Index || src->SrcRegister.Negate || src->SrcRegister.Absolute || @@ -223,7 +223,7 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens) src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W || - dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW) + dst->Register.WriteMask != TGSI_WRITEMASK_XYZW) { tgsi_parse_free(&parse); return FALSE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index c23b0cc343..785076a520 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -58,7 +58,7 @@ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - ((INST).Dst[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) @@ -1371,12 +1371,12 @@ emit_store( } - switch( reg->DstRegister.File ) { + switch( reg->Register.File ) { case TGSI_FILE_OUTPUT: emit_output( func, xmm, - reg->DstRegister.Index, + reg->Register.Index, chan_index ); break; @@ -1384,7 +1384,7 @@ emit_store( emit_temps( func, xmm, - reg->DstRegister.Index, + reg->Register.Index, chan_index ); break; @@ -1392,7 +1392,7 @@ emit_store( emit_addrs( func, xmm, - reg->DstRegister.Index, + reg->Register.Index, chan_index ); break; @@ -1727,8 +1727,8 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst) } for (i = 0; i < inst->Instruction.NumDstRegs; i++) { const struct tgsi_full_dst_register *reg = &inst->Dst[i]; - if (reg->DstRegister.File == TGSI_FILE_TEMPORARY && - reg->DstRegister.Indirect) + if (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Indirect) return TRUE; } return FALSE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 295ded9664..27b90f5ab7 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -506,9 +506,9 @@ parse_dst_operand( if (!parse_opt_writemask( ctx, &writemask )) return FALSE; - dst->DstRegister.File = file; - dst->DstRegister.Index = index; - dst->DstRegister.WriteMask = writemask; + dst->Register.File = file; + dst->Register.Index = index; + dst->Register.WriteMask = writemask; return TRUE; } diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index 34a02b5042..e31a46ba46 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -213,7 +213,7 @@ create_frag_shader(struct vl_compositor *c) */ for (i = 0; i < 4; ++i) { inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i); - inst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index 93e79e7f37..4564a6c67f 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -240,7 +240,7 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r) inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -418,7 +418,7 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -623,7 +623,7 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } diff --git a/src/gallium/auxiliary/vl/vl_shader_build.c b/src/gallium/auxiliary/vl/vl_shader_build.c index 548dfca05a..9ebb4a9171 100644 --- a/src/gallium/auxiliary/vl/vl_shader_build.c +++ b/src/gallium/auxiliary/vl/vl_shader_build.c @@ -138,8 +138,8 @@ struct tgsi_full_instruction vl_inst2 inst.Instruction.Opcode = opcode; inst.Instruction.NumDstRegs = 1; - inst.Dst[0].DstRegister.File = dst_file; - inst.Dst[0].DstRegister.Index = dst_index; + inst.Dst[0].Register.File = dst_file; + inst.Dst[0].Register.Index = dst_index; inst.Instruction.NumSrcRegs = 1; inst.Src[0].SrcRegister.File = src_file; inst.Src[0].SrcRegister.Index = src_index; @@ -162,8 +162,8 @@ struct tgsi_full_instruction vl_inst3 inst.Instruction.Opcode = opcode; inst.Instruction.NumDstRegs = 1; - inst.Dst[0].DstRegister.File = dst_file; - inst.Dst[0].DstRegister.Index = dst_index; + inst.Dst[0].Register.File = dst_file; + inst.Dst[0].Register.Index = dst_index; inst.Instruction.NumSrcRegs = 2; inst.Src[0].SrcRegister.File = src1_file; inst.Src[0].SrcRegister.Index = src1_index; @@ -188,8 +188,8 @@ struct tgsi_full_instruction vl_tex inst.Instruction.Opcode = TGSI_OPCODE_TEX; inst.Instruction.NumDstRegs = 1; - inst.Dst[0].DstRegister.File = dst_file; - inst.Dst[0].DstRegister.Index = dst_index; + inst.Dst[0].Register.File = dst_file; + inst.Dst[0].Register.Index = dst_index; inst.Instruction.NumSrcRegs = 2; inst.Instruction.Texture = 1; inst.Texture.Texture = tex; @@ -218,8 +218,8 @@ struct tgsi_full_instruction vl_inst4 inst.Instruction.Opcode = opcode; inst.Instruction.NumDstRegs = 1; - inst.Dst[0].DstRegister.File = dst_file; - inst.Dst[0].DstRegister.Index = dst_index; + inst.Dst[0].Register.File = dst_file; + inst.Dst[0].Register.Index = dst_index; inst.Instruction.NumSrcRegs = 3; inst.Src[0].SrcRegister.File = src1_file; inst.Src[0].SrcRegister.Index = src1_index; diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index aeabe002d0..f639c62605 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -249,7 +249,7 @@ static boolean is_memory_dst(struct codegen *gen, int channel, const struct tgsi_full_dst_register *dst) { - if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { + if (dst->Register.File == TGSI_FILE_OUTPUT) { return TRUE; } else { @@ -374,12 +374,12 @@ get_dst_reg(struct codegen *gen, { int reg = -1; - switch (dest->DstRegister.File) { + switch (dest->Register.File) { case TGSI_FILE_TEMPORARY: if (gen->if_nesting > 0 || gen->loop_nesting > 0) reg = get_itemp(gen); else - reg = gen->temp_regs[dest->DstRegister.Index][channel]; + reg = gen->temp_regs[dest->Register.Index][channel]; break; case TGSI_FILE_OUTPUT: reg = get_itemp(gen); @@ -419,10 +419,10 @@ store_dest_reg(struct codegen *gen, } #endif - switch (dest->DstRegister.File) { + switch (dest->Register.File) { case TGSI_FILE_TEMPORARY: if (gen->if_nesting > 0 || gen->loop_nesting > 0) { - int d_reg = gen->temp_regs[dest->DstRegister.Index][channel]; + int d_reg = gen->temp_regs[dest->Register.Index][channel]; int exec_reg = get_exec_mask_reg(gen); /* Mix d with new value according to exec mask: * d[i] = mask_reg[i] ? value_reg : d_reg @@ -437,7 +437,7 @@ store_dest_reg(struct codegen *gen, case TGSI_FILE_OUTPUT: { /* offset is measured in quadwords, not bytes */ - int offset = dest->DstRegister.Index * 4 + channel; + int offset = dest->Register.Index * 4 + channel; if (gen->if_nesting > 0 || gen->loop_nesting > 0) { int exec_reg = get_exec_mask_reg(gen); int curval_reg = get_itemp(gen); @@ -544,7 +544,7 @@ emit_epilogue(struct codegen *gen) #define FOR_EACH_ENABLED_CHANNEL(inst, ch) \ for (ch = 0; ch < 4; ch++) \ - if (inst->Dst[0].DstRegister.WriteMask & (1 << ch)) + if (inst->Dst[0].Register.WriteMask & (1 << ch)) static boolean @@ -948,7 +948,7 @@ emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst) /* t = y0 * z1 - t */ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - if (inst->Dst[0].DstRegister.WriteMask & (1 << CHAN_X)) { + if (inst->Dst[0].Register.WriteMask & (1 << CHAN_X)) { store_dest_reg(gen, tmp_reg, CHAN_X, &inst->Dst[0]); } @@ -962,7 +962,7 @@ emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst) /* t = z0 * x1 - t */ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - if (inst->Dst[0].DstRegister.WriteMask & (1 << CHAN_Y)) { + if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Y)) { store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->Dst[0]); } @@ -976,7 +976,7 @@ emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst) /* t = x0 * y1 - t */ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - if (inst->Dst[0].DstRegister.WriteMask & (1 << CHAN_Z)) { + if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Z)) { store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->Dst[0]); } diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index ee5e3432d5..1b4792a316 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -108,10 +108,10 @@ for (CHAN = 0; CHAN < 4; CHAN++) #define IS_CHANNEL_ENABLED(INST, CHAN)\ - ((INST).Dst[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) #define IS_CHANNEL_ENABLED2(INST, CHAN)\ - ((INST).Dst[1].DstRegister.WriteMask & (1 << (CHAN))) + ((INST).Dst[1].Register.WriteMask & (1 << (CHAN))) #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ FOR_EACH_CHANNEL( CHAN )\ @@ -532,21 +532,21 @@ store_dest( { union spu_exec_channel *dst; - switch( reg->DstRegister.File ) { + switch( reg->Register.File ) { case TGSI_FILE_NULL: return; case TGSI_FILE_OUTPUT: dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] - + reg->DstRegister.Index].xyzw[chan_index]; + + reg->Register.Index].xyzw[chan_index]; break; case TGSI_FILE_TEMPORARY: - dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; + dst = &mach->Temps[reg->Register.Index].xyzw[chan_index]; break; case TGSI_FILE_ADDRESS: - dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; + dst = &mach->Addrs[reg->Register.Index].xyzw[chan_index]; break; default: diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 1a4a7bbe62..13c280827a 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -246,10 +246,10 @@ static uint get_result_vector(struct i915_fp_compile *p, const struct tgsi_full_dst_register *dest) { - switch (dest->DstRegister.File) { + switch (dest->Register.File) { case TGSI_FILE_OUTPUT: { - uint sem_name = p->shader->info.output_semantic_name[dest->DstRegister.Index]; + uint sem_name = p->shader->info.output_semantic_name[dest->Register.Index]; switch (sem_name) { case TGSI_SEMANTIC_POSITION: return UREG(REG_TYPE_OD, 0); @@ -261,7 +261,7 @@ get_result_vector(struct i915_fp_compile *p, } } case TGSI_FILE_TEMPORARY: - return UREG(REG_TYPE_R, dest->DstRegister.Index); + return UREG(REG_TYPE_R, dest->Register.Index); default: i915_program_error(p, "Bad inst->DstReg.File"); return 0; @@ -276,7 +276,7 @@ static uint get_result_flags(const struct tgsi_full_instruction *inst) { const uint writeMask - = inst->Dst[0].DstRegister.WriteMask; + = inst->Dst[0].Register.WriteMask; uint flags = 0x0; if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) @@ -738,7 +738,7 @@ i915_translate_instruction(struct i915_fp_compile *p, swizzle(tmp, X, Y, X, Y), swizzle(tmp, X, X, ONE, ONE), 0); - writemask = inst->Dst[0].DstRegister.WriteMask; + writemask = inst->Dst[0].Register.WriteMask; if (writemask & TGSI_WRITEMASK_Y) { uint tmp1; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index 893e665e69..99266f34ed 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -64,7 +64,7 @@ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - ((INST)->Dst[0].DstRegister.WriteMask & (1 << (CHAN))) + ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) @@ -287,13 +287,13 @@ emit_store( assert(0); } - switch( reg->DstRegister.File ) { + switch( reg->Register.File ) { case TGSI_FILE_OUTPUT: - bld->outputs[reg->DstRegister.Index][chan_index] = value; + bld->outputs[reg->Register.Index][chan_index] = value; break; case TGSI_FILE_TEMPORARY: - bld->temps[reg->DstRegister.Index][chan_index] = value; + bld->temps[reg->Register.Index][chan_index] = value; break; case TGSI_FILE_ADDRESS: @@ -430,8 +430,8 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst) } for (i = 0; i < inst->Instruction.NumDstRegs; i++) { const struct tgsi_full_dst_register *reg = &inst->Dst[i]; - if (reg->DstRegister.File == TGSI_FILE_TEMPORARY && - reg->DstRegister.Indirect) + if (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Indirect) return TRUE; } return FALSE; diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c index abffbe33a8..e3bb9f9d7f 100644 --- a/src/gallium/drivers/nv20/nv20_vertprog.c +++ b/src/gallium/drivers/nv20/nv20_vertprog.c @@ -286,14 +286,14 @@ static INLINE struct nv20_sreg tgsi_dst(struct nv20_vpc *vpc, const struct tgsi_full_dst_register *fdst) { struct nv20_sreg dst; - switch (fdst->DstRegister.File) { + switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: dst = nv20_sr(NV30SR_OUTPUT, - vpc->output_map[fdst->DstRegister.Index]); + vpc->output_map[fdst->Register.Index]); break; case TGSI_FILE_TEMPORARY: - dst = nv20_sr(NV30SR_TEMP, fdst->DstRegister.Index); + dst = nv20_sr(NV30SR_TEMP, fdst->Register.Index); if (vpc->high_temp < dst.index) vpc->high_temp = dst.index; break; @@ -379,7 +379,7 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc, } dst = tgsi_dst(vpc, &finst->Dst[0]); - mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask); + mask = tgsi_mask(finst->Dst[0].Register.WriteMask); switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 20f7d4152c..14dc884b3a 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -281,22 +281,22 @@ static INLINE struct nv30_sreg tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) { int idx; - switch (fdst->DstRegister.File) { + switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: - if (fdst->DstRegister.Index == fpc->colour_id) + if (fdst->Register.Index == fpc->colour_id) return nv30_sr(NV30SR_OUTPUT, 0); else return nv30_sr(NV30SR_OUTPUT, 1); break; case TGSI_FILE_TEMPORARY: - idx = fdst->DstRegister.Index + 1; + idx = fdst->Register.Index + 1; if (fpc->high_temp < idx) fpc->high_temp = idx; return nv30_sr(NV30SR_TEMP, idx); case TGSI_FILE_NULL: return nv30_sr(NV30SR_NONE, 0); default: - NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File); + NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); return nv30_sr(NV30SR_NONE, 0); } } @@ -424,7 +424,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, } dst = tgsi_dst(fpc, &finst->Dst[0]); - mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask); + mask = tgsi_mask(finst->Dst[0].Register.WriteMask); sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); switch (finst->Instruction.Opcode) { diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 99fde93245..41e4161dda 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -286,14 +286,14 @@ static INLINE struct nv30_sreg tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) { struct nv30_sreg dst; - switch (fdst->DstRegister.File) { + switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: dst = nv30_sr(NV30SR_OUTPUT, - vpc->output_map[fdst->DstRegister.Index]); + vpc->output_map[fdst->Register.Index]); break; case TGSI_FILE_TEMPORARY: - dst = nv30_sr(NV30SR_TEMP, fdst->DstRegister.Index); + dst = nv30_sr(NV30SR_TEMP, fdst->Register.Index); if (vpc->high_temp < dst.index) vpc->high_temp = dst.index; break; @@ -379,7 +379,7 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, } dst = tgsi_dst(vpc, &finst->Dst[0]); - mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask); + mask = tgsi_mask(finst->Dst[0].Register.WriteMask); switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 8e8cba1a0c..02c23e92c0 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -290,15 +290,15 @@ tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) static INLINE struct nv40_sreg tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) { - switch (fdst->DstRegister.File) { + switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: - return fpc->r_result[fdst->DstRegister.Index]; + return fpc->r_result[fdst->Register.Index]; case TGSI_FILE_TEMPORARY: - return fpc->r_temp[fdst->DstRegister.Index]; + return fpc->r_temp[fdst->Register.Index]; case TGSI_FILE_NULL: return nv40_sr(NV40SR_NONE, 0); default: - NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File); + NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); return nv40_sr(NV40SR_NONE, 0); } } @@ -434,7 +434,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, } dst = tgsi_dst(fpc, &finst->Dst[0]); - mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask); + mask = tgsi_mask(finst->Dst[0].Register.WriteMask); sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); switch (finst->Instruction.Opcode) { diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 913e050389..c4f51d622c 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -326,15 +326,15 @@ static INLINE struct nv40_sreg tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) { struct nv40_sreg dst; - switch (fdst->DstRegister.File) { + switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: - dst = vpc->r_result[fdst->DstRegister.Index]; + dst = vpc->r_result[fdst->Register.Index]; break; case TGSI_FILE_TEMPORARY: - dst = vpc->r_temp[fdst->DstRegister.Index]; + dst = vpc->r_temp[fdst->Register.Index]; break; case TGSI_FILE_ADDRESS: - dst = vpc->r_address[fdst->DstRegister.Index]; + dst = vpc->r_address[fdst->Register.Index]; break; default: NOUVEAU_ERR("bad dst file\n"); @@ -470,7 +470,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, } dst = tgsi_dst(vpc, &finst->Dst[0]); - mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask); + mask = tgsi_mask(finst->Dst[0].Register.WriteMask); switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: @@ -683,9 +683,9 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) finst = &p.FullToken.FullInstruction; fdst = &finst->Dst[0]; - if (fdst->DstRegister.File == TGSI_FILE_ADDRESS) { - if (fdst->DstRegister.Index > high_addr) - high_addr = fdst->DstRegister.Index; + if (fdst->Register.File == TGSI_FILE_ADDRESS) { + if (fdst->Register.Index > high_addr) + high_addr = fdst->Register.Index; } } diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 57747a1840..3409edb4c8 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1549,7 +1549,7 @@ negate_supported(const struct tgsi_full_instruction *insn, int i) static unsigned nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c) { - unsigned x, mask = insn->Dst[0].DstRegister.WriteMask; + unsigned x, mask = insn->Dst[0].Register.WriteMask; switch (insn->Instruction.Opcode) { case TGSI_OPCODE_COS: @@ -1612,17 +1612,17 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c) static struct nv50_reg * tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { - switch (dst->DstRegister.File) { + switch (dst->Register.File) { case TGSI_FILE_TEMPORARY: - return &pc->temp[dst->DstRegister.Index * 4 + c]; + return &pc->temp[dst->Register.Index * 4 + c]; case TGSI_FILE_OUTPUT: - return &pc->result[dst->DstRegister.Index * 4 + c]; + return &pc->result[dst->Register.Index * 4 + c]; case TGSI_FILE_ADDRESS: { - struct nv50_reg *r = pc->addr[dst->DstRegister.Index * 4 + c]; + struct nv50_reg *r = pc->addr[dst->Register.Index * 4 + c]; if (!r) { r = alloc_addr(pc, NULL); - pc->addr[dst->DstRegister.Index * 4 + c] = r; + pc->addr[dst->Register.Index * 4 + c] = r; } assert(r); return r; @@ -1850,7 +1850,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, unsigned mask, sat, unit; int i, c; - mask = inst->Dst[0].DstRegister.WriteMask; + mask = inst->Dst[0].Register.WriteMask; sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; memset(src, 0, sizeof(src)); @@ -2264,7 +2264,7 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) const struct tgsi_dst_register *dst; unsigned i, c, k, mask; - dst = &insn->Dst[0].DstRegister; + dst = &insn->Dst[0].Register; mask = dst->WriteMask; if (dst->File == TGSI_FILE_TEMPORARY) @@ -2359,13 +2359,13 @@ static struct nv50_reg * tgsi_broadcast_dst(struct nv50_pc *pc, const struct tgsi_full_dst_register *fd, unsigned mask) { - if (fd->DstRegister.File == TGSI_FILE_TEMPORARY) { - int c = ffs(~mask & fd->DstRegister.WriteMask); + if (fd->Register.File == TGSI_FILE_TEMPORARY) { + int c = ffs(~mask & fd->Register.WriteMask); if (c) return tgsi_dst(pc, c - 1, fd); } else { - int c = ffs(fd->DstRegister.WriteMask) - 1; - if ((1 << c) == fd->DstRegister.WriteMask) + int c = ffs(fd->Register.WriteMask) - 1; + if ((1 << c) == fd->Register.WriteMask) return tgsi_dst(pc, c, fd); } @@ -2391,8 +2391,8 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, boolean neg_supp = negate_supported(insn, i); fs = &insn->Src[i]; - if (fs->SrcRegister.File != fd->DstRegister.File || - fs->SrcRegister.Index != fd->DstRegister.Index) + if (fs->SrcRegister.File != fd->Register.File || + fs->SrcRegister.Index != fd->Register.Index) continue; for (chn = 0; chn < 4; ++chn) { @@ -2403,7 +2403,7 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, c = tgsi_util_get_full_src_register_swizzle(fs, chn); s = tgsi_util_get_full_src_register_sign_mode(fs, chn); - if (!(fd->DstRegister.WriteMask & (1 << c))) + if (!(fd->Register.WriteMask & (1 << c))) continue; /* no danger if src is copied to TEMP first */ @@ -2446,10 +2446,10 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) for (i = 0; i < 4; ++i) { assert(pc->r_dst[m[i]] == NULL); - insn.Dst[0].DstRegister.WriteMask = - fd->DstRegister.WriteMask & (1 << m[i]); + insn.Dst[0].Register.WriteMask = + fd->Register.WriteMask & (1 << m[i]); - if (!insn.Dst[0].DstRegister.WriteMask) + if (!insn.Dst[0].Register.WriteMask) continue; if (deqs & (1 << i)) diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 82466e245a..92796d150b 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -190,10 +190,10 @@ static void transform_dstreg( struct rc_dst_register * dst, struct tgsi_full_dst_register * src) { - dst->File = translate_register_file(src->DstRegister.File); - dst->Index = translate_register_index(ttr, src->DstRegister.File, src->DstRegister.Index); - dst->WriteMask = src->DstRegister.WriteMask; - dst->RelAddr = src->DstRegister.Indirect; + dst->File = translate_register_file(src->Register.File); + dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index); + dst->WriteMask = src->Register.WriteMask; + dst->RelAddr = src->Register.Indirect; } static void transform_srcreg( diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 39fd7a6025..9ca89f1cdd 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -99,21 +99,21 @@ translate_dst_register( struct svga_shader_emitter *emit, const struct tgsi_full_dst_register *reg = &insn->Dst[idx]; SVGA3dShaderDestToken dest; - switch (reg->DstRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_OUTPUT: /* Output registers encode semantic information in their name. * Need to lookup a table built at decl time: */ - dest = emit->output_map[reg->DstRegister.Index]; + dest = emit->output_map[reg->Register.Index]; break; default: - dest = dst_register( translate_file( reg->DstRegister.File ), - reg->DstRegister.Index ); + dest = dst_register( translate_file( reg->Register.File ), + reg->Register.Index ); break; } - dest.mask = reg->DstRegister.WriteMask; + dest.mask = reg->Register.WriteMask; if (insn->Instruction.Saturate) dest.dstMod = SVGA3DDSTMOD_SATURATE; @@ -1434,7 +1434,7 @@ static boolean emit_pow(struct svga_shader_emitter *emit, boolean need_tmp = FALSE; /* POW can only output to a temporary */ - if (insn->Dst[0].DstRegister.File != TGSI_FILE_TEMPORARY) + if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY) need_tmp = TRUE; /* POW src1 must not be the same register as dst */ -- cgit v1.2.3 From 91a4e6d53f83c45c1da9240b6325011d96b61386 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 24 Nov 2009 15:13:17 +0000 Subject: tgsi: rename fields of tgsi_full_src_register to reduce verbosity SrcRegister -> Register SrcRegisterInd -> Indirect SrcRegisterDim -> Dimension SrcRegisterDimInd -> DimIndirect --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 20 +-- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 158 +++++++++++------------ src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 22 ++-- src/gallium/auxiliary/draw/draw_vs_aos.c | 8 +- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 34 ++--- src/gallium/auxiliary/tgsi/tgsi_build.c | 80 ++++++------ src/gallium/auxiliary/tgsi/tgsi_dump.c | 42 +++--- src/gallium/auxiliary/tgsi/tgsi_dump_c.c | 68 +++++----- src/gallium/auxiliary/tgsi/tgsi_exec.c | 60 ++++----- src/gallium/auxiliary/tgsi/tgsi_parse.c | 24 ++-- src/gallium/auxiliary/tgsi/tgsi_parse.h | 8 +- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 20 +-- src/gallium/auxiliary/tgsi/tgsi_sanity.c | 16 +-- src/gallium/auxiliary/tgsi/tgsi_scan.c | 24 ++-- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 22 ++-- src/gallium/auxiliary/tgsi/tgsi_text.c | 32 ++--- src/gallium/auxiliary/tgsi/tgsi_util.c | 24 ++-- src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 26 ++-- src/gallium/auxiliary/vl/vl_shader_build.c | 32 ++--- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 14 +- src/gallium/drivers/cell/spu/spu_exec.c | 36 +++--- src/gallium/drivers/cell/spu/spu_util.c | 10 +- src/gallium/drivers/i915/i915_fpc_translate.c | 20 +-- src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 16 +-- src/gallium/drivers/nv20/nv20_vertprog.c | 38 +++--- src/gallium/drivers/nv30/nv30_fragprog.c | 44 +++---- src/gallium/drivers/nv30/nv30_vertprog.c | 38 +++--- src/gallium/drivers/nv40/nv40_fragprog.c | 46 +++---- src/gallium/drivers/nv40/nv40_vertprog.c | 40 +++--- src/gallium/drivers/nv50/nv50_program.c | 46 +++---- src/gallium/drivers/r300/r300_tgsi_to_rc.c | 14 +- src/gallium/drivers/svga/svga_tgsi_insn.c | 68 +++++----- 32 files changed, 575 insertions(+), 575 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index fe200983ca..3bb9616122 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -270,10 +270,10 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.Instruction.NumSrcRegs = 2; newInst.Instruction.Texture = TRUE; newInst.Texture.Texture = TGSI_TEXTURE_2D; - newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.Src[0].SrcRegister.Index = aactx->maxInput + 1; - newInst.Src[1].SrcRegister.File = TGSI_FILE_SAMPLER; - newInst.Src[1].SrcRegister.Index = aactx->freeSampler; + newInst.Src[0].Register.File = TGSI_FILE_INPUT; + newInst.Src[0].Register.Index = aactx->maxInput + 1; + newInst.Src[1].Register.File = TGSI_FILE_SAMPLER; + newInst.Src[1].Register.Index = aactx->freeSampler; ctx->emit_instruction(ctx, &newInst); @@ -285,8 +285,8 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.Dst[0].Register.Index = aactx->colorOutput; newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ; newInst.Instruction.NumSrcRegs = 1; - newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.Src[0].SrcRegister.Index = aactx->colorTemp; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = aactx->colorTemp; ctx->emit_instruction(ctx, &newInst); /* MUL alpha */ @@ -297,10 +297,10 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.Dst[0].Register.Index = aactx->colorOutput; newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W; newInst.Instruction.NumSrcRegs = 2; - newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.Src[0].SrcRegister.Index = aactx->colorTemp; - newInst.Src[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.Src[1].SrcRegister.Index = aactx->texTemp; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = aactx->colorTemp; + newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[1].Register.Index = aactx->texTemp; ctx->emit_instruction(ctx, &newInst); /* END */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 39e1406e96..75130a8fb0 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -238,10 +238,10 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.Dst[0].Register.Index = tmp0; newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XY; newInst.Instruction.NumSrcRegs = 2; - newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.Src[0].SrcRegister.Index = texInput; - newInst.Src[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.Src[1].SrcRegister.Index = texInput; + newInst.Src[0].Register.File = TGSI_FILE_INPUT; + newInst.Src[0].Register.Index = texInput; + newInst.Src[1].Register.File = TGSI_FILE_INPUT; + newInst.Src[1].Register.Index = texInput; ctx->emit_instruction(ctx, &newInst); /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */ @@ -252,12 +252,12 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.Dst[0].Register.Index = tmp0; newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X; newInst.Instruction.NumSrcRegs = 2; - newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.Src[0].SrcRegister.Index = tmp0; - newInst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - newInst.Src[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.Src[1].SrcRegister.Index = tmp0; - newInst.Src[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; + newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; + newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[1].Register.Index = tmp0; + newInst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_Y; ctx->emit_instruction(ctx, &newInst); #if NORMALIZE /* OPTIONAL normalization of length */ @@ -269,8 +269,8 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.Dst[0].Register.Index = tmp0; newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X; newInst.Instruction.NumSrcRegs = 1; - newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.Src[0].SrcRegister.Index = tmp0; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; ctx->emit_instruction(ctx, &newInst); /* RCP t0.x, t0.x; */ @@ -281,8 +281,8 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.Dst[0].Register.Index = tmp0; newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X; newInst.Instruction.NumSrcRegs = 1; - newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.Src[0].SrcRegister.Index = tmp0; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; ctx->emit_instruction(ctx, &newInst); #endif @@ -294,12 +294,12 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.Dst[0].Register.Index = tmp0; newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y; newInst.Instruction.NumSrcRegs = 2; - newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.Src[0].SrcRegister.Index = tmp0; - newInst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - newInst.Src[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.Src[1].SrcRegister.Index = texInput; - newInst.Src[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; + newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; + newInst.Src[1].Register.File = TGSI_FILE_INPUT; + newInst.Src[1].Register.Index = texInput; + newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_W; ctx->emit_instruction(ctx, &newInst); /* KIL -tmp0.yyyy; # if -tmp0.y < 0, KILL */ @@ -307,13 +307,13 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.Instruction.Opcode = TGSI_OPCODE_KIL; newInst.Instruction.NumDstRegs = 0; newInst.Instruction.NumSrcRegs = 1; - newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.Src[0].SrcRegister.Index = tmp0; - newInst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - newInst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - newInst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - newInst.Src[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - newInst.Src[0].SrcRegister.Negate = 1; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; + newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.Negate = 1; ctx->emit_instruction(ctx, &newInst); @@ -327,12 +327,12 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.Dst[0].Register.Index = tmp0; newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z; newInst.Instruction.NumSrcRegs = 2; - newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.Src[0].SrcRegister.Index = texInput; - newInst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - newInst.Src[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.Src[1].SrcRegister.Index = texInput; - newInst.Src[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; + newInst.Src[0].Register.File = TGSI_FILE_INPUT; + newInst.Src[0].Register.Index = texInput; + newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_W; + newInst.Src[1].Register.File = TGSI_FILE_INPUT; + newInst.Src[1].Register.Index = texInput; + newInst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z; ctx->emit_instruction(ctx, &newInst); /* RCP t0.z, t0.z; # t0.z = 1 / m */ @@ -343,9 +343,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.Dst[0].Register.Index = tmp0; newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z; newInst.Instruction.NumSrcRegs = 1; - newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.Src[0].SrcRegister.Index = tmp0; - newInst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; + newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Z; ctx->emit_instruction(ctx, &newInst); /* SUB t0.y, 1, t0.x; # d = 1 - d */ @@ -356,12 +356,12 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.Dst[0].Register.Index = tmp0; newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y; newInst.Instruction.NumSrcRegs = 2; - newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.Src[0].SrcRegister.Index = texInput; - newInst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - newInst.Src[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.Src[1].SrcRegister.Index = tmp0; - newInst.Src[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + newInst.Src[0].Register.File = TGSI_FILE_INPUT; + newInst.Src[0].Register.Index = texInput; + newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_W; + newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[1].Register.Index = tmp0; + newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_X; ctx->emit_instruction(ctx, &newInst); /* MUL t0.w, t0.y, t0.z; # coverage = d * m */ @@ -372,12 +372,12 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.Dst[0].Register.Index = tmp0; newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W; newInst.Instruction.NumSrcRegs = 2; - newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.Src[0].SrcRegister.Index = tmp0; - newInst.Src[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - newInst.Src[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.Src[1].SrcRegister.Index = tmp0; - newInst.Src[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; + newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y; + newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[1].Register.Index = tmp0; + newInst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_Z; ctx->emit_instruction(ctx, &newInst); /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */ @@ -388,12 +388,12 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.Dst[0].Register.Index = tmp0; newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y; newInst.Instruction.NumSrcRegs = 2; - newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.Src[0].SrcRegister.Index = tmp0; - newInst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - newInst.Src[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.Src[1].SrcRegister.Index = texInput; - newInst.Src[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; + newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; + newInst.Src[1].Register.File = TGSI_FILE_INPUT; + newInst.Src[1].Register.Index = texInput; + newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Z; ctx->emit_instruction(ctx, &newInst); /* CMP t0.w, -t0.y, tex.w, t0.w; @@ -409,25 +409,25 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.Dst[0].Register.Index = tmp0; newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W; newInst.Instruction.NumSrcRegs = 3; - newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.Src[0].SrcRegister.Index = tmp0; - newInst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - newInst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - newInst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - newInst.Src[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - newInst.Src[0].SrcRegister.Negate = 1; - newInst.Src[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.Src[1].SrcRegister.Index = texInput; - newInst.Src[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; - newInst.Src[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - newInst.Src[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - newInst.Src[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; - newInst.Src[2].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.Src[2].SrcRegister.Index = tmp0; - newInst.Src[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; - newInst.Src[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - newInst.Src[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - newInst.Src[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = tmp0; + newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y; + newInst.Src[0].Register.Negate = 1; + newInst.Src[1].Register.File = TGSI_FILE_INPUT; + newInst.Src[1].Register.Index = texInput; + newInst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_W; + newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_W; + newInst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_W; + newInst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W; + newInst.Src[2].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[2].Register.Index = tmp0; + newInst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_W; + newInst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_W; + newInst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_W; + newInst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W; ctx->emit_instruction(ctx, &newInst); } @@ -443,8 +443,8 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.Dst[0].Register.Index = aactx->colorOutput; newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ; newInst.Instruction.NumSrcRegs = 1; - newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.Src[0].SrcRegister.Index = aactx->colorTemp; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = aactx->colorTemp; ctx->emit_instruction(ctx, &newInst); /* MUL result.color.w, colorTemp, tmp0.w; */ @@ -455,10 +455,10 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.Dst[0].Register.Index = aactx->colorOutput; newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W; newInst.Instruction.NumSrcRegs = 2; - newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.Src[0].SrcRegister.Index = aactx->colorTemp; - newInst.Src[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.Src[1].SrcRegister.Index = aactx->tmp0; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = aactx->colorTemp; + newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[1].Register.Index = aactx->tmp0; ctx->emit_instruction(ctx, &newInst); } else { diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 99165b1006..45317227a8 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -283,10 +283,10 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; newInst.Dst[0].Register.Index = pctx->texTemp; newInst.Instruction.NumSrcRegs = 2; - newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.Src[0].SrcRegister.Index = wincoordInput; - newInst.Src[1].SrcRegister.File = TGSI_FILE_IMMEDIATE; - newInst.Src[1].SrcRegister.Index = pctx->numImmed; + newInst.Src[0].Register.File = TGSI_FILE_INPUT; + newInst.Src[0].Register.Index = wincoordInput; + newInst.Src[1].Register.File = TGSI_FILE_IMMEDIATE; + newInst.Src[1].Register.Index = pctx->numImmed; ctx->emit_instruction(ctx, &newInst); /* TEX texTemp, texTemp, sampler; */ @@ -298,10 +298,10 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, newInst.Instruction.NumSrcRegs = 2; newInst.Instruction.Texture = TRUE; newInst.Texture.Texture = TGSI_TEXTURE_2D; - newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.Src[0].SrcRegister.Index = pctx->texTemp; - newInst.Src[1].SrcRegister.File = TGSI_FILE_SAMPLER; - newInst.Src[1].SrcRegister.Index = pctx->freeSampler; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = pctx->texTemp; + newInst.Src[1].Register.File = TGSI_FILE_SAMPLER; + newInst.Src[1].Register.Index = pctx->freeSampler; ctx->emit_instruction(ctx, &newInst); /* KIL -texTemp; # if -texTemp < 0, KILL fragment */ @@ -309,9 +309,9 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, newInst.Instruction.Opcode = TGSI_OPCODE_KIL; newInst.Instruction.NumDstRegs = 0; newInst.Instruction.NumSrcRegs = 1; - newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.Src[0].SrcRegister.Index = pctx->texTemp; - newInst.Src[0].SrcRegister.Negate = 1; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = pctx->texTemp; + newInst.Src[0].Register.Negate = 1; ctx->emit_instruction(ctx, &newInst); } diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 8c93642954..1aaae4ab7a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -529,8 +529,8 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, const struct tgsi_full_src_register *src ) { struct x86_reg arg0 = aos_get_shader_reg(cp, - src->SrcRegister.File, - src->SrcRegister.Index); + src->Register.File, + src->Register.Index); unsigned i; ubyte swz = 0; unsigned negs = 0; @@ -620,8 +620,8 @@ static void x87_fld_src( struct aos_compilation *cp, unsigned channel ) { struct x86_reg arg0 = aos_get_shader_reg_ptr(cp, - src->SrcRegister.File, - src->SrcRegister.Index); + src->Register.File, + src->Register.Index); unsigned swizzle = tgsi_util_get_full_src_register_swizzle( src, channel ); unsigned neg = tgsi_util_get_full_src_register_sign_mode( src, channel ); diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 135d307ce1..5cafe8c3f0 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -238,22 +238,22 @@ translate_instruction(llvm::Module *module, llvm::Value *val = 0; llvm::Value *indIdx = 0; - if (src->SrcRegister.Indirect) { - indIdx = storage->addrElement(src->SrcRegisterInd.Index); + if (src->Register.Indirect) { + indIdx = storage->addrElement(src->Indirect.Index); indIdx = storage->extractIndex(indIdx); } - if (src->SrcRegister.File == TGSI_FILE_CONSTANT) { - val = storage->constElement(src->SrcRegister.Index, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_INPUT) { - val = storage->inputElement(src->SrcRegister.Index, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) { - val = storage->tempElement(src->SrcRegister.Index); - } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) { - val = storage->outputElement(src->SrcRegister.Index, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { - val = storage->immediateElement(src->SrcRegister.Index); + if (src->Register.File == TGSI_FILE_CONSTANT) { + val = storage->constElement(src->Register.Index, indIdx); + } else if (src->Register.File == TGSI_FILE_INPUT) { + val = storage->inputElement(src->Register.Index, indIdx); + } else if (src->Register.File == TGSI_FILE_TEMPORARY) { + val = storage->tempElement(src->Register.Index); + } else if (src->Register.File == TGSI_FILE_OUTPUT) { + val = storage->outputElement(src->Register.Index, indIdx); + } else if (src->Register.File == TGSI_FILE_IMMEDIATE) { + val = storage->immediateElement(src->Register.Index); } else { - fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File); + fprintf(stderr, "ERROR: not supported llvm source %d\n", src->Register.File); return; } @@ -688,11 +688,11 @@ translate_instructionir(llvm::Module *module, llvm::Value *indIdx = 0; int swizzle = swizzleInt(src); - if (src->SrcRegister.Indirect) { - indIdx = storage->addrElement(src->SrcRegisterInd.Index); + if (src->Register.Indirect) { + indIdx = storage->addrElement(src->Indirect.Index); } - val = storage->load((enum tgsi_file_type)src->SrcRegister.File, - src->SrcRegister.Index, swizzle, instr->getIRBuilder(), indIdx); + val = storage->load((enum tgsi_file_type)src->Register.File, + src->Register.Index, swizzle, instr->getIRBuilder(), indIdx); inputs[i] = val; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 91fb4f68e5..c35634c69a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -623,21 +623,21 @@ tgsi_build_full_instruction( size++; *src_register = tgsi_build_src_register( - reg->SrcRegister.File, - reg->SrcRegister.SwizzleX, - reg->SrcRegister.SwizzleY, - reg->SrcRegister.SwizzleZ, - reg->SrcRegister.SwizzleW, - reg->SrcRegister.Negate, - reg->SrcRegister.Absolute, - reg->SrcRegister.Indirect, - reg->SrcRegister.Dimension, - reg->SrcRegister.Index, + reg->Register.File, + reg->Register.SwizzleX, + reg->Register.SwizzleY, + reg->Register.SwizzleZ, + reg->Register.SwizzleW, + reg->Register.Negate, + reg->Register.Absolute, + reg->Register.Indirect, + reg->Register.Dimension, + reg->Register.Index, instruction, header ); prev_token = (struct tgsi_token *) src_register; - if( reg->SrcRegister.Indirect ) { + if( reg->Register.Indirect ) { struct tgsi_src_register *ind; if( maxsize <= size ) @@ -646,24 +646,24 @@ tgsi_build_full_instruction( size++; *ind = tgsi_build_src_register( - reg->SrcRegisterInd.File, - reg->SrcRegisterInd.SwizzleX, - reg->SrcRegisterInd.SwizzleY, - reg->SrcRegisterInd.SwizzleZ, - reg->SrcRegisterInd.SwizzleW, - reg->SrcRegisterInd.Negate, - reg->SrcRegisterInd.Absolute, - reg->SrcRegisterInd.Indirect, - reg->SrcRegisterInd.Dimension, - reg->SrcRegisterInd.Index, + reg->Indirect.File, + reg->Indirect.SwizzleX, + reg->Indirect.SwizzleY, + reg->Indirect.SwizzleZ, + reg->Indirect.SwizzleW, + reg->Indirect.Negate, + reg->Indirect.Absolute, + reg->Indirect.Indirect, + reg->Indirect.Dimension, + reg->Indirect.Index, instruction, header ); } - if( reg->SrcRegister.Dimension ) { + if( reg->Register.Dimension ) { struct tgsi_dimension *dim; - assert( !reg->SrcRegisterDim.Dimension ); + assert( !reg->Dimension.Dimension ); if( maxsize <= size ) return 0; @@ -671,12 +671,12 @@ tgsi_build_full_instruction( size++; *dim = tgsi_build_dimension( - reg->SrcRegisterDim.Indirect, - reg->SrcRegisterDim.Index, + reg->Dimension.Indirect, + reg->Dimension.Index, instruction, header ); - if( reg->SrcRegisterDim.Indirect ) { + if( reg->Dimension.Indirect ) { struct tgsi_src_register *ind; if( maxsize <= size ) @@ -685,16 +685,16 @@ tgsi_build_full_instruction( size++; *ind = tgsi_build_src_register( - reg->SrcRegisterDimInd.File, - reg->SrcRegisterDimInd.SwizzleX, - reg->SrcRegisterDimInd.SwizzleY, - reg->SrcRegisterDimInd.SwizzleZ, - reg->SrcRegisterDimInd.SwizzleW, - reg->SrcRegisterDimInd.Negate, - reg->SrcRegisterDimInd.Absolute, - reg->SrcRegisterDimInd.Indirect, - reg->SrcRegisterDimInd.Dimension, - reg->SrcRegisterDimInd.Index, + reg->DimIndirect.File, + reg->DimIndirect.SwizzleX, + reg->DimIndirect.SwizzleY, + reg->DimIndirect.SwizzleZ, + reg->DimIndirect.SwizzleW, + reg->DimIndirect.Negate, + reg->DimIndirect.Absolute, + reg->DimIndirect.Indirect, + reg->DimIndirect.Dimension, + reg->DimIndirect.Index, instruction, header ); } @@ -894,10 +894,10 @@ tgsi_default_full_src_register( void ) { struct tgsi_full_src_register full_src_register; - full_src_register.SrcRegister = tgsi_default_src_register(); - full_src_register.SrcRegisterInd = tgsi_default_src_register(); - full_src_register.SrcRegisterDim = tgsi_default_dimension(); - full_src_register.SrcRegisterDimInd = tgsi_default_src_register(); + full_src_register.Register = tgsi_default_src_register(); + full_src_register.Indirect = tgsi_default_src_register(); + full_src_register.Dimension = tgsi_default_dimension(); + full_src_register.DimIndirect = tgsi_default_src_register(); return full_src_register; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 6141865f03..da126f3b01 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -386,42 +386,42 @@ iter_instruction( CHR( ',' ); CHR( ' ' ); - if (src->SrcRegister.Negate) + if (src->Register.Negate) TXT( "-(" ); - if (src->SrcRegister.Absolute) + if (src->Register.Absolute) CHR( '|' ); - if (src->SrcRegister.Indirect) { + if (src->Register.Indirect) { _dump_register_ind( ctx, - src->SrcRegister.File, - src->SrcRegister.Index, - src->SrcRegisterInd.File, - src->SrcRegisterInd.Index, - src->SrcRegisterInd.SwizzleX ); + src->Register.File, + src->Register.Index, + src->Indirect.File, + src->Indirect.Index, + src->Indirect.SwizzleX ); } else { _dump_register( ctx, - src->SrcRegister.File, - src->SrcRegister.Index, - src->SrcRegister.Index ); + src->Register.File, + src->Register.Index, + src->Register.Index ); } - if (src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || - src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || - src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || - src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W) { + if (src->Register.SwizzleX != TGSI_SWIZZLE_X || + src->Register.SwizzleY != TGSI_SWIZZLE_Y || + src->Register.SwizzleZ != TGSI_SWIZZLE_Z || + src->Register.SwizzleW != TGSI_SWIZZLE_W) { CHR( '.' ); - ENM( src->SrcRegister.SwizzleX, swizzle_names ); - ENM( src->SrcRegister.SwizzleY, swizzle_names ); - ENM( src->SrcRegister.SwizzleZ, swizzle_names ); - ENM( src->SrcRegister.SwizzleW, swizzle_names ); + ENM( src->Register.SwizzleX, swizzle_names ); + ENM( src->Register.SwizzleY, swizzle_names ); + ENM( src->Register.SwizzleZ, swizzle_names ); + ENM( src->Register.SwizzleW, swizzle_names ); } - if (src->SrcRegister.Absolute) + if (src->Register.Absolute) CHR( '|' ); - if (src->SrcRegister.Negate) + if (src->Register.Negate) CHR( ')' ); first_reg = FALSE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c index 5fae5a225f..77f671e9eb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -392,78 +392,78 @@ dump_instruction_verbose( EOL(); TXT( "\nFile : "); - ENM( src->SrcRegister.File, TGSI_FILES ); - if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) { + ENM( src->Register.File, TGSI_FILES ); + if( deflt || fs->Register.SwizzleX != src->Register.SwizzleX ) { TXT( "\nSwizzleX : " ); - ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES ); + ENM( src->Register.SwizzleX, TGSI_SWIZZLES ); } - if( deflt || fs->SrcRegister.SwizzleY != src->SrcRegister.SwizzleY ) { + if( deflt || fs->Register.SwizzleY != src->Register.SwizzleY ) { TXT( "\nSwizzleY : " ); - ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES ); + ENM( src->Register.SwizzleY, TGSI_SWIZZLES ); } - if( deflt || fs->SrcRegister.SwizzleZ != src->SrcRegister.SwizzleZ ) { + if( deflt || fs->Register.SwizzleZ != src->Register.SwizzleZ ) { TXT( "\nSwizzleZ : " ); - ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES ); + ENM( src->Register.SwizzleZ, TGSI_SWIZZLES ); } - if( deflt || fs->SrcRegister.SwizzleW != src->SrcRegister.SwizzleW ) { + if( deflt || fs->Register.SwizzleW != src->Register.SwizzleW ) { TXT( "\nSwizzleW : " ); - ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES ); + ENM( src->Register.SwizzleW, TGSI_SWIZZLES ); } - if( deflt || fs->SrcRegister.Negate != src->SrcRegister.Negate ) { + if( deflt || fs->Register.Negate != src->Register.Negate ) { TXT( "\nNegate : " ); - UID( src->SrcRegister.Negate ); + UID( src->Register.Negate ); } if( ignored ) { - if( deflt || fs->SrcRegister.Indirect != src->SrcRegister.Indirect ) { + if( deflt || fs->Register.Indirect != src->Register.Indirect ) { TXT( "\nIndirect : " ); - UID( src->SrcRegister.Indirect ); + UID( src->Register.Indirect ); } - if( deflt || fs->SrcRegister.Dimension != src->SrcRegister.Dimension ) { + if( deflt || fs->Register.Dimension != src->Register.Dimension ) { TXT( "\nDimension: " ); - UID( src->SrcRegister.Dimension ); + UID( src->Register.Dimension ); } } - if( deflt || fs->SrcRegister.Index != src->SrcRegister.Index ) { + if( deflt || fs->Register.Index != src->Register.Index ) { TXT( "\nIndex : " ); - SID( src->SrcRegister.Index ); + SID( src->Register.Index ); } if( ignored ) { - if( deflt || fs->SrcRegister.Extended != src->SrcRegister.Extended ) { + if( deflt || fs->Register.Extended != src->Register.Extended ) { TXT( "\nExtended : " ); - UID( src->SrcRegister.Extended ); + UID( src->Register.Extended ); } } - if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) { + if( deflt || tgsi_compare_src_register_ext_mod( src->RegisterExtMod, fs->RegisterExtMod ) ) { EOL(); TXT( "\nType : " ); - ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS ); - if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) { + ENM( src->RegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS ); + if( deflt || fs->RegisterExtMod.Complement != src->RegisterExtMod.Complement ) { TXT( "\nComplement: " ); - UID( src->SrcRegisterExtMod.Complement ); + UID( src->RegisterExtMod.Complement ); } - if( deflt || fs->SrcRegisterExtMod.Bias != src->SrcRegisterExtMod.Bias ) { + if( deflt || fs->RegisterExtMod.Bias != src->RegisterExtMod.Bias ) { TXT( "\nBias : " ); - UID( src->SrcRegisterExtMod.Bias ); + UID( src->RegisterExtMod.Bias ); } - if( deflt || fs->SrcRegisterExtMod.Scale2X != src->SrcRegisterExtMod.Scale2X ) { + if( deflt || fs->RegisterExtMod.Scale2X != src->RegisterExtMod.Scale2X ) { TXT( "\nScale2X : " ); - UID( src->SrcRegisterExtMod.Scale2X ); + UID( src->RegisterExtMod.Scale2X ); } - if( deflt || fs->SrcRegisterExtMod.Absolute != src->SrcRegisterExtMod.Absolute ) { + if( deflt || fs->RegisterExtMod.Absolute != src->RegisterExtMod.Absolute ) { TXT( "\nAbsolute : " ); - UID( src->SrcRegisterExtMod.Absolute ); + UID( src->RegisterExtMod.Absolute ); } - if( deflt || fs->SrcRegisterExtMod.Negate != src->SrcRegisterExtMod.Negate ) { + if( deflt || fs->RegisterExtMod.Negate != src->RegisterExtMod.Negate ) { TXT( "\nNegate : " ); - UID( src->SrcRegisterExtMod.Negate ); + UID( src->RegisterExtMod.Negate ); } if( ignored ) { TXT( "\nPadding : " ); - UIX( src->SrcRegisterExtMod.Padding ); - if( deflt || fs->SrcRegisterExtMod.Extended != src->SrcRegisterExtMod.Extended ) { + UIX( src->RegisterExtMod.Padding ); + if( deflt || fs->RegisterExtMod.Extended != src->RegisterExtMod.Extended ) { TXT( "\nExtended : " ); - UID( src->SrcRegisterExtMod.Extended ); + UID( src->RegisterExtMod.Extended ); } } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index a6bd1a784f..6cd23b37be 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -200,9 +200,9 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) /* loop over src regs */ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - if ((inst->Src[i].SrcRegister.File == + if ((inst->Src[i].Register.File == inst->Dst[0].Register.File) && - (inst->Src[i].SrcRegister.Index == + (inst->Src[i].Register.Index == inst->Dst[0].Register.Index)) { /* loop over dest channels */ uint channelsWritten = 0x0; @@ -1233,13 +1233,13 @@ fetch_source( * * file[1], * where: - * file = SrcRegister.File - * [1] = SrcRegister.Index + * file = Register.File + * [1] = Register.Index */ index.i[0] = index.i[1] = index.i[2] = - index.i[3] = reg->SrcRegister.Index; + index.i[3] = reg->Register.Index; /* There is an extra source register that indirectly subscripts * a register file. The direct index now becomes an offset @@ -1247,11 +1247,11 @@ fetch_source( * * file[ind[2].x+1], * where: - * ind = SrcRegisterInd.File - * [2] = SrcRegisterInd.Index - * .x = SrcRegisterInd.SwizzleX + * ind = Indirect.File + * [2] = Indirect.Index + * .x = Indirect.SwizzleX */ - if (reg->SrcRegister.Indirect) { + if (reg->Register.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; const uint execmask = mach->ExecMask; @@ -1261,13 +1261,13 @@ fetch_source( index2.i[0] = index2.i[1] = index2.i[2] = - index2.i[3] = reg->SrcRegisterInd.Index; + index2.i[3] = reg->Indirect.Index; /* get current value of address register[swizzle] */ - swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); + swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); fetch_src_file_channel( mach, - reg->SrcRegisterInd.File, + reg->Indirect.File, swizzle, &index2, &indir_index ); @@ -1293,14 +1293,14 @@ fetch_source( * * file[1][3] == file[1*sizeof(file[1])+3], * where: - * [3] = SrcRegisterDim.Index + * [3] = Dimension.Index */ - if (reg->SrcRegister.Dimension) { + if (reg->Register.Dimension) { /* The size of the first-order array depends on the register file type. * We need to multiply the index to the first array to get an effective, * "flat" index that points to the beginning of the second-order array. */ - switch (reg->SrcRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_INPUT: index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; @@ -1317,10 +1317,10 @@ fetch_source( assert( 0 ); } - index.i[0] += reg->SrcRegisterDim.Index; - index.i[1] += reg->SrcRegisterDim.Index; - index.i[2] += reg->SrcRegisterDim.Index; - index.i[3] += reg->SrcRegisterDim.Index; + index.i[0] += reg->Dimension.Index; + index.i[1] += reg->Dimension.Index; + index.i[2] += reg->Dimension.Index; + index.i[3] += reg->Dimension.Index; /* Again, the second subscript index can be addressed indirectly * identically to the first one. @@ -1329,11 +1329,11 @@ fetch_source( * * file[1][ind[4].y+3], * where: - * ind = SrcRegisterDimInd.File - * [4] = SrcRegisterDimInd.Index - * .y = SrcRegisterDimInd.SwizzleX + * ind = DimIndirect.File + * [4] = DimIndirect.Index + * .y = DimIndirect.SwizzleX */ - if (reg->SrcRegisterDim.Indirect) { + if (reg->Dimension.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; const uint execmask = mach->ExecMask; @@ -1342,12 +1342,12 @@ fetch_source( index2.i[0] = index2.i[1] = index2.i[2] = - index2.i[3] = reg->SrcRegisterDimInd.Index; + index2.i[3] = reg->DimIndirect.Index; - swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); + swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); fetch_src_file_channel( mach, - reg->SrcRegisterDimInd.File, + reg->DimIndirect.File, swizzle, &index2, &indir_index ); @@ -1367,7 +1367,7 @@ fetch_source( } /* If by any chance there was a need for a 3D array of register - * files, we would have to check whether SrcRegisterDim is followed + * files, we would have to check whether Dimension is followed * by a dimension register and continue the saga. */ } @@ -1375,7 +1375,7 @@ fetch_source( swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); fetch_src_file_channel( mach, - reg->SrcRegister.File, + reg->Register.File, swizzle, &index, chan ); @@ -1668,7 +1668,7 @@ exec_tex(struct tgsi_exec_machine *mach, boolean biasLod, boolean projected) { - const uint unit = inst->Src[1].SrcRegister.Index; + const uint unit = inst->Src[1].Register.Index; union tgsi_exec_channel r[4]; uint chan_index; float lodBias; @@ -1765,7 +1765,7 @@ static void exec_txd(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { - const uint unit = inst->Src[3].SrcRegister.Index; + const uint unit = inst->Src[3].Register.Index; union tgsi_exec_channel r[4]; uint chan_index; diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index e3a6bc0f54..4b252915c9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -190,34 +190,34 @@ tgsi_parse_token( for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { - next_token( ctx, &inst->Src[i].SrcRegister ); + next_token( ctx, &inst->Src[i].Register ); - if( inst->Src[i].SrcRegister.Indirect ) { - next_token( ctx, &inst->Src[i].SrcRegisterInd ); + if( inst->Src[i].Register.Indirect ) { + next_token( ctx, &inst->Src[i].Indirect ); /* * No support for indirect or multi-dimensional addressing. */ - assert( !inst->Src[i].SrcRegisterInd.Indirect ); - assert( !inst->Src[i].SrcRegisterInd.Dimension ); + assert( !inst->Src[i].Indirect.Indirect ); + assert( !inst->Src[i].Indirect.Dimension ); } - if( inst->Src[i].SrcRegister.Dimension ) { - next_token( ctx, &inst->Src[i].SrcRegisterDim ); + if( inst->Src[i].Register.Dimension ) { + next_token( ctx, &inst->Src[i].Dimension ); /* * No support for multi-dimensional addressing. */ - assert( !inst->Src[i].SrcRegisterDim.Dimension ); + assert( !inst->Src[i].Dimension.Dimension ); - if( inst->Src[i].SrcRegisterDim.Indirect ) { - next_token( ctx, &inst->Src[i].SrcRegisterDimInd ); + if( inst->Src[i].Dimension.Indirect ) { + next_token( ctx, &inst->Src[i].DimIndirect ); /* * No support for indirect or multi-dimensional addressing. */ - assert( !inst->Src[i].SrcRegisterInd.Indirect ); - assert( !inst->Src[i].SrcRegisterInd.Dimension ); + assert( !inst->Src[i].Indirect.Indirect ); + assert( !inst->Src[i].Indirect.Dimension ); } } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index 331a533dd9..e9efa3fdd9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -53,10 +53,10 @@ struct tgsi_full_dst_register struct tgsi_full_src_register { - struct tgsi_src_register SrcRegister; - struct tgsi_src_register SrcRegisterInd; - struct tgsi_dimension SrcRegisterDim; - struct tgsi_src_register SrcRegisterDimInd; + struct tgsi_src_register Register; + struct tgsi_src_register Indirect; + struct tgsi_dimension Dimension; + struct tgsi_src_register DimIndirect; }; struct tgsi_full_declaration diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index adb16f6ac9..da6ad6da04 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -156,8 +156,8 @@ init_gen_context(struct gen_context *gen, struct ppc_function *func) static boolean is_ppc_vec_temporary(const struct tgsi_full_src_register *reg) { - return (reg->SrcRegister.File == TGSI_FILE_TEMPORARY && - reg->SrcRegister.Index < MAX_PPC_TEMPS); + return (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Index < MAX_PPC_TEMPS); } @@ -291,10 +291,10 @@ emit_fetch(struct gen_context *gen, case TGSI_SWIZZLE_Y: case TGSI_SWIZZLE_Z: case TGSI_SWIZZLE_W: - switch (reg->SrcRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_INPUT: { - int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + int offset = (reg->Register.Index * 4 + swizzle) * 16; int offset_reg = emit_li_offset(gen, offset); dst_vec = ppc_allocate_vec_register(gen->f); ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg); @@ -303,11 +303,11 @@ emit_fetch(struct gen_context *gen, case TGSI_FILE_TEMPORARY: if (is_ppc_vec_temporary(reg)) { /* use PPC vec register */ - dst_vec = gen->temps_map[reg->SrcRegister.Index][swizzle]; + dst_vec = gen->temps_map[reg->Register.Index][swizzle]; } else { /* use memory-based temp register "file" */ - int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + int offset = (reg->Register.Index * 4 + swizzle) * 16; int offset_reg = emit_li_offset(gen, offset); dst_vec = ppc_allocate_vec_register(gen->f); ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg); @@ -315,7 +315,7 @@ emit_fetch(struct gen_context *gen, break; case TGSI_FILE_IMMEDIATE: { - int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; + int offset = (reg->Register.Index * 4 + swizzle) * 4; int offset_reg = emit_li_offset(gen, offset); dst_vec = ppc_allocate_vec_register(gen->f); /* Load 4-byte word into vector register. @@ -331,7 +331,7 @@ emit_fetch(struct gen_context *gen, break; case TGSI_FILE_CONSTANT: { - int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; + int offset = (reg->Register.Index * 4 + swizzle) * 4; int offset_reg = emit_li_offset(gen, offset); dst_vec = ppc_allocate_vec_register(gen->f); /* Load 4-byte word into vector register. @@ -404,9 +404,9 @@ equal_src_locs(const struct tgsi_full_src_register *a, uint chan_a, { int swz_a, swz_b; int sign_a, sign_b; - if (a->SrcRegister.File != b->SrcRegister.File) + if (a->Register.File != b->Register.File) return FALSE; - if (a->SrcRegister.Index != b->SrcRegister.Index) + if (a->Register.Index != b->Register.Index) return FALSE; swz_a = tgsi_util_get_full_src_register_swizzle(a, chan_a); swz_b = tgsi_util_get_full_src_register_swizzle(b, chan_b); diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 7e50e25353..8bd1f31e9c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -220,16 +220,16 @@ iter_instruction( for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { check_register_usage( ctx, - inst->Src[i].SrcRegister.File, - inst->Src[i].SrcRegister.Index, + inst->Src[i].Register.File, + inst->Src[i].Register.Index, "source", - (boolean)inst->Src[i].SrcRegister.Indirect ); - if (inst->Src[i].SrcRegister.Indirect) { + (boolean)inst->Src[i].Register.Indirect ); + if (inst->Src[i].Register.Indirect) { uint file; int index; - file = inst->Src[i].SrcRegisterInd.File; - index = inst->Src[i].SrcRegisterInd.Index; + file = inst->Src[i].Indirect.File; + index = inst->Src[i].Indirect.Index; check_register_usage( ctx, file, @@ -254,8 +254,8 @@ iter_instruction( switch (inst->Instruction.Opcode) { case TGSI_OPCODE_BGNFOR: - if (inst->Src[0].SrcRegister.File != TGSI_FILE_CONSTANT && - inst->Src[0].SrcRegister.File != TGSI_FILE_IMMEDIATE) { + if (inst->Src[0].Register.File != TGSI_FILE_CONSTANT && + inst->Src[0].Register.File != TGSI_FILE_IMMEDIATE) { report_error(ctx, "Source register file must be either CONST or IMM"); } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 90832e71bb..a5d2db04ec 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -97,13 +97,13 @@ tgsi_scan_shader(const struct tgsi_token *tokens, for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *src = &fullinst->Src[i]; - if (src->SrcRegister.File == TGSI_FILE_INPUT) { - const int ind = src->SrcRegister.Index; + if (src->Register.File == TGSI_FILE_INPUT) { + const int ind = src->Register.Index; if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FOG) { - if (src->SrcRegister.SwizzleX == TGSI_SWIZZLE_X) { + if (src->Register.SwizzleX == TGSI_SWIZZLE_X) { info->uses_fogcoord = TRUE; } - else if (src->SrcRegister.SwizzleX == TGSI_SWIZZLE_Y) { + else if (src->Register.SwizzleX == TGSI_SWIZZLE_Y) { info->uses_frontfacing = TRUE; } } @@ -211,17 +211,17 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens) /* Do a whole bunch of checks for a simple move */ if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV || - src->SrcRegister.File != TGSI_FILE_INPUT || + src->Register.File != TGSI_FILE_INPUT || dst->Register.File != TGSI_FILE_OUTPUT || - src->SrcRegister.Index != dst->Register.Index || + src->Register.Index != dst->Register.Index || - src->SrcRegister.Negate || - src->SrcRegister.Absolute || + src->Register.Negate || + src->Register.Absolute || - src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || - src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || - src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || - src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W || + src->Register.SwizzleX != TGSI_SWIZZLE_X || + src->Register.SwizzleY != TGSI_SWIZZLE_Y || + src->Register.SwizzleZ != TGSI_SWIZZLE_Z || + src->Register.SwizzleW != TGSI_SWIZZLE_W || dst->Register.WriteMask != TGSI_WRITEMASK_XYZW) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 785076a520..76051ea0d8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1267,23 +1267,23 @@ emit_fetch( case TGSI_SWIZZLE_Y: case TGSI_SWIZZLE_Z: case TGSI_SWIZZLE_W: - switch (reg->SrcRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_CONSTANT: emit_const( func, xmm, - reg->SrcRegister.Index, + reg->Register.Index, swizzle, - reg->SrcRegister.Indirect, - reg->SrcRegisterInd.File, - reg->SrcRegisterInd.Index ); + reg->Register.Indirect, + reg->Indirect.File, + reg->Indirect.Index ); break; case TGSI_FILE_IMMEDIATE: emit_immediate( func, xmm, - reg->SrcRegister.Index, + reg->Register.Index, swizzle ); break; @@ -1291,7 +1291,7 @@ emit_fetch( emit_inputf( func, xmm, - reg->SrcRegister.Index, + reg->Register.Index, swizzle ); break; @@ -1299,7 +1299,7 @@ emit_fetch( emit_tempf( func, xmm, - reg->SrcRegister.Index, + reg->Register.Index, swizzle ); break; @@ -1459,7 +1459,7 @@ emit_tex( struct x86_function *func, boolean lodbias, boolean projected) { - const uint unit = inst->Src[1].SrcRegister.Index; + const uint unit = inst->Src[1].Register.Index; struct x86_reg args[2]; unsigned count; unsigned i; @@ -1721,8 +1721,8 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst) uint i; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *reg = &inst->Src[i]; - if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY && - reg->SrcRegister.Indirect) + if (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Indirect) return TRUE; } for (i = 0; i < inst->Instruction.NumDstRegs; i++) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 27b90f5ab7..ca2e2bae11 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -565,41 +565,41 @@ parse_src_operand( if (*ctx->cur == '-') { ctx->cur++; eat_opt_white( &ctx->cur ); - src->SrcRegister.Negate = 1; + src->Register.Negate = 1; } if (*ctx->cur == '|') { ctx->cur++; eat_opt_white( &ctx->cur ); - src->SrcRegister.Absolute = 1; + src->Register.Absolute = 1; } if (!parse_register_src(ctx, &file, &index, &ind_file, &ind_index, &ind_comp)) return FALSE; - src->SrcRegister.File = file; - src->SrcRegister.Index = index; + src->Register.File = file; + src->Register.Index = index; if (ind_file != TGSI_FILE_NULL) { - src->SrcRegister.Indirect = 1; - src->SrcRegisterInd.File = ind_file; - src->SrcRegisterInd.Index = ind_index; - src->SrcRegisterInd.SwizzleX = ind_comp; - src->SrcRegisterInd.SwizzleY = ind_comp; - src->SrcRegisterInd.SwizzleZ = ind_comp; - src->SrcRegisterInd.SwizzleW = ind_comp; + src->Register.Indirect = 1; + src->Indirect.File = ind_file; + src->Indirect.Index = ind_index; + src->Indirect.SwizzleX = ind_comp; + src->Indirect.SwizzleY = ind_comp; + src->Indirect.SwizzleZ = ind_comp; + src->Indirect.SwizzleW = ind_comp; } /* Parse optional swizzle. */ if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle )) { if (parsed_swizzle) { - src->SrcRegister.SwizzleX = swizzle[0]; - src->SrcRegister.SwizzleY = swizzle[1]; - src->SrcRegister.SwizzleZ = swizzle[2]; - src->SrcRegister.SwizzleW = swizzle[3]; + src->Register.SwizzleX = swizzle[0]; + src->Register.SwizzleY = swizzle[1]; + src->Register.SwizzleZ = swizzle[2]; + src->Register.SwizzleW = swizzle[3]; } } - if (src->SrcRegister.Absolute) { + if (src->Register.Absolute) { eat_opt_white( &ctx->cur ); if (*ctx->cur != '|') { report_error( ctx, "Expected `|'" ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index 3544011b47..f4ca9e21ed 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -76,7 +76,7 @@ tgsi_util_get_full_src_register_swizzle( unsigned component ) { return tgsi_util_get_src_register_swizzle( - ®->SrcRegister, + ®->Register, component ); } @@ -111,10 +111,10 @@ tgsi_util_get_full_src_register_sign_mode( { unsigned sign_mode; - if( reg->SrcRegister.Absolute ) { + if( reg->Register.Absolute ) { /* Consider only the post-abs negation. */ - if( reg->SrcRegister.Negate ) { + if( reg->Register.Negate ) { sign_mode = TGSI_UTIL_SIGN_SET; } else { @@ -122,7 +122,7 @@ tgsi_util_get_full_src_register_sign_mode( } } else { - if( reg->SrcRegister.Negate ) { + if( reg->Register.Negate ) { sign_mode = TGSI_UTIL_SIGN_TOGGLE; } else { @@ -141,23 +141,23 @@ tgsi_util_set_full_src_register_sign_mode( switch (sign_mode) { case TGSI_UTIL_SIGN_CLEAR: - reg->SrcRegister.Negate = 0; - reg->SrcRegister.Absolute = 1; + reg->Register.Negate = 0; + reg->Register.Absolute = 1; break; case TGSI_UTIL_SIGN_SET: - reg->SrcRegister.Absolute = 1; - reg->SrcRegister.Negate = 1; + reg->Register.Absolute = 1; + reg->Register.Negate = 1; break; case TGSI_UTIL_SIGN_TOGGLE: - reg->SrcRegister.Negate = 1; - reg->SrcRegister.Absolute = 0; + reg->Register.Negate = 1; + reg->Register.Absolute = 0; break; case TGSI_UTIL_SIGN_KEEP: - reg->SrcRegister.Negate = 0; - reg->SrcRegister.Absolute = 0; + reg->Register.Negate = 0; + reg->Register.Absolute = 0; break; default: diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index 4564a6c67f..36a7987099 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -237,9 +237,9 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r) ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -415,9 +415,9 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -620,9 +620,9 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -642,10 +642,10 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.Src[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ diff --git a/src/gallium/auxiliary/vl/vl_shader_build.c b/src/gallium/auxiliary/vl/vl_shader_build.c index 9ebb4a9171..d011ef97bd 100644 --- a/src/gallium/auxiliary/vl/vl_shader_build.c +++ b/src/gallium/auxiliary/vl/vl_shader_build.c @@ -141,8 +141,8 @@ struct tgsi_full_instruction vl_inst2 inst.Dst[0].Register.File = dst_file; inst.Dst[0].Register.Index = dst_index; inst.Instruction.NumSrcRegs = 1; - inst.Src[0].SrcRegister.File = src_file; - inst.Src[0].SrcRegister.Index = src_index; + inst.Src[0].Register.File = src_file; + inst.Src[0].Register.Index = src_index; return inst; } @@ -165,10 +165,10 @@ struct tgsi_full_instruction vl_inst3 inst.Dst[0].Register.File = dst_file; inst.Dst[0].Register.Index = dst_index; inst.Instruction.NumSrcRegs = 2; - inst.Src[0].SrcRegister.File = src1_file; - inst.Src[0].SrcRegister.Index = src1_index; - inst.Src[1].SrcRegister.File = src2_file; - inst.Src[1].SrcRegister.Index = src2_index; + inst.Src[0].Register.File = src1_file; + inst.Src[0].Register.Index = src1_index; + inst.Src[1].Register.File = src2_file; + inst.Src[1].Register.Index = src2_index; return inst; } @@ -193,10 +193,10 @@ struct tgsi_full_instruction vl_tex inst.Instruction.NumSrcRegs = 2; inst.Instruction.Texture = 1; inst.Texture.Texture = tex; - inst.Src[0].SrcRegister.File = src1_file; - inst.Src[0].SrcRegister.Index = src1_index; - inst.Src[1].SrcRegister.File = src2_file; - inst.Src[1].SrcRegister.Index = src2_index; + inst.Src[0].Register.File = src1_file; + inst.Src[0].Register.Index = src1_index; + inst.Src[1].Register.File = src2_file; + inst.Src[1].Register.Index = src2_index; return inst; } @@ -221,12 +221,12 @@ struct tgsi_full_instruction vl_inst4 inst.Dst[0].Register.File = dst_file; inst.Dst[0].Register.Index = dst_index; inst.Instruction.NumSrcRegs = 3; - inst.Src[0].SrcRegister.File = src1_file; - inst.Src[0].SrcRegister.Index = src1_index; - inst.Src[1].SrcRegister.File = src2_file; - inst.Src[1].SrcRegister.Index = src2_index; - inst.Src[2].SrcRegister.File = src3_file; - inst.Src[2].SrcRegister.Index = src3_index; + inst.Src[0].Register.File = src1_file; + inst.Src[0].Register.Index = src1_index; + inst.Src[1].Register.File = src2_file; + inst.Src[1].Register.Index = src2_index; + inst.Src[2].Register.File = src3_file; + inst.Src[2].Register.Index = src3_index; return inst; } diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index f639c62605..4d43f65d29 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -237,8 +237,8 @@ is_register_src(struct codegen *gen, int channel, if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) { return FALSE; } - if (src->SrcRegister.File == TGSI_FILE_TEMPORARY || - src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { + if (src->Register.File == TGSI_FILE_TEMPORARY || + src->Register.File == TGSI_FILE_IMMEDIATE) { return TRUE; } return FALSE; @@ -279,15 +279,15 @@ get_src_reg(struct codegen *gen, assert(swizzle <= TGSI_SWIZZLE_W); { - int index = src->SrcRegister.Index; + int index = src->Register.Index; assert(swizzle < 4); - if (src->SrcRegister.Indirect) { + if (src->Register.Indirect) { /* XXX unfinished */ } - switch (src->SrcRegister.File) { + switch (src->Register.File) { case TGSI_FILE_TEMPORARY: reg = gen->temp_regs[index][swizzle]; break; @@ -1352,7 +1352,7 @@ static boolean emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) { const uint target = inst->InstructionExtTexture.Texture; - const uint unit = inst->Src[1].SrcRegister.Index; + const uint unit = inst->Src[1].Register.Index; uint addr; int ch; int coord_regs[4], d_regs[4]; @@ -1373,7 +1373,7 @@ emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) return FALSE; } - assert(inst->Src[1].SrcRegister.File == TGSI_FILE_SAMPLER); + assert(inst->Src[1].Register.File == TGSI_FILE_SAMPLER); spe_comment(gen->f, -4, "CALL tex:"); diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 1b4792a316..5ed330aa6e 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -431,22 +431,22 @@ fetch_source( index.i[0] = index.i[1] = index.i[2] = - index.i[3] = reg->SrcRegister.Index; + index.i[3] = reg->Register.Index; - if (reg->SrcRegister.Indirect) { + if (reg->Register.Indirect) { union spu_exec_channel index2; union spu_exec_channel indir_index; index2.i[0] = index2.i[1] = index2.i[2] = - index2.i[3] = reg->SrcRegisterInd.Index; + index2.i[3] = reg->Indirect.Index; - swizzle = tgsi_util_get_src_register_swizzle(®->SrcRegisterInd, + swizzle = tgsi_util_get_src_register_swizzle(®->Indirect, CHAN_X); fetch_src_file_channel( mach, - reg->SrcRegisterInd.File, + reg->Indirect.File, swizzle, &index2, &indir_index ); @@ -454,8 +454,8 @@ fetch_source( index.q = si_a(index.q, indir_index.q); } - if( reg->SrcRegister.Dimension ) { - switch( reg->SrcRegister.File ) { + if( reg->Register.Dimension ) { + switch( reg->Register.File ) { case TGSI_FILE_INPUT: index.q = si_mpyi(index.q, 17); break; @@ -466,24 +466,24 @@ fetch_source( ASSERT( 0 ); } - index.i[0] += reg->SrcRegisterDim.Index; - index.i[1] += reg->SrcRegisterDim.Index; - index.i[2] += reg->SrcRegisterDim.Index; - index.i[3] += reg->SrcRegisterDim.Index; + index.i[0] += reg->Dimension.Index; + index.i[1] += reg->Dimension.Index; + index.i[2] += reg->Dimension.Index; + index.i[3] += reg->Dimension.Index; - if (reg->SrcRegisterDim.Indirect) { + if (reg->Dimension.Indirect) { union spu_exec_channel index2; union spu_exec_channel indir_index; index2.i[0] = index2.i[1] = index2.i[2] = - index2.i[3] = reg->SrcRegisterDimInd.Index; + index2.i[3] = reg->DimIndirect.Index; - swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); + swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); fetch_src_file_channel( mach, - reg->SrcRegisterDimInd.File, + reg->DimIndirect.File, swizzle, &index2, &indir_index ); @@ -495,7 +495,7 @@ fetch_source( swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); fetch_src_file_channel( mach, - reg->SrcRegister.File, + reg->Register.File, swizzle, &index, chan ); @@ -517,7 +517,7 @@ fetch_source( break; } - if (reg->SrcRegisterExtMod.Complement) { + if (reg->RegisterExtMod.Complement) { chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q); } } @@ -677,7 +677,7 @@ exec_tex(struct spu_exec_machine *mach, const struct tgsi_full_instruction *inst, boolean biasLod, boolean projected) { - const uint unit = inst->Src[1].SrcRegister.Index; + const uint unit = inst->Src[1].Register.Index; union spu_exec_channel r[8]; uint chan_index; float lodBias; diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c index c2c32b22d5..24057e29e3 100644 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -33,7 +33,7 @@ tgsi_util_get_full_src_register_swizzle( unsigned component ) { return tgsi_util_get_src_register_swizzle( - reg->SrcRegister, + reg->Register, component ); } @@ -45,10 +45,10 @@ tgsi_util_get_full_src_register_sign_mode( { unsigned sign_mode; - if( reg->SrcRegisterExtMod.Absolute ) { + if( reg->RegisterExtMod.Absolute ) { /* Consider only the post-abs negation. */ - if( reg->SrcRegisterExtMod.Negate ) { + if( reg->RegisterExtMod.Negate ) { sign_mode = TGSI_UTIL_SIGN_SET; } else { @@ -60,8 +60,8 @@ tgsi_util_get_full_src_register_sign_mode( unsigned negate; - negate = reg->SrcRegister.Negate; - if( reg->SrcRegisterExtMod.Negate ) { + negate = reg->Register.Negate; + if( reg->RegisterExtMod.Negate ) { negate = !negate; } diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 13c280827a..25c53210be 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -143,12 +143,12 @@ static uint src_vector(struct i915_fp_compile *p, const struct tgsi_full_src_register *source) { - uint index = source->SrcRegister.Index; + uint index = source->Register.Index; uint src = 0, sem_name, sem_ind; - switch (source->SrcRegister.File) { + switch (source->Register.File) { case TGSI_FILE_TEMPORARY: - if (source->SrcRegister.Index >= I915_MAX_TEMPORARY) { + if (source->Register.Index >= I915_MAX_TEMPORARY) { i915_program_error(p, "Exceeded max temporary reg"); return 0; } @@ -215,17 +215,17 @@ src_vector(struct i915_fp_compile *p, } src = swizzle(src, - source->SrcRegister.SwizzleX, - source->SrcRegister.SwizzleY, - source->SrcRegister.SwizzleZ, - source->SrcRegister.SwizzleW); + source->Register.SwizzleX, + source->Register.SwizzleY, + source->Register.SwizzleZ, + source->Register.SwizzleW); /* There's both negate-all-components and per-component negation. * Try to handle both here. */ { - int n = source->SrcRegister.Negate; + int n = source->Register.Negate; src = negate(src, n, n, n, n); } @@ -233,7 +233,7 @@ src_vector(struct i915_fp_compile *p, #if 0 /* XXX assertions disabled to allow arbfplight.c to run */ /* XXX enable these assertions, or fix things */ - assert(!source->SrcRegister.Absolute); + assert(!source->Register.Absolute); #endif return src; } @@ -339,7 +339,7 @@ emit_tex(struct i915_fp_compile *p, uint opcode) { uint texture = inst->Texture.Texture; - uint unit = inst->Src[1].SrcRegister.Index; + uint unit = inst->Src[1].Register.Index; uint tex = translate_tex_src_target( p, texture ); uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); uint coord = src_vector( p, &inst->Src[0]); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index 99266f34ed..fe2db04d8f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -167,9 +167,9 @@ emit_fetch( case TGSI_SWIZZLE_Z: case TGSI_SWIZZLE_W: - switch (reg->SrcRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_CONSTANT: { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0); + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0); LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); res = lp_build_broadcast_scalar(&bld->base, scalar); @@ -177,17 +177,17 @@ emit_fetch( } case TGSI_FILE_IMMEDIATE: - res = bld->immediates[reg->SrcRegister.Index][swizzle]; + res = bld->immediates[reg->Register.Index][swizzle]; assert(res); break; case TGSI_FILE_INPUT: - res = bld->inputs[reg->SrcRegister.Index][swizzle]; + res = bld->inputs[reg->Register.Index][swizzle]; assert(res); break; case TGSI_FILE_TEMPORARY: - res = bld->temps[reg->SrcRegister.Index][swizzle]; + res = bld->temps[reg->Register.Index][swizzle]; if(!res) return bld->base.undef; break; @@ -319,7 +319,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, boolean projected, LLVMValueRef *texel) { - const uint unit = inst->Src[1].SrcRegister.Index; + const uint unit = inst->Src[1].Register.Index; LLVMValueRef lodbias; LLVMValueRef oow; LLVMValueRef coords[3]; @@ -424,8 +424,8 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst) uint i; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *reg = &inst->Src[i]; - if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY && - reg->SrcRegister.Indirect) + if (reg->Register.File == TGSI_FILE_TEMPORARY && + reg->Register.Indirect) return TRUE; } for (i = 0; i < inst->Instruction.NumDstRegs; i++) { diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c index e3bb9f9d7f..9e8aab9754 100644 --- a/src/gallium/drivers/nv20/nv20_vertprog.c +++ b/src/gallium/drivers/nv20/nv20_vertprog.c @@ -253,32 +253,32 @@ static INLINE struct nv20_sreg tgsi_src(struct nv20_vpc *vpc, const struct tgsi_full_src_register *fsrc) { struct nv20_sreg src; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nv20_sr(NV30SR_INPUT, fsrc->SrcRegister.Index); + src = nv20_sr(NV30SR_INPUT, fsrc->Register.Index); break; case TGSI_FILE_CONSTANT: - src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); + src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); break; case TGSI_FILE_IMMEDIATE: - src = vpc->imm[fsrc->SrcRegister.Index]; + src = vpc->imm[fsrc->Register.Index]; break; case TGSI_FILE_TEMPORARY: - if (vpc->high_temp < fsrc->SrcRegister.Index) - vpc->high_temp = fsrc->SrcRegister.Index; - src = nv20_sr(NV30SR_TEMP, fsrc->SrcRegister.Index); + if (vpc->high_temp < fsrc->Register.Index) + vpc->high_temp = fsrc->Register.Index; + src = nv20_sr(NV30SR_TEMP, fsrc->Register.Index); break; default: NOUVEAU_ERR("bad src file\n"); break; } - src.abs = fsrc->SrcRegister.Absolute; - src.negate = fsrc->SrcRegister.Negate; - src.swz[0] = fsrc->SrcRegister.SwizzleX; - src.swz[1] = fsrc->SrcRegister.SwizzleY; - src.swz[2] = fsrc->SrcRegister.SwizzleZ; - src.swz[3] = fsrc->SrcRegister.SwizzleW; + src.abs = fsrc->Register.Absolute; + src.negate = fsrc->Register.Negate; + src.swz[0] = fsrc->Register.SwizzleX; + src.swz[1] = fsrc->Register.SwizzleY; + src.swz[2] = fsrc->Register.SwizzleZ; + src.swz[3] = fsrc->Register.SwizzleW; return src; } @@ -335,7 +335,7 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc, const struct tgsi_full_src_register *fsrc; fsrc = &finst->Src[i]; - if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(vpc, fsrc); } } @@ -344,10 +344,10 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc, const struct tgsi_full_src_register *fsrc; fsrc = &finst->Src[i]; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->SrcRegister.Index) { - ai = fsrc->SrcRegister.Index; + if (ai == -1 || ai == fsrc->Register.Index) { + ai = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); @@ -360,8 +360,8 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc, */ case TGSI_FILE_CONSTANT: case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->SrcRegister.Index) { - ci = fsrc->SrcRegister.Index; + if (ci == -1 || ci == fsrc->Register.Index) { + ci = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 14dc884b3a..40965a9772 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -237,20 +237,20 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) { struct nv30_sreg src; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: src = nv30_sr(NV30SR_INPUT, - fpc->attrib_map[fsrc->SrcRegister.Index]); + fpc->attrib_map[fsrc->Register.Index]); break; case TGSI_FILE_CONSTANT: - src = constant(fpc, fsrc->SrcRegister.Index, NULL); + src = constant(fpc, fsrc->Register.Index, NULL); break; case TGSI_FILE_IMMEDIATE: - assert(fsrc->SrcRegister.Index < fpc->nr_imm); - src = fpc->imm[fsrc->SrcRegister.Index]; + assert(fsrc->Register.Index < fpc->nr_imm); + src = fpc->imm[fsrc->Register.Index]; break; case TGSI_FILE_TEMPORARY: - src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index + 1); + src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index + 1); if (fpc->high_temp < src.index) fpc->high_temp = src.index; break; @@ -258,7 +258,7 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) * Luckily fragprog results are just temp regs.. */ case TGSI_FILE_OUTPUT: - if (fsrc->SrcRegister.Index == fpc->colour_id) + if (fsrc->Register.Index == fpc->colour_id) return nv30_sr(NV30SR_OUTPUT, 0); else return nv30_sr(NV30SR_OUTPUT, 1); @@ -268,12 +268,12 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) break; } - src.abs = fsrc->SrcRegister.Absolute; - src.negate = fsrc->SrcRegister.Negate; - src.swz[0] = fsrc->SrcRegister.SwizzleX; - src.swz[1] = fsrc->SrcRegister.SwizzleY; - src.swz[2] = fsrc->SrcRegister.SwizzleZ; - src.swz[3] = fsrc->SrcRegister.SwizzleW; + src.abs = fsrc->Register.Absolute; + src.negate = fsrc->Register.Negate; + src.swz[0] = fsrc->Register.SwizzleX; + src.swz[1] = fsrc->Register.SwizzleY; + src.swz[2] = fsrc->Register.SwizzleZ; + src.swz[3] = fsrc->Register.SwizzleW; return src; } @@ -364,7 +364,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc; fsrc = &finst->Src[i]; - if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(fpc, fsrc); } } @@ -374,7 +374,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, fsrc = &finst->Src[i]; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: case TGSI_FILE_CONSTANT: case TGSI_FILE_TEMPORARY: @@ -385,14 +385,14 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, break; } - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->SrcRegister.Index) { - ai = fsrc->SrcRegister.Index; + if (ai == -1 || ai == fsrc->Register.Index) { + ai = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { NOUVEAU_MSG("extra src attr %d\n", - fsrc->SrcRegister.Index); + fsrc->Register.Index); src[i] = temp(fpc); arith(fpc, 0, MOV, src[i], MASK_ALL, tgsi_src(fpc, fsrc), none, none); @@ -400,8 +400,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, break; case TGSI_FILE_CONSTANT: case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->SrcRegister.Index) { - ci = fsrc->SrcRegister.Index; + if (ci == -1 || ci == fsrc->Register.Index) { + ci = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); @@ -413,7 +413,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, /* handled above */ break; case TGSI_FILE_SAMPLER: - unit = fsrc->SrcRegister.Index; + unit = fsrc->Register.Index; break; case TGSI_FILE_OUTPUT: break; diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 41e4161dda..36ac8299f0 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -253,32 +253,32 @@ static INLINE struct nv30_sreg tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) { struct nv30_sreg src; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nv30_sr(NV30SR_INPUT, fsrc->SrcRegister.Index); + src = nv30_sr(NV30SR_INPUT, fsrc->Register.Index); break; case TGSI_FILE_CONSTANT: - src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); + src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); break; case TGSI_FILE_IMMEDIATE: - src = vpc->imm[fsrc->SrcRegister.Index]; + src = vpc->imm[fsrc->Register.Index]; break; case TGSI_FILE_TEMPORARY: - if (vpc->high_temp < fsrc->SrcRegister.Index) - vpc->high_temp = fsrc->SrcRegister.Index; - src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index); + if (vpc->high_temp < fsrc->Register.Index) + vpc->high_temp = fsrc->Register.Index; + src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index); break; default: NOUVEAU_ERR("bad src file\n"); break; } - src.abs = fsrc->SrcRegister.Absolute; - src.negate = fsrc->SrcRegister.Negate; - src.swz[0] = fsrc->SrcRegister.SwizzleX; - src.swz[1] = fsrc->SrcRegister.SwizzleY; - src.swz[2] = fsrc->SrcRegister.SwizzleZ; - src.swz[3] = fsrc->SrcRegister.SwizzleW; + src.abs = fsrc->Register.Absolute; + src.negate = fsrc->Register.Negate; + src.swz[0] = fsrc->Register.SwizzleX; + src.swz[1] = fsrc->Register.SwizzleY; + src.swz[2] = fsrc->Register.SwizzleZ; + src.swz[3] = fsrc->Register.SwizzleW; return src; } @@ -335,7 +335,7 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc; fsrc = &finst->Src[i]; - if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(vpc, fsrc); } } @@ -344,10 +344,10 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc; fsrc = &finst->Src[i]; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->SrcRegister.Index) { - ai = fsrc->SrcRegister.Index; + if (ai == -1 || ai == fsrc->Register.Index) { + ai = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); @@ -360,8 +360,8 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, */ case TGSI_FILE_CONSTANT: case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->SrcRegister.Index) { - ci = fsrc->SrcRegister.Index; + if (ci == -1 || ci == fsrc->Register.Index) { + ci = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 02c23e92c0..1bf16726d1 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -255,36 +255,36 @@ tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) { struct nv40_sreg src; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: src = nv40_sr(NV40SR_INPUT, - fpc->attrib_map[fsrc->SrcRegister.Index]); + fpc->attrib_map[fsrc->Register.Index]); break; case TGSI_FILE_CONSTANT: - src = constant(fpc, fsrc->SrcRegister.Index, NULL); + src = constant(fpc, fsrc->Register.Index, NULL); break; case TGSI_FILE_IMMEDIATE: - assert(fsrc->SrcRegister.Index < fpc->nr_imm); - src = fpc->imm[fsrc->SrcRegister.Index]; + assert(fsrc->Register.Index < fpc->nr_imm); + src = fpc->imm[fsrc->Register.Index]; break; case TGSI_FILE_TEMPORARY: - src = fpc->r_temp[fsrc->SrcRegister.Index]; + src = fpc->r_temp[fsrc->Register.Index]; break; /* NV40 fragprog result regs are just temps, so this is simple */ case TGSI_FILE_OUTPUT: - src = fpc->r_result[fsrc->SrcRegister.Index]; + src = fpc->r_result[fsrc->Register.Index]; break; default: NOUVEAU_ERR("bad src file\n"); break; } - src.abs = fsrc->SrcRegister.Absolute; - src.negate = fsrc->SrcRegister.Negate; - src.swz[0] = fsrc->SrcRegister.SwizzleX; - src.swz[1] = fsrc->SrcRegister.SwizzleY; - src.swz[2] = fsrc->SrcRegister.SwizzleZ; - src.swz[3] = fsrc->SrcRegister.SwizzleW; + src.abs = fsrc->Register.Absolute; + src.negate = fsrc->Register.Negate; + src.swz[0] = fsrc->Register.SwizzleX; + src.swz[1] = fsrc->Register.SwizzleY; + src.swz[2] = fsrc->Register.SwizzleZ; + src.swz[3] = fsrc->Register.SwizzleW; return src; } @@ -365,7 +365,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc; fsrc = &finst->Src[i]; - if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(fpc, fsrc); } } @@ -375,7 +375,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, fsrc = &finst->Src[i]; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: case TGSI_FILE_CONSTANT: case TGSI_FILE_TEMPORARY: @@ -386,10 +386,10 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, break; } - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->SrcRegister.Index) { - ai = fsrc->SrcRegister.Index; + if (ai == -1 || ai == fsrc->Register.Index) { + ai = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); @@ -399,8 +399,8 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, break; case TGSI_FILE_CONSTANT: if ((ci == -1 && ii == -1) || - ci == fsrc->SrcRegister.Index) { - ci = fsrc->SrcRegister.Index; + ci == fsrc->Register.Index) { + ci = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); @@ -410,8 +410,8 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, break; case TGSI_FILE_IMMEDIATE: if ((ci == -1 && ii == -1) || - ii == fsrc->SrcRegister.Index) { - ii = fsrc->SrcRegister.Index; + ii == fsrc->Register.Index) { + ii = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); @@ -423,7 +423,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, /* handled above */ break; case TGSI_FILE_SAMPLER: - unit = fsrc->SrcRegister.Index; + unit = fsrc->Register.Index; break; case TGSI_FILE_OUTPUT: break; diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index c4f51d622c..55835ee644 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -295,30 +295,30 @@ static INLINE struct nv40_sreg tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { struct nv40_sreg src; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nv40_sr(NV40SR_INPUT, fsrc->SrcRegister.Index); + src = nv40_sr(NV40SR_INPUT, fsrc->Register.Index); break; case TGSI_FILE_CONSTANT: - src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); + src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); break; case TGSI_FILE_IMMEDIATE: - src = vpc->imm[fsrc->SrcRegister.Index]; + src = vpc->imm[fsrc->Register.Index]; break; case TGSI_FILE_TEMPORARY: - src = vpc->r_temp[fsrc->SrcRegister.Index]; + src = vpc->r_temp[fsrc->Register.Index]; break; default: NOUVEAU_ERR("bad src file\n"); break; } - src.abs = fsrc->SrcRegister.Absolute; - src.negate = fsrc->SrcRegister.Negate; - src.swz[0] = fsrc->SrcRegister.SwizzleX; - src.swz[1] = fsrc->SrcRegister.SwizzleY; - src.swz[2] = fsrc->SrcRegister.SwizzleZ; - src.swz[3] = fsrc->SrcRegister.SwizzleW; + src.abs = fsrc->Register.Absolute; + src.negate = fsrc->Register.Negate; + src.swz[0] = fsrc->Register.SwizzleX; + src.swz[1] = fsrc->Register.SwizzleY; + src.swz[2] = fsrc->Register.SwizzleZ; + src.swz[3] = fsrc->Register.SwizzleW; return src; } @@ -406,7 +406,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc; fsrc = &finst->Src[i]; - if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(vpc, fsrc); } } @@ -416,7 +416,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, fsrc = &finst->Src[i]; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: case TGSI_FILE_CONSTANT: case TGSI_FILE_TEMPORARY: @@ -427,10 +427,10 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, break; } - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->SrcRegister.Index) { - ai = fsrc->SrcRegister.Index; + if (ai == -1 || ai == fsrc->Register.Index) { + ai = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); @@ -440,8 +440,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, break; case TGSI_FILE_CONSTANT: if ((ci == -1 && ii == -1) || - ci == fsrc->SrcRegister.Index) { - ci = fsrc->SrcRegister.Index; + ci == fsrc->Register.Index) { + ci = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); @@ -451,8 +451,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, break; case TGSI_FILE_IMMEDIATE: if ((ci == -1 && ii == -1) || - ii == fsrc->SrcRegister.Index) { - ii = fsrc->SrcRegister.Index; + ii == fsrc->Register.Index) { + ii = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 3409edb4c8..1509cecaac 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1535,10 +1535,10 @@ negate_supported(const struct tgsi_full_instruction *insn, int i) for (s = 0; s < insn->Instruction.NumSrcRegs; ++s) { if (s == i) continue; - if ((insn->Src[s].SrcRegister.Index == - insn->Src[i].SrcRegister.Index) && - (insn->Src[s].SrcRegister.File == - insn->Src[i].SrcRegister.File)) + if ((insn->Src[s].Register.Index == + insn->Src[i].Register.Index) && + (insn->Src[s].Register.File == + insn->Src[i].Register.File)) return FALSE; } @@ -1644,8 +1644,8 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, struct nv50_reg *temp; unsigned sgn, c, swz; - if (src->SrcRegister.File != TGSI_FILE_CONSTANT) - assert(!src->SrcRegister.Indirect); + if (src->Register.File != TGSI_FILE_CONSTANT) + assert(!src->Register.Indirect); sgn = tgsi_util_get_full_src_register_sign_mode(src, chan); @@ -1655,16 +1655,16 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, case TGSI_SWIZZLE_Y: case TGSI_SWIZZLE_Z: case TGSI_SWIZZLE_W: - switch (src->SrcRegister.File) { + switch (src->Register.File) { case TGSI_FILE_INPUT: - r = &pc->attr[src->SrcRegister.Index * 4 + c]; + r = &pc->attr[src->Register.Index * 4 + c]; break; case TGSI_FILE_TEMPORARY: - r = &pc->temp[src->SrcRegister.Index * 4 + c]; + r = &pc->temp[src->Register.Index * 4 + c]; break; case TGSI_FILE_CONSTANT: - if (!src->SrcRegister.Indirect) { - r = &pc->param[src->SrcRegister.Index * 4 + c]; + if (!src->Register.Indirect) { + r = &pc->param[src->Register.Index * 4 + c]; break; } /* Indicate indirection by setting r->acc < 0 and @@ -1672,19 +1672,19 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, */ r = MALLOC_STRUCT(nv50_reg); swz = tgsi_util_get_src_register_swizzle( - &src->SrcRegisterInd, 0); + &src->Indirect, 0); ctor_reg(r, P_CONST, - src->SrcRegisterInd.Index * 4 + swz, - src->SrcRegister.Index * 4 + c); + src->Indirect.Index * 4 + swz, + src->Register.Index * 4 + c); r->acc = -1; break; case TGSI_FILE_IMMEDIATE: - r = &pc->immd[src->SrcRegister.Index * 4 + c]; + r = &pc->immd[src->Register.Index * 4 + c]; break; case TGSI_FILE_SAMPLER: break; case TGSI_FILE_ADDRESS: - r = pc->addr[src->SrcRegister.Index * 4 + c]; + r = pc->addr[src->Register.Index * 4 + c]; assert(r); break; default: @@ -1871,8 +1871,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, src_mask = nv50_tgsi_src_mask(inst, i); neg_supp = negate_supported(inst, i); - if (fs->SrcRegister.File == TGSI_FILE_SAMPLER) - unit = fs->SrcRegister.Index; + if (fs->Register.File == TGSI_FILE_SAMPLER) + unit = fs->Register.Index; for (c = 0; c < 4; c++) if (src_mask & (1 << c)) @@ -2284,10 +2284,10 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) for (i = 0; i < insn->Instruction.NumSrcRegs; i++) { src = &insn->Src[i]; - if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) + if (src->Register.File == TGSI_FILE_TEMPORARY) reg = pc->temp; else - if (src->SrcRegister.File == TGSI_FILE_INPUT) + if (src->Register.File == TGSI_FILE_INPUT) reg = pc->attr; else continue; @@ -2299,7 +2299,7 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) continue; k = tgsi_util_get_full_src_register_swizzle(src, c); - reg[src->SrcRegister.Index * 4 + k].acc = pc->insn_nr; + reg[src->Register.Index * 4 + k].acc = pc->insn_nr; } } } @@ -2391,8 +2391,8 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, boolean neg_supp = negate_supported(insn, i); fs = &insn->Src[i]; - if (fs->SrcRegister.File != fd->Register.File || - fs->SrcRegister.Index != fd->Register.Index) + if (fs->Register.File != fd->Register.File || + fs->Register.Index != fd->Register.Index) continue; for (chn = 0; chn < 4; ++chn) { diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 92796d150b..9fb2de2403 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -201,15 +201,15 @@ static void transform_srcreg( struct rc_src_register * dst, struct tgsi_full_src_register * src) { - dst->File = translate_register_file(src->SrcRegister.File); - dst->Index = translate_register_index(ttr, src->SrcRegister.File, src->SrcRegister.Index); - dst->RelAddr = src->SrcRegister.Indirect; + dst->File = translate_register_file(src->Register.File); + dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index); + dst->RelAddr = src->Register.Indirect; dst->Swizzle = tgsi_util_get_full_src_register_swizzle(src, 0); dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 1) << 3; dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 2) << 6; dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 3) << 9; - dst->Abs = src->SrcRegister.Absolute; - dst->Negate = src->SrcRegister.Negate ? RC_MASK_XYZW : 0; + dst->Abs = src->Register.Absolute; + dst->Negate = src->Register.Negate ? RC_MASK_XYZW : 0; } static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_texture src) @@ -261,8 +261,8 @@ static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_inst transform_dstreg(ttr, &dst->U.I.DstReg, &src->Dst[0]); for(i = 0; i < src->Instruction.NumSrcRegs; ++i) { - if (src->Src[i].SrcRegister.File == TGSI_FILE_SAMPLER) - dst->U.I.TexSrcUnit = src->Src[i].SrcRegister.Index; + if (src->Src[i].Register.File == TGSI_FILE_SAMPLER) + dst->U.I.TexSrcUnit = src->Src[i].Register.Index; else transform_srcreg(ttr, &dst->U.I.SrcReg[i], &src->Src[i]); } diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 9ca89f1cdd..1670da8bfa 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -176,33 +176,33 @@ translate_src_register( const struct svga_shader_emitter *emit, { struct src_register src; - switch (reg->SrcRegister.File) { + switch (reg->Register.File) { case TGSI_FILE_INPUT: /* Input registers are referred to by their semantic name rather * than by index. Use the mapping build up from the decls: */ - src = emit->input_map[reg->SrcRegister.Index]; + src = emit->input_map[reg->Register.Index]; break; case TGSI_FILE_IMMEDIATE: /* Immediates are appended after TGSI constants in the D3D * constant buffer. */ - src = src_register( translate_file( reg->SrcRegister.File ), - reg->SrcRegister.Index + + src = src_register( translate_file( reg->Register.File ), + reg->Register.Index + emit->imm_start ); break; default: - src = src_register( translate_file( reg->SrcRegister.File ), - reg->SrcRegister.Index ); + src = src_register( translate_file( reg->Register.File ), + reg->Register.Index ); break; } /* Indirect addressing (for coninstant buffer lookups only) */ - if (reg->SrcRegister.Indirect) + if (reg->Register.Indirect) { /* we shift the offset towards the minimum */ if (svga_arl_needs_adjustment( emit )) { @@ -213,28 +213,28 @@ translate_src_register( const struct svga_shader_emitter *emit, /* Not really sure what should go in the second token: */ src.indirect = src_token( SVGA3DREG_ADDR, - reg->SrcRegisterInd.Index ); + reg->Indirect.Index ); src.indirect.swizzle = SWIZZLE_XXXX; } src = swizzle( src, - reg->SrcRegister.SwizzleX, - reg->SrcRegister.SwizzleY, - reg->SrcRegister.SwizzleZ, - reg->SrcRegister.SwizzleW ); + reg->Register.SwizzleX, + reg->Register.SwizzleY, + reg->Register.SwizzleZ, + reg->Register.SwizzleW ); /* src.mod isn't a bitfield, unfortunately: * See tgsi_util_get_full_src_register_sign_mode for implementation details. */ - if (reg->SrcRegister.Absolute) { - if (reg->SrcRegister.Negate) + if (reg->Register.Absolute) { + if (reg->Register.Negate) src.base.srcMod = SVGA3DSRCMOD_ABSNEG; else src.base.srcMod = SVGA3DSRCMOD_ABS; } else { - if (reg->SrcRegister.Negate) + if (reg->Register.Negate) src.base.srcMod = SVGA3DSRCMOD_NEG; else src.base.srcMod = SVGA3DSRCMOD_NONE; @@ -986,13 +986,13 @@ static boolean emit_kil(struct svga_shader_emitter *emit, inst = inst_token( SVGA3DOP_TEXKILL ); src0 = translate_src_register( emit, reg ); - if (reg->SrcRegister.Absolute || - reg->SrcRegister.Negate || - reg->SrcRegister.Indirect || - reg->SrcRegister.SwizzleX != 0 || - reg->SrcRegister.SwizzleY != 1 || - reg->SrcRegister.SwizzleZ != 2 || - reg->SrcRegister.File != TGSI_FILE_TEMPORARY) + if (reg->Register.Absolute || + reg->Register.Negate || + reg->Register.Indirect || + reg->Register.SwizzleX != 0 || + reg->Register.SwizzleY != 1 || + reg->Register.SwizzleZ != 2 || + reg->Register.File != TGSI_FILE_TEMPORARY) { SVGA3dShaderDestToken temp = get_temp( emit ); @@ -2543,27 +2543,27 @@ pre_parse_instruction( struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn, int current_arl) { - if (insn->Src[0].SrcRegister.Indirect && - insn->Src[0].SrcRegisterInd.File == TGSI_FILE_ADDRESS) { + if (insn->Src[0].Register.Indirect && + insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) { const struct tgsi_full_src_register *reg = &insn->Src[0]; - if (reg->SrcRegister.Index < 0) { - pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl); + if (reg->Register.Index < 0) { + pre_parse_add_indirect(emit, reg->Register.Index, current_arl); } } - if (insn->Src[1].SrcRegister.Indirect && - insn->Src[1].SrcRegisterInd.File == TGSI_FILE_ADDRESS) { + if (insn->Src[1].Register.Indirect && + insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) { const struct tgsi_full_src_register *reg = &insn->Src[1]; - if (reg->SrcRegister.Index < 0) { - pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl); + if (reg->Register.Index < 0) { + pre_parse_add_indirect(emit, reg->Register.Index, current_arl); } } - if (insn->Src[2].SrcRegister.Indirect && - insn->Src[2].SrcRegisterInd.File == TGSI_FILE_ADDRESS) { + if (insn->Src[2].Register.Indirect && + insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) { const struct tgsi_full_src_register *reg = &insn->Src[2]; - if (reg->SrcRegister.Index < 0) { - pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl); + if (reg->Register.Index < 0) { + pre_parse_add_indirect(emit, reg->Register.Index, current_arl); } } -- cgit v1.2.3 From 8bf4e5d6176b0efb93c11bcd14fa5d320088e2e3 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 24 Nov 2009 16:01:01 +0000 Subject: llvmpipe: Update instructions. --- src/gallium/drivers/llvmpipe/README | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 89d08834a3..478e0139c8 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -51,21 +51,18 @@ Requirements - Linux - - udis86, http://udis86.sourceforge.net/ . Use my repository, which decodes - opcodes not yet supported by upstream. + - A x86 or amd64 processor with support for sse2, sse3, and sse4.1 SIMD + instructions. This is necessary because we emit several SSE intrinsics for + convenience. See /proc/cpuinfo to know what your CPU supports. - git clone git://people.freedesktop.org/~jrfonseca/udis86 - cd udis86 - ./configure --with-pic - make - sudo make install + - LLVM 2.5 or greater. LLVM 2.6 is preferred. - - LLVM 2.5. On Debian based distributions do: + On Debian based distributions do: aptitude install llvm-dev - There is a typo in one of the llvm-dev 2.5 headers, that causes compilation - errors in the debug build: + There is a typo in one of the llvm 2.5 headers, that may cause compilation + errors. To fix it apply the change: --- /usr/include/llvm-c/Core.h.orig 2009-08-10 15:38:54.000000000 +0100 +++ /usr/include/llvm-c/Core.h 2009-08-10 15:38:25.000000000 +0100 @@ -79,12 +76,17 @@ Requirements #endif return reinterpret_cast(Vals); - - A x86 or amd64 processor with support for sse2, sse3, and sse4.1 SIMD - instructions. This is necessary because we emit several SSE intrinsics for - convenience. See /proc/cpuinfo to know what your CPU supports. - - - scons + - scons (optional) + - udis86, http://udis86.sourceforge.net/ (optional): + + git clone git://udis86.git.sourceforge.net/gitroot/udis86/udis86 + cd udis86 + ./autogen.sh + ./configure --with-pic + make + sudo make install + Building ======== -- cgit v1.2.3 From c783f5cfd891e6b8e9dc622ad0950e5859b5a0c0 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 26 Nov 2009 12:02:14 +0000 Subject: svga: Remove spurious argument to SVGA_DBG. --- src/gallium/drivers/svga/svga_screen_texture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c index d61d88114c..e7301aba84 100644 --- a/src/gallium/drivers/svga/svga_screen_texture.c +++ b/src/gallium/drivers/svga/svga_screen_texture.c @@ -556,7 +556,7 @@ svga_texture_view_surface(struct pipe_context *pipe, return NULL; } - SVGA_DBG(DEBUG_DMA, "surface_create for texture view\n", handle); + SVGA_DBG(DEBUG_DMA, "surface_create for texture view\n"); handle = svga_screen_surface_create(ss, key); if (!handle) { key->cachable = 0; -- cgit v1.2.3 From b96218c65622a7814ff8154a91874a5e5a9dc773 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 26 Nov 2009 15:25:09 +0000 Subject: svga: hash the whole key, not just the first four bytes --- src/gallium/drivers/svga/svga_screen_cache.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_screen_cache.c b/src/gallium/drivers/svga/svga_screen_cache.c index 65f5c07a72..689981cc6d 100644 --- a/src/gallium/drivers/svga/svga_screen_cache.c +++ b/src/gallium/drivers/svga/svga_screen_cache.c @@ -41,7 +41,7 @@ static INLINE unsigned svga_screen_cache_bucket(const struct svga_host_surface_cache_key *key) { - return util_hash_crc32( key, sizeof key ) % SVGA_HOST_SURFACE_CACHE_BUCKETS; + return util_hash_crc32( key, sizeof *key ) % SVGA_HOST_SURFACE_CACHE_BUCKETS; } @@ -95,8 +95,8 @@ svga_screen_cache_lookup(struct svga_screen *svgascreen, pipe_mutex_unlock(cache->mutex); if (SVGA_DEBUG & DEBUG_DMA) - debug_printf("%s: cache %s after %u tries\n", __FUNCTION__, - handle ? "hit" : "miss", tries); + debug_printf("%s: cache %s after %u tries (bucket %d)\n", __FUNCTION__, + handle ? "hit" : "miss", tries, bucket); return handle; } -- cgit v1.2.3 From da1c40260d8cb49eacbb6c394198dc37e020e75a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 26 Nov 2009 11:15:08 +0000 Subject: llvmpipe: Update/correct CPU requirements. There are no hard requirements at the moment. We don't really emit any sse3 yet. Just some ssse3. Thanks to Roland for spotting these incorrections. --- src/gallium/drivers/llvmpipe/README | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 478e0139c8..0c3f00fd58 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -51,9 +51,13 @@ Requirements - Linux - - A x86 or amd64 processor with support for sse2, sse3, and sse4.1 SIMD - instructions. This is necessary because we emit several SSE intrinsics for - convenience. See /proc/cpuinfo to know what your CPU supports. + - A x86 or amd64 processor. 64bit mode is preferred. + + Support for sse2 is strongly encouraged. Support for ssse3, and sse4.1 will + yield the most efficient code. The less features the CPU has the more + likely is that you ran into underperforming, buggy, or incomplete code. + + See /proc/cpuinfo to know what your CPU supports. - LLVM 2.5 or greater. LLVM 2.6 is preferred. -- cgit v1.2.3 From 953b74d116c88f2b93740b6d1f713bb1b5989e98 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 26 Nov 2009 11:16:19 +0000 Subject: llvmpipe: Fake missing SSSE3 when simulation less capabable machines. SSE3 != SSSE3 and so far we only use the later. --- src/gallium/drivers/llvmpipe/lp_jit.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index c601c79480..bce3baec16 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -154,6 +154,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) #if 0 /* For simulating less capable machines */ util_cpu_caps.has_sse3 = 0; + util_cpu_caps.has_ssse3 = 0; util_cpu_caps.has_sse4_1 = 0; #endif -- cgit v1.2.3 From d509f84543d0979e9bb53c20c195f378dd61e728 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Thu, 26 Nov 2009 22:49:58 +0100 Subject: gallium: fix more statetrackers/drivers for not using texture width/height/depth arrays --- src/gallium/auxiliary/util/u_gen_mipmap.c | 8 +-- src/gallium/drivers/cell/ppu/cell_state_emit.c | 7 +- src/gallium/drivers/cell/ppu/cell_texture.c | 27 ++++--- src/gallium/drivers/llvmpipe/lp_bld_sample.c | 6 +- src/gallium/drivers/llvmpipe/lp_state_sampler.c | 4 +- src/gallium/drivers/llvmpipe/lp_tex_cache.c | 5 +- src/gallium/drivers/llvmpipe/lp_tex_sample_c.c | 30 ++++---- src/gallium/drivers/llvmpipe/lp_texture.c | 31 ++++---- src/gallium/drivers/nv04/nv04_fragtex.c | 4 +- src/gallium/drivers/nv04/nv04_miptree.c | 19 +++-- src/gallium/drivers/nv04/nv04_transfer.c | 7 +- src/gallium/drivers/nv10/nv10_fragtex.c | 8 +-- src/gallium/drivers/nv10/nv10_miptree.c | 18 ++--- src/gallium/drivers/nv10/nv10_transfer.c | 7 +- src/gallium/drivers/nv20/nv20_fragtex.c | 8 +-- src/gallium/drivers/nv20/nv20_miptree.c | 31 ++++---- src/gallium/drivers/nv20/nv20_transfer.c | 7 +- src/gallium/drivers/nv30/nv30_fragtex.c | 10 +-- src/gallium/drivers/nv30/nv30_miptree.c | 36 +++++----- src/gallium/drivers/nv30/nv30_transfer.c | 7 +- src/gallium/drivers/nv40/nv40_fragtex.c | 6 +- src/gallium/drivers/nv40/nv40_miptree.c | 36 +++++----- src/gallium/drivers/nv40/nv40_transfer.c | 7 +- src/gallium/drivers/nv50/nv50_miptree.c | 21 +++--- src/gallium/drivers/nv50/nv50_tex.c | 4 +- src/gallium/drivers/nv50/nv50_transfer.c | 11 +-- src/gallium/drivers/r300/r300_texture.c | 48 ++++++------- src/gallium/state_trackers/dri/dri_drawable.c | 8 +-- src/gallium/state_trackers/egl/egl_surface.c | 6 +- src/gallium/state_trackers/python/p_device.i | 6 +- src/gallium/state_trackers/python/p_texture.i | 12 ++-- .../state_trackers/python/retrace/interpreter.py | 6 +- src/gallium/state_trackers/python/st_device.c | 10 +-- src/gallium/state_trackers/python/st_sample.c | 4 +- src/gallium/state_trackers/vega/api_filters.c | 8 +-- src/gallium/state_trackers/vega/image.c | 16 ++--- src/gallium/state_trackers/vega/mask.c | 12 ++-- src/gallium/state_trackers/vega/paint.c | 10 +-- src/gallium/state_trackers/vega/renderer.c | 46 ++++++------ src/gallium/state_trackers/vega/vg_tracker.c | 6 +- src/gallium/state_trackers/xorg/xorg_composite.c | 4 +- src/gallium/state_trackers/xorg/xorg_crtc.c | 6 +- src/gallium/state_trackers/xorg/xorg_dri2.c | 6 +- src/gallium/state_trackers/xorg/xorg_exa.c | 30 ++++---- src/gallium/state_trackers/xorg/xorg_renderer.c | 82 +++++++++++----------- src/gallium/state_trackers/xorg/xorg_xv.c | 22 +++--- src/gallium/state_trackers/xorg/xvmc/surface.c | 6 +- .../winsys/drm/nouveau/drm/nouveau_drm_api.c | 6 +- src/gallium/winsys/drm/radeon/core/radeon_buffer.c | 6 +- src/mesa/state_tracker/st_cb_fbo.c | 2 +- 50 files changed, 360 insertions(+), 373 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 84db14576e..f67f1e458d 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -1214,12 +1214,12 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, PIPE_TRANSFER_READ, 0, 0, - pt->width[srcLevel], - pt->height[srcLevel]); + u_minify(pt->width0, srcLevel), + u_minify(pt->height0, srcLevel)); dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, PIPE_TRANSFER_WRITE, 0, 0, - pt->width[dstLevel], - pt->height[dstLevel]); + u_minify(pt->width0, dstLevel), + u_minify(pt->height0, dstLevel)); srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 9479c0898f..ac5fafec1a 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -27,6 +27,7 @@ #include "pipe/p_inlines.h" #include "util/u_memory.h" +#include "util/u_math.h" #include "cell_context.h" #include "cell_gen_fragment.h" #include "cell_state.h" @@ -299,9 +300,9 @@ cell_emit_state(struct cell_context *cell) for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { texture->start[level] = (ct->mapped + ct->level_offset[level]); - texture->width[level] = ct->base.width[level]; - texture->height[level] = ct->base.height[level]; - texture->depth[level] = ct->base.depth[level]; + texture->width[level] = u_minify(ct->base.width0, level); + texture->height[level] = u_minify(ct->base.height0, level); + texture->depth[level] = u_minify(ct->base.depth0, level); } texture->target = ct->base.target; } diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index ae4c61efb3..e6b8a87045 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -49,9 +49,9 @@ cell_texture_layout(struct cell_texture *ct) { struct pipe_texture *pt = &ct->base; unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; ct->buffer_size = 0; @@ -65,9 +65,6 @@ cell_texture_layout(struct cell_texture *ct) w_tile = align(width, TILE_SIZE); h_tile = align(height, TILE_SIZE); - pt->width[level] = width; - pt->height[level] = height; - pt->depth[level] = depth; pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w_tile); pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h_tile); @@ -83,9 +80,9 @@ cell_texture_layout(struct cell_texture *ct) ct->buffer_size += size; - width = minify(width); - height = minify(height); - depth = minify(depth); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); } } @@ -276,8 +273,8 @@ cell_get_tex_surface(struct pipe_screen *screen, pipe_reference_init(&ps->reference, 1); pipe_texture_reference(&ps->texture, pt); ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; + ps->width = u_minify(pt->width0, level); + ps->height = u_minify(pt->height0, level); ps->offset = ct->level_offset[level]; /* XXX may need to override usage flags (see sp_texture.c) */ ps->usage = usage; @@ -386,8 +383,8 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer) struct pipe_texture *pt = transfer->texture; struct cell_texture *ct = cell_texture(pt); const uint level = ctrans->base.level; - const uint texWidth = pt->width[level]; - const uint texHeight = pt->height[level]; + const uint texWidth = u_minify(pt->width0, level); + const uint texHeight = u_minify(pt->height0, level); const uint stride = ct->stride[level]; unsigned size; @@ -440,8 +437,8 @@ cell_transfer_unmap(struct pipe_screen *screen, struct pipe_texture *pt = transfer->texture; struct cell_texture *ct = cell_texture(pt); const uint level = ctrans->base.level; - const uint texWidth = pt->width[level]; - const uint texHeight = pt->height[level]; + const uint texWidth = u_minify(pt->width0, level); + const uint texHeight = u_minify(pt->height0, level); const uint stride = ct->stride[level]; if (!ct->mapped) { diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c index 4d272bea87..af70ddc6ab 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c @@ -59,9 +59,9 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, state->format = texture->format; state->target = texture->target; - state->pot_width = util_is_pot(texture->width[0]); - state->pot_height = util_is_pot(texture->height[0]); - state->pot_depth = util_is_pot(texture->depth[0]); + state->pot_width = util_is_pot(texture->width0); + state->pot_height = util_is_pot(texture->height0); + state->pot_depth = util_is_pot(texture->depth0); state->wrap_s = sampler->wrap_s; state->wrap_t = sampler->wrap_t; diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index c69d90c723..8333805a3f 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -102,8 +102,8 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe, if(tex) { struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i]; - jit_tex->width = tex->width[0]; - jit_tex->height = tex->height[0]; + jit_tex->width = tex->width0; + jit_tex->height = tex->height0; jit_tex->stride = lp_tex->stride[0]; if(!lp_tex->dt) jit_tex->data = lp_tex->data; diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c index 773e848242..c7c4143bc6 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c @@ -36,6 +36,7 @@ #include "util/u_memory.h" #include "util/u_tile.h" #include "util/u_format.h" +#include "util/u_math.h" #include "lp_context.h" #include "lp_surface.h" #include "lp_texture.h" @@ -270,8 +271,8 @@ lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc, addr.bits.level, addr.bits.z, PIPE_TRANSFER_READ, 0, 0, - tc->texture->width[addr.bits.level], - tc->texture->height[addr.bits.level]); + u_minify(tc->texture->width0, addr.bits.level), + u_minify(tc->texture->height0, addr.bits.level)); tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans); diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c index a1365a045f..0d01c07fb5 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c @@ -544,7 +544,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler, float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; dsdx = fabsf(dsdx); dsdy = fabsf(dsdy); - rho = MAX2(dsdx, dsdy) * texture->width[0]; + rho = MAX2(dsdx, dsdy) * texture->width0; } if (t) { float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; @@ -552,7 +552,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler, float max; dtdx = fabsf(dtdx); dtdy = fabsf(dtdy); - max = MAX2(dtdx, dtdy) * texture->height[0]; + max = MAX2(dtdx, dtdy) * texture->height0; rho = MAX2(rho, max); } if (p) { @@ -561,7 +561,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler, float max; dpdx = fabsf(dpdx); dpdy = fabsf(dpdy); - max = MAX2(dpdx, dpdy) * texture->depth[0]; + max = MAX2(dpdx, dpdy) * texture->depth0; rho = MAX2(rho, max); } @@ -726,9 +726,9 @@ get_texel(const struct tgsi_sampler *tgsi_sampler, const struct pipe_texture *texture = samp->texture; const struct pipe_sampler_state *sampler = samp->sampler; - if (x < 0 || x >= (int) texture->width[level] || - y < 0 || y >= (int) texture->height[level] || - z < 0 || z >= (int) texture->depth[level]) { + if (x < 0 || x >= (int) u_minify(texture->width0, level) || + y < 0 || y >= (int) u_minify(texture->height0, level) || + z < 0 || z >= (int) u_minify(texture->depth0, level)) { rgba[0][j] = sampler->border_color[0]; rgba[1][j] = sampler->border_color[1]; rgba[2][j] = sampler->border_color[2]; @@ -1093,8 +1093,8 @@ lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, assert(sampler->normalized_coords); - width = texture->width[level0]; - height = texture->height[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); assert(width > 0); @@ -1250,9 +1250,9 @@ lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, assert(sampler->normalized_coords); - width = texture->width[level0]; - height = texture->height[level0]; - depth = texture->depth[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); + depth = u_minify(texture->depth0, level0); assert(width > 0); assert(height > 0); @@ -1394,8 +1394,8 @@ lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler, /* texture RECTS cannot be mipmapped */ assert(level0 == level1); - width = texture->width[level0]; - height = texture->height[level0]; + width = u_minify(texture->width0, level0); + height = u_minify(texture->height0, level0); assert(width > 0); @@ -1513,8 +1513,8 @@ lp_get_samples(struct tgsi_sampler *tgsi_sampler, /* Do this elsewhere: */ - samp->xpot = util_unsigned_logbase2( samp->texture->width[0] ); - samp->ypot = util_unsigned_logbase2( samp->texture->height[0] ); + samp->xpot = util_unsigned_logbase2( samp->texture->width0 ); + samp->ypot = util_unsigned_logbase2( samp->texture->height0 ); /* Try to hook in a faster sampler. Ultimately we'll have to * code-generate these. Luckily most of this looks like it is diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index a00f2495df..0a0f31f8a3 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -57,9 +57,9 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, { struct pipe_texture *pt = &lpt->base; unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; unsigned buffer_size = 0; @@ -68,9 +68,6 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, for (level = 0; level <= pt->last_level; level++) { unsigned nblocksx, nblocksy; - pt->width[level] = width; - pt->height[level] = height; - pt->depth[level] = depth; pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); @@ -87,9 +84,9 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * lpt->stride[level]); - width = minify(width); - height = minify(height); - depth = minify(depth); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); } lpt->data = align_malloc(buffer_size, 16); @@ -104,13 +101,13 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, struct llvmpipe_winsys *winsys = screen->winsys; pf_get_block(lpt->base.format, &lpt->base.block); - lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width[0]); - lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height[0]); + lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width0); + lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height0); lpt->dt = winsys->displaytarget_create(winsys, lpt->base.format, - lpt->base.width[0], - lpt->base.height[0], + lpt->base.width0, + lpt->base.height0, 16, &lpt->stride[0] ); @@ -183,8 +180,8 @@ llvmpipe_texture_blanket(struct pipe_screen * screen, lpt->base = *base; pipe_reference_init(&lpt->base.reference, 1); lpt->base.screen = screen; - lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width[0]); - lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height[0]); + lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width0); + lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height0); lpt->stride[0] = stride[0]; pipe_buffer_reference(&lpt->buffer, buffer); @@ -229,8 +226,8 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen, pipe_reference_init(&ps->reference, 1); pipe_texture_reference(&ps->texture, pt); ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; + ps->width = u_minify(pt->width0, level); + ps->height = u_minify(pt->height0, level); ps->offset = lpt->level_offset[level]; ps->usage = usage; diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c index 21f990fd53..0cce71ad1d 100644 --- a/src/gallium/drivers/nv04/nv04_fragtex.c +++ b/src/gallium/drivers/nv04/nv04_fragtex.c @@ -57,8 +57,8 @@ nv04_fragtex_build(struct nv04_context *nv04, int unit) | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER | nv04_fragtex_format(pt->format) | ( (pt->last_level + 1) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT ) - | ( log2i(pt->width[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT ) - | ( log2i(pt->height[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT ) + | ( log2i(pt->width0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT ) + | ( log2i(pt->height0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT ) | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE ; diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c index 93f752faec..4fd72c82e6 100644 --- a/src/gallium/drivers/nv04/nv04_miptree.c +++ b/src/gallium/drivers/nv04/nv04_miptree.c @@ -1,6 +1,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" #include "nv04_context.h" #include "nv04_screen.h" @@ -9,31 +10,29 @@ static void nv04_miptree_layout(struct nv04_miptree *nv04mt) { struct pipe_texture *pt = &nv04mt->base; - uint width = pt->width[0], height = pt->height[0]; + uint width = pt->width0, height = pt->height0; uint offset = 0; int nr_faces, l; nr_faces = 1; for (l = 0; l <= pt->last_level; l++) { - pt->width[l] = width; - pt->height[l] = height; pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); - nv04mt->level[l].pitch = pt->width[0]; + nv04mt->level[l].pitch = pt->width0; nv04mt->level[l].pitch = (nv04mt->level[l].pitch + 63) & ~63; - width = MAX2(1, width >> 1); - height = MAX2(1, height >> 1); + width = u_minify(width, 1); + height = u_minify(height, 1); } for (l = 0; l <= pt->last_level; l++) { nv04mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); - offset += nv04mt->level[l].pitch * pt->height[l]; + offset += nv04mt->level[l].pitch * u_minify(pt->height0, l); } nv04mt->total_size = offset; @@ -75,7 +74,7 @@ nv04_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, /* Only supports 2D, non-mipmapped textures for the moment */ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth[0] != 1) + pt->depth0 != 1) return NULL; mt = CALLOC_STRUCT(nv04_miptree); @@ -120,8 +119,8 @@ nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&ns->base.texture, pt); ns->base.format = pt->format; - ns->base.width = pt->width[level]; - ns->base.height = pt->height[level]; + ns->base.width = u_minify(pt->width0, level); + ns->base.height = u_minify(pt->height0, level); ns->base.usage = flags; pipe_reference_init(&ns->base.reference, 1); ns->base.face = face; diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c index 6618660743..e6456429f4 100644 --- a/src/gallium/drivers/nv04/nv04_transfer.c +++ b/src/gallium/drivers/nv04/nv04_transfer.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "nv04_context.h" #include "nv04_screen.h" @@ -20,9 +21,9 @@ nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width[0] = pt->width[level]; - template->height[0] = pt->height[level]; - template->depth[0] = 1; + template->width0 = u_minify(pt->width0, level); + template->height0 = u_minify(pt->height0, level); + template->depth0 = 1; template->block = pt->block; template->nblocksx[0] = pt->nblocksx[level]; template->nblocksy[0] = pt->nblocksx[level]; diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c index 27f2f87584..906fdfeeb9 100644 --- a/src/gallium/drivers/nv10/nv10_fragtex.c +++ b/src/gallium/drivers/nv10/nv10_fragtex.c @@ -62,9 +62,9 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit) txf = tf->format << 8; txf |= (pt->last_level + 1) << 16; - txf |= log2i(pt->width[0]) << 20; - txf |= log2i(pt->height[0]) << 24; - txf |= log2i(pt->depth[0]) << 28; + txf |= log2i(pt->width0) << 20; + txf |= log2i(pt->height0) << 24; + txf |= log2i(pt->depth0) << 28; txf |= 8; switch (pt->target) { @@ -89,7 +89,7 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit) OUT_RING (0x40000000); /* enable */ OUT_RING (txs); OUT_RING (ps->filt | 0x2000 /* magic */); - OUT_RING ((pt->width[0] << 16) | pt->height[0]); + OUT_RING ((pt->width0 << 16) | pt->height0); OUT_RING (ps->bcol); #endif } diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c index 34e3c2ebd7..b2a6c59b74 100644 --- a/src/gallium/drivers/nv10/nv10_miptree.c +++ b/src/gallium/drivers/nv10/nv10_miptree.c @@ -1,6 +1,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" #include "nv10_context.h" #include "nv10_screen.h" @@ -10,7 +11,7 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt) { struct pipe_texture *pt = &nv10mt->base; boolean swizzled = FALSE; - uint width = pt->width[0], height = pt->height[0]; + uint width = pt->width0, height = pt->height0; uint offset = 0; int nr_faces, l, f; @@ -21,8 +22,7 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt) } for (l = 0; l <= pt->last_level; l++) { - pt->width[l] = width; - pt->height[l] = height; + pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); @@ -35,15 +35,15 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt) nv10mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); - width = MAX2(1, width >> 1); - height = MAX2(1, height >> 1); + width = u_minify(width, 1); + height = u_minify(height, 1); } for (f = 0; f < nr_faces; f++) { for (l = 0; l <= pt->last_level; l++) { nv10mt->level[l].image_offset[f] = offset; - offset += nv10mt->level[l].pitch * pt->height[l]; + offset += nv10mt->level[l].pitch * u_minify(pt->height0, l); } } @@ -58,7 +58,7 @@ nv10_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, /* Only supports 2D, non-mipmapped textures for the moment */ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth[0] != 1) + pt->depth0 != 1) return NULL; mt = CALLOC_STRUCT(nv10_miptree); @@ -133,8 +133,8 @@ nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&ns->base.texture, pt); ns->base.format = pt->format; - ns->base.width = pt->width[level]; - ns->base.height = pt->height[level]; + ns->base.width = u_minify(pt->width0, level); + ns->base.height = u_minify(pt->height0, level); ns->base.usage = flags; pipe_reference_init(&ns->base.reference, 1); ns->base.face = face; diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c index 8feb85e4bd..ec54297ab0 100644 --- a/src/gallium/drivers/nv10/nv10_transfer.c +++ b/src/gallium/drivers/nv10/nv10_transfer.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "nv10_context.h" #include "nv10_screen.h" @@ -20,9 +21,9 @@ nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width[0] = pt->width[level]; - template->height[0] = pt->height[level]; - template->depth[0] = 1; + template->width0 = u_minify(pt->width0, level); + template->height0 = u_minify(pt->height0, level); + template->depth0 = 1; template->block = pt->block; template->nblocksx[0] = pt->nblocksx[level]; template->nblocksy[0] = pt->nblocksx[level]; diff --git a/src/gallium/drivers/nv20/nv20_fragtex.c b/src/gallium/drivers/nv20/nv20_fragtex.c index 495a7be912..2db4a4015a 100644 --- a/src/gallium/drivers/nv20/nv20_fragtex.c +++ b/src/gallium/drivers/nv20/nv20_fragtex.c @@ -62,9 +62,9 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit) txf = tf->format << 8; txf |= (pt->last_level + 1) << 16; - txf |= log2i(pt->width[0]) << 20; - txf |= log2i(pt->height[0]) << 24; - txf |= log2i(pt->depth[0]) << 28; + txf |= log2i(pt->width0) << 20; + txf |= log2i(pt->height0) << 24; + txf |= log2i(pt->depth0) << 28; txf |= 8; switch (pt->target) { @@ -89,7 +89,7 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit) OUT_RING (0x40000000); /* enable */ OUT_RING (txs); OUT_RING (ps->filt | 0x2000 /* magic */); - OUT_RING ((pt->width[0] << 16) | pt->height[0]); + OUT_RING ((pt->width0 << 16) | pt->height0); OUT_RING (ps->bcol); #endif } diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c index 185fbf53e0..554e28e47d 100644 --- a/src/gallium/drivers/nv20/nv20_miptree.c +++ b/src/gallium/drivers/nv20/nv20_miptree.c @@ -1,6 +1,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" #include "nv20_context.h" #include "nv20_screen.h" @@ -9,7 +10,7 @@ static void nv20_miptree_layout(struct nv20_miptree *nv20mt) { struct pipe_texture *pt = &nv20mt->base; - uint width = pt->width[0], height = pt->height[0]; + uint width = pt->width0, height = pt->height0; uint offset = 0; int nr_faces, l, f; uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | @@ -25,21 +26,19 @@ nv20_miptree_layout(struct nv20_miptree *nv20mt) } for (l = 0; l <= pt->last_level; l++) { - pt->width[l] = width; - pt->height[l] = height; pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) - nv20mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64); + nv20mt->level[l].pitch = align(pt->width0 * pt->block.size, 64); else - nv20mt->level[l].pitch = pt->width[l] * pt->block.size; + nv20mt->level[l].pitch = u_minify(pt->width0, l) * pt->block.size; nv20mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); - width = MAX2(1, width >> 1); - height = MAX2(1, height >> 1); + width = u_minify(width, 1); + height = u_minify(height, 1); } for (f = 0; f < nr_faces; f++) { @@ -47,14 +46,14 @@ nv20_miptree_layout(struct nv20_miptree *nv20mt) nv20mt->level[l].image_offset[f] = offset; if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) && - pt->width[l + 1] > 1 && pt->height[l + 1] > 1) - offset += align(nv20mt->level[l].pitch * pt->height[l], 64); + u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1) + offset += align(nv20mt->level[l].pitch * u_minify(pt->height0, l), 64); else - offset += nv20mt->level[l].pitch * pt->height[l]; + offset += nv20mt->level[l].pitch * u_minify(pt->height0, l); } nv20mt->level[l].image_offset[f] = offset; - offset += nv20mt->level[l].pitch * pt->height[l]; + offset += nv20mt->level[l].pitch * u_minify(pt->height0, l); } nv20mt->total_size = offset; @@ -68,7 +67,7 @@ nv20_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, /* Only supports 2D, non-mipmapped textures for the moment */ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth[0] != 1) + pt->depth0 != 1) return NULL; mt = CALLOC_STRUCT(nv20_miptree); @@ -100,8 +99,8 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) mt->base.screen = screen; /* Swizzled textures must be POT */ - if (pt->width[0] & (pt->width[0] - 1) || - pt->height[0] & (pt->height[0] - 1)) + if (pt->width0 & (pt->width0 - 1) || + pt->height0 & (pt->height0 - 1)) mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; else if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | @@ -167,8 +166,8 @@ nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&ns->base.texture, pt); ns->base.format = pt->format; - ns->base.width = pt->width[level]; - ns->base.height = pt->height[level]; + ns->base.width = u_minify(pt->width0, level); + ns->base.height = u_minify(pt->height0, level); ns->base.usage = flags; pipe_reference_init(&ns->base.reference, 1); ns->base.face = face; diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c index 81b4f1a917..87b5c14a3c 100644 --- a/src/gallium/drivers/nv20/nv20_transfer.c +++ b/src/gallium/drivers/nv20/nv20_transfer.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "nv20_context.h" #include "nv20_screen.h" @@ -20,9 +21,9 @@ nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width[0] = pt->width[level]; - template->height[0] = pt->height[level]; - template->depth[0] = 1; + template->width0 = u_minify(pt->width0, level); + template->height0 = u_minify(pt->height0, level); + template->depth0 = 1; template->block = pt->block; template->nblocksx[0] = pt->nblocksx[level]; template->nblocksy[0] = pt->nblocksx[level]; diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index dca760cae6..b3293ee700 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -74,9 +74,9 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) txf = tf->format; txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0); - txf |= log2i(pt->width[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT; - txf |= log2i(pt->height[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT; - txf |= log2i(pt->depth[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT; + txf |= log2i(pt->width0) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT; + txf |= log2i(pt->height0) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT; + txf |= log2i(pt->depth0) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT; txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000; switch (pt->target) { @@ -115,8 +115,8 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) so_data (so, NV34TCL_TX_ENABLE_ENABLE | ps->en); so_data (so, txs); so_data (so, ps->filt | 0x2000 /*voodoo*/); - so_data (so, (pt->width[0] << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | - pt->height[0]); + so_data (so, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | + pt->height0); so_data (so, ps->bcol); return so; diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index 280696d450..b4c306d127 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -1,6 +1,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" #include "nv30_context.h" @@ -8,7 +9,7 @@ static void nv30_miptree_layout(struct nv30_miptree *nv30mt) { struct pipe_texture *pt = &nv30mt->base; - uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + uint width = pt->width0, height = pt->height0, depth = pt->depth0; uint offset = 0; int nr_faces, l, f; uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | @@ -21,29 +22,26 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt) nr_faces = 6; } else if (pt->target == PIPE_TEXTURE_3D) { - nr_faces = pt->depth[0]; + nr_faces = pt->depth0; } else { nr_faces = 1; } for (l = 0; l <= pt->last_level; l++) { - pt->width[l] = width; - pt->height[l] = height; - pt->depth[l] = depth; pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) - nv30mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64); + nv30mt->level[l].pitch = align(pt->width0 * pt->block.size, 64); else - nv30mt->level[l].pitch = pt->width[l] * pt->block.size; + nv30mt->level[l].pitch = u_minify(pt->width0, l) * pt->block.size; nv30mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); - width = MAX2(1, width >> 1); - height = MAX2(1, height >> 1); - depth = MAX2(1, depth >> 1); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); } for (f = 0; f < nr_faces; f++) { @@ -51,14 +49,14 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt) nv30mt->level[l].image_offset[f] = offset; if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) && - pt->width[l + 1] > 1 && pt->height[l + 1] > 1) - offset += align(nv30mt->level[l].pitch * pt->height[l], 64); + u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1) + offset += align(nv30mt->level[l].pitch * u_minify(pt->height0, l), 64); else - offset += nv30mt->level[l].pitch * pt->height[l]; + offset += nv30mt->level[l].pitch * u_minify(pt->height0, l); } nv30mt->level[l].image_offset[f] = offset; - offset += nv30mt->level[l].pitch * pt->height[l]; + offset += nv30mt->level[l].pitch * u_minify(pt->height0, l); } nv30mt->total_size = offset; @@ -79,8 +77,8 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) mt->base.screen = pscreen; /* Swizzled textures must be POT */ - if (pt->width[0] & (pt->width[0] - 1) || - pt->height[0] & (pt->height[0] - 1)) + if (pt->width0 & (pt->width0 - 1) || + pt->height0 & (pt->height0 - 1)) mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; else if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | @@ -134,7 +132,7 @@ nv30_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, /* Only supports 2D, non-mipmapped textures for the moment */ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth[0] != 1) + pt->depth0 != 1) return NULL; mt = CALLOC_STRUCT(nv30_miptree); @@ -182,8 +180,8 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&ns->base.texture, pt); ns->base.format = pt->format; - ns->base.width = pt->width[level]; - ns->base.height = pt->height[level]; + ns->base.width = u_minify(pt->width0, level); + ns->base.height = u_minify(pt->height0, level); ns->base.usage = flags; pipe_reference_init(&ns->base.reference, 1); ns->base.face = face; diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c index 98011decf7..5e429b4d85 100644 --- a/src/gallium/drivers/nv30/nv30_transfer.c +++ b/src/gallium/drivers/nv30/nv30_transfer.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "nv30_context.h" #include "nv30_screen.h" @@ -20,9 +21,9 @@ nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width[0] = pt->width[level]; - template->height[0] = pt->height[level]; - template->depth[0] = 1; + template->width0 = u_minify(pt->width0, level); + template->height0 = u_minify(pt->height0, level); + template->depth0 = 1; template->block = pt->block; template->nblocksx[0] = pt->nblocksx[level]; template->nblocksy[0] = pt->nblocksx[level]; diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index e2ec57564d..44abc84596 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -117,11 +117,11 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) so_data (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en); so_data (so, txs); so_data (so, ps->filt | tf->sign | 0x2000 /*voodoo*/); - so_data (so, (pt->width[0] << NV40TCL_TEX_SIZE0_W_SHIFT) | - pt->height[0]); + so_data (so, (pt->width0 << NV40TCL_TEX_SIZE0_W_SHIFT) | + pt->height0); so_data (so, ps->bcol); so_method(so, nv40->screen->curie, NV40TCL_TEX_SIZE1(unit), 1); - so_data (so, (pt->depth[0] << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); + so_data (so, (pt->depth0 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); return so; } diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index 465dd3b069..f73bedff6d 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -1,6 +1,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" #include "nv40_context.h" @@ -8,7 +9,7 @@ static void nv40_miptree_layout(struct nv40_miptree *mt) { struct pipe_texture *pt = &mt->base; - uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + uint width = pt->width0, height = pt->height0, depth = pt->depth0; uint offset = 0; int nr_faces, l, f; uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | @@ -21,29 +22,26 @@ nv40_miptree_layout(struct nv40_miptree *mt) nr_faces = 6; } else if (pt->target == PIPE_TEXTURE_3D) { - nr_faces = pt->depth[0]; + nr_faces = pt->depth0; } else { nr_faces = 1; } for (l = 0; l <= pt->last_level; l++) { - pt->width[l] = width; - pt->height[l] = height; - pt->depth[l] = depth; pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) - mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64); + mt->level[l].pitch = align(pt->width0 * pt->block.size, 64); else - mt->level[l].pitch = pt->width[l] * pt->block.size; + mt->level[l].pitch = u_minify(pt->width0, l) * pt->block.size; mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); - width = MAX2(1, width >> 1); - height = MAX2(1, height >> 1); - depth = MAX2(1, depth >> 1); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); } for (f = 0; f < nr_faces; f++) { @@ -51,14 +49,14 @@ nv40_miptree_layout(struct nv40_miptree *mt) mt->level[l].image_offset[f] = offset; if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) && - pt->width[l + 1] > 1 && pt->height[l + 1] > 1) - offset += align(mt->level[l].pitch * pt->height[l], 64); + u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1) + offset += align(mt->level[l].pitch * u_minify(pt->height0, l), 64); else - offset += mt->level[l].pitch * pt->height[l]; + offset += mt->level[l].pitch * u_minify(pt->height0, l); } mt->level[l].image_offset[f] = offset; - offset += mt->level[l].pitch * pt->height[l]; + offset += mt->level[l].pitch * u_minify(pt->height0, l); } mt->total_size = offset; @@ -79,8 +77,8 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) mt->base.screen = pscreen; /* Swizzled textures must be POT */ - if (pt->width[0] & (pt->width[0] - 1) || - pt->height[0] & (pt->height[0] - 1)) + if (pt->width0 & (pt->width0 - 1) || + pt->height0 & (pt->height0 - 1)) mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; else if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | @@ -128,7 +126,7 @@ nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, /* Only supports 2D, non-mipmapped textures for the moment */ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth[0] != 1) + pt->depth0 != 1) return NULL; mt = CALLOC_STRUCT(nv40_miptree); @@ -176,8 +174,8 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&ns->base.texture, pt); ns->base.format = pt->format; - ns->base.width = pt->width[level]; - ns->base.height = pt->height[level]; + ns->base.width = u_minify(pt->width0, level); + ns->base.height = u_minify(pt->height0, level); ns->base.usage = flags; pipe_reference_init(&ns->base.reference, 1); ns->base.face = face; diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c index 92caee6f38..36e253c96f 100644 --- a/src/gallium/drivers/nv40/nv40_transfer.c +++ b/src/gallium/drivers/nv40/nv40_transfer.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "nv40_context.h" #include "nv40_screen.h" @@ -20,9 +21,9 @@ nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width[0] = pt->width[level]; - template->height[0] = pt->height[level]; - template->depth[0] = 1; + template->width0 = u_minify(pt->width0, level); + template->height0 = u_minify(pt->height0, level); + template->depth0 = 1; template->block = pt->block; template->nblocksx[0] = pt->nblocksx[level]; template->nblocksy[0] = pt->nblocksx[level]; diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 9c20c5cc28..3d58746793 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -61,8 +61,8 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) struct nouveau_device *dev = nouveau_screen(pscreen)->device; struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); struct pipe_texture *pt = &mt->base.base; - unsigned width = tmp->width[0], height = tmp->height[0]; - unsigned depth = tmp->depth[0], image_alignment; + unsigned width = tmp->width0, height = tmp->height0; + unsigned depth = tmp->depth0, image_alignment; uint32_t tile_flags; int ret, i, l; @@ -92,9 +92,6 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) for (l = 0; l <= pt->last_level; l++) { struct nv50_miptree_level *lvl = &mt->level[l]; - pt->width[l] = width; - pt->height[l] = height; - pt->depth[l] = depth; pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); @@ -102,9 +99,9 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) lvl->pitch = align(pt->nblocksx[l] * pt->block.size, 64); lvl->tile_mode = get_tile_mode(pt->nblocksy[l], depth); - width = MAX2(1, width >> 1); - height = MAX2(1, height >> 1); - depth = MAX2(1, depth >> 1); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); } image_alignment = get_tile_height(mt->level[0].tile_mode) * 64; @@ -122,7 +119,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) size = lvl->pitch; size *= align(pt->nblocksy[l], tile_h); - size *= align(pt->depth[l], tile_d); + size *= align(u_minify(pt->depth0, l), tile_d); lvl->image_offset[i] = mt->total_size; @@ -151,7 +148,7 @@ nv50_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, /* Only supports 2D, non-mipmapped textures for the moment */ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth[0] != 1) + pt->depth0 != 1) return NULL; mt = CALLOC_STRUCT(nv50_miptree); @@ -202,8 +199,8 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&ps->texture, pt); ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; + ps->width = u_minify(pt->width0, level); + ps->height = u_minify(pt->height0, level); ps->usage = flags; pipe_reference_init(&ps->reference, 1); ps->face = face; diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index 2813f54477..417d367942 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -131,9 +131,9 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so, NOUVEAU_BO_RD, 0, 0); so_data (so, mode); so_data (so, 0x00300000); - so_data (so, mt->base.base.width[0] | (1 << 31)); + so_data (so, mt->base.base.width0 | (1 << 31)); so_data (so, (mt->base.base.last_level << 28) | - (mt->base.base.depth[0] << 16) | mt->base.base.height[0]); + (mt->base.base.depth0 << 16) | mt->base.base.height0); so_data (so, 0x03000000); so_data (so, mt->base.base.last_level << 4); diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index ea61357aaa..39d65279fc 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -1,6 +1,7 @@ #include "pipe/p_context.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" #include "nv50_context.h" @@ -156,9 +157,9 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->base.block = pt->block; if (!pt->nblocksx[level]) { tx->base.nblocksx = pf_get_nblocksx(&pt->block, - pt->width[level]); + u_minify(pt->width0, level)); tx->base.nblocksy = pf_get_nblocksy(&pt->block, - pt->height[level]); + u_minify(pt->height0, level)); } else { tx->base.nblocksx = pt->nblocksx[level]; tx->base.nblocksy = pt->nblocksy[level]; @@ -167,9 +168,9 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->base.usage = usage; tx->level_pitch = lvl->pitch; - tx->level_width = mt->base.base.width[level]; - tx->level_height = mt->base.base.height[level]; - tx->level_depth = mt->base.base.depth[level]; + tx->level_width = u_minify(mt->base.base.width0, level); + tx->level_height = u_minify(mt->base.base.height0, level); + tx->level_depth = u_minify(mt->base.base.depth0, level); tx->level_offset = lvl->image_offset[image]; tx->level_tiling = lvl->tile_mode; tx->level_x = pf_get_nblocksx(&tx->base.block, x); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index aea25cf71d..f4d148cdc5 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -34,8 +34,8 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) struct r300_texture_state* state = &tex->state; struct pipe_texture *pt = &tex->tex; - state->format0 = R300_TX_WIDTH((pt->width[0] - 1) & 0x7ff) | - R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff); + state->format0 = R300_TX_WIDTH((pt->width0 - 1) & 0x7ff) | + R300_TX_HEIGHT((pt->height0 - 1) & 0x7ff); if (tex->is_npot) { /* rectangles love this */ @@ -43,7 +43,7 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) state->format2 = (tex->pitch[0] - 1) & 0x1fff; } else { /* power of two textures (3D, mipmaps, and no pitch) */ - state->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) | + state->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf) | R300_TX_NUM_LEVELS(pt->last_level & 0xf); } @@ -58,17 +58,17 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) /* large textures on r500 */ if (is_r500) { - if (pt->width[0] > 2048) { + if (pt->width0 > 2048) { state->format2 |= R500_TXWIDTH_BIT11; } - if (pt->height[0] > 2048) { + if (pt->height0 > 2048) { state->format2 |= R500_TXHEIGHT_BIT11; } } - assert(is_r500 || (pt->width[0] <= 2048 && pt->height[0] <= 2048)); + assert(is_r500 || (pt->width0 <= 2048 && pt->height0 <= 2048)); debug_printf("r300: Set texture state (%dx%d, %d levels)\n", - pt->width[0], pt->height[0], pt->last_level); + pt->width0, pt->height0, pt->last_level); } unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, @@ -106,7 +106,7 @@ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) return 0; } - return align(pf_get_stride(&tex->tex.block, tex->tex.width[level]), 32); + return align(pf_get_stride(&tex->tex.block, u_minify(tex->tex.width0, level)), 32); } static void r300_setup_miptree(struct r300_texture* tex) @@ -116,14 +116,8 @@ static void r300_setup_miptree(struct r300_texture* tex) int i; for (i = 0; i <= base->last_level; i++) { - if (i > 0) { - base->width[i] = minify(base->width[i-1]); - base->height[i] = minify(base->height[i-1]); - base->depth[i] = minify(base->depth[i-1]); - } - - base->nblocksx[i] = pf_get_nblocksx(&base->block, base->width[i]); - base->nblocksy[i] = pf_get_nblocksy(&base->block, base->height[i]); + base->nblocksx[i] = pf_get_nblocksx(&base->block, u_minify(base->width0, i)); + base->nblocksy[i] = pf_get_nblocksy(&base->block, u_minify(base->height0, i)); stride = r300_texture_get_stride(tex, i); layer_size = stride * base->nblocksy[i]; @@ -131,7 +125,7 @@ static void r300_setup_miptree(struct r300_texture* tex) if (base->target == PIPE_TEXTURE_CUBE) size = layer_size * 6; else - size = layer_size * base->depth[i]; + size = layer_size * u_minify(base->depth0, i); tex->offset[i] = align(tex->size, 32); tex->size = tex->offset[i] + size; @@ -140,15 +134,15 @@ static void r300_setup_miptree(struct r300_texture* tex) debug_printf("r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes)\n", - i, base->width[i], base->height[i], base->depth[i], - stride); + i, u_minify(base->width0, i), u_minify(base->height0, i), + u_minify(base->depth0, i), stride); } } static void r300_setup_flags(struct r300_texture* tex) { - tex->is_npot = !util_is_power_of_two(tex->tex.width[0]) || - !util_is_power_of_two(tex->tex.height[0]); + tex->is_npot = !util_is_power_of_two(tex->tex.width0) || + !util_is_power_of_two(tex->tex.height0); } /* Create a new texture. */ @@ -208,8 +202,8 @@ static struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, pipe_reference_init(&surface->reference, 1); pipe_texture_reference(&surface->texture, texture); surface->format = texture->format; - surface->width = texture->width[level]; - surface->height = texture->height[level]; + surface->width = u_minify(texture->width0, level); + surface->height = u_minify(texture->height0, level); surface->offset = offset; surface->usage = flags; surface->zslice = zslice; @@ -237,7 +231,7 @@ static struct pipe_texture* /* Support only 2D textures without mipmaps */ if (base->target != PIPE_TEXTURE_2D || - base->depth[0] != 1 || + base->depth0 != 1 || base->last_level != 0) { return NULL; } @@ -287,9 +281,9 @@ r300_video_surface_create(struct pipe_screen *screen, template.target = PIPE_TEXTURE_2D; template.format = PIPE_FORMAT_X8R8G8B8_UNORM; template.last_level = 0; - template.width[0] = util_next_power_of_two(width); - template.height[0] = util_next_power_of_two(height); - template.depth[0] = 1; + template.width0 = util_next_power_of_two(width); + template.height0 = util_next_power_of_two(height); + template.depth0 = 1; pf_get_block(template.format, &template.block); template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET; diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c index 5625ff53cf..45a6059ea8 100644 --- a/src/gallium/state_trackers/dri/dri_drawable.c +++ b/src/gallium/state_trackers/dri/dri_drawable.c @@ -46,7 +46,7 @@ #include "util/u_memory.h" #include "util/u_rect.h" - + static struct pipe_surface * dri_surface_from_handle(struct drm_api *api, struct pipe_screen *screen, @@ -62,10 +62,10 @@ dri_surface_from_handle(struct drm_api *api, templat.tex_usage |= PIPE_TEXTURE_USAGE_RENDER_TARGET; templat.target = PIPE_TEXTURE_2D; templat.last_level = 0; - templat.depth[0] = 1; + templat.depth0 = 1; templat.format = format; - templat.width[0] = width; - templat.height[0] = height; + templat.width0 = width; + templat.height0 = height; pf_get_block(templat.format, &templat.block); texture = api->texture_from_shared_handle(api, screen, &templat, diff --git a/src/gallium/state_trackers/egl/egl_surface.c b/src/gallium/state_trackers/egl/egl_surface.c index 91615abebe..ddd9b04cd4 100644 --- a/src/gallium/state_trackers/egl/egl_surface.c +++ b/src/gallium/state_trackers/egl/egl_surface.c @@ -114,10 +114,10 @@ drm_create_texture(_EGLDisplay *dpy, templat.tex_usage |= PIPE_TEXTURE_USAGE_PRIMARY; templat.target = PIPE_TEXTURE_2D; templat.last_level = 0; - templat.depth[0] = 1; + templat.depth0 = 1; templat.format = PIPE_FORMAT_A8R8G8B8_UNORM; - templat.width[0] = w; - templat.height[0] = h; + templat.width0 = w; + templat.height0 = h; pf_get_block(templat.format, &templat.block); texture = screen->texture_create(dev->screen, diff --git a/src/gallium/state_trackers/python/p_device.i b/src/gallium/state_trackers/python/p_device.i index f16fe5b0ff..a83bcc71a1 100644 --- a/src/gallium/state_trackers/python/p_device.i +++ b/src/gallium/state_trackers/python/p_device.i @@ -113,9 +113,9 @@ struct st_device { memset(&templat, 0, sizeof(templat)); templat.format = format; pf_get_block(templat.format, &templat.block); - templat.width[0] = width; - templat.height[0] = height; - templat.depth[0] = depth; + templat.width0 = width; + templat.height0 = height; + templat.depth0 = depth; templat.last_level = last_level; templat.target = target; templat.tex_usage = tex_usage; diff --git a/src/gallium/state_trackers/python/p_texture.i b/src/gallium/state_trackers/python/p_texture.i index 1d513abf3c..5416b872f5 100644 --- a/src/gallium/state_trackers/python/p_texture.i +++ b/src/gallium/state_trackers/python/p_texture.i @@ -59,15 +59,15 @@ } unsigned get_width(unsigned level=0) { - return $self->width[level]; + return u_minify($self->width0, level); } unsigned get_height(unsigned level=0) { - return $self->height[level]; + return u_minify($self->height0, level); } unsigned get_depth(unsigned level=0) { - return $self->depth[level]; + return u_minify($self->depth0, level); } unsigned get_nblocksx(unsigned level=0) { @@ -88,7 +88,7 @@ SWIG_exception(SWIG_ValueError, "face out of bounds"); if(level > $self->last_level) SWIG_exception(SWIG_ValueError, "level out of bounds"); - if(zslice >= $self->depth[level]) + if(zslice >= u_minify($self->depth0, level)) SWIG_exception(SWIG_ValueError, "zslice out of bounds"); surface = CALLOC_STRUCT(st_surface); @@ -375,13 +375,13 @@ struct st_surface static unsigned st_surface_width_get(struct st_surface *surface) { - return surface->texture->width[surface->level]; + return u_minify(surface->texture->width0, surface->level); } static unsigned st_surface_height_get(struct st_surface *surface) { - return surface->texture->height[surface->level]; + return u_minify(surface->texture->height0, surface->level); } static unsigned diff --git a/src/gallium/state_trackers/python/retrace/interpreter.py b/src/gallium/state_trackers/python/retrace/interpreter.py index 348f2e4368..d0bcb690a9 100755 --- a/src/gallium/state_trackers/python/retrace/interpreter.py +++ b/src/gallium/state_trackers/python/retrace/interpreter.py @@ -279,9 +279,9 @@ class Screen(Object): def texture_create(self, templat): return self.real.texture_create( format = templat.format, - width = templat.width[0], - height = templat.height[0], - depth = templat.depth[0], + width = templat.width0, + height = templat.height0, + depth = templat.depth0, last_level = templat.last_level, target = templat.target, tex_usage = templat.tex_usage, diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c index ea7d18738f..a791113aba 100644 --- a/src/gallium/state_trackers/python/st_device.c +++ b/src/gallium/state_trackers/python/st_device.c @@ -252,9 +252,9 @@ st_context_create(struct st_device *st_dev) templat.block.size = 4; templat.block.width = 1; templat.block.height = 1; - templat.width[0] = 1; - templat.height[0] = 1; - templat.depth[0] = 1; + templat.width0 = 1; + templat.height0 = 1; + templat.depth0 = 1; templat.last_level = 0; st_ctx->default_texture = screen->texture_create( screen, &templat ); @@ -264,8 +264,8 @@ st_context_create(struct st_device *st_dev) 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0, - st_ctx->default_texture->width[0], - st_ctx->default_texture->height[0]); + st_ctx->default_texture->width0, + st_ctx->default_texture->height0); if (transfer) { uint32_t *map; map = (uint32_t *) screen->transfer_map(screen, transfer); diff --git a/src/gallium/state_trackers/python/st_sample.c b/src/gallium/state_trackers/python/st_sample.c index 53a01891e1..6fee90afda 100644 --- a/src/gallium/state_trackers/python/st_sample.c +++ b/src/gallium/state_trackers/python/st_sample.c @@ -528,8 +528,8 @@ st_sample_surface(struct st_surface *surface, float *rgba) { struct pipe_texture *texture = surface->texture; struct pipe_screen *screen = texture->screen; - unsigned width = texture->width[surface->level]; - unsigned height = texture->height[surface->level]; + unsigned width = u_minify(texture->width0, surface->level); + unsigned height = u_minify(texture->height0, surface->level); uint rgba_stride = width * 4; struct pipe_transfer *transfer; void *raw; diff --git a/src/gallium/state_trackers/vega/api_filters.c b/src/gallium/state_trackers/vega/api_filters.c index 862cbb03c4..faf396d087 100644 --- a/src/gallium/state_trackers/vega/api_filters.c +++ b/src/gallium/state_trackers/vega/api_filters.c @@ -68,9 +68,9 @@ static INLINE struct pipe_texture *create_texture_1d(struct vg_context *ctx, templ.target = PIPE_TEXTURE_1D; templ.format = PIPE_FORMAT_A8R8G8B8_UNORM; templ.last_level = 0; - templ.width[0] = color_data_len; - templ.height[0] = 1; - templ.depth[0] = 1; + templ.width0 = color_data_len; + templ.height0 = 1; + templ.depth0 = 1; pf_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &templ.block); templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER; @@ -81,7 +81,7 @@ static INLINE struct pipe_texture *create_texture_1d(struct vg_context *ctx, screen->get_tex_transfer(screen, tex, 0, 0, 0, PIPE_TRANSFER_READ_WRITE , - 0, 0, tex->width[0], tex->height[0]); + 0, 0, tex->width0, tex->height0); void *map = screen->transfer_map(screen, transfer); memcpy(map, color_data, sizeof(VGint)*color_data_len); screen->transfer_unmap(screen, transfer); diff --git a/src/gallium/state_trackers/vega/image.c b/src/gallium/state_trackers/vega/image.c index 9a722980d5..4684a5727d 100644 --- a/src/gallium/state_trackers/vega/image.c +++ b/src/gallium/state_trackers/vega/image.c @@ -93,8 +93,8 @@ static void vg_copy_texture(struct vg_context *ctx, dst_loc[3] = height; dst_bounds[0] = 0.f; dst_bounds[1] = 0.f; - dst_bounds[2] = dst->width[0]; - dst_bounds[3] = dst->height[0]; + dst_bounds[2] = dst->width0; + dst_bounds[3] = dst->height0; src_loc[0] = sx; src_loc[1] = sy; @@ -102,8 +102,8 @@ static void vg_copy_texture(struct vg_context *ctx, src_loc[3] = height; src_bounds[0] = 0.f; src_bounds[1] = 0.f; - src_bounds[2] = src->width[0]; - src_bounds[3] = src->height[0]; + src_bounds[2] = src->width0; + src_bounds[3] = src->height0; vg_bound_rect(src_loc, src_bounds, src_shift); vg_bound_rect(dst_loc, dst_bounds, dst_shift); @@ -272,9 +272,9 @@ struct vg_image * image_create(VGImageFormat format, pt.format = pformat; pf_get_block(pformat, &pt.block); pt.last_level = 0; - pt.width[0] = width; - pt.height[0] = height; - pt.depth[0] = 1; + pt.width0 = width; + pt.height0 = height; + pt.depth0 = 1; pt.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER; newtex = screen->texture_create(screen, &pt); @@ -414,7 +414,7 @@ void image_sub_data(struct vg_image *image, { /* upload color_data */ struct pipe_transfer *transfer = screen->get_tex_transfer( screen, texture, 0, 0, 0, - PIPE_TRANSFER_WRITE, 0, 0, texture->width[0], texture->height[0]); + PIPE_TRANSFER_WRITE, 0, 0, texture->width0, texture->height0); src += (dataStride * yoffset); for (i = 0; i < height; i++) { _vega_unpack_float_span_rgba(ctx, width, xoffset, src, dataFormat, temp); diff --git a/src/gallium/state_trackers/vega/mask.c b/src/gallium/state_trackers/vega/mask.c index 24650a37d5..b84103fdba 100644 --- a/src/gallium/state_trackers/vega/mask.c +++ b/src/gallium/state_trackers/vega/mask.c @@ -426,7 +426,7 @@ static void mask_using_texture(struct pipe_texture *texture, if (!surface) return; if (!intersect_rectangles(surface->width, surface->height, - texture->width[0], texture->height[0], + texture->width0, texture->height0, x, y, width, height, offsets, loc)) return; @@ -493,9 +493,9 @@ struct vg_mask_layer * mask_layer_create(VGint width, VGint height) pt.format = PIPE_FORMAT_A8R8G8B8_UNORM; pf_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &pt.block); pt.last_level = 0; - pt.width[0] = width; - pt.height[0] = height; - pt.depth[0] = 1; + pt.width0 = width; + pt.height0 = height; + pt.depth0 = 1; pt.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER; pt.compressed = 0; @@ -607,8 +607,8 @@ void mask_render_to(struct path *path, struct vg_mask_layer *temp_layer; VGint width, height; - width = fb_buffers->alpha_mask->width[0]; - height = fb_buffers->alpha_mask->width[0]; + width = fb_buffers->alpha_mask->width0; + height = fb_buffers->alpha_mask->width0; temp_layer = mask_layer_create(width, height); diff --git a/src/gallium/state_trackers/vega/paint.c b/src/gallium/state_trackers/vega/paint.c index 04a6ba9cdc..e8ca7d9e89 100644 --- a/src/gallium/state_trackers/vega/paint.c +++ b/src/gallium/state_trackers/vega/paint.c @@ -151,9 +151,9 @@ static INLINE struct pipe_texture *create_gradient_texture(struct vg_paint *p) templ.target = PIPE_TEXTURE_1D; templ.format = PIPE_FORMAT_A8R8G8B8_UNORM; templ.last_level = 0; - templ.width[0] = 1024; - templ.height[0] = 1; - templ.depth[0] = 1; + templ.width0 = 1024; + templ.height0 = 1; + templ.depth0 = 1; pf_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &templ.block); templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER; @@ -328,8 +328,8 @@ static INLINE void paint_pattern_buffer(struct vg_paint *paint, void *buffer) map[4] = 0.f; map[5] = 1.f; - map[6] = paint->pattern.texture->width[0]; - map[7] = paint->pattern.texture->height[0]; + map[6] = paint->pattern.texture->width0; + map[7] = paint->pattern.texture->height0; { struct matrix mat; memcpy(&mat, &ctx->state.vg.fill_paint_to_user_matrix, diff --git a/src/gallium/state_trackers/vega/renderer.c b/src/gallium/state_trackers/vega/renderer.c index 396c88aa3d..9085ed1bfe 100644 --- a/src/gallium/state_trackers/vega/renderer.c +++ b/src/gallium/state_trackers/vega/renderer.c @@ -230,13 +230,13 @@ void renderer_draw_texture(struct renderer *r, struct pipe_buffer *buf; VGfloat s0, t0, s1, t1; - assert(tex->width[0] != 0); - assert(tex->height[0] != 0); + assert(tex->width0 != 0); + assert(tex->height0 != 0); - s0 = x1offset / tex->width[0]; - s1 = x2offset / tex->width[0]; - t0 = y1offset / tex->height[0]; - t1 = y2offset / tex->height[0]; + s0 = x1offset / tex->width0; + s1 = x2offset / tex->width0; + t0 = y1offset / tex->height0; + t1 = y2offset / tex->height0; cso_save_vertex_shader(r->cso); /* shaders */ @@ -276,10 +276,10 @@ void renderer_copy_texture(struct renderer *ctx, struct pipe_framebuffer_state fb; float s0, t0, s1, t1; - assert(src->width[0] != 0); - assert(src->height[0] != 0); - assert(dst->width[0] != 0); - assert(dst->height[0] != 0); + assert(src->width0 != 0); + assert(src->height0 != 0); + assert(dst->width0 != 0); + assert(dst->height0 != 0); #if 0 debug_printf("copy texture [%f, %f, %f, %f], [%f, %f, %f, %f]\n", @@ -287,10 +287,10 @@ void renderer_copy_texture(struct renderer *ctx, #endif #if 1 - s0 = sx1 / src->width[0]; - s1 = sx2 / src->width[0]; - t0 = sy1 / src->height[0]; - t1 = sy2 / src->height[0]; + s0 = sx1 / src->width0; + s1 = sx2 / src->width0; + t0 = sy1 / src->height0; + t1 = sy2 / src->height0; #else s0 = 0; s1 = 1; @@ -445,9 +445,9 @@ void renderer_copy_surface(struct renderer *ctx, texTemp.target = PIPE_TEXTURE_2D; texTemp.format = src->format; texTemp.last_level = 0; - texTemp.width[0] = srcW; - texTemp.height[0] = srcH; - texTemp.depth[0] = 1; + texTemp.width0 = srcW; + texTemp.height0 = srcH; + texTemp.depth0 = 1; pf_get_block(src->format, &texTemp.block); tex = screen->texture_create(screen, &texTemp); @@ -570,13 +570,13 @@ void renderer_texture_quad(struct renderer *r, struct pipe_buffer *buf; VGfloat s0, t0, s1, t1; - assert(tex->width[0] != 0); - assert(tex->height[0] != 0); + assert(tex->width0 != 0); + assert(tex->height0 != 0); - s0 = x1offset / tex->width[0]; - s1 = x2offset / tex->width[0]; - t0 = y1offset / tex->height[0]; - t1 = y2offset / tex->height[0]; + s0 = x1offset / tex->width0; + s1 = x2offset / tex->width0; + t0 = y1offset / tex->height0; + t1 = y2offset / tex->height0; cso_save_vertex_shader(r->cso); /* shaders */ diff --git a/src/gallium/state_trackers/vega/vg_tracker.c b/src/gallium/state_trackers/vega/vg_tracker.c index c4da01e52c..d28463dd1b 100644 --- a/src/gallium/state_trackers/vega/vg_tracker.c +++ b/src/gallium/state_trackers/vega/vg_tracker.c @@ -51,9 +51,9 @@ create_texture(struct pipe_context *pipe, enum pipe_format format, templ.target = PIPE_TEXTURE_2D; pf_get_block(templ.format, &templ.block); - templ.width[0] = width; - templ.height[0] = height; - templ.depth[0] = 1; + templ.width0 = width; + templ.height0 = height; + templ.depth0 = 1; templ.last_level = 0; if (pf_get_component_bits(format, PIPE_FORMAT_COMP_S)) { diff --git a/src/gallium/state_trackers/xorg/xorg_composite.c b/src/gallium/state_trackers/xorg/xorg_composite.c index 733bd53fca..6064648ab0 100644 --- a/src/gallium/state_trackers/xorg/xorg_composite.c +++ b/src/gallium/state_trackers/xorg/xorg_composite.c @@ -436,8 +436,8 @@ setup_fs_constant_buffer(struct exa_context *exa) static void setup_constant_buffers(struct exa_context *exa, struct exa_pixmap_priv *pDst) { - int width = pDst->tex->width[0]; - int height = pDst->tex->height[0]; + int width = pDst->tex->width0; + int height = pDst->tex->height0; setup_vs_constant_buffer(exa, width, height); setup_fs_constant_buffer(exa); diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c index 85b9162d4c..c4751724c9 100644 --- a/src/gallium/state_trackers/xorg/xorg_crtc.c +++ b/src/gallium/state_trackers/xorg/xorg_crtc.c @@ -187,10 +187,10 @@ crtc_load_cursor_argb(xf86CrtcPtr crtc, CARD32 * image) templat.tex_usage |= PIPE_TEXTURE_USAGE_PRIMARY; templat.target = PIPE_TEXTURE_2D; templat.last_level = 0; - templat.depth[0] = 1; + templat.depth0 = 1; templat.format = PIPE_FORMAT_A8R8G8B8_UNORM; - templat.width[0] = 64; - templat.height[0] = 64; + templat.width0 = 64; + templat.height0 = 64; pf_get_block(templat.format, &templat.block); crtcp->cursor_tex = ms->screen->texture_create(ms->screen, diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c index c41a7cd639..e16e79719c 100644 --- a/src/gallium/state_trackers/xorg/xorg_dri2.c +++ b/src/gallium/state_trackers/xorg/xorg_dri2.c @@ -103,9 +103,9 @@ driDoCreateBuffer(DrawablePtr pDraw, DRI2BufferPtr buffer, unsigned int format) template.format = ms->ds_depth_bits_last ? PIPE_FORMAT_S8Z24_UNORM : PIPE_FORMAT_Z24S8_UNORM; pf_get_block(template.format, &template.block); - template.width[0] = pDraw->width; - template.height[0] = pDraw->height; - template.depth[0] = 1; + template.width0 = pDraw->width; + template.height0 = pDraw->height; + template.depth0 = 1; template.last_level = 0; template.tex_usage = PIPE_TEXTURE_USAGE_DEPTH_STENCIL | PIPE_TEXTURE_USAGE_DISPLAY_TARGET; diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c index 6fa274eb0a..534d4da13f 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.c +++ b/src/gallium/state_trackers/xorg/xorg_exa.c @@ -288,7 +288,7 @@ ExaPrepareAccess(PixmapPtr pPix, int index) PIPE_TRANSFER_MAP_DIRECTLY | #endif PIPE_TRANSFER_READ_WRITE, - 0, 0, priv->tex->width[0], priv->tex->height[0]); + 0, 0, priv->tex->width0, priv->tex->height0); if (!priv->map_transfer) #ifdef EXA_MIXED_PIXMAPS return FALSE; @@ -752,8 +752,8 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, /* Deal with screen resize */ if (!priv->tex || - (priv->tex->width[0] != width || - priv->tex->height[0] != height || + (priv->tex->width0 != width || + priv->tex->height0 != height || priv->tex_flags != priv->flags)) { struct pipe_texture *texture = NULL; struct pipe_texture template; @@ -762,9 +762,9 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, template.target = PIPE_TEXTURE_2D; exa_get_pipe_format(depth, &template.format, &bitsPerPixel, &priv->picture_format); pf_get_block(template.format, &template.block); - template.width[0] = width; - template.height[0] = height; - template.depth[0] = 1; + template.width0 = width; + template.height0 = height; + template.depth0 = 1; template.last_level = 0; template.tex_usage = PIPE_TEXTURE_USAGE_RENDER_TARGET | priv->flags; priv->tex_flags = priv->flags; @@ -779,12 +779,12 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, src_surf = xorg_gpu_surface(exa->pipe->screen, priv); if (exa->pipe->surface_copy) { exa->pipe->surface_copy(exa->pipe, dst_surf, 0, 0, src_surf, - 0, 0, min(width, texture->width[0]), - min(height, texture->height[0])); + 0, 0, min(width, texture->width0), + min(height, texture->height0)); } else { util_surface_copy(exa->pipe, FALSE, dst_surf, 0, 0, src_surf, - 0, 0, min(width, texture->width[0]), - min(height, texture->height[0])); + 0, 0, min(width, texture->width0), + min(height, texture->height0)); } exa->scrn->tex_surface_destroy(dst_surf); exa->scrn->tex_surface_destroy(src_surf); @@ -817,8 +817,8 @@ xorg_exa_set_texture(PixmapPtr pPixmap, struct pipe_texture *tex) if (!priv) return FALSE; - if (pPixmap->drawable.width != tex->width[0] || - pPixmap->drawable.height != tex->height[0]) + if (pPixmap->drawable.width != tex->width0 || + pPixmap->drawable.height != tex->height0) return FALSE; pipe_texture_reference(&priv->tex, tex); @@ -841,9 +841,9 @@ xorg_exa_create_root_texture(ScrnInfoPtr pScrn, template.target = PIPE_TEXTURE_2D; exa_get_pipe_format(depth, &template.format, &bitsPerPixel, &dummy); pf_get_block(template.format, &template.block); - template.width[0] = width; - template.height[0] = height; - template.depth[0] = 1; + template.width0 = width; + template.height0 = height; + template.depth0 = 1; template.last_level = 0; template.tex_usage |= PIPE_TEXTURE_USAGE_RENDER_TARGET; template.tex_usage |= PIPE_TEXTURE_USAGE_PRIMARY; diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c index 723605312c..418a8dd88b 100644 --- a/src/gallium/state_trackers/xorg/xorg_renderer.c +++ b/src/gallium/state_trackers/xorg/xorg_renderer.c @@ -167,14 +167,14 @@ add_vertex_data1(struct xorg_renderer *r, map_point(src_matrix, pt3[0], pt3[1], &pt3[0], &pt3[1]); } - s0 = pt0[0] / src->width[0]; - s1 = pt1[0] / src->width[0]; - s2 = pt2[0] / src->width[0]; - s3 = pt3[0] / src->width[0]; - t0 = pt0[1] / src->height[0]; - t1 = pt1[1] / src->height[0]; - t2 = pt2[1] / src->height[0]; - t3 = pt3[1] / src->height[0]; + s0 = pt0[0] / src->width0; + s1 = pt1[0] / src->width0; + s2 = pt2[0] / src->width0; + s3 = pt3[0] / src->width0; + t0 = pt0[1] / src->height0; + t1 = pt1[1] / src->height0; + t2 = pt2[1] / src->height0; + t3 = pt3[1] / src->height0; /* 1st vertex */ add_vertex_1tex(r, dstX, dstY, s0, t0); @@ -262,15 +262,15 @@ add_vertex_data2(struct xorg_renderer *r, map_point(mask_matrix, mpt1[0], mpt1[1], &mpt1[0], &mpt1[1]); } - src_s0 = spt0[0] / src->width[0]; - src_t0 = spt0[1] / src->height[0]; - src_s1 = spt1[0] / src->width[0]; - src_t1 = spt1[1] / src->height[0]; + src_s0 = spt0[0] / src->width0; + src_t0 = spt0[1] / src->height0; + src_s1 = spt1[0] / src->width0; + src_t1 = spt1[1] / src->height0; - mask_s0 = mpt0[0] / mask->width[0]; - mask_t0 = mpt0[1] / mask->height[0]; - mask_s1 = mpt1[0] / mask->width[0]; - mask_t1 = mpt1[1] / mask->height[0]; + mask_s0 = mpt0[0] / mask->width0; + mask_t0 = mpt0[1] / mask->height0; + mask_s1 = mpt1[0] / mask->width0; + mask_t1 = mpt1[1] / mask->height0; /* 1st vertex */ add_vertex_2tex(r, dstX, dstY, @@ -300,10 +300,10 @@ setup_vertex_data_yuv(struct xorg_renderer *r, spt1[0] = srcX + srcW; spt1[1] = srcY + srcH; - s0 = spt0[0] / tex[0]->width[0]; - t0 = spt0[1] / tex[0]->height[0]; - s1 = spt1[0] / tex[0]->width[0]; - t1 = spt1[1] / tex[0]->height[0]; + s0 = spt0[0] / tex[0]->width0; + t0 = spt0[1] / tex[0]->height0; + s1 = spt1[0] / tex[0]->width0; + t1 = spt1[1] / tex[0]->height0; /* 1st vertex */ add_vertex_1tex(r, dstX, dstY, s0, t0); @@ -387,8 +387,8 @@ void renderer_bind_framebuffer(struct xorg_renderer *r, struct pipe_surface *surface = xorg_gpu_surface(r->pipe->screen, priv); memset(&state, 0, sizeof(struct pipe_framebuffer_state)); - state.width = priv->tex->width[0]; - state.height = priv->tex->height[0]; + state.width = priv->tex->width0; + state.height = priv->tex->height0; state.nr_cbufs = 1; state.cbufs[0] = surface; @@ -407,8 +407,8 @@ void renderer_bind_framebuffer(struct xorg_renderer *r, void renderer_bind_viewport(struct xorg_renderer *r, struct exa_pixmap_priv *dst) { - int width = dst->tex->width[0]; - int height = dst->tex->height[0]; + int width = dst->tex->width0; + int height = dst->tex->height0; /*debug_printf("Bind viewport (%d, %d)\n", width, height);*/ @@ -584,16 +584,16 @@ static void renderer_copy_texture(struct xorg_renderer *r, float s0, t0, s1, t1; struct xorg_shader shader; - assert(src->width[0] != 0); - assert(src->height[0] != 0); - assert(dst->width[0] != 0); - assert(dst->height[0] != 0); + assert(src->width0 != 0); + assert(src->height0 != 0); + assert(dst->width0 != 0); + assert(dst->height0 != 0); #if 1 - s0 = sx1 / src->width[0]; - s1 = sx2 / src->width[0]; - t0 = sy1 / src->height[0]; - t1 = sy2 / src->height[0]; + s0 = sx1 / src->width0; + s1 = sx2 / src->width0; + t0 = sy1 / src->height0; + t1 = sy2 / src->height0; #else s0 = 0; s1 = 1; @@ -730,9 +730,9 @@ create_sampler_texture(struct xorg_renderer *r, templ.target = PIPE_TEXTURE_2D; templ.format = format; templ.last_level = 0; - templ.width[0] = src->width[0]; - templ.height[0] = src->height[0]; - templ.depth[0] = 1; + templ.width0 = src->width0; + templ.height0 = src->height0; + templ.depth0 = 1; pf_get_block(format, &templ.block); templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER; @@ -754,13 +754,13 @@ create_sampler_texture(struct xorg_renderer *r, ps_tex, /* dest */ 0, 0, /* destx/y */ ps_read, - 0, 0, src->width[0], src->height[0]); + 0, 0, src->width0, src->height0); } else { util_surface_copy(pipe, FALSE, ps_tex, /* dest */ 0, 0, /* destx/y */ ps_read, - 0, 0, src->width[0], src->height[0]); + 0, 0, src->width0, src->height0); } pipe_surface_reference(&ps_read, NULL); pipe_surface_reference(&ps_tex, NULL); @@ -791,8 +791,8 @@ void renderer_copy_pixmap(struct xorg_renderer *r, dst_loc[3] = height; dst_bounds[0] = 0.f; dst_bounds[1] = 0.f; - dst_bounds[2] = dst->width[0]; - dst_bounds[3] = dst->height[0]; + dst_bounds[2] = dst->width0; + dst_bounds[3] = dst->height0; src_loc[0] = sx; src_loc[1] = sy; @@ -800,8 +800,8 @@ void renderer_copy_pixmap(struct xorg_renderer *r, src_loc[3] = height; src_bounds[0] = 0.f; src_bounds[1] = 0.f; - src_bounds[2] = src->width[0]; - src_bounds[3] = src->height[0]; + src_bounds[2] = src->width0; + src_bounds[3] = src->height0; bound_rect(src_loc, src_bounds, src_shift); bound_rect(dst_loc, dst_bounds, dst_shift); diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c index 2b935c0f73..856599e640 100644 --- a/src/gallium/state_trackers/xorg/xorg_xv.c +++ b/src/gallium/state_trackers/xorg/xorg_xv.c @@ -166,9 +166,9 @@ create_component_texture(struct pipe_context *pipe, templ.target = PIPE_TEXTURE_2D; templ.format = PIPE_FORMAT_L8_UNORM; templ.last_level = 0; - templ.width[0] = width; - templ.height[0] = height; - templ.depth[0] = 1; + templ.width0 = width; + templ.height0 = height; + templ.depth0 = 1; pf_get_block(PIPE_FORMAT_L8_UNORM, &templ.block); templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER; @@ -182,18 +182,18 @@ check_yuv_textures(struct xorg_xv_port_priv *priv, int width, int height) { struct pipe_texture **dst = priv->yuv[priv->current_set]; if (!dst[0] || - dst[0]->width[0] != width || - dst[0]->height[0] != height) { + dst[0]->width0 != width || + dst[0]->height0 != height) { pipe_texture_reference(&dst[0], NULL); } if (!dst[1] || - dst[1]->width[0] != width || - dst[1]->height[0] != height) { + dst[1]->width0 != width || + dst[1]->height0 != height) { pipe_texture_reference(&dst[1], NULL); } if (!dst[2] || - dst[2]->width[0] != width || - dst[2]->height[0] != height) { + dst[2]->width0 != width || + dst[2]->height0 != height) { pipe_texture_reference(&dst[2], NULL); } @@ -320,8 +320,8 @@ copy_packed_data(ScrnInfoPtr pScrn, static void setup_vs_video_constants(struct xorg_renderer *r, struct exa_pixmap_priv *dst) { - int width = dst->tex->width[0]; - int height = dst->tex->height[0]; + int width = dst->tex->width0; + int height = dst->tex->height0; const int param_bytes = 8 * sizeof(float); float vs_consts[8] = { 2.f/width, 2.f/height, 1, 1, diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c index bf9038f356..8cb73f4897 100644 --- a/src/gallium/state_trackers/xorg/xvmc/surface.c +++ b/src/gallium/state_trackers/xorg/xvmc/surface.c @@ -103,9 +103,9 @@ CreateOrResizeBackBuffer(struct pipe_video_context *vpipe, unsigned int width, u /* XXX: Needs to match the drawable's format? */ template.format = PIPE_FORMAT_X8R8G8B8_UNORM; template.last_level = 0; - template.width[0] = width; - template.height[0] = height; - template.depth[0] = 1; + template.width0 = width; + template.height0 = height; + template.depth0 = 1; pf_get_block(template.format, &template.block); template.tex_usage = PIPE_TEXTURE_USAGE_DISPLAY_TARGET; diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c index 317dc44d22..d497861324 100644 --- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c +++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c @@ -24,10 +24,10 @@ dri_surface_from_handle(struct drm_api *api, struct pipe_screen *pscreen, tmpl.tex_usage = PIPE_TEXTURE_USAGE_PRIMARY; tmpl.target = PIPE_TEXTURE_2D; tmpl.last_level = 0; - tmpl.depth[0] = 1; + tmpl.depth0 = 1; tmpl.format = format; - tmpl.width[0] = width; - tmpl.height[0] = height; + tmpl.width0 = width; + tmpl.height0 = height; pf_get_block(tmpl.format, &tmpl.block); pt = api->texture_from_shared_handle(api, pscreen, &tmpl, diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c index 81cd9dc4fb..74afffc9cf 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c @@ -317,9 +317,9 @@ struct pipe_surface *radeon_surface_from_handle(struct radeon_context *radeon_co memset(&tmpl, 0, sizeof(tmpl)); tmpl.tex_usage = PIPE_TEXTURE_USAGE_DISPLAY_TARGET; tmpl.target = PIPE_TEXTURE_2D; - tmpl.width[0] = w; - tmpl.height[0] = h; - tmpl.depth[0] = 1; + tmpl.width0 = w; + tmpl.height0 = h; + tmpl.depth0 = 1; tmpl.format = format; pf_get_block(tmpl.format, &tmpl.block); tmpl.nblocksx[0] = pf_get_nblocksx(&tmpl.block, w); diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 0469fb9978..659a6c9193 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -376,7 +376,7 @@ st_render_texture(GLcontext *ctx, rb->_BaseFormat = texImage->_BaseFormat; /*printf("***** render to texture level %d: %d x %d\n", att->TextureLevel, rb->Width, rb->Height);*/ - /*printf("***** pipe texture %d x %d\n", pt->width[0], pt->height[0]);*/ + /*printf("***** pipe texture %d x %d\n", pt->width0, pt->height0);*/ pipe_texture_reference( &strb->texture, pt ); -- cgit v1.2.3 From 456b5bd5d0dbed172a5d8f88625eeb63fd87c8dd Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 27 Nov 2009 10:11:18 +0100 Subject: svga: Update text shader header. --- src/gallium/drivers/svga/svga_pipe_vs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c index e5ffe668c3..c104c41f5f 100644 --- a/src/gallium/drivers/svga/svga_pipe_vs.c +++ b/src/gallium/drivers/svga/svga_pipe_vs.c @@ -48,7 +48,7 @@ static const struct tgsi_token *substitute_vs( static struct tgsi_token tokens[300]; const char *text = - "VERT1.1\n" + "VERT\n" "DCL IN[0]\n" "DCL IN[1]\n" "DCL IN[2]\n" -- cgit v1.2.3 From b911688b87a011eacf2034bd61562e633952a66b Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 27 Nov 2009 12:18:22 +0000 Subject: svga: add DEBUG_CACHE option --- src/gallium/drivers/svga/svga_debug.h | 1 + src/gallium/drivers/svga/svga_draw.c | 7 +++++++ src/gallium/drivers/svga/svga_pipe_blit.c | 8 +++++++ src/gallium/drivers/svga/svga_pipe_clear.c | 6 ++++++ src/gallium/drivers/svga/svga_pipe_flush.c | 3 +++ src/gallium/drivers/svga/svga_screen.c | 5 +++++ src/gallium/drivers/svga/svga_screen_buffer.c | 2 +- src/gallium/drivers/svga/svga_screen_cache.c | 30 ++++++++++++++++++++------- 8 files changed, 53 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_debug.h b/src/gallium/drivers/svga/svga_debug.h index b7bb5686ed..3a3fcd8fae 100644 --- a/src/gallium/drivers/svga/svga_debug.h +++ b/src/gallium/drivers/svga/svga_debug.h @@ -43,6 +43,7 @@ #define DEBUG_FLUSH 0x1000 /* flush after every draw */ #define DEBUG_SYNC 0x2000 /* sync after every flush */ #define DEBUG_QUERY 0x4000 +#define DEBUG_CACHE 0x8000 #ifdef DEBUG extern int SVGA_DEBUG; diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index 1b371cecc6..8db40d0fd5 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -29,10 +29,13 @@ #include "util/u_memory.h" #include "util/u_math.h" +#include "svga_context.h" #include "svga_draw.h" #include "svga_draw_private.h" +#include "svga_debug.h" #include "svga_screen.h" #include "svga_screen_buffer.h" +#include "svga_screen_texture.h" #include "svga_winsys.h" #include "svga_cmd.h" @@ -160,6 +163,10 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) ib_handle[i] = handle; } + SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n", + svga_surface(svga->curr.framebuffer.cbufs[0])->handle, + hwtnl->cmd.prim_count); + ret = SVGA3D_BeginDrawPrimitives(swc, &vdecl, hwtnl->cmd.vdecl_count, diff --git a/src/gallium/drivers/svga/svga_pipe_blit.c b/src/gallium/drivers/svga/svga_pipe_blit.c index 5a4a8c0f5f..4f575b06e6 100644 --- a/src/gallium/drivers/svga/svga_pipe_blit.c +++ b/src/gallium/drivers/svga/svga_pipe_blit.c @@ -25,6 +25,7 @@ #include "svga_screen_texture.h" #include "svga_context.h" +#include "svga_debug.h" #include "svga_cmd.h" #define FILE_DEBUG_FLAG DEBUG_BLIT @@ -43,6 +44,13 @@ static void svga_surface_copy(struct pipe_context *pipe, svga_hwtnl_flush_retry( svga ); + SVGA_DBG(DEBUG_DMA, "blit to sid %p (%d,%d), from sid %p (%d,%d) sz %dx%d\n", + svga_surface(dest)->handle, + destx, desty, + svga_surface(src)->handle, + srcx, srcy, + width, height); + ret = SVGA3D_BeginSurfaceCopy(svga->swc, src, dest, diff --git a/src/gallium/drivers/svga/svga_pipe_clear.c b/src/gallium/drivers/svga/svga_pipe_clear.c index 8977d26541..6195c3897e 100644 --- a/src/gallium/drivers/svga/svga_pipe_clear.c +++ b/src/gallium/drivers/svga/svga_pipe_clear.c @@ -24,12 +24,14 @@ **********************************************************/ #include "svga_cmd.h" +#include "svga_debug.h" #include "pipe/p_defines.h" #include "util/u_pack_color.h" #include "svga_context.h" #include "svga_state.h" +#include "svga_screen_texture.h" static enum pipe_error @@ -98,6 +100,10 @@ svga_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, { struct svga_context *svga = svga_context( pipe ); int ret; + + if (buffers & PIPE_CLEAR_COLOR) + SVGA_DBG(DEBUG_DMA, "clear sid %p\n", + svga_surface(svga->curr.framebuffer.cbufs[0])->handle); ret = try_clear( svga, buffers, rgba, depth, stencil ); diff --git a/src/gallium/drivers/svga/svga_pipe_flush.c b/src/gallium/drivers/svga/svga_pipe_flush.c index 942366de72..0becb0765a 100644 --- a/src/gallium/drivers/svga/svga_pipe_flush.c +++ b/src/gallium/drivers/svga/svga_pipe_flush.c @@ -59,6 +59,9 @@ static void svga_flush( struct pipe_context *pipe, /* Flush command queue. */ svga_context_flush(svga, fence); + + SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s flags %x fence_ptr %p\n", + __FUNCTION__, flags, fence ? *fence : 0x0); } diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index 3afcaffff5..fc1b3c980e 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -57,6 +57,7 @@ static const struct debug_named_value svga_debug_flags[] = { { "perf", DEBUG_PERF }, { "flush", DEBUG_FLUSH }, { "sync", DEBUG_SYNC }, + { "cache", DEBUG_CACHE }, {NULL, 0} }; #endif @@ -297,6 +298,10 @@ svga_fence_finish(struct pipe_screen *screen, unsigned flag) { struct svga_winsys_screen *sws = svga_screen(screen)->sws; + + SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s fence_ptr %p\n", + __FUNCTION__, fence); + return sws->fence_finish(sws, fence, flag); } diff --git a/src/gallium/drivers/svga/svga_screen_buffer.c b/src/gallium/drivers/svga/svga_screen_buffer.c index c0b0f518bc..1f8a889672 100644 --- a/src/gallium/drivers/svga/svga_screen_buffer.c +++ b/src/gallium/drivers/svga/svga_screen_buffer.c @@ -447,7 +447,7 @@ svga_buffer_map_range( struct pipe_screen *screen, enum pipe_error ret; struct pipe_fence_handle *fence = NULL; - SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "dma from sid %p, bytes %u - %u\n", + SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "dma from sid %p (buffer), bytes %u - %u\n", sbuf->handle, 0, sbuf->base.size); memset(&flags, 0, sizeof flags); diff --git a/src/gallium/drivers/svga/svga_screen_cache.c b/src/gallium/drivers/svga/svga_screen_cache.c index 689981cc6d..8a06383f61 100644 --- a/src/gallium/drivers/svga/svga_screen_cache.c +++ b/src/gallium/drivers/svga/svga_screen_cache.c @@ -134,7 +134,8 @@ svga_screen_cache_add(struct svga_screen *svgascreen, else if(!LIST_IS_EMPTY(&cache->unused)) { /* free the last used buffer and reuse its entry */ entry = LIST_ENTRY(struct svga_host_surface_cache_entry, cache->unused.prev, head); - SVGA_DBG(DEBUG_DMA, "unref sid %p (make space)\n", entry->handle); + SVGA_DBG(DEBUG_CACHE|DEBUG_DMA, + "unref sid %p (make space)\n", entry->handle); sws->surface_reference(sws, &entry->handle, NULL); LIST_DEL(&entry->bucket_head); @@ -146,11 +147,14 @@ svga_screen_cache_add(struct svga_screen *svgascreen, entry->handle = handle; memcpy(&entry->key, key, sizeof entry->key); + SVGA_DBG(DEBUG_CACHE|DEBUG_DMA, + "cache sid %p\n", entry->handle); LIST_ADD(&entry->head, &cache->validated); } else { /* Couldn't cache the buffer -- this really shouldn't happen */ - SVGA_DBG(DEBUG_DMA, "unref sid %p (couldn't find space)\n", handle); + SVGA_DBG(DEBUG_CACHE|DEBUG_DMA, + "unref sid %p (couldn't find space)\n", handle); sws->surface_reference(sws, &handle, NULL); } @@ -209,7 +213,8 @@ svga_screen_cache_cleanup(struct svga_screen *svgascreen) for(i = 0; i < SVGA_HOST_SURFACE_CACHE_SIZE; ++i) { if(cache->entries[i].handle) { - SVGA_DBG(DEBUG_DMA, "unref sid %p (shutdown)\n", cache->entries[i].handle); + SVGA_DBG(DEBUG_CACHE|DEBUG_DMA, + "unref sid %p (shutdown)\n", cache->entries[i].handle); sws->surface_reference(sws, &cache->entries[i].handle, NULL); } @@ -252,7 +257,8 @@ svga_screen_surface_create(struct svga_screen *svgascreen, struct svga_winsys_surface *handle = NULL; boolean cachable = SVGA_SURFACE_CACHE_ENABLED && key->cachable; - SVGA_DBG(DEBUG_DMA, "%s sz %dx%dx%d mips %d faces %d cachable %d\n", + SVGA_DBG(DEBUG_CACHE|DEBUG_DMA, + "%s sz %dx%dx%d mips %d faces %d cachable %d\n", __FUNCTION__, key->size.width, key->size.height, @@ -276,10 +282,12 @@ svga_screen_surface_create(struct svga_screen *svgascreen, handle = svga_screen_cache_lookup(svgascreen, key); if (handle) { if (key->format == SVGA3D_BUFFER) - SVGA_DBG(DEBUG_DMA, " reuse sid %p sz %d (buffer)\n", handle, + SVGA_DBG(DEBUG_CACHE|DEBUG_DMA, + "reuse sid %p sz %d (buffer)\n", handle, key->size.width); else - SVGA_DBG(DEBUG_DMA, " reuse sid %p sz %dx%dx%d mips %d faces %d\n", handle, + SVGA_DBG(DEBUG_CACHE|DEBUG_DMA, + "reuse sid %p sz %dx%dx%d mips %d faces %d\n", handle, key->size.width, key->size.height, key->size.depth, @@ -296,7 +304,12 @@ svga_screen_surface_create(struct svga_screen *svgascreen, key->numFaces, key->numMipLevels); if (handle) - SVGA_DBG(DEBUG_DMA, "create sid %p sz %d\n", handle, key->size); + SVGA_DBG(DEBUG_CACHE|DEBUG_DMA, + " CREATE sid %p sz %dx%dx%d\n", + handle, + key->size.width, + key->size.height, + key->size.depth); } return handle; @@ -318,7 +331,8 @@ svga_screen_surface_destroy(struct svga_screen *svgascreen, svga_screen_cache_add(svgascreen, key, p_handle); } else { - SVGA_DBG(DEBUG_DMA, "unref sid %p (uncachable)\n", *p_handle); + SVGA_DBG(DEBUG_DMA, + "unref sid %p (uncachable)\n", *p_handle); sws->surface_reference(sws, p_handle, NULL); } } -- cgit v1.2.3 From b84b7f19dfdc0ac02175847065b39110db7ad98f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 27 Nov 2009 12:19:28 +0000 Subject: svga: flush our command buffer after the 8th distinct render target This helps improve the surface cache behaviour in the face of the large number of single-use render targets generated by EXA and the xorg state tracker. Without this we can reference hundreds of individual render targets from a command buffer, which leaves little scope for sharing or reuse of those targets. Flushing early means we can start reusing textures much sooner. This shouldn't have much effect on normal 3d rendering as it's pretty rare to have a command buffer with >8 different render targets in that world. --- src/gallium/drivers/svga/svga_context.c | 4 +++- src/gallium/drivers/svga/svga_context.h | 5 +++++ src/gallium/drivers/svga/svga_state_framebuffer.c | 3 +++ 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index 73233957f3..c3de12b4a3 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -230,7 +230,9 @@ void svga_context_flush( struct svga_context *svga, struct pipe_fence_handle **pfence ) { struct svga_screen *svgascreen = svga_screen(svga->pipe.screen); - + + svga->curr.nr_fbs = 0; + /* Unmap upload manager buffers: */ u_upload_flush(svga->upload_vb); diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 9a3e92fd8d..e650a251d1 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -191,6 +191,11 @@ struct svga_state struct pipe_framebuffer_state framebuffer; float depthscale; + /* Hack to limit the number of different render targets between + * flushes. Helps avoid blowing out our surface cache in EXA. + */ + int nr_fbs; + struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; struct pipe_blend_color blend_color; diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c index 7d7f93d8e3..cfdcae4ee4 100644 --- a/src/gallium/drivers/svga/svga_state_framebuffer.c +++ b/src/gallium/drivers/svga/svga_state_framebuffer.c @@ -54,6 +54,9 @@ static int emit_framebuffer( struct svga_context *svga, for(i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) { if (curr->cbufs[i] != hw->cbufs[i]) { + if (svga->curr.nr_fbs++ > 8) + return PIPE_ERROR_OUT_OF_MEMORY; + ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_COLOR0 + i, curr->cbufs[i]); if (ret != PIPE_OK) return ret; -- cgit v1.2.3 From 178407f33c413cbe7434597b2129abde90041b6b Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 24 Nov 2009 14:37:45 +0000 Subject: svga: Use consistent file names for dumping facilities. --- src/gallium/drivers/svga/Makefile | 4 +- src/gallium/drivers/svga/SConscript | 4 +- src/gallium/drivers/svga/svga_tgsi.c | 2 +- src/gallium/drivers/svga/svgadump/st_shader.h | 214 ------- src/gallium/drivers/svga/svgadump/st_shader_dump.c | 649 --------------------- src/gallium/drivers/svga/svgadump/st_shader_dump.h | 42 -- src/gallium/drivers/svga/svgadump/st_shader_op.c | 168 ------ src/gallium/drivers/svga/svgadump/st_shader_op.h | 46 -- src/gallium/drivers/svga/svgadump/svga_dump.c | 2 +- src/gallium/drivers/svga/svgadump/svga_dump.py | 2 +- src/gallium/drivers/svga/svgadump/svga_shader.h | 214 +++++++ .../drivers/svga/svgadump/svga_shader_dump.c | 649 +++++++++++++++++++++ .../drivers/svga/svgadump/svga_shader_dump.h | 42 ++ src/gallium/drivers/svga/svgadump/svga_shader_op.c | 168 ++++++ src/gallium/drivers/svga/svgadump/svga_shader_op.h | 46 ++ 15 files changed, 1126 insertions(+), 1126 deletions(-) delete mode 100644 src/gallium/drivers/svga/svgadump/st_shader.h delete mode 100644 src/gallium/drivers/svga/svgadump/st_shader_dump.c delete mode 100644 src/gallium/drivers/svga/svgadump/st_shader_dump.h delete mode 100644 src/gallium/drivers/svga/svgadump/st_shader_op.c delete mode 100644 src/gallium/drivers/svga/svgadump/st_shader_op.h create mode 100644 src/gallium/drivers/svga/svgadump/svga_shader.h create mode 100644 src/gallium/drivers/svga/svgadump/svga_shader_dump.c create mode 100644 src/gallium/drivers/svga/svgadump/svga_shader_dump.h create mode 100644 src/gallium/drivers/svga/svgadump/svga_shader_op.c create mode 100644 src/gallium/drivers/svga/svgadump/svga_shader_op.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/Makefile b/src/gallium/drivers/svga/Makefile index 8158364d25..f361908187 100644 --- a/src/gallium/drivers/svga/Makefile +++ b/src/gallium/drivers/svga/Makefile @@ -4,8 +4,8 @@ include $(TOP)/configs/current LIBNAME = svga C_SOURCES = \ - svgadump/st_shader_dump.c \ - svgadump/st_shader_op.c \ + svgadump/svga_shader_dump.c \ + svgadump/svga_shader_op.c \ svgadump/svga_dump.c \ svga_cmd.c \ svga_context.c \ diff --git a/src/gallium/drivers/svga/SConscript b/src/gallium/drivers/svga/SConscript index ff9645fc03..737b791ceb 100644 --- a/src/gallium/drivers/svga/SConscript +++ b/src/gallium/drivers/svga/SConscript @@ -60,8 +60,8 @@ sources = [ 'svga_tgsi_insn.c', 'svgadump/svga_dump.c', - 'svgadump/st_shader_dump.c', - 'svgadump/st_shader_op.c', + 'svgadump/svga_shader_dump.c', + 'svgadump/svga_shader_op.c', ] svga = env.ConvenienceLibrary( diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c index 44d0930bc0..81eea1a145 100644 --- a/src/gallium/drivers/svga/svga_tgsi.c +++ b/src/gallium/drivers/svga/svga_tgsi.c @@ -32,7 +32,7 @@ #include "tgsi/tgsi_scan.h" #include "util/u_memory.h" -#include "svgadump/st_shader_dump.h" +#include "svgadump/svga_shader_dump.h" #include "svga_context.h" #include "svga_tgsi.h" diff --git a/src/gallium/drivers/svga/svgadump/st_shader.h b/src/gallium/drivers/svga/svgadump/st_shader.h deleted file mode 100644 index 2fc1796a90..0000000000 --- a/src/gallium/drivers/svga/svgadump/st_shader.h +++ /dev/null @@ -1,214 +0,0 @@ -/********************************************************** - * Copyright 2007-2009 VMware, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **********************************************************/ - -/** - * @file - * SVGA Shader Token Definitions - * - * @author Michal Krol - */ - -#ifndef ST_SHADER_SVGA_H -#define ST_SHADER_SVGA_H - -#include "pipe/p_compiler.h" - -struct sh_op -{ - unsigned opcode:16; - unsigned control:8; - unsigned length:4; - unsigned predicated:1; - unsigned unused:1; - unsigned coissue:1; - unsigned is_reg:1; -}; - -struct sh_reg -{ - unsigned number:11; - unsigned type_hi:2; - unsigned relative:1; - unsigned unused:14; - unsigned type_lo:3; - unsigned is_reg:1; -}; - -static INLINE unsigned -sh_reg_type( struct sh_reg reg ) -{ - return reg.type_lo | (reg.type_hi << 3); -} - -struct sh_cdata -{ - float xyzw[4]; -}; - -struct sh_def -{ - struct sh_op op; - struct sh_reg reg; - struct sh_cdata cdata; -}; - -struct sh_defb -{ - struct sh_op op; - struct sh_reg reg; - uint data; -}; - -struct sh_idata -{ - int xyzw[4]; -}; - -struct sh_defi -{ - struct sh_op op; - struct sh_reg reg; - struct sh_idata idata; -}; - -#define PS_TEXTURETYPE_UNKNOWN SVGA3DSAMP_UNKNOWN -#define PS_TEXTURETYPE_2D SVGA3DSAMP_2D -#define PS_TEXTURETYPE_CUBE SVGA3DSAMP_CUBE -#define PS_TEXTURETYPE_VOLUME SVGA3DSAMP_VOLUME - -struct ps_sampleinfo -{ - unsigned unused:27; - unsigned texture_type:4; - unsigned is_reg:1; -}; - -struct vs_semantic -{ - unsigned usage:5; - unsigned unused1:11; - unsigned usage_index:4; - unsigned unused2:12; -}; - -struct sh_dstreg -{ - unsigned number:11; - unsigned type_hi:2; - unsigned relative:1; - unsigned unused:2; - unsigned write_mask:4; - unsigned modifier:4; - unsigned shift_scale:4; - unsigned type_lo:3; - unsigned is_reg:1; -}; - -static INLINE unsigned -sh_dstreg_type( struct sh_dstreg reg ) -{ - return reg.type_lo | (reg.type_hi << 3); -} - -struct sh_dcl -{ - struct sh_op op; - union { - struct { - struct ps_sampleinfo sampleinfo; - } ps; - struct { - struct vs_semantic semantic; - } vs; - } u; - struct sh_dstreg reg; -}; - - -struct sh_srcreg -{ - unsigned number:11; - unsigned type_hi:2; - unsigned relative:1; - unsigned unused:2; - unsigned swizzle_x:2; - unsigned swizzle_y:2; - unsigned swizzle_z:2; - unsigned swizzle_w:2; - unsigned modifier:4; - unsigned type_lo:3; - unsigned is_reg:1; -}; - -static INLINE unsigned -sh_srcreg_type( struct sh_srcreg reg ) -{ - return reg.type_lo | (reg.type_hi << 3); -} - -struct sh_dstop -{ - struct sh_op op; - struct sh_dstreg dst; -}; - -struct sh_srcop -{ - struct sh_op op; - struct sh_srcreg src; -}; - -struct sh_src2op -{ - struct sh_op op; - struct sh_srcreg src0; - struct sh_srcreg src1; -}; - -struct sh_unaryop -{ - struct sh_op op; - struct sh_dstreg dst; - struct sh_srcreg src; -}; - -struct sh_binaryop -{ - struct sh_op op; - struct sh_dstreg dst; - struct sh_srcreg src0; - struct sh_srcreg src1; -}; - -struct sh_trinaryop -{ - struct sh_op op; - struct sh_dstreg dst; - struct sh_srcreg src0; - struct sh_srcreg src1; - struct sh_srcreg src2; -}; - -#endif /* ST_SHADER_SVGA_H */ diff --git a/src/gallium/drivers/svga/svgadump/st_shader_dump.c b/src/gallium/drivers/svga/svgadump/st_shader_dump.c deleted file mode 100644 index d65cc93bfd..0000000000 --- a/src/gallium/drivers/svga/svgadump/st_shader_dump.c +++ /dev/null @@ -1,649 +0,0 @@ -/********************************************************** - * Copyright 2008-2009 VMware, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **********************************************************/ - -/** - * @file - * SVGA Shader Dump Facilities - * - * @author Michal Krol - */ - -#include "st_shader.h" -#include "st_shader_dump.h" -#include "st_shader_op.h" -#include "util/u_debug.h" - -#include "../svga_hw_reg.h" -#include "svga3d_shaderdefs.h" - -struct dump_info -{ - SVGA3dShaderVersion version; - boolean is_ps; -}; - -static void dump_op( struct sh_op op, const char *mnemonic ) -{ - assert( op.predicated == 0 ); - assert( op.is_reg == 0 ); - - if (op.coissue) - debug_printf( "+" ); - debug_printf( "%s", mnemonic ); - switch (op.control) { - case 0: - break; - case SVGA3DOPCONT_PROJECT: - debug_printf( "p" ); - break; - case SVGA3DOPCONT_BIAS: - debug_printf( "b" ); - break; - default: - assert( 0 ); - } -} - - -static void dump_comp_op( struct sh_op op, const char *mnemonic ) -{ - assert( op.is_reg == 0 ); - - if (op.coissue) - debug_printf( "+" ); - debug_printf( "%s", mnemonic ); - switch (op.control) { - case SVGA3DOPCOMP_RESERVED0: - break; - case SVGA3DOPCOMP_GT: - debug_printf("_gt"); - break; - case SVGA3DOPCOMP_EQ: - debug_printf("_eq"); - break; - case SVGA3DOPCOMP_GE: - debug_printf("_ge"); - break; - case SVGA3DOPCOMP_LT: - debug_printf("_lt"); - break; - case SVGA3DOPCOMPC_NE: - debug_printf("_ne"); - break; - case SVGA3DOPCOMP_LE: - debug_printf("_le"); - break; - case SVGA3DOPCOMP_RESERVED1: - default: - assert( 0 ); - } -} - - -static void dump_reg( struct sh_reg reg, struct sh_srcreg *indreg, const struct dump_info *di ) -{ - assert( sh_reg_type( reg ) == SVGA3DREG_CONST || reg.relative == 0 ); - assert( reg.is_reg == 1 ); - - switch (sh_reg_type( reg )) { - case SVGA3DREG_TEMP: - debug_printf( "r%u", reg.number ); - break; - - case SVGA3DREG_INPUT: - debug_printf( "v%u", reg.number ); - break; - - case SVGA3DREG_CONST: - if (reg.relative) { - if (sh_srcreg_type( *indreg ) == SVGA3DREG_LOOP) - debug_printf( "c[aL+%u]", reg.number ); - else - debug_printf( "c[a%u.x+%u]", indreg->number, reg.number ); - } - else - debug_printf( "c%u", reg.number ); - break; - - case SVGA3DREG_ADDR: /* VS */ - /* SVGA3DREG_TEXTURE */ /* PS */ - if (di->is_ps) - debug_printf( "t%u", reg.number ); - else - debug_printf( "a%u", reg.number ); - break; - - case SVGA3DREG_RASTOUT: - switch (reg.number) { - case 0 /*POSITION*/: - debug_printf( "oPos" ); - break; - case 1 /*FOG*/: - debug_printf( "oFog" ); - break; - case 2 /*POINT_SIZE*/: - debug_printf( "oPts" ); - break; - default: - assert( 0 ); - debug_printf( "???" ); - } - break; - - case SVGA3DREG_ATTROUT: - assert( reg.number < 2 ); - debug_printf( "oD%u", reg.number ); - break; - - case SVGA3DREG_TEXCRDOUT: - /* SVGA3DREG_OUTPUT */ - debug_printf( "oT%u", reg.number ); - break; - - case SVGA3DREG_COLOROUT: - debug_printf( "oC%u", reg.number ); - break; - - case SVGA3DREG_DEPTHOUT: - debug_printf( "oD%u", reg.number ); - break; - - case SVGA3DREG_SAMPLER: - debug_printf( "s%u", reg.number ); - break; - - case SVGA3DREG_CONSTBOOL: - assert( !reg.relative ); - debug_printf( "b%u", reg.number ); - break; - - case SVGA3DREG_CONSTINT: - assert( !reg.relative ); - debug_printf( "i%u", reg.number ); - break; - - case SVGA3DREG_LOOP: - assert( reg.number == 0 ); - debug_printf( "aL" ); - break; - - case SVGA3DREG_MISCTYPE: - switch (reg.number) { - case SVGA3DMISCREG_POSITION: - debug_printf( "vPos" ); - break; - case SVGA3DMISCREG_FACE: - debug_printf( "vFace" ); - break; - default: - assert(0); - break; - } - break; - - case SVGA3DREG_LABEL: - debug_printf( "l%u", reg.number ); - break; - - case SVGA3DREG_PREDICATE: - debug_printf( "p%u", reg.number ); - break; - - - default: - assert( 0 ); - debug_printf( "???" ); - } -} - -static void dump_cdata( struct sh_cdata cdata ) -{ - debug_printf( "%f, %f, %f, %f", cdata.xyzw[0], cdata.xyzw[1], cdata.xyzw[2], cdata.xyzw[3] ); -} - -static void dump_idata( struct sh_idata idata ) -{ - debug_printf( "%d, %d, %d, %d", idata.xyzw[0], idata.xyzw[1], idata.xyzw[2], idata.xyzw[3] ); -} - -static void dump_bdata( boolean bdata ) -{ - debug_printf( bdata ? "TRUE" : "FALSE" ); -} - -static void dump_sampleinfo( struct ps_sampleinfo sampleinfo ) -{ - switch (sampleinfo.texture_type) { - case SVGA3DSAMP_2D: - debug_printf( "_2d" ); - break; - case SVGA3DSAMP_CUBE: - debug_printf( "_cube" ); - break; - case SVGA3DSAMP_VOLUME: - debug_printf( "_volume" ); - break; - default: - assert( 0 ); - } -} - - -static void dump_usageinfo( struct vs_semantic semantic ) -{ - switch (semantic.usage) { - case SVGA3D_DECLUSAGE_POSITION: - debug_printf("_position" ); - break; - case SVGA3D_DECLUSAGE_BLENDWEIGHT: - debug_printf("_blendweight" ); - break; - case SVGA3D_DECLUSAGE_BLENDINDICES: - debug_printf("_blendindices" ); - break; - case SVGA3D_DECLUSAGE_NORMAL: - debug_printf("_normal" ); - break; - case SVGA3D_DECLUSAGE_PSIZE: - debug_printf("_psize" ); - break; - case SVGA3D_DECLUSAGE_TEXCOORD: - debug_printf("_texcoord"); - break; - case SVGA3D_DECLUSAGE_TANGENT: - debug_printf("_tangent" ); - break; - case SVGA3D_DECLUSAGE_BINORMAL: - debug_printf("_binormal" ); - break; - case SVGA3D_DECLUSAGE_TESSFACTOR: - debug_printf("_tessfactor" ); - break; - case SVGA3D_DECLUSAGE_POSITIONT: - debug_printf("_positiont" ); - break; - case SVGA3D_DECLUSAGE_COLOR: - debug_printf("_color" ); - break; - case SVGA3D_DECLUSAGE_FOG: - debug_printf("_fog" ); - break; - case SVGA3D_DECLUSAGE_DEPTH: - debug_printf("_depth" ); - break; - case SVGA3D_DECLUSAGE_SAMPLE: - debug_printf("_sample"); - break; - default: - assert( 0 ); - return; - } - - if (semantic.usage_index != 0) { - debug_printf("%d", semantic.usage_index ); - } -} - -static void dump_dstreg( struct sh_dstreg dstreg, const struct dump_info *di ) -{ - union { - struct sh_reg reg; - struct sh_dstreg dstreg; - } u; - - assert( (dstreg.modifier & (SVGA3DDSTMOD_SATURATE | SVGA3DDSTMOD_PARTIALPRECISION)) == dstreg.modifier ); - - if (dstreg.modifier & SVGA3DDSTMOD_SATURATE) - debug_printf( "_sat" ); - if (dstreg.modifier & SVGA3DDSTMOD_PARTIALPRECISION) - debug_printf( "_pp" ); - switch (dstreg.shift_scale) { - case 0: - break; - case 1: - debug_printf( "_x2" ); - break; - case 2: - debug_printf( "_x4" ); - break; - case 3: - debug_printf( "_x8" ); - break; - case 13: - debug_printf( "_d8" ); - break; - case 14: - debug_printf( "_d4" ); - break; - case 15: - debug_printf( "_d2" ); - break; - default: - assert( 0 ); - } - debug_printf( " " ); - - u.dstreg = dstreg; - dump_reg( u.reg, NULL, di ); - if (dstreg.write_mask != SVGA3DWRITEMASK_ALL) { - debug_printf( "." ); - if (dstreg.write_mask & SVGA3DWRITEMASK_0) - debug_printf( "x" ); - if (dstreg.write_mask & SVGA3DWRITEMASK_1) - debug_printf( "y" ); - if (dstreg.write_mask & SVGA3DWRITEMASK_2) - debug_printf( "z" ); - if (dstreg.write_mask & SVGA3DWRITEMASK_3) - debug_printf( "w" ); - } -} - -static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, const struct dump_info *di ) -{ - union { - struct sh_reg reg; - struct sh_srcreg srcreg; - } u; - - switch (srcreg.modifier) { - case SVGA3DSRCMOD_NEG: - case SVGA3DSRCMOD_BIASNEG: - case SVGA3DSRCMOD_SIGNNEG: - case SVGA3DSRCMOD_X2NEG: - debug_printf( "-" ); - break; - case SVGA3DSRCMOD_ABS: - debug_printf( "|" ); - break; - case SVGA3DSRCMOD_ABSNEG: - debug_printf( "-|" ); - break; - case SVGA3DSRCMOD_COMP: - debug_printf( "1-" ); - break; - case SVGA3DSRCMOD_NOT: - debug_printf( "!" ); - } - - u.srcreg = srcreg; - dump_reg( u.reg, indreg, di ); - switch (srcreg.modifier) { - case SVGA3DSRCMOD_NONE: - case SVGA3DSRCMOD_NEG: - case SVGA3DSRCMOD_COMP: - case SVGA3DSRCMOD_NOT: - break; - case SVGA3DSRCMOD_ABS: - case SVGA3DSRCMOD_ABSNEG: - debug_printf( "|" ); - break; - case SVGA3DSRCMOD_BIAS: - case SVGA3DSRCMOD_BIASNEG: - debug_printf( "_bias" ); - break; - case SVGA3DSRCMOD_SIGN: - case SVGA3DSRCMOD_SIGNNEG: - debug_printf( "_bx2" ); - break; - case SVGA3DSRCMOD_X2: - case SVGA3DSRCMOD_X2NEG: - debug_printf( "_x2" ); - break; - case SVGA3DSRCMOD_DZ: - debug_printf( "_dz" ); - break; - case SVGA3DSRCMOD_DW: - debug_printf( "_dw" ); - break; - default: - assert( 0 ); - } - if (srcreg.swizzle_x != 0 || srcreg.swizzle_y != 1 || srcreg.swizzle_z != 2 || srcreg.swizzle_w != 3) { - debug_printf( "." ); - if (srcreg.swizzle_x == srcreg.swizzle_y && srcreg.swizzle_y == srcreg.swizzle_z && srcreg.swizzle_z == srcreg.swizzle_w) { - debug_printf( "%c", "xyzw"[srcreg.swizzle_x] ); - } - else { - debug_printf( "%c", "xyzw"[srcreg.swizzle_x] ); - debug_printf( "%c", "xyzw"[srcreg.swizzle_y] ); - debug_printf( "%c", "xyzw"[srcreg.swizzle_z] ); - debug_printf( "%c", "xyzw"[srcreg.swizzle_w] ); - } - } -} - -void -sh_svga_dump( - const unsigned *assem, - unsigned dwords, - unsigned do_binary ) -{ - const unsigned *start = assem; - boolean finished = FALSE; - struct dump_info di; - unsigned i; - - if (do_binary) { - for (i = 0; i < dwords; i++) - debug_printf(" 0x%08x,\n", assem[i]); - - debug_printf("\n\n"); - } - - di.version.value = *assem++; - di.is_ps = (di.version.type == SVGA3D_PS_TYPE); - - debug_printf( - "%s_%u_%u\n", - di.is_ps ? "ps" : "vs", - di.version.major, - di.version.minor ); - - while (!finished) { - struct sh_op op = *(struct sh_op *) assem; - - if (assem - start >= dwords) { - debug_printf("... ran off end of buffer\n"); - assert(0); - return; - } - - switch (op.opcode) { - case SVGA3DOP_DCL: - { - struct sh_dcl dcl = *(struct sh_dcl *) assem; - - debug_printf( "dcl" ); - if (sh_dstreg_type( dcl.reg ) == SVGA3DREG_SAMPLER) - dump_sampleinfo( dcl.u.ps.sampleinfo ); - else if (di.is_ps) { - if (di.version.major == 3 && - sh_dstreg_type( dcl.reg ) != SVGA3DREG_MISCTYPE) - dump_usageinfo( dcl.u.vs.semantic ); - } - else - dump_usageinfo( dcl.u.vs.semantic ); - dump_dstreg( dcl.reg, &di ); - debug_printf( "\n" ); - assem += sizeof( struct sh_dcl ) / sizeof( unsigned ); - } - break; - - case SVGA3DOP_DEFB: - { - struct sh_defb defb = *(struct sh_defb *) assem; - - debug_printf( "defb " ); - dump_reg( defb.reg, NULL, &di ); - debug_printf( ", " ); - dump_bdata( defb.data ); - debug_printf( "\n" ); - assem += sizeof( struct sh_defb ) / sizeof( unsigned ); - } - break; - - case SVGA3DOP_DEFI: - { - struct sh_defi defi = *(struct sh_defi *) assem; - - debug_printf( "defi " ); - dump_reg( defi.reg, NULL, &di ); - debug_printf( ", " ); - dump_idata( defi.idata ); - debug_printf( "\n" ); - assem += sizeof( struct sh_defi ) / sizeof( unsigned ); - } - break; - - case SVGA3DOP_TEXCOORD: - assert( di.is_ps ); - dump_op( op, "texcoord" ); - if (0) { - struct sh_dstop dstop = *(struct sh_dstop *) assem; - dump_dstreg( dstop.dst, &di ); - assem += sizeof( struct sh_dstop ) / sizeof( unsigned ); - } - else { - struct sh_unaryop unaryop = *(struct sh_unaryop *) assem; - dump_dstreg( unaryop.dst, &di ); - debug_printf( ", " ); - dump_srcreg( unaryop.src, NULL, &di ); - assem += sizeof( struct sh_unaryop ) / sizeof( unsigned ); - } - debug_printf( "\n" ); - break; - - case SVGA3DOP_TEX: - assert( di.is_ps ); - if (0) { - dump_op( op, "tex" ); - if (0) { - struct sh_dstop dstop = *(struct sh_dstop *) assem; - - dump_dstreg( dstop.dst, &di ); - assem += sizeof( struct sh_dstop ) / sizeof( unsigned ); - } - else { - struct sh_unaryop unaryop = *(struct sh_unaryop *) assem; - - dump_dstreg( unaryop.dst, &di ); - debug_printf( ", " ); - dump_srcreg( unaryop.src, NULL, &di ); - assem += sizeof( struct sh_unaryop ) / sizeof( unsigned ); - } - } - else { - struct sh_binaryop binaryop = *(struct sh_binaryop *) assem; - - dump_op( op, "texld" ); - dump_dstreg( binaryop.dst, &di ); - debug_printf( ", " ); - dump_srcreg( binaryop.src0, NULL, &di ); - debug_printf( ", " ); - dump_srcreg( binaryop.src1, NULL, &di ); - assem += sizeof( struct sh_binaryop ) / sizeof( unsigned ); - } - debug_printf( "\n" ); - break; - - case SVGA3DOP_DEF: - { - struct sh_def def = *(struct sh_def *) assem; - - debug_printf( "def " ); - dump_reg( def.reg, NULL, &di ); - debug_printf( ", " ); - dump_cdata( def.cdata ); - debug_printf( "\n" ); - assem += sizeof( struct sh_def ) / sizeof( unsigned ); - } - break; - - case SVGA3DOP_PHASE: - debug_printf( "phase\n" ); - assem += sizeof( struct sh_op ) / sizeof( unsigned ); - break; - - case SVGA3DOP_COMMENT: - assert( 0 ); - break; - - case SVGA3DOP_RET: - debug_printf( "ret\n" ); - assem += sizeof( struct sh_op ) / sizeof( unsigned ); - break; - - case SVGA3DOP_END: - debug_printf( "end\n" ); - finished = TRUE; - break; - - default: - { - const struct sh_opcode_info *info = sh_svga_opcode_info( op.opcode ); - uint i; - uint num_src = info->num_src + op.predicated; - boolean not_first_arg = FALSE; - - assert( info->num_dst <= 1 ); - - if (op.opcode == SVGA3DOP_SINCOS && di.version.major < 3) - num_src += 2; - - dump_comp_op( op, info->mnemonic ); - assem += sizeof( struct sh_op ) / sizeof( unsigned ); - - if (info->num_dst > 0) { - struct sh_dstreg dstreg = *(struct sh_dstreg *) assem; - - dump_dstreg( dstreg, &di ); - assem += sizeof( struct sh_dstreg ) / sizeof( unsigned ); - not_first_arg = TRUE; - } - - for (i = 0; i < num_src; i++) { - struct sh_srcreg srcreg; - struct sh_srcreg indreg; - - srcreg = *(struct sh_srcreg *) assem; - assem += sizeof( struct sh_srcreg ) / sizeof( unsigned ); - if (srcreg.relative && !di.is_ps && di.version.major >= 2) { - indreg = *(struct sh_srcreg *) assem; - assem += sizeof( struct sh_srcreg ) / sizeof( unsigned ); - } - - if (not_first_arg) - debug_printf( ", " ); - else - debug_printf( " " ); - dump_srcreg( srcreg, &indreg, &di ); - not_first_arg = TRUE; - } - - debug_printf( "\n" ); - } - } - } -} diff --git a/src/gallium/drivers/svga/svgadump/st_shader_dump.h b/src/gallium/drivers/svga/svgadump/st_shader_dump.h deleted file mode 100644 index af5549cdba..0000000000 --- a/src/gallium/drivers/svga/svgadump/st_shader_dump.h +++ /dev/null @@ -1,42 +0,0 @@ -/********************************************************** - * Copyright 2008-2009 VMware, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **********************************************************/ - -/** - * @file - * SVGA Shader Dump Facilities - * - * @author Michal Krol - */ - -#ifndef ST_SHADER_SVGA_DUMP_H -#define ST_SHADER_SVGA_DUMP_H - -void -sh_svga_dump( - const unsigned *assem, - unsigned dwords, - unsigned do_binary ); - -#endif /* ST_SHADER_SVGA_DUMP_H */ diff --git a/src/gallium/drivers/svga/svgadump/st_shader_op.c b/src/gallium/drivers/svga/svgadump/st_shader_op.c deleted file mode 100644 index 2c05382ab9..0000000000 --- a/src/gallium/drivers/svga/svgadump/st_shader_op.c +++ /dev/null @@ -1,168 +0,0 @@ -/********************************************************** - * Copyright 2008-2009 VMware, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **********************************************************/ - -/** - * @file - * SVGA Shader Token Opcode Info - * - * @author Michal Krol - */ - -#include "util/u_debug.h" -#include "st_shader_op.h" - -#include "../svga_hw_reg.h" -#include "svga3d_shaderdefs.h" - -#define SVGA3DOP_INVALID SVGA3DOP_END -#define TGSI_OPCODE_INVALID TGSI_OPCODE_LAST - -static struct sh_opcode_info opcode_info[] = -{ - { "nop", 0, 0, SVGA3DOP_NOP }, - { "mov", 1, 1, SVGA3DOP_MOV, }, - { "add", 1, 2, SVGA3DOP_ADD, }, - { "sub", 1, 2, SVGA3DOP_SUB, }, - { "mad", 1, 3, SVGA3DOP_MAD, }, - { "mul", 1, 2, SVGA3DOP_MUL, }, - { "rcp", 1, 1, SVGA3DOP_RCP, }, - { "rsq", 1, 1, SVGA3DOP_RSQ, }, - { "dp3", 1, 2, SVGA3DOP_DP3, }, - { "dp4", 1, 2, SVGA3DOP_DP4, }, - { "min", 1, 2, SVGA3DOP_MIN, }, - { "max", 1, 2, SVGA3DOP_MAX, }, - { "slt", 1, 2, SVGA3DOP_SLT, }, - { "sge", 1, 2, SVGA3DOP_SGE, }, - { "exp", 1, 1, SVGA3DOP_EXP, }, - { "log", 1, 1, SVGA3DOP_LOG, }, - { "lit", 1, 1, SVGA3DOP_LIT, }, - { "dst", 1, 2, SVGA3DOP_DST, }, - { "lrp", 1, 3, SVGA3DOP_LRP, }, - { "frc", 1, 1, SVGA3DOP_FRC, }, - { "m4x4", 1, 2, SVGA3DOP_M4x4, }, - { "m4x3", 1, 2, SVGA3DOP_M4x3, }, - { "m3x4", 1, 2, SVGA3DOP_M3x4, }, - { "m3x3", 1, 2, SVGA3DOP_M3x3, }, - { "m3x2", 1, 2, SVGA3DOP_M3x2, }, - { "call", 0, 1, SVGA3DOP_CALL, }, - { "callnz", 0, 2, SVGA3DOP_CALLNZ, }, - { "loop", 0, 2, SVGA3DOP_LOOP, }, - { "ret", 0, 0, SVGA3DOP_RET, }, - { "endloop", 0, 0, SVGA3DOP_ENDLOOP, }, - { "label", 0, 1, SVGA3DOP_LABEL, }, - { "dcl", 0, 0, SVGA3DOP_DCL, }, - { "pow", 1, 2, SVGA3DOP_POW, }, - { "crs", 1, 2, SVGA3DOP_CRS, }, - { "sgn", 1, 3, SVGA3DOP_SGN, }, - { "abs", 1, 1, SVGA3DOP_ABS, }, - { "nrm", 1, 1, SVGA3DOP_NRM, }, /* 3-componenet normalization */ - { "sincos", 1, 1, SVGA3DOP_SINCOS, }, - { "rep", 0, 1, SVGA3DOP_REP, }, - { "endrep", 0, 0, SVGA3DOP_ENDREP, }, - { "if", 0, 1, SVGA3DOP_IF, }, - { "ifc", 0, 2, SVGA3DOP_IFC, }, - { "else", 0, 0, SVGA3DOP_ELSE, }, - { "endif", 0, 0, SVGA3DOP_ENDIF, }, - { "break", 0, 0, SVGA3DOP_BREAK, }, - { "breakc", 0, 0, SVGA3DOP_BREAKC, }, - { "mova", 1, 1, SVGA3DOP_MOVA, }, - { "defb", 0, 0, SVGA3DOP_DEFB, }, - { "defi", 0, 0, SVGA3DOP_DEFI, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "???", 0, 0, SVGA3DOP_INVALID, }, - { "texcoord", 0, 0, SVGA3DOP_TEXCOORD, }, - { "texkill", 1, 0, SVGA3DOP_TEXKILL, }, - { "tex", 0, 0, SVGA3DOP_TEX, }, - { "texbem", 1, 1, SVGA3DOP_TEXBEM, }, - { "texbeml", 1, 1, SVGA3DOP_TEXBEML, }, - { "texreg2ar", 1, 1, SVGA3DOP_TEXREG2AR, }, - { "texreg2gb", 1, 1, SVGA3DOP_TEXREG2GB, }, - { "texm3x2pad", 1, 1, SVGA3DOP_TEXM3x2PAD, }, - { "texm3x2tex", 1, 1, SVGA3DOP_TEXM3x2TEX, }, - { "texm3x3pad", 1, 1, SVGA3DOP_TEXM3x3PAD, }, - { "texm3x3tex", 1, 1, SVGA3DOP_TEXM3x3TEX, }, - { "reserved0", 0, 0, SVGA3DOP_RESERVED0, }, - { "texm3x3spec", 1, 2, SVGA3DOP_TEXM3x3SPEC, }, - { "texm3x3vspec", 1, 1, SVGA3DOP_TEXM3x3VSPEC,}, - { "expp", 1, 1, SVGA3DOP_EXPP, }, - { "logp", 1, 1, SVGA3DOP_LOGP, }, - { "cnd", 1, 3, SVGA3DOP_CND, }, - { "def", 0, 0, SVGA3DOP_DEF, }, - { "texreg2rgb", 1, 1, SVGA3DOP_TEXREG2RGB, }, - { "texdp3tex", 1, 1, SVGA3DOP_TEXDP3TEX, }, - { "texm3x2depth", 1, 1, SVGA3DOP_TEXM3x2DEPTH,}, - { "texdp3", 1, 1, SVGA3DOP_TEXDP3, }, - { "texm3x3", 1, 1, SVGA3DOP_TEXM3x3, }, - { "texdepth", 1, 0, SVGA3DOP_TEXDEPTH, }, - { "cmp", 1, 3, SVGA3DOP_CMP, }, - { "bem", 1, 2, SVGA3DOP_BEM, }, - { "dp2add", 1, 3, SVGA3DOP_DP2ADD, }, - { "dsx", 1, 1, SVGA3DOP_INVALID, }, - { "dsy", 1, 1, SVGA3DOP_INVALID, }, - { "texldd", 1, 1, SVGA3DOP_INVALID, }, - { "setp", 1, 2, SVGA3DOP_SETP, }, - { "texldl", 1, 1, SVGA3DOP_INVALID, }, - { "breakp", 1, 1, SVGA3DOP_INVALID, }, -}; - -const struct sh_opcode_info *sh_svga_opcode_info( uint op ) -{ - struct sh_opcode_info *info; - - if (op >= sizeof( opcode_info ) / sizeof( opcode_info[0] )) { - /* The opcode is either PHASE, COMMENT, END or out of range. - */ - assert( 0 ); - return NULL; - } - - info = &opcode_info[op]; - - if (info->svga_opcode == SVGA3DOP_INVALID) { - /* No valid information. Please provide number of dst/src registers. - */ - assert( 0 ); - return NULL; - } - - /* Sanity check. - */ - assert( op == info->svga_opcode ); - - return info; -} diff --git a/src/gallium/drivers/svga/svgadump/st_shader_op.h b/src/gallium/drivers/svga/svgadump/st_shader_op.h deleted file mode 100644 index 01d39dca84..0000000000 --- a/src/gallium/drivers/svga/svgadump/st_shader_op.h +++ /dev/null @@ -1,46 +0,0 @@ -/********************************************************** - * Copyright 2008-2009 VMware, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **********************************************************/ - -/** - * @file - * SVGA Shader Token Opcode Info - * - * @author Michal Krol - */ - -#ifndef ST_SHADER_SVGA_OP_H -#define ST_SHADER_SVGA_OP_H - -struct sh_opcode_info -{ - const char *mnemonic; - unsigned num_dst:8; - unsigned num_src:8; - unsigned svga_opcode:16; -}; - -const struct sh_opcode_info *sh_svga_opcode_info( unsigned op ); - -#endif /* ST_SHADER_SVGA_OP_H */ diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c index 180dde8dc1..c6c353f58e 100644 --- a/src/gallium/drivers/svga/svgadump/svga_dump.c +++ b/src/gallium/drivers/svga/svgadump/svga_dump.c @@ -31,7 +31,7 @@ */ #include "svga_types.h" -#include "st_shader_dump.h" +#include "svga_shader_dump.h" #include "svga3d_reg.h" #include "util/u_debug.h" diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.py b/src/gallium/drivers/svga/svgadump/svga_dump.py index 3cb29c395b..288e753296 100755 --- a/src/gallium/drivers/svga/svgadump/svga_dump.py +++ b/src/gallium/drivers/svga/svgadump/svga_dump.py @@ -291,7 +291,7 @@ def main(): print ' */' print print '#include "svga_types.h"' - print '#include "shader_dump/st_shader_dump.h"' + print '#include "svga_shader_dump.h"' print '#include "svga3d_reg.h"' print print '#include "pipe/p_debug.h"' diff --git a/src/gallium/drivers/svga/svgadump/svga_shader.h b/src/gallium/drivers/svga/svgadump/svga_shader.h new file mode 100644 index 0000000000..2fc1796a90 --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/svga_shader.h @@ -0,0 +1,214 @@ +/********************************************************** + * Copyright 2007-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * SVGA Shader Token Definitions + * + * @author Michal Krol + */ + +#ifndef ST_SHADER_SVGA_H +#define ST_SHADER_SVGA_H + +#include "pipe/p_compiler.h" + +struct sh_op +{ + unsigned opcode:16; + unsigned control:8; + unsigned length:4; + unsigned predicated:1; + unsigned unused:1; + unsigned coissue:1; + unsigned is_reg:1; +}; + +struct sh_reg +{ + unsigned number:11; + unsigned type_hi:2; + unsigned relative:1; + unsigned unused:14; + unsigned type_lo:3; + unsigned is_reg:1; +}; + +static INLINE unsigned +sh_reg_type( struct sh_reg reg ) +{ + return reg.type_lo | (reg.type_hi << 3); +} + +struct sh_cdata +{ + float xyzw[4]; +}; + +struct sh_def +{ + struct sh_op op; + struct sh_reg reg; + struct sh_cdata cdata; +}; + +struct sh_defb +{ + struct sh_op op; + struct sh_reg reg; + uint data; +}; + +struct sh_idata +{ + int xyzw[4]; +}; + +struct sh_defi +{ + struct sh_op op; + struct sh_reg reg; + struct sh_idata idata; +}; + +#define PS_TEXTURETYPE_UNKNOWN SVGA3DSAMP_UNKNOWN +#define PS_TEXTURETYPE_2D SVGA3DSAMP_2D +#define PS_TEXTURETYPE_CUBE SVGA3DSAMP_CUBE +#define PS_TEXTURETYPE_VOLUME SVGA3DSAMP_VOLUME + +struct ps_sampleinfo +{ + unsigned unused:27; + unsigned texture_type:4; + unsigned is_reg:1; +}; + +struct vs_semantic +{ + unsigned usage:5; + unsigned unused1:11; + unsigned usage_index:4; + unsigned unused2:12; +}; + +struct sh_dstreg +{ + unsigned number:11; + unsigned type_hi:2; + unsigned relative:1; + unsigned unused:2; + unsigned write_mask:4; + unsigned modifier:4; + unsigned shift_scale:4; + unsigned type_lo:3; + unsigned is_reg:1; +}; + +static INLINE unsigned +sh_dstreg_type( struct sh_dstreg reg ) +{ + return reg.type_lo | (reg.type_hi << 3); +} + +struct sh_dcl +{ + struct sh_op op; + union { + struct { + struct ps_sampleinfo sampleinfo; + } ps; + struct { + struct vs_semantic semantic; + } vs; + } u; + struct sh_dstreg reg; +}; + + +struct sh_srcreg +{ + unsigned number:11; + unsigned type_hi:2; + unsigned relative:1; + unsigned unused:2; + unsigned swizzle_x:2; + unsigned swizzle_y:2; + unsigned swizzle_z:2; + unsigned swizzle_w:2; + unsigned modifier:4; + unsigned type_lo:3; + unsigned is_reg:1; +}; + +static INLINE unsigned +sh_srcreg_type( struct sh_srcreg reg ) +{ + return reg.type_lo | (reg.type_hi << 3); +} + +struct sh_dstop +{ + struct sh_op op; + struct sh_dstreg dst; +}; + +struct sh_srcop +{ + struct sh_op op; + struct sh_srcreg src; +}; + +struct sh_src2op +{ + struct sh_op op; + struct sh_srcreg src0; + struct sh_srcreg src1; +}; + +struct sh_unaryop +{ + struct sh_op op; + struct sh_dstreg dst; + struct sh_srcreg src; +}; + +struct sh_binaryop +{ + struct sh_op op; + struct sh_dstreg dst; + struct sh_srcreg src0; + struct sh_srcreg src1; +}; + +struct sh_trinaryop +{ + struct sh_op op; + struct sh_dstreg dst; + struct sh_srcreg src0; + struct sh_srcreg src1; + struct sh_srcreg src2; +}; + +#endif /* ST_SHADER_SVGA_H */ diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c new file mode 100644 index 0000000000..c654126d3a --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c @@ -0,0 +1,649 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * SVGA Shader Dump Facilities + * + * @author Michal Krol + */ + +#include "svga_shader.h" +#include "svga_shader_dump.h" +#include "svga_shader_op.h" +#include "util/u_debug.h" + +#include "../svga_hw_reg.h" +#include "svga3d_shaderdefs.h" + +struct dump_info +{ + SVGA3dShaderVersion version; + boolean is_ps; +}; + +static void dump_op( struct sh_op op, const char *mnemonic ) +{ + assert( op.predicated == 0 ); + assert( op.is_reg == 0 ); + + if (op.coissue) + debug_printf( "+" ); + debug_printf( "%s", mnemonic ); + switch (op.control) { + case 0: + break; + case SVGA3DOPCONT_PROJECT: + debug_printf( "p" ); + break; + case SVGA3DOPCONT_BIAS: + debug_printf( "b" ); + break; + default: + assert( 0 ); + } +} + + +static void dump_comp_op( struct sh_op op, const char *mnemonic ) +{ + assert( op.is_reg == 0 ); + + if (op.coissue) + debug_printf( "+" ); + debug_printf( "%s", mnemonic ); + switch (op.control) { + case SVGA3DOPCOMP_RESERVED0: + break; + case SVGA3DOPCOMP_GT: + debug_printf("_gt"); + break; + case SVGA3DOPCOMP_EQ: + debug_printf("_eq"); + break; + case SVGA3DOPCOMP_GE: + debug_printf("_ge"); + break; + case SVGA3DOPCOMP_LT: + debug_printf("_lt"); + break; + case SVGA3DOPCOMPC_NE: + debug_printf("_ne"); + break; + case SVGA3DOPCOMP_LE: + debug_printf("_le"); + break; + case SVGA3DOPCOMP_RESERVED1: + default: + assert( 0 ); + } +} + + +static void dump_reg( struct sh_reg reg, struct sh_srcreg *indreg, const struct dump_info *di ) +{ + assert( sh_reg_type( reg ) == SVGA3DREG_CONST || reg.relative == 0 ); + assert( reg.is_reg == 1 ); + + switch (sh_reg_type( reg )) { + case SVGA3DREG_TEMP: + debug_printf( "r%u", reg.number ); + break; + + case SVGA3DREG_INPUT: + debug_printf( "v%u", reg.number ); + break; + + case SVGA3DREG_CONST: + if (reg.relative) { + if (sh_srcreg_type( *indreg ) == SVGA3DREG_LOOP) + debug_printf( "c[aL+%u]", reg.number ); + else + debug_printf( "c[a%u.x+%u]", indreg->number, reg.number ); + } + else + debug_printf( "c%u", reg.number ); + break; + + case SVGA3DREG_ADDR: /* VS */ + /* SVGA3DREG_TEXTURE */ /* PS */ + if (di->is_ps) + debug_printf( "t%u", reg.number ); + else + debug_printf( "a%u", reg.number ); + break; + + case SVGA3DREG_RASTOUT: + switch (reg.number) { + case 0 /*POSITION*/: + debug_printf( "oPos" ); + break; + case 1 /*FOG*/: + debug_printf( "oFog" ); + break; + case 2 /*POINT_SIZE*/: + debug_printf( "oPts" ); + break; + default: + assert( 0 ); + debug_printf( "???" ); + } + break; + + case SVGA3DREG_ATTROUT: + assert( reg.number < 2 ); + debug_printf( "oD%u", reg.number ); + break; + + case SVGA3DREG_TEXCRDOUT: + /* SVGA3DREG_OUTPUT */ + debug_printf( "oT%u", reg.number ); + break; + + case SVGA3DREG_COLOROUT: + debug_printf( "oC%u", reg.number ); + break; + + case SVGA3DREG_DEPTHOUT: + debug_printf( "oD%u", reg.number ); + break; + + case SVGA3DREG_SAMPLER: + debug_printf( "s%u", reg.number ); + break; + + case SVGA3DREG_CONSTBOOL: + assert( !reg.relative ); + debug_printf( "b%u", reg.number ); + break; + + case SVGA3DREG_CONSTINT: + assert( !reg.relative ); + debug_printf( "i%u", reg.number ); + break; + + case SVGA3DREG_LOOP: + assert( reg.number == 0 ); + debug_printf( "aL" ); + break; + + case SVGA3DREG_MISCTYPE: + switch (reg.number) { + case SVGA3DMISCREG_POSITION: + debug_printf( "vPos" ); + break; + case SVGA3DMISCREG_FACE: + debug_printf( "vFace" ); + break; + default: + assert(0); + break; + } + break; + + case SVGA3DREG_LABEL: + debug_printf( "l%u", reg.number ); + break; + + case SVGA3DREG_PREDICATE: + debug_printf( "p%u", reg.number ); + break; + + + default: + assert( 0 ); + debug_printf( "???" ); + } +} + +static void dump_cdata( struct sh_cdata cdata ) +{ + debug_printf( "%f, %f, %f, %f", cdata.xyzw[0], cdata.xyzw[1], cdata.xyzw[2], cdata.xyzw[3] ); +} + +static void dump_idata( struct sh_idata idata ) +{ + debug_printf( "%d, %d, %d, %d", idata.xyzw[0], idata.xyzw[1], idata.xyzw[2], idata.xyzw[3] ); +} + +static void dump_bdata( boolean bdata ) +{ + debug_printf( bdata ? "TRUE" : "FALSE" ); +} + +static void dump_sampleinfo( struct ps_sampleinfo sampleinfo ) +{ + switch (sampleinfo.texture_type) { + case SVGA3DSAMP_2D: + debug_printf( "_2d" ); + break; + case SVGA3DSAMP_CUBE: + debug_printf( "_cube" ); + break; + case SVGA3DSAMP_VOLUME: + debug_printf( "_volume" ); + break; + default: + assert( 0 ); + } +} + + +static void dump_usageinfo( struct vs_semantic semantic ) +{ + switch (semantic.usage) { + case SVGA3D_DECLUSAGE_POSITION: + debug_printf("_position" ); + break; + case SVGA3D_DECLUSAGE_BLENDWEIGHT: + debug_printf("_blendweight" ); + break; + case SVGA3D_DECLUSAGE_BLENDINDICES: + debug_printf("_blendindices" ); + break; + case SVGA3D_DECLUSAGE_NORMAL: + debug_printf("_normal" ); + break; + case SVGA3D_DECLUSAGE_PSIZE: + debug_printf("_psize" ); + break; + case SVGA3D_DECLUSAGE_TEXCOORD: + debug_printf("_texcoord"); + break; + case SVGA3D_DECLUSAGE_TANGENT: + debug_printf("_tangent" ); + break; + case SVGA3D_DECLUSAGE_BINORMAL: + debug_printf("_binormal" ); + break; + case SVGA3D_DECLUSAGE_TESSFACTOR: + debug_printf("_tessfactor" ); + break; + case SVGA3D_DECLUSAGE_POSITIONT: + debug_printf("_positiont" ); + break; + case SVGA3D_DECLUSAGE_COLOR: + debug_printf("_color" ); + break; + case SVGA3D_DECLUSAGE_FOG: + debug_printf("_fog" ); + break; + case SVGA3D_DECLUSAGE_DEPTH: + debug_printf("_depth" ); + break; + case SVGA3D_DECLUSAGE_SAMPLE: + debug_printf("_sample"); + break; + default: + assert( 0 ); + return; + } + + if (semantic.usage_index != 0) { + debug_printf("%d", semantic.usage_index ); + } +} + +static void dump_dstreg( struct sh_dstreg dstreg, const struct dump_info *di ) +{ + union { + struct sh_reg reg; + struct sh_dstreg dstreg; + } u; + + assert( (dstreg.modifier & (SVGA3DDSTMOD_SATURATE | SVGA3DDSTMOD_PARTIALPRECISION)) == dstreg.modifier ); + + if (dstreg.modifier & SVGA3DDSTMOD_SATURATE) + debug_printf( "_sat" ); + if (dstreg.modifier & SVGA3DDSTMOD_PARTIALPRECISION) + debug_printf( "_pp" ); + switch (dstreg.shift_scale) { + case 0: + break; + case 1: + debug_printf( "_x2" ); + break; + case 2: + debug_printf( "_x4" ); + break; + case 3: + debug_printf( "_x8" ); + break; + case 13: + debug_printf( "_d8" ); + break; + case 14: + debug_printf( "_d4" ); + break; + case 15: + debug_printf( "_d2" ); + break; + default: + assert( 0 ); + } + debug_printf( " " ); + + u.dstreg = dstreg; + dump_reg( u.reg, NULL, di ); + if (dstreg.write_mask != SVGA3DWRITEMASK_ALL) { + debug_printf( "." ); + if (dstreg.write_mask & SVGA3DWRITEMASK_0) + debug_printf( "x" ); + if (dstreg.write_mask & SVGA3DWRITEMASK_1) + debug_printf( "y" ); + if (dstreg.write_mask & SVGA3DWRITEMASK_2) + debug_printf( "z" ); + if (dstreg.write_mask & SVGA3DWRITEMASK_3) + debug_printf( "w" ); + } +} + +static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, const struct dump_info *di ) +{ + union { + struct sh_reg reg; + struct sh_srcreg srcreg; + } u; + + switch (srcreg.modifier) { + case SVGA3DSRCMOD_NEG: + case SVGA3DSRCMOD_BIASNEG: + case SVGA3DSRCMOD_SIGNNEG: + case SVGA3DSRCMOD_X2NEG: + debug_printf( "-" ); + break; + case SVGA3DSRCMOD_ABS: + debug_printf( "|" ); + break; + case SVGA3DSRCMOD_ABSNEG: + debug_printf( "-|" ); + break; + case SVGA3DSRCMOD_COMP: + debug_printf( "1-" ); + break; + case SVGA3DSRCMOD_NOT: + debug_printf( "!" ); + } + + u.srcreg = srcreg; + dump_reg( u.reg, indreg, di ); + switch (srcreg.modifier) { + case SVGA3DSRCMOD_NONE: + case SVGA3DSRCMOD_NEG: + case SVGA3DSRCMOD_COMP: + case SVGA3DSRCMOD_NOT: + break; + case SVGA3DSRCMOD_ABS: + case SVGA3DSRCMOD_ABSNEG: + debug_printf( "|" ); + break; + case SVGA3DSRCMOD_BIAS: + case SVGA3DSRCMOD_BIASNEG: + debug_printf( "_bias" ); + break; + case SVGA3DSRCMOD_SIGN: + case SVGA3DSRCMOD_SIGNNEG: + debug_printf( "_bx2" ); + break; + case SVGA3DSRCMOD_X2: + case SVGA3DSRCMOD_X2NEG: + debug_printf( "_x2" ); + break; + case SVGA3DSRCMOD_DZ: + debug_printf( "_dz" ); + break; + case SVGA3DSRCMOD_DW: + debug_printf( "_dw" ); + break; + default: + assert( 0 ); + } + if (srcreg.swizzle_x != 0 || srcreg.swizzle_y != 1 || srcreg.swizzle_z != 2 || srcreg.swizzle_w != 3) { + debug_printf( "." ); + if (srcreg.swizzle_x == srcreg.swizzle_y && srcreg.swizzle_y == srcreg.swizzle_z && srcreg.swizzle_z == srcreg.swizzle_w) { + debug_printf( "%c", "xyzw"[srcreg.swizzle_x] ); + } + else { + debug_printf( "%c", "xyzw"[srcreg.swizzle_x] ); + debug_printf( "%c", "xyzw"[srcreg.swizzle_y] ); + debug_printf( "%c", "xyzw"[srcreg.swizzle_z] ); + debug_printf( "%c", "xyzw"[srcreg.swizzle_w] ); + } + } +} + +void +sh_svga_dump( + const unsigned *assem, + unsigned dwords, + unsigned do_binary ) +{ + const unsigned *start = assem; + boolean finished = FALSE; + struct dump_info di; + unsigned i; + + if (do_binary) { + for (i = 0; i < dwords; i++) + debug_printf(" 0x%08x,\n", assem[i]); + + debug_printf("\n\n"); + } + + di.version.value = *assem++; + di.is_ps = (di.version.type == SVGA3D_PS_TYPE); + + debug_printf( + "%s_%u_%u\n", + di.is_ps ? "ps" : "vs", + di.version.major, + di.version.minor ); + + while (!finished) { + struct sh_op op = *(struct sh_op *) assem; + + if (assem - start >= dwords) { + debug_printf("... ran off end of buffer\n"); + assert(0); + return; + } + + switch (op.opcode) { + case SVGA3DOP_DCL: + { + struct sh_dcl dcl = *(struct sh_dcl *) assem; + + debug_printf( "dcl" ); + if (sh_dstreg_type( dcl.reg ) == SVGA3DREG_SAMPLER) + dump_sampleinfo( dcl.u.ps.sampleinfo ); + else if (di.is_ps) { + if (di.version.major == 3 && + sh_dstreg_type( dcl.reg ) != SVGA3DREG_MISCTYPE) + dump_usageinfo( dcl.u.vs.semantic ); + } + else + dump_usageinfo( dcl.u.vs.semantic ); + dump_dstreg( dcl.reg, &di ); + debug_printf( "\n" ); + assem += sizeof( struct sh_dcl ) / sizeof( unsigned ); + } + break; + + case SVGA3DOP_DEFB: + { + struct sh_defb defb = *(struct sh_defb *) assem; + + debug_printf( "defb " ); + dump_reg( defb.reg, NULL, &di ); + debug_printf( ", " ); + dump_bdata( defb.data ); + debug_printf( "\n" ); + assem += sizeof( struct sh_defb ) / sizeof( unsigned ); + } + break; + + case SVGA3DOP_DEFI: + { + struct sh_defi defi = *(struct sh_defi *) assem; + + debug_printf( "defi " ); + dump_reg( defi.reg, NULL, &di ); + debug_printf( ", " ); + dump_idata( defi.idata ); + debug_printf( "\n" ); + assem += sizeof( struct sh_defi ) / sizeof( unsigned ); + } + break; + + case SVGA3DOP_TEXCOORD: + assert( di.is_ps ); + dump_op( op, "texcoord" ); + if (0) { + struct sh_dstop dstop = *(struct sh_dstop *) assem; + dump_dstreg( dstop.dst, &di ); + assem += sizeof( struct sh_dstop ) / sizeof( unsigned ); + } + else { + struct sh_unaryop unaryop = *(struct sh_unaryop *) assem; + dump_dstreg( unaryop.dst, &di ); + debug_printf( ", " ); + dump_srcreg( unaryop.src, NULL, &di ); + assem += sizeof( struct sh_unaryop ) / sizeof( unsigned ); + } + debug_printf( "\n" ); + break; + + case SVGA3DOP_TEX: + assert( di.is_ps ); + if (0) { + dump_op( op, "tex" ); + if (0) { + struct sh_dstop dstop = *(struct sh_dstop *) assem; + + dump_dstreg( dstop.dst, &di ); + assem += sizeof( struct sh_dstop ) / sizeof( unsigned ); + } + else { + struct sh_unaryop unaryop = *(struct sh_unaryop *) assem; + + dump_dstreg( unaryop.dst, &di ); + debug_printf( ", " ); + dump_srcreg( unaryop.src, NULL, &di ); + assem += sizeof( struct sh_unaryop ) / sizeof( unsigned ); + } + } + else { + struct sh_binaryop binaryop = *(struct sh_binaryop *) assem; + + dump_op( op, "texld" ); + dump_dstreg( binaryop.dst, &di ); + debug_printf( ", " ); + dump_srcreg( binaryop.src0, NULL, &di ); + debug_printf( ", " ); + dump_srcreg( binaryop.src1, NULL, &di ); + assem += sizeof( struct sh_binaryop ) / sizeof( unsigned ); + } + debug_printf( "\n" ); + break; + + case SVGA3DOP_DEF: + { + struct sh_def def = *(struct sh_def *) assem; + + debug_printf( "def " ); + dump_reg( def.reg, NULL, &di ); + debug_printf( ", " ); + dump_cdata( def.cdata ); + debug_printf( "\n" ); + assem += sizeof( struct sh_def ) / sizeof( unsigned ); + } + break; + + case SVGA3DOP_PHASE: + debug_printf( "phase\n" ); + assem += sizeof( struct sh_op ) / sizeof( unsigned ); + break; + + case SVGA3DOP_COMMENT: + assert( 0 ); + break; + + case SVGA3DOP_RET: + debug_printf( "ret\n" ); + assem += sizeof( struct sh_op ) / sizeof( unsigned ); + break; + + case SVGA3DOP_END: + debug_printf( "end\n" ); + finished = TRUE; + break; + + default: + { + const struct sh_opcode_info *info = sh_svga_opcode_info( op.opcode ); + uint i; + uint num_src = info->num_src + op.predicated; + boolean not_first_arg = FALSE; + + assert( info->num_dst <= 1 ); + + if (op.opcode == SVGA3DOP_SINCOS && di.version.major < 3) + num_src += 2; + + dump_comp_op( op, info->mnemonic ); + assem += sizeof( struct sh_op ) / sizeof( unsigned ); + + if (info->num_dst > 0) { + struct sh_dstreg dstreg = *(struct sh_dstreg *) assem; + + dump_dstreg( dstreg, &di ); + assem += sizeof( struct sh_dstreg ) / sizeof( unsigned ); + not_first_arg = TRUE; + } + + for (i = 0; i < num_src; i++) { + struct sh_srcreg srcreg; + struct sh_srcreg indreg; + + srcreg = *(struct sh_srcreg *) assem; + assem += sizeof( struct sh_srcreg ) / sizeof( unsigned ); + if (srcreg.relative && !di.is_ps && di.version.major >= 2) { + indreg = *(struct sh_srcreg *) assem; + assem += sizeof( struct sh_srcreg ) / sizeof( unsigned ); + } + + if (not_first_arg) + debug_printf( ", " ); + else + debug_printf( " " ); + dump_srcreg( srcreg, &indreg, &di ); + not_first_arg = TRUE; + } + + debug_printf( "\n" ); + } + } + } +} diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.h b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h new file mode 100644 index 0000000000..af5549cdba --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h @@ -0,0 +1,42 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * SVGA Shader Dump Facilities + * + * @author Michal Krol + */ + +#ifndef ST_SHADER_SVGA_DUMP_H +#define ST_SHADER_SVGA_DUMP_H + +void +sh_svga_dump( + const unsigned *assem, + unsigned dwords, + unsigned do_binary ); + +#endif /* ST_SHADER_SVGA_DUMP_H */ diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.c b/src/gallium/drivers/svga/svgadump/svga_shader_op.c new file mode 100644 index 0000000000..cecc22106b --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.c @@ -0,0 +1,168 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * SVGA Shader Token Opcode Info + * + * @author Michal Krol + */ + +#include "util/u_debug.h" +#include "svga_shader_op.h" + +#include "../svga_hw_reg.h" +#include "svga3d_shaderdefs.h" + +#define SVGA3DOP_INVALID SVGA3DOP_END +#define TGSI_OPCODE_INVALID TGSI_OPCODE_LAST + +static struct sh_opcode_info opcode_info[] = +{ + { "nop", 0, 0, SVGA3DOP_NOP }, + { "mov", 1, 1, SVGA3DOP_MOV, }, + { "add", 1, 2, SVGA3DOP_ADD, }, + { "sub", 1, 2, SVGA3DOP_SUB, }, + { "mad", 1, 3, SVGA3DOP_MAD, }, + { "mul", 1, 2, SVGA3DOP_MUL, }, + { "rcp", 1, 1, SVGA3DOP_RCP, }, + { "rsq", 1, 1, SVGA3DOP_RSQ, }, + { "dp3", 1, 2, SVGA3DOP_DP3, }, + { "dp4", 1, 2, SVGA3DOP_DP4, }, + { "min", 1, 2, SVGA3DOP_MIN, }, + { "max", 1, 2, SVGA3DOP_MAX, }, + { "slt", 1, 2, SVGA3DOP_SLT, }, + { "sge", 1, 2, SVGA3DOP_SGE, }, + { "exp", 1, 1, SVGA3DOP_EXP, }, + { "log", 1, 1, SVGA3DOP_LOG, }, + { "lit", 1, 1, SVGA3DOP_LIT, }, + { "dst", 1, 2, SVGA3DOP_DST, }, + { "lrp", 1, 3, SVGA3DOP_LRP, }, + { "frc", 1, 1, SVGA3DOP_FRC, }, + { "m4x4", 1, 2, SVGA3DOP_M4x4, }, + { "m4x3", 1, 2, SVGA3DOP_M4x3, }, + { "m3x4", 1, 2, SVGA3DOP_M3x4, }, + { "m3x3", 1, 2, SVGA3DOP_M3x3, }, + { "m3x2", 1, 2, SVGA3DOP_M3x2, }, + { "call", 0, 1, SVGA3DOP_CALL, }, + { "callnz", 0, 2, SVGA3DOP_CALLNZ, }, + { "loop", 0, 2, SVGA3DOP_LOOP, }, + { "ret", 0, 0, SVGA3DOP_RET, }, + { "endloop", 0, 0, SVGA3DOP_ENDLOOP, }, + { "label", 0, 1, SVGA3DOP_LABEL, }, + { "dcl", 0, 0, SVGA3DOP_DCL, }, + { "pow", 1, 2, SVGA3DOP_POW, }, + { "crs", 1, 2, SVGA3DOP_CRS, }, + { "sgn", 1, 3, SVGA3DOP_SGN, }, + { "abs", 1, 1, SVGA3DOP_ABS, }, + { "nrm", 1, 1, SVGA3DOP_NRM, }, /* 3-componenet normalization */ + { "sincos", 1, 1, SVGA3DOP_SINCOS, }, + { "rep", 0, 1, SVGA3DOP_REP, }, + { "endrep", 0, 0, SVGA3DOP_ENDREP, }, + { "if", 0, 1, SVGA3DOP_IF, }, + { "ifc", 0, 2, SVGA3DOP_IFC, }, + { "else", 0, 0, SVGA3DOP_ELSE, }, + { "endif", 0, 0, SVGA3DOP_ENDIF, }, + { "break", 0, 0, SVGA3DOP_BREAK, }, + { "breakc", 0, 0, SVGA3DOP_BREAKC, }, + { "mova", 1, 1, SVGA3DOP_MOVA, }, + { "defb", 0, 0, SVGA3DOP_DEFB, }, + { "defi", 0, 0, SVGA3DOP_DEFI, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "???", 0, 0, SVGA3DOP_INVALID, }, + { "texcoord", 0, 0, SVGA3DOP_TEXCOORD, }, + { "texkill", 1, 0, SVGA3DOP_TEXKILL, }, + { "tex", 0, 0, SVGA3DOP_TEX, }, + { "texbem", 1, 1, SVGA3DOP_TEXBEM, }, + { "texbeml", 1, 1, SVGA3DOP_TEXBEML, }, + { "texreg2ar", 1, 1, SVGA3DOP_TEXREG2AR, }, + { "texreg2gb", 1, 1, SVGA3DOP_TEXREG2GB, }, + { "texm3x2pad", 1, 1, SVGA3DOP_TEXM3x2PAD, }, + { "texm3x2tex", 1, 1, SVGA3DOP_TEXM3x2TEX, }, + { "texm3x3pad", 1, 1, SVGA3DOP_TEXM3x3PAD, }, + { "texm3x3tex", 1, 1, SVGA3DOP_TEXM3x3TEX, }, + { "reserved0", 0, 0, SVGA3DOP_RESERVED0, }, + { "texm3x3spec", 1, 2, SVGA3DOP_TEXM3x3SPEC, }, + { "texm3x3vspec", 1, 1, SVGA3DOP_TEXM3x3VSPEC,}, + { "expp", 1, 1, SVGA3DOP_EXPP, }, + { "logp", 1, 1, SVGA3DOP_LOGP, }, + { "cnd", 1, 3, SVGA3DOP_CND, }, + { "def", 0, 0, SVGA3DOP_DEF, }, + { "texreg2rgb", 1, 1, SVGA3DOP_TEXREG2RGB, }, + { "texdp3tex", 1, 1, SVGA3DOP_TEXDP3TEX, }, + { "texm3x2depth", 1, 1, SVGA3DOP_TEXM3x2DEPTH,}, + { "texdp3", 1, 1, SVGA3DOP_TEXDP3, }, + { "texm3x3", 1, 1, SVGA3DOP_TEXM3x3, }, + { "texdepth", 1, 0, SVGA3DOP_TEXDEPTH, }, + { "cmp", 1, 3, SVGA3DOP_CMP, }, + { "bem", 1, 2, SVGA3DOP_BEM, }, + { "dp2add", 1, 3, SVGA3DOP_DP2ADD, }, + { "dsx", 1, 1, SVGA3DOP_INVALID, }, + { "dsy", 1, 1, SVGA3DOP_INVALID, }, + { "texldd", 1, 1, SVGA3DOP_INVALID, }, + { "setp", 1, 2, SVGA3DOP_SETP, }, + { "texldl", 1, 1, SVGA3DOP_INVALID, }, + { "breakp", 1, 1, SVGA3DOP_INVALID, }, +}; + +const struct sh_opcode_info *sh_svga_opcode_info( uint op ) +{ + struct sh_opcode_info *info; + + if (op >= sizeof( opcode_info ) / sizeof( opcode_info[0] )) { + /* The opcode is either PHASE, COMMENT, END or out of range. + */ + assert( 0 ); + return NULL; + } + + info = &opcode_info[op]; + + if (info->svga_opcode == SVGA3DOP_INVALID) { + /* No valid information. Please provide number of dst/src registers. + */ + assert( 0 ); + return NULL; + } + + /* Sanity check. + */ + assert( op == info->svga_opcode ); + + return info; +} diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.h b/src/gallium/drivers/svga/svgadump/svga_shader_op.h new file mode 100644 index 0000000000..01d39dca84 --- /dev/null +++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.h @@ -0,0 +1,46 @@ +/********************************************************** + * Copyright 2008-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file + * SVGA Shader Token Opcode Info + * + * @author Michal Krol + */ + +#ifndef ST_SHADER_SVGA_OP_H +#define ST_SHADER_SVGA_OP_H + +struct sh_opcode_info +{ + const char *mnemonic; + unsigned num_dst:8; + unsigned num_src:8; + unsigned svga_opcode:16; +}; + +const struct sh_opcode_info *sh_svga_opcode_info( unsigned op ); + +#endif /* ST_SHADER_SVGA_OP_H */ -- cgit v1.2.3 From d3f26a84204d589e69e82627395771ed7273315d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 24 Nov 2009 14:43:30 +0000 Subject: svga: Use consistent names for public symbol names of shader dumping facilities. --- src/gallium/drivers/svga/svgadump/svga_dump.c | 2 +- src/gallium/drivers/svga/svgadump/svga_shader_dump.c | 4 ++-- src/gallium/drivers/svga/svgadump/svga_shader_dump.h | 8 ++++---- src/gallium/drivers/svga/svgadump/svga_shader_op.c | 2 +- src/gallium/drivers/svga/svgadump/svga_shader_op.h | 8 ++++---- 5 files changed, 12 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c index c6c353f58e..910afa2528 100644 --- a/src/gallium/drivers/svga/svgadump/svga_dump.c +++ b/src/gallium/drivers/svga/svgadump/svga_dump.c @@ -1627,7 +1627,7 @@ svga_dump_commands(const void *commands, uint32_t size) const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body; dump_SVGA3dCmdDefineShader(cmd); body = (const uint8_t *)&cmd[1]; - sh_svga_dump((const uint32_t *)body, + svga_shader_dump((const uint32_t *)body, (unsigned)(next - body)/sizeof(uint32_t), FALSE ); body = next; diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c index c654126d3a..7718bdf757 100644 --- a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c +++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c @@ -435,7 +435,7 @@ static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, cons } void -sh_svga_dump( +svga_shader_dump( const unsigned *assem, unsigned dwords, unsigned do_binary ) @@ -602,7 +602,7 @@ sh_svga_dump( default: { - const struct sh_opcode_info *info = sh_svga_opcode_info( op.opcode ); + const struct sh_opcode_info *info = svga_opcode_info( op.opcode ); uint i; uint num_src = info->num_src + op.predicated; boolean not_first_arg = FALSE; diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.h b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h index af5549cdba..a2657acb2f 100644 --- a/src/gallium/drivers/svga/svgadump/svga_shader_dump.h +++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h @@ -30,13 +30,13 @@ * @author Michal Krol */ -#ifndef ST_SHADER_SVGA_DUMP_H -#define ST_SHADER_SVGA_DUMP_H +#ifndef SVGA_SHADER_DUMP_H +#define SVGA_SHADER_DUMP_H void -sh_svga_dump( +svga_shader_dump( const unsigned *assem, unsigned dwords, unsigned do_binary ); -#endif /* ST_SHADER_SVGA_DUMP_H */ +#endif /* SVGA_SHADER_DUMP_H */ diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.c b/src/gallium/drivers/svga/svgadump/svga_shader_op.c index cecc22106b..8343bfdaab 100644 --- a/src/gallium/drivers/svga/svgadump/svga_shader_op.c +++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.c @@ -140,7 +140,7 @@ static struct sh_opcode_info opcode_info[] = { "breakp", 1, 1, SVGA3DOP_INVALID, }, }; -const struct sh_opcode_info *sh_svga_opcode_info( uint op ) +const struct sh_opcode_info *svga_opcode_info( uint op ) { struct sh_opcode_info *info; diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.h b/src/gallium/drivers/svga/svgadump/svga_shader_op.h index 01d39dca84..e558de02c5 100644 --- a/src/gallium/drivers/svga/svgadump/svga_shader_op.h +++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.h @@ -30,8 +30,8 @@ * @author Michal Krol */ -#ifndef ST_SHADER_SVGA_OP_H -#define ST_SHADER_SVGA_OP_H +#ifndef SVGA_SHADER_OP_H +#define SVGA_SHADER_OP_H struct sh_opcode_info { @@ -41,6 +41,6 @@ struct sh_opcode_info unsigned svga_opcode:16; }; -const struct sh_opcode_info *sh_svga_opcode_info( unsigned op ); +const struct sh_opcode_info *svga_opcode_info( unsigned op ); -#endif /* ST_SHADER_SVGA_OP_H */ +#endif /* SVGA_SHADER_OP_H */ -- cgit v1.2.3 From 135d7e12991312d7aff637565fbe67f666e4e39f Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 15 Nov 2009 12:14:03 -0800 Subject: svga: Handle comment tokens when dumping. --- src/gallium/drivers/svga/svgadump/svga_shader.h | 6 ++++++ src/gallium/drivers/svga/svgadump/svga_shader_dump.c | 7 ++++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svgadump/svga_shader.h b/src/gallium/drivers/svga/svgadump/svga_shader.h index 2fc1796a90..9217af2dd9 100644 --- a/src/gallium/drivers/svga/svgadump/svga_shader.h +++ b/src/gallium/drivers/svga/svgadump/svga_shader.h @@ -211,4 +211,10 @@ struct sh_trinaryop struct sh_srcreg src2; }; +struct sh_comment +{ + unsigned opcode:16; + unsigned size:16; +}; + #endif /* ST_SHADER_SVGA_H */ diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c index 7718bdf757..b0e7fdf378 100644 --- a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c +++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c @@ -587,7 +587,12 @@ svga_shader_dump( break; case SVGA3DOP_COMMENT: - assert( 0 ); + { + struct sh_comment comment = *(struct sh_comment *)assem; + + /* Ignore comment contents. */ + assem += sizeof(struct sh_comment) / sizeof(unsigned) + comment.size; + } break; case SVGA3DOP_RET: -- cgit v1.2.3 From 6dd9676a8fc43062a7017f2951e0f032889fac9e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 27 Nov 2009 13:59:37 +0000 Subject: svga: Re-add shader dumping. --- src/gallium/drivers/svga/svga_tgsi.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c index 81eea1a145..b8ef137c01 100644 --- a/src/gallium/drivers/svga/svga_tgsi.c +++ b/src/gallium/drivers/svga/svga_tgsi.c @@ -222,6 +222,20 @@ svga_tgsi_translate( const struct svga_shader *shader, result->nr_tokens = (emit.ptr - emit.buf) / sizeof(unsigned); memcpy(&result->key, &key, sizeof key); + if (SVGA_DEBUG & DEBUG_TGSI) + { + debug_printf( "#####################################\n" ); + debug_printf( "Shader %u below\n", shader->id ); + tgsi_dump( shader->tokens, 0 ); + if (SVGA_DEBUG & DEBUG_TGSI) { + debug_printf( "Shader %u compiled below\n", shader->id ); + svga_shader_dump( result->tokens, + result->nr_tokens , + FALSE ); + } + debug_printf( "#####################################\n" ); + } + return result; fail: -- cgit v1.2.3 From b748a9f574361273df6b05c06c647ac4fd9b3e41 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Fri, 27 Nov 2009 17:40:24 +0100 Subject: r300g,llvmpipe: fix some more merge problems --- src/gallium/drivers/llvmpipe/lp_texture.c | 2 +- src/gallium/drivers/r300/r300_emit.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 0a0f31f8a3..65d62fd072 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -169,7 +169,7 @@ llvmpipe_texture_blanket(struct pipe_screen * screen, /* Only supports one type */ if (base->target != PIPE_TEXTURE_2D || base->last_level != 0 || - base->depth[0] != 1) { + base->depth0 != 1) { return NULL; } diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index e6ab8e4af1..98a39390bf 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -145,8 +145,8 @@ static const float * get_shader_constant( * normalized coords. Should only show up on non-r500. */ case RC_STATE_R300_TEXRECT_FACTOR: tex = &r300->textures[constant->u.State[1]]->tex; - vec[0] = 1.0 / tex->width[0]; - vec[1] = 1.0 / tex->height[0]; + vec[0] = 1.0 / tex->width0; + vec[1] = 1.0 / tex->height0; break; default: -- cgit v1.2.3 From 7fa1bcc05a237365e5ea09512453f29a91c7a141 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Fri, 27 Nov 2009 17:41:42 +0100 Subject: svga: fix for not using texture width/height/depth arrays --- src/gallium/drivers/svga/svga_screen_texture.c | 61 ++++++++++++------------- src/gallium/drivers/svga/svga_state_constants.c | 4 +- 2 files changed, 32 insertions(+), 33 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c index 8472dea04d..fb11b80dcf 100644 --- a/src/gallium/drivers/svga/svga_screen_texture.c +++ b/src/gallium/drivers/svga/svga_screen_texture.c @@ -287,23 +287,20 @@ svga_texture_create(struct pipe_screen *screen, if(templat->last_level >= SVGA_MAX_TEXTURE_LEVELS) goto error2; - width = templat->width[0]; - height = templat->height[0]; - depth = templat->depth[0]; + width = templat->width0; + height = templat->height0; + depth = templat->depth0; for(level = 0; level <= templat->last_level; ++level) { - tex->base.width[level] = width; - tex->base.height[level] = height; - tex->base.depth[level] = depth; tex->base.nblocksx[level] = pf_get_nblocksx(&tex->base.block, width); tex->base.nblocksy[level] = pf_get_nblocksy(&tex->base.block, height); - width = minify(width); - height = minify(height); - depth = minify(depth); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); } - size.width = templat->width[0]; - size.height = templat->height[0]; - size.depth = templat->depth[0]; + size.width = templat->width0; + size.height = templat->height0; + size.depth = templat->depth0; if(templat->target == PIPE_TEXTURE_CUBE) { flags |= SVGA3D_SURFACE_CUBEMAP; @@ -367,7 +364,7 @@ svga_texture_blanket(struct pipe_screen * screen, /* Only supports one type */ if (base->target != PIPE_TEXTURE_2D || base->last_level != 0 || - base->depth[0] != 1) { + base->depth0 != 1) { return NULL; } @@ -534,9 +531,9 @@ svga_texture_view_surface(struct pipe_context *pipe, "svga: Create surface view: face %d zslice %d mips %d..%d\n", face_pick, zslice_pick, start_mip, start_mip+num_mip-1); - size.width = tex->base.width[start_mip]; - size.height = tex->base.height[start_mip]; - size.depth = zslice_pick < 0 ? tex->base.depth[start_mip] : 1; + size.width = u_minify(tex->base.width0, start_mip); + size.height = u_minify(tex->base.height0, start_mip); + size.depth = zslice_pick < 0 ? u_minify(tex->base.depth0, start_mip) : 1; assert(size.depth == 1); if(tex->base.target == PIPE_TEXTURE_CUBE && face_pick < 0) { @@ -565,11 +562,12 @@ svga_texture_view_surface(struct pipe_context *pipe, for (i = 0; i < num_mip; i++) { for (j = 0; j < numFaces; j++) { if(tex->defined[j + face_pick][i + start_mip]) { - unsigned depth = zslice_pick < 0 ? tex->base.depth[i + start_mip] : 1; + unsigned depth = zslice_pick < 0 ? u_minify(tex->base.depth0, i + start_mip) : 1; svga_texture_copy_handle(svga_context(pipe), ss, tex->handle, 0, 0, z_offset, i + start_mip, j + face_pick, handle, 0, 0, 0, i, j, - tex->base.width[i + start_mip], tex->base.height[i + start_mip], depth); + u_minify(tex->base.width0, i + start_mip), + u_minify(tex->base.height0, i + start_mip), depth); } } } @@ -599,8 +597,8 @@ svga_get_tex_surface(struct pipe_screen *screen, pipe_reference_init(&ps->reference, 1); pipe_texture_reference(&ps->texture, pt); ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; + ps->width = u_minify(pt->width0, level); + ps->height = u_minify(pt->height0, level); ps->usage = flags; ps->level = level; ps->face = face; @@ -723,7 +721,8 @@ svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf) svga_texture_copy_handle(svga_context(pipe), ss, s->handle, 0, 0, 0, s->real_level, s->real_face, tex->handle, 0, 0, surf->zslice, surf->level, surf->face, - tex->base.width[surf->level], tex->base.height[surf->level], 1); + u_minify(tex->base.width0, surf->level), + u_minify(tex->base.height0, surf->level), 1); tex->defined[surf->face][surf->level] = TRUE; } } @@ -953,9 +952,9 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt, "svga: Sampler view: no %p, mips %u..%u, nr %u, size (%ux%ux%u), last %u\n", pt, min_lod, max_lod, max_lod - min_lod + 1, - pt->width[0], - pt->height[0], - pt->depth[0], + pt->width0, + pt->height0, + pt->depth0, pt->last_level); sws->surface_reference(sws, &sv->handle, tex->handle); return sv; @@ -965,9 +964,9 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt, "svga: Sampler view: yes %p, mips %u..%u, nr %u, size (%ux%ux%u), last %u\n", pt, min_lod, max_lod, max_lod - min_lod + 1, - pt->width[0], - pt->height[0], - pt->depth[0], + pt->width0, + pt->height0, + pt->depth0, pt->last_level); sv->age = tex->age; @@ -1015,9 +1014,9 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view * svga_texture_copy_handle(svga, NULL, tex->handle, 0, 0, 0, i, k, v->handle, 0, 0, 0, i - v->min_lod, k, - tex->base.width[i], - tex->base.height[i], - tex->base.depth[i]); + u_minify(tex->base.width0, i), + u_minify(tex->base.height0, i), + u_minify(tex->base.depth0, i)); } } @@ -1047,7 +1046,7 @@ svga_screen_buffer_from_texture(struct pipe_texture *texture, svga_translate_format(texture->format), stex->handle); - *stride = pf_get_nblocksx(&texture->block, texture->width[0]) * + *stride = pf_get_nblocksx(&texture->block, texture->width0) * texture->block.size; return *buffer != NULL; diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c index 18cce7dde1..209ed28245 100644 --- a/src/gallium/drivers/svga/svga_state_constants.c +++ b/src/gallium/drivers/svga/svga_state_constants.c @@ -140,8 +140,8 @@ static int emit_fs_consts( struct svga_context *svga, struct pipe_texture *tex = svga->curr.texture[i]; float data[4]; - data[0] = 1.0 / (float)tex->width[0]; - data[1] = 1.0 / (float)tex->height[0]; + data[0] = 1.0 / (float)tex->width0; + data[1] = 1.0 / (float)tex->height0; data[2] = 1.0; data[3] = 1.0; -- cgit v1.2.3 From 510fd280b54fa33ed229ef297a1a77c78811c592 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 26 Nov 2009 16:59:39 +0100 Subject: nv50: bswap32 the polygon stipple pattern The hardware wants the pattern the same way it is passed to glPolygonStipple. --- src/gallium/drivers/nv50/nv50_state_validate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 799d2758fe..19b8ef07b5 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -285,7 +285,7 @@ nv50_state_validate(struct nv50_context *nv50) so = so_new(33, 0); so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32); for (i = 0; i < 32; i++) - so_data(so, nv50->stipple.stipple[i]); + so_data(so, util_bswap32(nv50->stipple.stipple[i])); so_ref(so, &nv50->state.stipple); so_ref(NULL, &so); } -- cgit v1.2.3 From cad14c2542698de144bb5434cefa02d7a00aaa74 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 27 Nov 2009 21:29:38 +0100 Subject: nv50: do conversion of last insn to 64 bit format first Simplifies things since the second to last one will then be converted in the subsequent pass that ensures alignment automatically. --- src/gallium/drivers/nv50/nv50_program.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index bf50982dd1..855079f293 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -2914,7 +2914,7 @@ nv50_fp_move_results(struct nv50_pc *pc) static void nv50_program_fixup_insns(struct nv50_pc *pc) { - struct nv50_program_exec *e, *prev = NULL, **bra_list; + struct nv50_program_exec *e, **bra_list; unsigned i, n, pos; bra_list = CALLOC(pc->p->exec_size, sizeof(struct nv50_program_exec *)); @@ -2926,6 +2926,16 @@ nv50_program_fixup_insns(struct nv50_pc *pc) if (e->param.index >= 0 && !e->param.mask) bra_list[n++] = e; + /* last instruction must be long so it can have the exit bit set */ + if (!is_long(pc->p->exec_tail)) + convert_to_long(pc, pc->p->exec_tail); + /* set exit bit */ + pc->p->exec_tail->inst[1] |= 1; + + /* !immd on exit insn simultaneously means !join */ + assert(!is_immd(pc->p->exec_head)); + assert(!is_immd(pc->p->exec_tail)); + /* Make sure we don't have any single 32 bit instructions. */ for (e = pc->p->exec_head, pos = 0; e; e = e->next) { pos += is_long(e) ? 2 : 1; @@ -2937,23 +2947,8 @@ nv50_program_fixup_insns(struct nv50_pc *pc) convert_to_long(pc, e); ++pos; } - if (e->next) - prev = e; } - assert(!is_immd(pc->p->exec_head)); - assert(!is_immd(pc->p->exec_tail)); - - /* last instruction must be long so it can have the end bit set */ - if (!is_long(pc->p->exec_tail)) { - convert_to_long(pc, pc->p->exec_tail); - if (prev) - convert_to_long(pc, prev); - } - assert(!(pc->p->exec_tail->inst[1] & 2)); - /* set the end-bit */ - pc->p->exec_tail->inst[1] |= 1; - FREE(bra_list); } -- cgit v1.2.3 From 3f471c7948425a9c8ae23a563e0e816954a7589a Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 26 Nov 2009 17:03:00 +0100 Subject: nv50: don't permanently negate src in emit_ddy --- src/gallium/drivers/nv50/nv50_program.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 855079f293..f93c864c2a 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1440,19 +1440,25 @@ emit_ddx(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) static void emit_ddy(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { + struct nv50_reg *r = src; struct nv50_program_exec *e = exec(pc); assert(src->type == P_TEMP); - if (!(src->mod & NV50_MOD_NEG)) /* ! double negation */ - emit_neg(pc, src, src); + if (!(src->mod & NV50_MOD_NEG)) { /* ! double negation */ + r = alloc_temp(pc, NULL); + emit_neg(pc, r, src); + } e->inst[0] = 0xc0150000; e->inst[1] = 0x8a400000; set_long(pc, e); set_dst(pc, dst, e); - set_src_0(pc, src, e); - set_src_2(pc, src, e); + set_src_0(pc, r, e); + set_src_2(pc, r, e); + + if (r != src) + free_temp(pc, r); emit(pc, e); } -- cgit v1.2.3 From 7494b829052a87d7a8c56c68300a110b40e401e8 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 29 Nov 2009 13:33:16 +0100 Subject: nv50: match VP outputs to FP inputs ourselves For each FP input, don't assume that the VP output will be at the same position, but scan the semantics instead, then put the correct output reg indices into VP_RESULT_MAP. Position is still assumed to be the first output/input. See 07fafc7c9346aa260829603bf3188596481e9e62, which renders previous assumptions incorrect. --- src/gallium/drivers/nv50/nv50_program.c | 70 ++++++++++++++++++--------------- src/gallium/drivers/nv50/nv50_program.h | 3 +- 2 files changed, 40 insertions(+), 33 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index f93c864c2a..bad0ace7e5 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -2643,7 +2643,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) for (i = 0, rid = 0; i < pc->result_nr; ++i) { p->cfg.io[i].hw = rid; - p->cfg.io[i].id_vp = i; + p->cfg.io[i].id = i; for (c = 0; c < 4; ++c) { int n = i * 4 + c; @@ -2675,14 +2675,12 @@ nv50_program_tx_prep(struct nv50_pc *pc) * the lower hardware IDs, so sort them: */ for (i = 0; i < pc->attr_nr; i++) { - if (pc->interp_mode[i] == INTERP_FLAT) { - p->cfg.io[m].id_vp = i + base; - p->cfg.io[m++].id_fp = i; - } else { + if (pc->interp_mode[i] == INTERP_FLAT) + p->cfg.io[m++].id = i; + else { if (!(pc->interp_mode[i] & INTERP_PERSPECTIVE)) p->cfg.io[n].linear = TRUE; - p->cfg.io[n].id_vp = i + base; - p->cfg.io[n++].id_fp = i; + p->cfg.io[n++].id = i; } } @@ -2694,7 +2692,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) for (n = 0; n < pc->attr_nr; ++n) { p->cfg.io[n].hw = rid = aid; - i = p->cfg.io[n].id_fp; + i = p->cfg.io[n].id; if (p->info.input_semantic_name[n] == TGSI_SEMANTIC_FACE) { @@ -2734,8 +2732,8 @@ nv50_program_tx_prep(struct nv50_pc *pc) for (i = 0; i < pc->attr_nr; i++) { ubyte si, sn; - sn = p->info.input_semantic_name[p->cfg.io[i].id_fp]; - si = p->info.input_semantic_index[p->cfg.io[i].id_fp]; + sn = p->info.input_semantic_name[p->cfg.io[i].id]; + si = p->info.input_semantic_index[p->cfg.io[i].id]; if (sn == TGSI_SEMANTIC_COLOR) { p->cfg.two_side[si] = p->cfg.io[i]; @@ -3237,15 +3235,15 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) struct nv50_program *vp = nv50->vertprog; unsigned i, c, m = base; - /* XXX: This can't work correctly in all cases yet, we either - * have to create TGSI_SEMANTIC_PNTC or sprite_coord_mode has - * to be per FP input instead of per VP output + /* XXX: this might not work correctly in all cases yet - we'll + * just assume that an FP generic input that is not written in + * the VP is PointCoord. */ memset(pntc, 0, 8 * sizeof(uint32_t)); for (i = 0; i < fp->cfg.io_nr; i++) { uint8_t sn, si; - uint8_t j = fp->cfg.io[i].id_vp, k = fp->cfg.io[i].id_fp; + uint8_t j, k = fp->cfg.io[i].id; unsigned n = popcnt4(fp->cfg.io[i].mask); if (fp->info.input_semantic_name[k] != TGSI_SEMANTIC_GENERIC) { @@ -3253,10 +3251,16 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) continue; } - sn = vp->info.input_semantic_name[j]; - si = vp->info.input_semantic_index[j]; + for (j = 0; j < vp->info.num_outputs; ++j) { + sn = vp->info.output_semantic_name[j]; + si = vp->info.output_semantic_index[j]; - if (j < fp->cfg.io_nr && sn == TGSI_SEMANTIC_GENERIC) { + if (sn == fp->info.input_semantic_name[k] && + si == fp->info.input_semantic_index[k]) + break; + } + + if (j < vp->info.num_outputs) { ubyte mode = nv50->rasterizer->pipe.sprite_coord_mode[si]; @@ -3344,20 +3348,24 @@ nv50_linkage_validate(struct nv50_context *nv50) reg[0] += m - 4; /* adjust FFC0 id */ reg[4] |= m << 8; /* set mid where 'normal' FP inputs start */ - i = 0; - if (fp->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) - i = 1; - for (; i < fp->cfg.io_nr; i++) { - ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id_fp]; - ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id_fp]; - - n = fp->cfg.io[i].id_vp; - if (n >= vp->cfg.io_nr || - vp->info.output_semantic_name[n] != sn || - vp->info.output_semantic_index[n] != si) - vpo = &dummy; - else - vpo = &vp->cfg.io[n]; + for (i = 0; i < fp->cfg.io_nr; i++) { + ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id]; + ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id]; + + /* position must be mapped first */ + assert(i == 0 || sn != TGSI_SEMANTIC_POSITION); + + /* maybe even remove these from cfg.io */ + if (sn == TGSI_SEMANTIC_POSITION || sn == TGSI_SEMANTIC_FACE) + continue; + + /* VP outputs and vp->cfg.io are in the same order */ + for (n = 0; n < vp->info.num_outputs; ++n) { + if (vp->info.output_semantic_name[n] == sn && + vp->info.output_semantic_index[n] == si) + break; + } + vpo = (n < vp->info.num_outputs) ? &vp->cfg.io[n] : &dummy; m = nv50_sreg4_map(map, m, lin, &fp->cfg.io[i], vpo); } diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index d78dee083f..255c7c737e 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -17,8 +17,7 @@ struct nv50_program_exec { struct nv50_sreg4 { uint8_t hw; - uint8_t id_vp; - uint8_t id_fp; + uint8_t id; /* tgsi index, nv50 needs them sorted: flat ones last */ uint8_t mask; boolean linear; -- cgit v1.2.3 From c332525ad3cf8e946e60c3f9b96af525ca4cb71c Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 28 Nov 2009 13:57:38 +0100 Subject: nv50: update linkage on rasterizer change We need to update VP_RESULT_MAP and/or COORD_REPLACE_MAP when light_twoside and/or point_sprite are changed. --- src/gallium/drivers/nv50/nv50_state_validate.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 19b8ef07b5..c871acaab8 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -201,7 +201,8 @@ nv50_state_emit(struct nv50_context *nv50) so_emit(chan, nv50->state.vertprog); if (nv50->state.dirty & NV50_NEW_FRAGPROG) so_emit(chan, nv50->state.fragprog); - if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG)) + if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | + NV50_NEW_RASTERIZER)) so_emit(chan, nv50->state.programs); if (nv50->state.dirty & NV50_NEW_RASTERIZER) so_emit(chan, nv50->state.rast); @@ -264,7 +265,8 @@ nv50_state_validate(struct nv50_context *nv50) if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB)) nv50_fragprog_validate(nv50); - if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG)) + if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | + NV50_NEW_RASTERIZER)) nv50_linkage_validate(nv50); if (nv50->dirty & NV50_NEW_RASTERIZER) -- cgit v1.2.3 From ac400ffce62be47fc77e8d10cabcd39b92b6c627 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Mon, 30 Nov 2009 20:29:18 +0100 Subject: gallium: interface cleanups, remove nblocksx/y from pipe_texture and more This patch removes nblocksx, nblocksy arrays from pipe_texture (can be recalculated if needed). Furthermore, pipe_format_block struct is gone completely (again, contains just derived state). nblocksx, nblocksy, block are also removed from pipe_transfer, together with the format enum (can be obtained from the texture associated with the transfer). --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 1 - src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 1 - src/gallium/auxiliary/util/u_blit.c | 1 - src/gallium/auxiliary/util/u_debug.c | 8 +- src/gallium/auxiliary/util/u_format.h | 2 +- src/gallium/auxiliary/util/u_gen_mipmap.c | 12 +-- src/gallium/auxiliary/util/u_linear.c | 2 +- src/gallium/auxiliary/util/u_linear.h | 19 +++- src/gallium/auxiliary/util/u_rect.c | 71 +++++++------ src/gallium/auxiliary/util/u_rect.h | 4 +- src/gallium/auxiliary/util/u_surface.c | 1 - src/gallium/auxiliary/util/u_tile.c | 29 +++--- src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 1 - src/gallium/drivers/softpipe/sp_texture.c | 33 +++--- src/gallium/drivers/softpipe/sp_tile_cache.c | 10 +- src/gallium/include/pipe/p_format.h | 123 ++++++++++------------- src/gallium/include/pipe/p_state.h | 8 -- src/mesa/state_tracker/st_cb_drawpixels.c | 8 +- src/mesa/state_tracker/st_cb_fbo.c | 7 +- src/mesa/state_tracker/st_cb_readpixels.c | 14 +-- src/mesa/state_tracker/st_cb_texture.c | 33 +++--- src/mesa/state_tracker/st_gen_mipmap.c | 4 +- src/mesa/state_tracker/st_texture.c | 4 +- 23 files changed, 191 insertions(+), 205 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 31de84b272..8c631a01af 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -401,7 +401,6 @@ aaline_create_texture(struct aaline_stage *aaline) texTemp.width0 = 1 << MAX_TEXTURE_LEVEL; texTemp.height0 = 1 << MAX_TEXTURE_LEVEL; texTemp.depth0 = 1; - pf_get_block(texTemp.format, &texTemp.block); aaline->texture = screen->texture_create(screen, &texTemp); if (!aaline->texture) diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 27d89721b1..7803946baa 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -430,7 +430,6 @@ pstip_create_texture(struct pstip_stage *pstip) texTemp.width0 = 32; texTemp.height0 = 32; texTemp.depth0 = 1; - pf_get_block(texTemp.format, &texTemp.block); pstip->texture = screen->texture_create(screen, &texTemp); if (pstip->texture == NULL) diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 5372df5735..abe1de3302 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -357,7 +357,6 @@ util_blit_pixels_writemask(struct blit_state *ctx, texTemp.width0 = srcW; texTemp.height0 = srcH; texTemp.depth0 = 1; - pf_get_block(src->format, &texTemp.block); tex = screen->texture_create(screen, &texTemp); if (!tex) diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 96d400c839..40633574b0 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -669,10 +669,10 @@ void debug_dump_surface(const char *prefix, goto error; debug_dump_image(prefix, - transfer->format, - transfer->block.size, - transfer->nblocksx, - transfer->nblocksy, + texture->format, + pf_get_blocksize(texture->format), + pf_get_nblocksx(texture->format, transfer->width), + pf_get_nblocksy(texture->format, transfer->height), transfer->stride, data); diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 7b5b7fcda5..6740683a61 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -50,7 +50,7 @@ struct util_format_block /** Block height in pixels */ unsigned height; - /** Block size in bytes */ + /** Block size in bits */ unsigned bits; }; diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index f67f1e458d..83263d9fe6 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -996,7 +996,7 @@ reduce_2d(enum pipe_format pformat, { enum dtype datatype; uint comps; - const int bpt = pf_get_size(pformat); + const int bpt = pf_get_blocksize(pformat); const ubyte *srcA, *srcB; ubyte *dst; int row; @@ -1035,7 +1035,7 @@ reduce_3d(enum pipe_format pformat, int dstWidth, int dstHeight, int dstDepth, int dstRowStride, ubyte *dstPtr) { - const int bpt = pf_get_size(pformat); + const int bpt = pf_get_blocksize(pformat); const int border = 0; int img, row; int bytesPerSrcImage, bytesPerDstImage; @@ -1159,8 +1159,8 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, const uint zslice = 0; uint dstLevel; - assert(pt->block.width == 1); - assert(pt->block.height == 1); + assert(pf_get_blockwidth(pt->format) == 1); + assert(pf_get_blockheight(pt->format) == 1); for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { const uint srcLevel = dstLevel - 1; @@ -1204,8 +1204,8 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, struct pipe_screen *screen = pipe->screen; uint dstLevel, zslice = 0; - assert(pt->block.width == 1); - assert(pt->block.height == 1); + assert(pf_get_blockwidth(pt->format) == 1); + assert(pf_get_blockheight(pt->format) == 1); for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { const uint srcLevel = dstLevel - 1; diff --git a/src/gallium/auxiliary/util/u_linear.c b/src/gallium/auxiliary/util/u_linear.c index a1dce3f5cf..f1aef21677 100644 --- a/src/gallium/auxiliary/util/u_linear.c +++ b/src/gallium/auxiliary/util/u_linear.c @@ -82,7 +82,7 @@ void pipe_linear_from_tile(struct pipe_tile_info *t, const void *src_ptr, void pipe_linear_fill_info(struct pipe_tile_info *t, - const struct pipe_format_block *block, + const struct u_linear_format_block *block, unsigned tile_width, unsigned tile_height, unsigned tiles_x, unsigned tiles_y) { diff --git a/src/gallium/auxiliary/util/u_linear.h b/src/gallium/auxiliary/util/u_linear.h index b74308ffa3..42c40b2aa7 100644 --- a/src/gallium/auxiliary/util/u_linear.h +++ b/src/gallium/auxiliary/util/u_linear.h @@ -35,6 +35,19 @@ #include "pipe/p_format.h" +struct u_linear_format_block +{ + /** Block size in bytes */ + unsigned size; + + /** Block width in pixels */ + unsigned width; + + /** Block height in pixels */ + unsigned height; +}; + + struct pipe_tile_info { unsigned size; @@ -49,10 +62,10 @@ struct pipe_tile_info unsigned rows; /* Describe the tile in pixels */ - struct pipe_format_block tile; + struct u_linear_format_block tile; /* Describe each block within the tile */ - struct pipe_format_block block; + struct u_linear_format_block block; }; void pipe_linear_to_tile(size_t src_stride, const void *src_ptr, @@ -71,7 +84,7 @@ void pipe_linear_from_tile(struct pipe_tile_info *t, const void *src_ptr, * @tiles_y number of tiles in y axis */ void pipe_linear_fill_info(struct pipe_tile_info *t, - const struct pipe_format_block *block, + const struct u_linear_format_block *block, unsigned tile_width, unsigned tile_height, unsigned tiles_x, unsigned tiles_y); diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c index 9866b6fc8a..72725b59d2 100644 --- a/src/gallium/auxiliary/util/u_rect.c +++ b/src/gallium/auxiliary/util/u_rect.c @@ -44,7 +44,7 @@ */ void util_copy_rect(ubyte * dst, - const struct pipe_format_block *block, + enum pipe_format format, unsigned dst_stride, unsigned dst_x, unsigned dst_y, @@ -57,27 +57,30 @@ util_copy_rect(ubyte * dst, { unsigned i; int src_stride_pos = src_stride < 0 ? -src_stride : src_stride; + int blocksize = pf_get_blocksize(format); + int blockwidth = pf_get_blockwidth(format); + int blockheight = pf_get_blockheight(format); - assert(block->size > 0); - assert(block->width > 0); - assert(block->height > 0); + assert(blocksize > 0); + assert(blockwidth > 0); + assert(blockheight > 0); assert(src_x >= 0); assert(src_y >= 0); assert(dst_x >= 0); assert(dst_y >= 0); - dst_x /= block->width; - dst_y /= block->height; - width = (width + block->width - 1)/block->width; - height = (height + block->height - 1)/block->height; - src_x /= block->width; - src_y /= block->height; + dst_x /= blockwidth; + dst_y /= blockheight; + width = (width + blockwidth - 1)/blockwidth; + height = (height + blockheight - 1)/blockheight; + src_x /= blockwidth; + src_y /= blockheight; - dst += dst_x * block->size; - src += src_x * block->size; + dst += dst_x * blocksize; + src += src_x * blocksize; dst += dst_y * dst_stride; src += src_y * src_stride_pos; - width *= block->size; + width *= blocksize; if (width == dst_stride && width == src_stride) memcpy(dst, src, height * width); @@ -92,7 +95,7 @@ util_copy_rect(ubyte * dst, void util_fill_rect(ubyte * dst, - const struct pipe_format_block *block, + enum pipe_format format, unsigned dst_stride, unsigned dst_x, unsigned dst_y, @@ -102,23 +105,26 @@ util_fill_rect(ubyte * dst, { unsigned i, j; unsigned width_size; + int blocksize = pf_get_blocksize(format); + int blockwidth = pf_get_blockwidth(format); + int blockheight = pf_get_blockheight(format); - assert(block->size > 0); - assert(block->width > 0); - assert(block->height > 0); + assert(blocksize > 0); + assert(blockwidth > 0); + assert(blockheight > 0); assert(dst_x >= 0); assert(dst_y >= 0); - dst_x /= block->width; - dst_y /= block->height; - width = (width + block->width - 1)/block->width; - height = (height + block->height - 1)/block->height; + dst_x /= blockwidth; + dst_y /= blockheight; + width = (width + blockwidth - 1)/blockwidth; + height = (height + blockheight - 1)/blockheight; - dst += dst_x * block->size; + dst += dst_x * blocksize; dst += dst_y * dst_stride; - width_size = width * block->size; + width_size = width * blocksize; - switch (block->size) { + switch (blocksize) { case 1: if(dst_stride == width_size) memset(dst, (ubyte) value, height * width_size); @@ -172,10 +178,15 @@ util_surface_copy(struct pipe_context *pipe, struct pipe_transfer *src_trans, *dst_trans; void *dst_map; const void *src_map; + enum pipe_format src_format, dst_format; assert(src->texture && dst->texture); if (!src->texture || !dst->texture) return; + + src_format = src->texture->format; + dst_format = dst->texture->format; + src_trans = screen->get_tex_transfer(screen, src->texture, src->face, @@ -192,9 +203,9 @@ util_surface_copy(struct pipe_context *pipe, PIPE_TRANSFER_WRITE, dst_x, dst_y, w, h); - assert(dst_trans->block.size == src_trans->block.size); - assert(dst_trans->block.width == src_trans->block.width); - assert(dst_trans->block.height == src_trans->block.height); + assert(pf_get_blocksize(dst_format) == pf_get_blocksize(src_format)); + assert(pf_get_blockwidth(dst_format) == pf_get_blockwidth(src_format)); + assert(pf_get_blockheight(dst_format) == pf_get_blockheight(src_format)); src_map = pipe->screen->transfer_map(screen, src_trans); dst_map = pipe->screen->transfer_map(screen, dst_trans); @@ -205,7 +216,7 @@ util_surface_copy(struct pipe_context *pipe, if (src_map && dst_map) { /* If do_flip, invert src_y position and pass negative src stride */ util_copy_rect(dst_map, - &dst_trans->block, + dst_format, dst_trans->stride, 0, 0, w, h, @@ -259,11 +270,11 @@ util_surface_fill(struct pipe_context *pipe, if (dst_map) { assert(dst_trans->stride > 0); - switch (dst_trans->block.size) { + switch (pf_get_blocksize(dst_trans->texture->format)) { case 1: case 2: case 4: - util_fill_rect(dst_map, &dst_trans->block, dst_trans->stride, + util_fill_rect(dst_map, dst_trans->texture->format, dst_trans->stride, 0, 0, width, height, value); break; case 8: diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h index daa50834d3..5e444ffae2 100644 --- a/src/gallium/auxiliary/util/u_rect.h +++ b/src/gallium/auxiliary/util/u_rect.h @@ -42,13 +42,13 @@ struct pipe_surface; extern void -util_copy_rect(ubyte * dst, const struct pipe_format_block *block, +util_copy_rect(ubyte * dst, enum pipe_format format, unsigned dst_stride, unsigned dst_x, unsigned dst_y, unsigned width, unsigned height, const ubyte * src, int src_stride, unsigned src_x, int src_y); extern void -util_fill_rect(ubyte * dst, const struct pipe_format_block *block, +util_fill_rect(ubyte * dst, enum pipe_format format, unsigned dst_stride, unsigned dst_x, unsigned dst_y, unsigned width, unsigned height, uint32_t value); diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index de8c266db8..f828908f0b 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -82,7 +82,6 @@ util_create_rgba_surface(struct pipe_screen *screen, templ.width0 = width; templ.height0 = height; templ.depth0 = 1; - pf_get_block(format, &templ.block); templ.tex_usage = usage; *textureOut = screen->texture_create(screen, &templ); diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 8a22f584be..4f34f8a1a6 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -52,7 +52,7 @@ pipe_get_tile_raw(struct pipe_transfer *pt, const void *src; if (dst_stride == 0) - dst_stride = pf_get_nblocksx(&pt->block, w) * pt->block.size; + dst_stride = pf_get_stride(pt->texture->format, w); if (pipe_clip_tile(x, y, &w, &h, pt)) return; @@ -62,7 +62,7 @@ pipe_get_tile_raw(struct pipe_transfer *pt, if(!src) return; - util_copy_rect(dst, &pt->block, dst_stride, 0, 0, w, h, src, pt->stride, x, y); + util_copy_rect(dst, pt->texture->format, dst_stride, 0, 0, w, h, src, pt->stride, x, y); screen->transfer_unmap(screen, pt); } @@ -78,9 +78,10 @@ pipe_put_tile_raw(struct pipe_transfer *pt, { struct pipe_screen *screen = pt->texture->screen; void *dst; + enum pipe_format format = pt->texture->format; if (src_stride == 0) - src_stride = pf_get_nblocksx(&pt->block, w) * pt->block.size; + src_stride = pf_get_stride(format, w); if (pipe_clip_tile(x, y, &w, &h, pt)) return; @@ -90,7 +91,7 @@ pipe_put_tile_raw(struct pipe_transfer *pt, if(!dst) return; - util_copy_rect(dst, &pt->block, pt->stride, x, y, w, h, src, src_stride, 0, 0); + util_copy_rect(dst, format, pt->stride, x, y, w, h, src, src_stride, 0, 0); screen->transfer_unmap(screen, pt); } @@ -1219,21 +1220,22 @@ pipe_get_tile_rgba(struct pipe_transfer *pt, { unsigned dst_stride = w * 4; void *packed; + enum pipe_format format = pt->texture->format; if (pipe_clip_tile(x, y, &w, &h, pt)) return; - packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size); + packed = MALLOC(pf_get_nblocks(format, w, h) * pf_get_blocksize(format)); if (!packed) return; - if(pt->format == PIPE_FORMAT_YCBCR || pt->format == PIPE_FORMAT_YCBCR_REV) + if(format == PIPE_FORMAT_YCBCR || format == PIPE_FORMAT_YCBCR_REV) assert((x & 1) == 0); pipe_get_tile_raw(pt, x, y, w, h, packed, 0); - pipe_tile_raw_to_rgba(pt->format, packed, w, h, p, dst_stride); + pipe_tile_raw_to_rgba(format, packed, w, h, p, dst_stride); FREE(packed); } @@ -1246,16 +1248,17 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, { unsigned src_stride = w * 4; void *packed; + enum pipe_format format = pt->texture->format; if (pipe_clip_tile(x, y, &w, &h, pt)) return; - packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size); + packed = MALLOC(pf_get_nblocks(format, w, h) * pf_get_blocksize(format)); if (!packed) return; - switch (pt->format) { + switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); break; @@ -1322,7 +1325,7 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ break; default: - debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(pt->format)); + debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(format)); } pipe_put_tile_raw(pt, x, y, w, h, packed, 0); @@ -1344,6 +1347,7 @@ pipe_get_tile_z(struct pipe_transfer *pt, ubyte *map; uint *pDest = z; uint i, j; + enum pipe_format format = pt->texture->format; if (pipe_clip_tile(x, y, &w, &h, pt)) return; @@ -1354,7 +1358,7 @@ pipe_get_tile_z(struct pipe_transfer *pt, return; } - switch (pt->format) { + switch (format) { case PIPE_FORMAT_Z32_UNORM: { const uint *ptrc @@ -1428,6 +1432,7 @@ pipe_put_tile_z(struct pipe_transfer *pt, const uint *ptrc = zSrc; ubyte *map; uint i, j; + enum pipe_format format = pt->texture->format; if (pipe_clip_tile(x, y, &w, &h, pt)) return; @@ -1438,7 +1443,7 @@ pipe_put_tile_z(struct pipe_transfer *pt, return; } - switch (pt->format) { + switch (format) { case PIPE_FORMAT_Z32_UNORM: { uint *pDest = (uint *) (map + y * pt->stride + x*4); diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index 1934965995..8b4c0dc3a2 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -840,7 +840,6 @@ init_buffers(struct vl_mpeg12_mc_renderer *r) template.height0 = r->pot_buffers ? util_next_power_of_two(r->picture_height) : r->picture_height; template.depth0 = 1; - pf_get_block(template.format, &template.block); template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC; r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template); diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index ac5f61e46f..bd653216c0 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -63,13 +63,11 @@ softpipe_texture_layout(struct pipe_screen *screen, pt->depth0 = depth; for (level = 0; level <= pt->last_level; level++) { - pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); - spt->stride[level] = pt->nblocksx[level]*pt->block.size; + spt->stride[level] = pf_get_stride(pt->format, width); spt->level_offset[level] = buffer_size; - buffer_size += (pt->nblocksy[level] * + buffer_size += (pf_get_nblocksy(pt->format, u_minify(height, level)) * ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * spt->stride[level]); @@ -97,9 +95,6 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, PIPE_BUFFER_USAGE_GPU_READ_WRITE); unsigned tex_usage = spt->base.tex_usage; - spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width0); - spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height0); - spt->buffer = screen->surface_buffer_create( screen, spt->base.width0, spt->base.height0, @@ -175,8 +170,6 @@ softpipe_texture_blanket(struct pipe_screen * screen, spt->base = *base; pipe_reference_init(&spt->base.reference, 1); spt->base.screen = screen; - spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width0); - spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height0); spt->stride[0] = stride[0]; pipe_buffer_reference(&spt->buffer, buffer); @@ -244,10 +237,12 @@ softpipe_get_tex_surface(struct pipe_screen *screen, ps->zslice = zslice; if (pt->target == PIPE_TEXTURE_CUBE) { - ps->offset += face * pt->nblocksy[level] * spt->stride[level]; + ps->offset += face * pf_get_nblocksy(pt->format, u_minify(pt->height0, level)) * + spt->stride[level]; } else if (pt->target == PIPE_TEXTURE_3D) { - ps->offset += zslice * pt->nblocksy[level] * spt->stride[level]; + ps->offset += zslice * pf_get_nblocksy(pt->format, u_minify(pt->height0, level)) * + spt->stride[level]; } else { assert(face == 0); @@ -302,15 +297,12 @@ softpipe_get_tex_transfer(struct pipe_screen *screen, spt = CALLOC_STRUCT(softpipe_transfer); if (spt) { struct pipe_transfer *pt = &spt->base; + int nblocksy = pf_get_nblocksy(texture->format, u_minify(texture->height0, level)); pipe_texture_reference(&pt->texture, texture); - pt->format = texture->format; - pt->block = texture->block; pt->x = x; pt->y = y; pt->width = w; pt->height = h; - pt->nblocksx = texture->nblocksx[level]; - pt->nblocksy = texture->nblocksy[level]; pt->stride = sptex->stride[level]; pt->usage = usage; pt->face = face; @@ -320,10 +312,10 @@ softpipe_get_tex_transfer(struct pipe_screen *screen, spt->offset = sptex->level_offset[level]; if (texture->target == PIPE_TEXTURE_CUBE) { - spt->offset += face * pt->nblocksy * pt->stride; + spt->offset += face * nblocksy * pt->stride; } else if (texture->target == PIPE_TEXTURE_3D) { - spt->offset += zslice * pt->nblocksy * pt->stride; + spt->offset += zslice * nblocksy * pt->stride; } else { assert(face == 0); @@ -361,9 +353,11 @@ softpipe_transfer_map( struct pipe_screen *screen, { ubyte *map, *xfer_map; struct softpipe_texture *spt; + enum pipe_format format; assert(transfer->texture); spt = softpipe_texture(transfer->texture); + format = transfer->texture->format; map = pipe_buffer_map(screen, spt->buffer, pipe_transfer_buffer_flags(transfer)); if (map == NULL) @@ -380,8 +374,8 @@ softpipe_transfer_map( struct pipe_screen *screen, } xfer_map = map + softpipe_transfer(transfer)->offset + - transfer->y / transfer->block.height * transfer->stride + - transfer->x / transfer->block.width * transfer->block.size; + transfer->y / pf_get_blockheight(format) * transfer->stride + + transfer->x / pf_get_blockwidth(format) * pf_get_blocksize(format); /*printf("map = %p xfer map = %p\n", map, xfer_map);*/ return xfer_map; } @@ -438,7 +432,6 @@ softpipe_video_surface_create(struct pipe_screen *screen, template.width0 = util_next_power_of_two(width); template.height0 = util_next_power_of_two(height); template.depth0 = 1; - pf_get_block(template.format, &template.block); template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET; sp_vsfc->tex = screen->texture_create(screen, &template); diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 65872cecc4..04f61d16c4 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -238,7 +238,7 @@ clear_tile(struct softpipe_cached_tile *tile, { uint i, j; - switch (pf_get_size(format)) { + switch (pf_get_blocksize(format)) { case 1: memset(tile->data.any, clear_value, TILE_SIZE * TILE_SIZE); break; @@ -284,8 +284,9 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc) uint x, y; uint numCleared = 0; + assert(pt->texture); /* clear the scratch tile to the clear value */ - clear_tile(&tc->tile, pt->format, tc->clear_val); + clear_tile(&tc->tile, pt->texture->format, tc->clear_val); /* push the tile to all positions marked as clear */ for (y = 0; y < h; y += TILE_SIZE) { @@ -372,6 +373,7 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc, if (addr.value != tile->addr.value) { + assert(pt->texture); if (tile->addr.bits.invalid == 0) { /* put dirty tile back in framebuffer */ if (tc->depth_stencil) { @@ -395,10 +397,10 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc, if (is_clear_flag_set(tc->clear_flags, addr)) { /* don't get tile from framebuffer, just clear it */ if (tc->depth_stencil) { - clear_tile(tile, pt->format, tc->clear_val); + clear_tile(tile, pt->texture->format, tc->clear_val); } else { - clear_tile_rgba(tile, pt->format, tc->clear_color); + clear_tile_rgba(tile, pt->texture->format, tc->clear_color); } clear_clear_flag(tc->clear_flags, addr); } diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index af23080920..e6bba777d3 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -422,10 +422,11 @@ static INLINE uint pf_get_component_bits( enum pipe_format format, uint comp ) return size << (pf_mixed_scale8( format ) * 3); } + /** - * Return total bits needed for the pixel format. + * Return total bits needed for the pixel format per block. */ -static INLINE uint pf_get_bits( enum pipe_format format ) +static INLINE uint pf_get_blocksizebits( enum pipe_format format ) { switch (pf_layout(format)) { case PIPE_FORMAT_LAYOUT_RGBAZS: @@ -441,8 +442,24 @@ static INLINE uint pf_get_bits( enum pipe_format format ) pf_get_component_bits( format, PIPE_FORMAT_COMP_S ); case PIPE_FORMAT_LAYOUT_YCBCR: assert( format == PIPE_FORMAT_YCBCR || format == PIPE_FORMAT_YCBCR_REV ); - /* return effective bits per pixel */ - return 16; + return 32; + case PIPE_FORMAT_LAYOUT_DXT: + switch(format) { + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_SRGBA: + case PIPE_FORMAT_DXT1_SRGB: + return 64; + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_DXT3_SRGBA: + case PIPE_FORMAT_DXT5_SRGBA: + return 128; + default: + assert( 0 ); + return 0; + } + default: assert( 0 ); return 0; @@ -450,102 +467,66 @@ static INLINE uint pf_get_bits( enum pipe_format format ) } /** - * Return bytes per pixel for the given format. + * Return bytes per element for the given format. */ -static INLINE uint pf_get_size( enum pipe_format format ) +static INLINE uint pf_get_blocksize( enum pipe_format format ) { - assert(pf_get_bits(format) % 8 == 0); - return pf_get_bits(format) / 8; + assert(pf_get_blocksizebits(format) % 8 == 0); + return pf_get_blocksizebits(format) / 8; } -/** - * Describe accurately the pixel format. - * - * The chars-per-pixel concept falls apart with compressed and yuv images, where - * more than one pixel are coded in a single data block. This structure - * describes that block. - * - * Simple pixel formats are effectively a 1x1xcpp block. - */ -struct pipe_format_block +static INLINE uint pf_get_blockwidth( enum pipe_format format ) { - /** Block size in bytes */ - unsigned size; - - /** Block width in pixels */ - unsigned width; - - /** Block height in pixels */ - unsigned height; -}; + switch (pf_layout(format)) { + case PIPE_FORMAT_LAYOUT_YCBCR: + return 2; + case PIPE_FORMAT_LAYOUT_DXT: + return 4; + default: + return 1; + } +} -/** - * Describe pixel format's block. - * - * @sa http://msdn2.microsoft.com/en-us/library/ms796147.aspx - */ -static INLINE void -pf_get_block(enum pipe_format format, struct pipe_format_block *block) +static INLINE uint pf_get_blockheight( enum pipe_format format ) { - switch(format) { - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_SRGBA: - case PIPE_FORMAT_DXT1_SRGB: - block->size = 8; - block->width = 4; - block->height = 4; - break; - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - case PIPE_FORMAT_DXT3_SRGBA: - case PIPE_FORMAT_DXT5_SRGBA: - block->size = 16; - block->width = 4; - block->height = 4; - break; - case PIPE_FORMAT_YCBCR: - case PIPE_FORMAT_YCBCR_REV: - block->size = 4; /* 2*cpp */ - block->width = 2; - block->height = 1; - break; + switch (pf_layout(format)) { + case PIPE_FORMAT_LAYOUT_DXT: + return 4; default: - block->size = pf_get_size(format); - block->width = 1; - block->height = 1; - break; + return 1; } } static INLINE unsigned -pf_get_nblocksx(const struct pipe_format_block *block, unsigned x) +pf_get_nblocksx(enum pipe_format format, unsigned x) { - return (x + block->width - 1)/block->width; + unsigned blockwidth = pf_get_blockwidth(format); + return (x + blockwidth - 1) / blockwidth; } static INLINE unsigned -pf_get_nblocksy(const struct pipe_format_block *block, unsigned y) +pf_get_nblocksy(enum pipe_format format, unsigned y) { - return (y + block->height - 1)/block->height; + unsigned blockheight = pf_get_blockheight(format); + return (y + blockheight - 1) / blockheight; } static INLINE unsigned -pf_get_nblocks(const struct pipe_format_block *block, unsigned width, unsigned height) +pf_get_nblocks(enum pipe_format format, unsigned width, unsigned height) { - return pf_get_nblocksx(block, width)*pf_get_nblocksy(block, height); + return pf_get_nblocksx(format, width) * pf_get_nblocksy(format, height); } static INLINE size_t -pf_get_stride(const struct pipe_format_block *block, unsigned width) +pf_get_stride(enum pipe_format format, unsigned width) { - return pf_get_nblocksx(block, width)*block->size; + return pf_get_nblocksx(format, width) * pf_get_blocksize(format); } static INLINE size_t -pf_get_2d_size(const struct pipe_format_block *block, size_t stride, unsigned height) +pf_get_2d_size(enum pipe_format format, size_t stride, unsigned height) { - return pf_get_nblocksy(block, height)*stride; + return pf_get_nblocksy(format, height) * stride; } static INLINE boolean diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 9766e86620..db83c8e157 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -315,14 +315,10 @@ struct pipe_surface */ struct pipe_transfer { - enum pipe_format format; /**< PIPE_FORMAT_x */ unsigned x; /**< x offset from start of texture image */ unsigned y; /**< y offset from start of texture image */ unsigned width; /**< logical width in pixels */ unsigned height; /**< logical height in pixels */ - struct pipe_format_block block; - unsigned nblocksx; /**< allocated width in blocks */ - unsigned nblocksy; /**< allocated height in blocks */ unsigned stride; /**< stride in bytes between rows of blocks */ enum pipe_transfer_usage usage; /**< PIPE_TRANSFER_* */ @@ -347,10 +343,6 @@ struct pipe_texture unsigned height0; unsigned depth0; - struct pipe_format_block block; - unsigned nblocksx[PIPE_MAX_TEXTURE_LEVELS]; /**< allocated width in blocks */ - unsigned nblocksy[PIPE_MAX_TEXTURE_LEVELS]; /**< allocated height in blocks */ - unsigned last_level:8; /**< Index of last mipmap level present/defined */ unsigned nr_samples:8; /**< for multisampled surfaces, nr of samples */ diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index a68a29e126..a15043da78 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -701,7 +701,7 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y, } /* now pack the stencil (and Z) values in the dest format */ - switch (pt->format) { + switch (pt->texture->format) { case PIPE_FORMAT_S8_UNORM: { ubyte *dest = stmap + spanY * pt->stride + spanX; @@ -856,8 +856,8 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy, usage, dstx, dsty, width, height); - assert(ptDraw->block.width == 1); - assert(ptDraw->block.height == 1); + assert(pf_get_blockwidth(ptDraw->texture->format) == 1); + assert(pf_get_blockheight(ptDraw->texture->format) == 1); /* map the stencil buffer */ drawMap = screen->transfer_map(screen, ptDraw); @@ -878,7 +878,7 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy, dst = drawMap + y * ptDraw->stride; src = buffer + i * width; - switch (ptDraw->format) { + switch (ptDraw->texture->format) { case PIPE_FORMAT_S8Z24_UNORM: { uint *dst4 = (uint *) dst; diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 659a6c9193..ead8e22888 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -98,16 +98,14 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, strb->defined = GL_FALSE; /* undefined contents now */ if(strb->software) { - struct pipe_format_block block; size_t size; _mesa_free(strb->data); assert(strb->format != PIPE_FORMAT_NONE); - pf_get_block(strb->format, &block); - strb->stride = pf_get_stride(&block, width); - size = pf_get_2d_size(&block, strb->stride, height); + strb->stride = pf_get_stride(strb->format, width); + size = pf_get_2d_size(strb->format, strb->stride, height); strb->data = _mesa_malloc(size); @@ -127,7 +125,6 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, memset(&template, 0, sizeof(template)); template.target = PIPE_TEXTURE_2D; template.format = format; - pf_get_block(format, &template.block); template.width0 = width; template.height0 = height; template.depth0 = 1; diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 103861d6f9..6fa7bb64f2 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -103,7 +103,7 @@ st_read_stencil_pixels(GLcontext *ctx, GLint x, GLint y, } /* get stencil (and Z) values */ - switch (pt->format) { + switch (pt->texture->format) { case PIPE_FORMAT_S8_UNORM: { const ubyte *src = stmap + srcY * pt->stride; @@ -431,8 +431,8 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, const GLint dstStride = _mesa_image_row_stride(&clippedPacking, width, format, type); - if (trans->format == PIPE_FORMAT_S8Z24_UNORM || - trans->format == PIPE_FORMAT_X8Z24_UNORM) { + if (trans->texture->format == PIPE_FORMAT_S8Z24_UNORM || + trans->texture->format == PIPE_FORMAT_X8Z24_UNORM) { if (format == GL_DEPTH_COMPONENT) { for (i = 0; i < height; i++) { GLuint ztemp[MAX_WIDTH]; @@ -463,8 +463,8 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, } } } - else if (trans->format == PIPE_FORMAT_Z24S8_UNORM || - trans->format == PIPE_FORMAT_Z24X8_UNORM) { + else if (trans->texture->format == PIPE_FORMAT_Z24S8_UNORM || + trans->texture->format == PIPE_FORMAT_Z24X8_UNORM) { if (format == GL_DEPTH_COMPONENT) { for (i = 0; i < height; i++) { GLuint ztemp[MAX_WIDTH]; @@ -490,7 +490,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, } } } - else if (trans->format == PIPE_FORMAT_Z16_UNORM) { + else if (trans->texture->format == PIPE_FORMAT_Z16_UNORM) { for (i = 0; i < height; i++) { GLushort ztemp[MAX_WIDTH]; GLfloat zfloat[MAX_WIDTH]; @@ -505,7 +505,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, dst += dstStride; } } - else if (trans->format == PIPE_FORMAT_Z32_UNORM) { + else if (trans->texture->format == PIPE_FORMAT_Z32_UNORM) { for (i = 0; i < height; i++) { GLuint ztemp[MAX_WIDTH]; GLfloat zfloat[MAX_WIDTH]; diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 3a2337802f..6d136f5abf 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -405,7 +405,6 @@ compress_with_blit(GLcontext * ctx, memset(&templ, 0, sizeof(templ)); templ.target = PIPE_TEXTURE_2D; templ.format = st_mesa_format_to_pipe_format(mesa_format); - pf_get_block(templ.format, &templ.block); templ.width0 = width; templ.height0 = height; templ.depth0 = 1; @@ -833,7 +832,7 @@ decompress_with_blit(GLcontext * ctx, GLenum target, GLint level, /* copy/pack data into user buffer */ if (st_equal_formats(stImage->pt->format, format, type)) { /* memcpy */ - const uint bytesPerRow = width * pf_get_size(stImage->pt->format); + const uint bytesPerRow = width * pf_get_blocksize(stImage->pt->format); ubyte *map = screen->transfer_map(screen, tex_xfer); GLuint row; for (row = 0; row < height; row++) { @@ -915,7 +914,7 @@ st_get_tex_image(GLcontext * ctx, GLenum target, GLint level, PIPE_TRANSFER_READ, 0, 0, stImage->base.Width, stImage->base.Height); - texImage->RowStride = stImage->transfer->stride / stImage->pt->block.size; + texImage->RowStride = stImage->transfer->stride / pf_get_blocksize(stImage->pt->format); } else { /* Otherwise, the image should actually be stored in @@ -1163,10 +1162,10 @@ st_CompressedTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, struct gl_texture_image *texImage) { struct st_texture_image *stImage = st_texture_image(texImage); - struct pipe_format_block block; int srcBlockStride; int dstBlockStride; int y; + enum pipe_format pformat= stImage->pt->format; if (stImage->pt) { unsigned face = _mesa_tex_target_to_face(target); @@ -1178,8 +1177,7 @@ st_CompressedTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, xoffset, yoffset, width, height); - block = stImage->pt->block; - srcBlockStride = pf_get_stride(&block, width); + srcBlockStride = pf_get_stride(pformat, width); dstBlockStride = stImage->transfer->stride; } else { assert(stImage->pt); @@ -1193,16 +1191,16 @@ st_CompressedTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, return; } - assert(xoffset % block.width == 0); - assert(yoffset % block.height == 0); - assert(width % block.width == 0); - assert(height % block.height == 0); + assert(xoffset % pf_get_blockwidth(pformat) == 0); + assert(yoffset % pf_get_blockheight(pformat) == 0); + assert(width % pf_get_blockwidth(pformat) == 0); + assert(height % pf_get_blockheight(pformat) == 0); - for (y = 0; y < height; y += block.height) { + for (y = 0; y < height; y += pf_get_blockheight(pformat)) { /* don't need to adjust for xoffset and yoffset as st_texture_image_map does that */ - const char *src = (const char*)data + srcBlockStride * pf_get_nblocksy(&block, y); - char *dst = (char*)texImage->Data + dstBlockStride * pf_get_nblocksy(&block, y); - memcpy(dst, src, pf_get_stride(&block, width)); + const char *src = (const char*)data + srcBlockStride * pf_get_nblocksy(pformat, y); + char *dst = (char*)texImage->Data + dstBlockStride * pf_get_nblocksy(pformat, y); + memcpy(dst, src, pf_get_stride(pformat, width)); } if (stImage->pt) { @@ -1692,10 +1690,10 @@ copy_image_data_to_texture(struct st_context *st, dstLevel, stImage->base.Data, stImage->base.RowStride * - stObj->pt->block.size, + pf_get_blocksize(stObj->pt->format), stImage->base.RowStride * stImage->base.Height * - stObj->pt->block.size); + pf_get_blocksize(stObj->pt->format)); _mesa_align_free(stImage->base.Data); stImage->base.Data = NULL; } @@ -1763,8 +1761,7 @@ st_finalize_texture(GLcontext *ctx, stObj->pt->last_level < stObj->lastLevel || stObj->pt->width0 != firstImage->base.Width2 || stObj->pt->height0 != firstImage->base.Height2 || - stObj->pt->depth0 != firstImage->base.Depth2 || - stObj->pt->block.size != blockSize) + stObj->pt->depth0 != firstImage->base.Depth2) { pipe_texture_reference(&stObj->pt, NULL); ctx->st->dirty.st |= ST_NEW_FRAMEBUFFER; diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index f8068fa12b..7700551830 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -146,8 +146,8 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, srcData = (ubyte *) screen->transfer_map(screen, srcTrans); dstData = (ubyte *) screen->transfer_map(screen, dstTrans); - srcStride = srcTrans->stride / srcTrans->block.size; - dstStride = dstTrans->stride / dstTrans->block.size; + srcStride = srcTrans->stride / pf_get_blocksize(srcTrans->texture->format); + dstStride = dstTrans->stride / pf_get_blocksize(dstTrans->texture->format); _mesa_generate_mipmap_level(target, datatype, comps, 0 /*border*/, diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index dbccee86c1..3035d78b61 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -104,7 +104,6 @@ st_texture_create(struct st_context *st, pt.width0 = width0; pt.height0 = height0; pt.depth0 = depth0; - pf_get_block(format, &pt.block); pt.tex_usage = usage; newtex = screen->texture_create(screen, &pt); @@ -242,8 +241,9 @@ st_surface_data(struct pipe_context *pipe, struct pipe_screen *screen = pipe->screen; void *map = screen->transfer_map(screen, dst); + assert(dst->texture); util_copy_rect(map, - &dst->block, + dst->texture->format, dst->stride, dstx, dsty, width, height, -- cgit v1.2.3 From decf6ed810eae473d043a4a399a5a84f1378a725 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Mon, 30 Nov 2009 23:02:49 +0100 Subject: fixups for interface changes (mostly state trackers) --- src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 4 +-- src/gallium/drivers/trace/tr_dump_state.c | 24 --------------- src/gallium/drivers/trace/tr_dump_state.h | 3 -- src/gallium/drivers/trace/tr_rbug.c | 13 +++++--- src/gallium/drivers/trace/tr_screen.c | 3 +- src/gallium/state_trackers/dri/dri_drawable.c | 1 - src/gallium/state_trackers/egl/egl_surface.c | 1 - src/gallium/state_trackers/python/gallium.i | 1 - src/gallium/state_trackers/python/p_device.i | 1 - src/gallium/state_trackers/python/p_format.i | 8 ----- src/gallium/state_trackers/python/p_texture.i | 32 ++++---------------- .../state_trackers/python/retrace/interpreter.py | 3 +- src/gallium/state_trackers/python/st_device.c | 3 -- src/gallium/state_trackers/python/st_sample.c | 35 ++++++++++++---------- src/gallium/state_trackers/python/st_sample.h | 1 - .../state_trackers/python/st_softpipe_winsys.c | 19 ++---------- .../state_trackers/python/tests/surface_copy.py | 7 +++-- .../python/tests/texture_transfer.py | 5 ++-- src/gallium/state_trackers/vega/api_filters.c | 1 - src/gallium/state_trackers/vega/image.c | 1 - src/gallium/state_trackers/vega/mask.c | 1 - src/gallium/state_trackers/vega/paint.c | 1 - src/gallium/state_trackers/vega/renderer.c | 1 - src/gallium/state_trackers/vega/vg_tracker.c | 1 - src/gallium/state_trackers/xorg/xorg_crtc.c | 3 +- src/gallium/state_trackers/xorg/xorg_dri2.c | 1 - src/gallium/state_trackers/xorg/xorg_exa.c | 6 ++-- src/gallium/state_trackers/xorg/xorg_renderer.c | 1 - src/gallium/state_trackers/xorg/xorg_xv.c | 1 - .../winsys/drm/nouveau/drm/nouveau_drm_api.c | 3 +- src/gallium/winsys/drm/radeon/core/radeon_buffer.c | 17 ++++------- src/gallium/winsys/egl_xlib/sw_winsys.c | 19 ++---------- src/gallium/winsys/g3dvl/xlib/xsp_winsys.c | 9 ++---- src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c | 17 ++--------- src/gallium/winsys/gdi/gdi_softpipe_winsys.c | 23 ++++---------- src/gallium/winsys/xlib/xlib_cell.c | 20 +++---------- src/gallium/winsys/xlib/xlib_llvmpipe.c | 15 ++++------ src/gallium/winsys/xlib/xlib_softpipe.c | 15 ++++------ 38 files changed, 87 insertions(+), 233 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index 8b4c0dc3a2..bffc018848 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -1449,7 +1449,7 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby, assert(r); assert(blocks); - tex_pitch = r->tex_transfer[0]->stride / r->tex_transfer[0]->block.size; + tex_pitch = r->tex_transfer[0]->stride / pf_get_blocksize(r->tex_transfer[0]->texture->format); texels = r->texels[0] + mbpy * tex_pitch + mbpx; for (y = 0; y < 2; ++y) { @@ -1488,7 +1488,7 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby, mbpy /= 2; for (tb = 0; tb < 2; ++tb) { - tex_pitch = r->tex_transfer[tb + 1]->stride / r->tex_transfer[tb + 1]->block.size; + tex_pitch = r->tex_transfer[tb + 1]->stride / pf_get_blocksize(r->tex_transfer[tb + 1]->texture->format); texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx; if ((cbp >> (1 - tb)) & 1) { diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 6d58209294..0102cc1876 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -43,19 +43,6 @@ void trace_dump_format(enum pipe_format format) } -void trace_dump_block(const struct pipe_format_block *block) -{ - if (!trace_dumping_enabled_locked()) - return; - - trace_dump_struct_begin("pipe_format_block"); - trace_dump_member(uint, block, size); - trace_dump_member(uint, block, width); - trace_dump_member(uint, block, height); - trace_dump_struct_end(); -} - - static void trace_dump_reference(const struct pipe_reference *reference) { if (!trace_dumping_enabled_locked()) @@ -94,10 +81,6 @@ void trace_dump_template(const struct pipe_texture *templat) trace_dump_uint(templat->depth0); trace_dump_member_end(); - trace_dump_member_begin("block"); - trace_dump_block(&templat->block); - trace_dump_member_end(); - trace_dump_member(uint, templat, last_level); trace_dump_member(uint, templat, tex_usage); @@ -483,16 +466,9 @@ void trace_dump_transfer(const struct pipe_transfer *state) trace_dump_struct_begin("pipe_transfer"); - trace_dump_member(format, state, format); trace_dump_member(uint, state, width); trace_dump_member(uint, state, height); - trace_dump_member_begin("block"); - trace_dump_block(&state->block); - trace_dump_member_end(); - - trace_dump_member(uint, state, nblocksx); - trace_dump_member(uint, state, nblocksy); trace_dump_member(uint, state, stride); trace_dump_member(uint, state, usage); diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h index 05b821adb6..07ad6fbb20 100644 --- a/src/gallium/drivers/trace/tr_dump_state.h +++ b/src/gallium/drivers/trace/tr_dump_state.h @@ -35,11 +35,8 @@ void trace_dump_format(enum pipe_format format); -void trace_dump_block(const struct pipe_format_block *block); - void trace_dump_template(const struct pipe_texture *templat); - void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state); void trace_dump_poly_stipple(const struct pipe_poly_stipple *state); diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c index b59458c0e3..af1d7f3224 100644 --- a/src/gallium/drivers/trace/tr_rbug.c +++ b/src/gallium/drivers/trace/tr_rbug.c @@ -203,7 +203,9 @@ trace_rbug_texture_info(struct trace_rbug *tr_rbug, struct rbug_header *header, &t->width0, 1, &t->height0, 1, &t->depth0, 1, - t->block.width, t->block.height, t->block.size, + pf_get_blockwidth(t->format), + pf_get_blockheight(t->format), + pf_get_blocksize(t->format), t->last_level, t->nr_samples, t->tex_usage, @@ -251,9 +253,12 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header, map = screen->transfer_map(screen, t); rbug_send_texture_read_reply(tr_rbug->con, serial, - t->format, - t->block.width, t->block.height, t->block.size, - (uint8_t*)map, t->stride * t->nblocksy, + t->texture->format, + pf_get_blockwidth(t->texture->format), + pf_get_blockheight(t->texture->format), + pf_get_blocksize(t->texture->format), + (uint8_t*)map, + t->stride * pf_get_nblocksy(t->texture->format, t->height), t->stride, NULL); diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index 7da9bd3866..f69f7da000 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -35,6 +35,7 @@ #include "tr_screen.h" #include "pipe/p_inlines.h" +#include "pipe/p_format.h" static boolean trace = FALSE; @@ -424,7 +425,7 @@ trace_screen_transfer_unmap(struct pipe_screen *_screen, struct pipe_transfer *transfer = tr_trans->transfer; if(tr_trans->map) { - size_t size = transfer->nblocksy * transfer->stride; + size_t size = pf_get_nblocksy(transfer->texture->format, transfer->width) * transfer->stride; trace_dump_call_begin("pipe_screen", "transfer_write"); diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c index 45a6059ea8..099cf1e064 100644 --- a/src/gallium/state_trackers/dri/dri_drawable.c +++ b/src/gallium/state_trackers/dri/dri_drawable.c @@ -66,7 +66,6 @@ dri_surface_from_handle(struct drm_api *api, templat.format = format; templat.width0 = width; templat.height0 = height; - pf_get_block(templat.format, &templat.block); texture = api->texture_from_shared_handle(api, screen, &templat, "dri2 buffer", pitch, handle); diff --git a/src/gallium/state_trackers/egl/egl_surface.c b/src/gallium/state_trackers/egl/egl_surface.c index ddd9b04cd4..737bdfdd34 100644 --- a/src/gallium/state_trackers/egl/egl_surface.c +++ b/src/gallium/state_trackers/egl/egl_surface.c @@ -118,7 +118,6 @@ drm_create_texture(_EGLDisplay *dpy, templat.format = PIPE_FORMAT_A8R8G8B8_UNORM; templat.width0 = w; templat.height0 = h; - pf_get_block(templat.format, &templat.block); texture = screen->texture_create(dev->screen, &templat); diff --git a/src/gallium/state_trackers/python/gallium.i b/src/gallium/state_trackers/python/gallium.i index 3f79cc1a3d..8e323f4896 100644 --- a/src/gallium/state_trackers/python/gallium.i +++ b/src/gallium/state_trackers/python/gallium.i @@ -80,7 +80,6 @@ %rename(Stencil) pipe_stencil_state; %rename(Alpha) pipe_alpha_state; %rename(DepthStencilAlpha) pipe_depth_stencil_alpha_state; -%rename(FormatBlock) pipe_format_block; %rename(Framebuffer) pipe_framebuffer_state; %rename(PolyStipple) pipe_poly_stipple; %rename(Rasterizer) pipe_rasterizer_state; diff --git a/src/gallium/state_trackers/python/p_device.i b/src/gallium/state_trackers/python/p_device.i index a83bcc71a1..2dc995adb0 100644 --- a/src/gallium/state_trackers/python/p_device.i +++ b/src/gallium/state_trackers/python/p_device.i @@ -112,7 +112,6 @@ struct st_device { struct pipe_texture templat; memset(&templat, 0, sizeof(templat)); templat.format = format; - pf_get_block(templat.format, &templat.block); templat.width0 = width; templat.height0 = height; templat.depth0 = depth; diff --git a/src/gallium/state_trackers/python/p_format.i b/src/gallium/state_trackers/python/p_format.i index 26fb12b387..68df009331 100644 --- a/src/gallium/state_trackers/python/p_format.i +++ b/src/gallium/state_trackers/python/p_format.i @@ -152,11 +152,3 @@ enum pipe_format { PIPE_FORMAT_DXT5_SRGBA, }; - -struct pipe_format_block -{ - unsigned size; - unsigned width; - unsigned height; -}; - diff --git a/src/gallium/state_trackers/python/p_texture.i b/src/gallium/state_trackers/python/p_texture.i index 5416b872f5..1de7f86a3c 100644 --- a/src/gallium/state_trackers/python/p_texture.i +++ b/src/gallium/state_trackers/python/p_texture.i @@ -69,15 +69,7 @@ unsigned get_depth(unsigned level=0) { return u_minify($self->depth0, level); } - - unsigned get_nblocksx(unsigned level=0) { - return $self->nblocksx[level]; - } - - unsigned get_nblocksy(unsigned level=0) { - return $self->nblocksy[level]; - } - + /** Get a surface which is a "view" into a texture */ struct st_surface * get_surface(unsigned face=0, unsigned level=0, unsigned zslice=0) @@ -126,8 +118,6 @@ struct st_surface unsigned format; unsigned width; unsigned height; - unsigned nblocksx; - unsigned nblocksy; ~st_surface() { pipe_texture_reference(&$self->texture, NULL); @@ -142,8 +132,8 @@ struct st_surface struct pipe_transfer *transfer; unsigned stride; - stride = pf_get_nblocksx(&texture->block, w) * texture->block.size; - *LENGTH = pf_get_nblocksy(&texture->block, h) * stride; + stride = pf_get_stride(texture->format, w); + *LENGTH = pf_get_nblocksy(texture->format, h) * stride; *STRING = (char *) malloc(*LENGTH); if(!*STRING) return; @@ -169,9 +159,9 @@ struct st_surface struct pipe_transfer *transfer; if(stride == 0) - stride = pf_get_nblocksx(&texture->block, w) * texture->block.size; + stride = pf_get_stride(texture->format, w); - if(LENGTH < pf_get_nblocksy(&texture->block, h) * stride) + if(LENGTH < pf_get_nblocksy(texture->format, h) * stride) SWIG_exception(SWIG_ValueError, "offset must be smaller than buffer size"); transfer = screen->get_tex_transfer(screen, @@ -383,18 +373,6 @@ struct st_surface { return u_minify(surface->texture->height0, surface->level); } - - static unsigned - st_surface_nblocksx_get(struct st_surface *surface) - { - return surface->texture->nblocksx[surface->level]; - } - - static unsigned - st_surface_nblocksy_get(struct st_surface *surface) - { - return surface->texture->nblocksy[surface->level]; - } %} /* Avoid naming conflict with p_inlines.h's pipe_buffer_read/write */ diff --git a/src/gallium/state_trackers/python/retrace/interpreter.py b/src/gallium/state_trackers/python/retrace/interpreter.py index d0bcb690a9..3251046c79 100755 --- a/src/gallium/state_trackers/python/retrace/interpreter.py +++ b/src/gallium/state_trackers/python/retrace/interpreter.py @@ -99,7 +99,6 @@ struct_factories = { "pipe_stencil_state": gallium.Stencil, "pipe_alpha_state": gallium.Alpha, "pipe_depth_stencil_alpha_state": gallium.DepthStencilAlpha, - "pipe_format_block": gallium.FormatBlock, #"pipe_framebuffer_state": gallium.Framebuffer, "pipe_poly_stipple": gallium.PolyStipple, "pipe_rasterizer_state": gallium.Rasterizer, @@ -307,7 +306,7 @@ class Screen(Object): def surface_write(self, surface, data, stride, size): if surface is None: return - assert surface.nblocksy * stride == size +# assert surface.nblocksy * stride == size surface.put_tile_raw(0, 0, surface.width, surface.height, data, stride) def get_tex_transfer(self, texture, face, level, zslice, usage, x, y, w, h): diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c index a791113aba..2966b24cdc 100644 --- a/src/gallium/state_trackers/python/st_device.c +++ b/src/gallium/state_trackers/python/st_device.c @@ -249,9 +249,6 @@ st_context_create(struct st_device *st_dev) memset( &templat, 0, sizeof( templat ) ); templat.target = PIPE_TEXTURE_2D; templat.format = PIPE_FORMAT_A8R8G8B8_UNORM; - templat.block.size = 4; - templat.block.width = 1; - templat.block.height = 1; templat.width0 = 1; templat.height0 = 1; templat.depth0 = 1; diff --git a/src/gallium/state_trackers/python/st_sample.c b/src/gallium/state_trackers/python/st_sample.c index 6fee90afda..97ca2afc54 100644 --- a/src/gallium/state_trackers/python/st_sample.c +++ b/src/gallium/state_trackers/python/st_sample.c @@ -423,7 +423,6 @@ dxt5_rgba_data[] = { static INLINE void st_sample_dxt_pixel_block(enum pipe_format format, - const struct pipe_format_block *block, uint8_t *raw, float *rgba, unsigned rgba_stride, unsigned w, unsigned h) @@ -462,21 +461,21 @@ st_sample_dxt_pixel_block(enum pipe_format format, for(ch = 0; ch < 4; ++ch) rgba[y*rgba_stride + x*4 + ch] = (float)(data[i].rgba[y*4*4 + x*4 + ch])/255.0f; - memcpy(raw, data[i].raw, block->size); + memcpy(raw, data[i].raw, pf_get_blocksize(format)); } static INLINE void st_sample_generic_pixel_block(enum pipe_format format, - const struct pipe_format_block *block, uint8_t *raw, float *rgba, unsigned rgba_stride, unsigned w, unsigned h) { unsigned i; unsigned x, y, ch; + int blocksize = pf_get_blocksize(format); - for(i = 0; i < block->size; ++i) + for(i = 0; i < blocksize; ++i) raw[i] = (uint8_t)st_random(); @@ -503,7 +502,6 @@ st_sample_generic_pixel_block(enum pipe_format format, */ void st_sample_pixel_block(enum pipe_format format, - const struct pipe_format_block *block, void *raw, float *rgba, unsigned rgba_stride, unsigned w, unsigned h) @@ -513,11 +511,11 @@ st_sample_pixel_block(enum pipe_format format, case PIPE_FORMAT_DXT1_RGBA: case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: - st_sample_dxt_pixel_block(format, block, raw, rgba, rgba_stride, w, h); + st_sample_dxt_pixel_block(format, raw, rgba, rgba_stride, w, h); break; default: - st_sample_generic_pixel_block(format, block, raw, rgba, rgba_stride, w, h); + st_sample_generic_pixel_block(format, raw, rgba, rgba_stride, w, h); break; } } @@ -548,18 +546,23 @@ st_sample_surface(struct st_surface *surface, float *rgba) raw = screen->transfer_map(screen, transfer); if (raw) { - const struct pipe_format_block *block = &texture->block; + enum pipe_format format = texture->format; uint x, y; + int nblocksx = pf_get_nblocksx(format, width); + int nblocksy = pf_get_nblocksy(format, height); + int blockwidth = pf_get_blockwidth(format); + int blockheight = pf_get_blockheight(format); + int blocksize = pf_get_blocksize(format); - for (y = 0; y < transfer->nblocksy; ++y) { - for (x = 0; x < transfer->nblocksx; ++x) { - st_sample_pixel_block(texture->format, - block, - (uint8_t *) raw + y * transfer->stride + x * block->size, - rgba + y * block->height * rgba_stride + x * block->width * 4, + + for (y = 0; y < nblocksy; ++y) { + for (x = 0; x < nblocksx; ++x) { + st_sample_pixel_block(format, + (uint8_t *) raw + y * transfer->stride + x * blocksize, + rgba + y * blockheight * rgba_stride + x * blockwidth * 4, rgba_stride, - MIN2(block->width, width - x*block->width), - MIN2(block->height, height - y*block->height)); + MIN2(blockwidth, width - x*blockwidth), + MIN2(blockheight, height - y*blockheight)); } } diff --git a/src/gallium/state_trackers/python/st_sample.h b/src/gallium/state_trackers/python/st_sample.h index 0a27083549..888114d302 100644 --- a/src/gallium/state_trackers/python/st_sample.h +++ b/src/gallium/state_trackers/python/st_sample.h @@ -35,7 +35,6 @@ void st_sample_pixel_block(enum pipe_format format, - const struct pipe_format_block *block, void *raw, float *rgba, unsigned rgba_stride, unsigned w, unsigned h); diff --git a/src/gallium/state_trackers/python/st_softpipe_winsys.c b/src/gallium/state_trackers/python/st_softpipe_winsys.c index f0abd12e3d..43c61af1ff 100644 --- a/src/gallium/state_trackers/python/st_softpipe_winsys.c +++ b/src/gallium/state_trackers/python/st_softpipe_winsys.c @@ -157,16 +157,6 @@ st_softpipe_user_buffer_create(struct pipe_winsys *winsys, } -/** - * Round n up to next multiple. - */ -static INLINE unsigned -round_up(unsigned n, unsigned multiple) -{ - return (n + multiple - 1) & ~(multiple - 1); -} - - static struct pipe_buffer * st_softpipe_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, @@ -176,13 +166,10 @@ st_softpipe_surface_buffer_create(struct pipe_winsys *winsys, unsigned *stride) { const unsigned alignment = 64; - struct pipe_format_block block; - unsigned nblocksx, nblocksy; + unsigned nblocksy; - pf_get_block(format, &block); - nblocksx = pf_get_nblocksx(&block, width); - nblocksy = pf_get_nblocksy(&block, height); - *stride = round_up(nblocksx * block.size, alignment); + nblocksy = pf_get_nblocksy(format, height); + *stride = align(pf_get_stride(format, width), alignment); return winsys->buffer_create(winsys, alignment, usage, diff --git a/src/gallium/state_trackers/python/tests/surface_copy.py b/src/gallium/state_trackers/python/tests/surface_copy.py index 3ceecbbd3a..df5babb78a 100755 --- a/src/gallium/state_trackers/python/tests/surface_copy.py +++ b/src/gallium/state_trackers/python/tests/surface_copy.py @@ -98,9 +98,10 @@ class TextureTest(TestCase): y = 0 w = dst_surface.width h = dst_surface.height - - stride = dst_surface.nblocksx * dst_texture.block.size - size = dst_surface.nblocksy * stride + + # ??? + stride = pf_get_stride(texture->format, w) + size = pf_get_nblocksy(texture->format) * stride src_raw = os.urandom(size) src_surface.put_tile_raw(0, 0, w, h, src_raw, stride) diff --git a/src/gallium/state_trackers/python/tests/texture_transfer.py b/src/gallium/state_trackers/python/tests/texture_transfer.py index e65b425adf..35daca9e49 100755 --- a/src/gallium/state_trackers/python/tests/texture_transfer.py +++ b/src/gallium/state_trackers/python/tests/texture_transfer.py @@ -86,8 +86,9 @@ class TextureTest(TestCase): surface = texture.get_surface(face, level, zslice) - stride = surface.nblocksx * texture.block.size - size = surface.nblocksy * stride + # ??? + stride = pf_get_stride(texture->format, w) + size = pf_get_nblocksy(texture->format) * stride in_raw = os.urandom(size) diff --git a/src/gallium/state_trackers/vega/api_filters.c b/src/gallium/state_trackers/vega/api_filters.c index faf396d087..eb135c1ff4 100644 --- a/src/gallium/state_trackers/vega/api_filters.c +++ b/src/gallium/state_trackers/vega/api_filters.c @@ -71,7 +71,6 @@ static INLINE struct pipe_texture *create_texture_1d(struct vg_context *ctx, templ.width0 = color_data_len; templ.height0 = 1; templ.depth0 = 1; - pf_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &templ.block); templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER; tex = screen->texture_create(screen, &templ); diff --git a/src/gallium/state_trackers/vega/image.c b/src/gallium/state_trackers/vega/image.c index 4684a5727d..172311851e 100644 --- a/src/gallium/state_trackers/vega/image.c +++ b/src/gallium/state_trackers/vega/image.c @@ -270,7 +270,6 @@ struct vg_image * image_create(VGImageFormat format, memset(&pt, 0, sizeof(pt)); pt.target = PIPE_TEXTURE_2D; pt.format = pformat; - pf_get_block(pformat, &pt.block); pt.last_level = 0; pt.width0 = width; pt.height0 = height; diff --git a/src/gallium/state_trackers/vega/mask.c b/src/gallium/state_trackers/vega/mask.c index b84103fdba..868c28239a 100644 --- a/src/gallium/state_trackers/vega/mask.c +++ b/src/gallium/state_trackers/vega/mask.c @@ -491,7 +491,6 @@ struct vg_mask_layer * mask_layer_create(VGint width, VGint height) memset(&pt, 0, sizeof(pt)); pt.target = PIPE_TEXTURE_2D; pt.format = PIPE_FORMAT_A8R8G8B8_UNORM; - pf_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &pt.block); pt.last_level = 0; pt.width0 = width; pt.height0 = height; diff --git a/src/gallium/state_trackers/vega/paint.c b/src/gallium/state_trackers/vega/paint.c index e8ca7d9e89..785c982943 100644 --- a/src/gallium/state_trackers/vega/paint.c +++ b/src/gallium/state_trackers/vega/paint.c @@ -154,7 +154,6 @@ static INLINE struct pipe_texture *create_gradient_texture(struct vg_paint *p) templ.width0 = 1024; templ.height0 = 1; templ.depth0 = 1; - pf_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &templ.block); templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER; tex = screen->texture_create(screen, &templ); diff --git a/src/gallium/state_trackers/vega/renderer.c b/src/gallium/state_trackers/vega/renderer.c index 9085ed1bfe..c85dae0282 100644 --- a/src/gallium/state_trackers/vega/renderer.c +++ b/src/gallium/state_trackers/vega/renderer.c @@ -448,7 +448,6 @@ void renderer_copy_surface(struct renderer *ctx, texTemp.width0 = srcW; texTemp.height0 = srcH; texTemp.depth0 = 1; - pf_get_block(src->format, &texTemp.block); tex = screen->texture_create(screen, &texTemp); if (!tex) diff --git a/src/gallium/state_trackers/vega/vg_tracker.c b/src/gallium/state_trackers/vega/vg_tracker.c index d28463dd1b..ed18dd6075 100644 --- a/src/gallium/state_trackers/vega/vg_tracker.c +++ b/src/gallium/state_trackers/vega/vg_tracker.c @@ -50,7 +50,6 @@ create_texture(struct pipe_context *pipe, enum pipe_format format, } templ.target = PIPE_TEXTURE_2D; - pf_get_block(templ.format, &templ.block); templ.width0 = width; templ.height0 = height; templ.depth0 = 1; diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c index c4751724c9..0d1844b53c 100644 --- a/src/gallium/state_trackers/xorg/xorg_crtc.c +++ b/src/gallium/state_trackers/xorg/xorg_crtc.c @@ -191,7 +191,6 @@ crtc_load_cursor_argb(xf86CrtcPtr crtc, CARD32 * image) templat.format = PIPE_FORMAT_A8R8G8B8_UNORM; templat.width0 = 64; templat.height0 = 64; - pf_get_block(templat.format, &templat.block); crtcp->cursor_tex = ms->screen->texture_create(ms->screen, &templat); @@ -207,7 +206,7 @@ crtc_load_cursor_argb(xf86CrtcPtr crtc, CARD32 * image) PIPE_TRANSFER_WRITE, 0, 0, 64, 64); ptr = ms->screen->transfer_map(ms->screen, transfer); - util_copy_rect(ptr, &crtcp->cursor_tex->block, + util_copy_rect(ptr, crtcp->cursor_tex->format, transfer->stride, 0, 0, 64, 64, (void*)image, 64 * 4, 0, 0); ms->screen->transfer_unmap(ms->screen, transfer); diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c index 406e0afff4..d3bb381333 100644 --- a/src/gallium/state_trackers/xorg/xorg_dri2.c +++ b/src/gallium/state_trackers/xorg/xorg_dri2.c @@ -108,7 +108,6 @@ driDoCreateBuffer(DrawablePtr pDraw, DRI2BufferPtr buffer, unsigned int format) else template.format = ms->ds_depth_bits_last ? PIPE_FORMAT_S8Z24_UNORM : PIPE_FORMAT_Z24S8_UNORM; - pf_get_block(template.format, &template.block); template.width0 = pDraw->width; template.height0 = pDraw->height; template.depth0 = 1; diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c index a68a626fa4..c02ed39ca1 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.c +++ b/src/gallium/state_trackers/xorg/xorg_exa.c @@ -206,7 +206,7 @@ ExaDownloadFromScreen(PixmapPtr pPix, int x, int y, int w, int h, char *dst, x, y, w, h, dst_pitch); #endif - util_copy_rect((unsigned char*)dst, &priv->tex->block, dst_pitch, 0, 0, + util_copy_rect((unsigned char*)dst, priv->tex->format, dst_pitch, 0, 0, w, h, exa->scrn->transfer_map(exa->scrn, transfer), transfer->stride, 0, 0); @@ -246,7 +246,7 @@ ExaUploadToScreen(PixmapPtr pPix, int x, int y, int w, int h, char *src, #endif util_copy_rect(exa->scrn->transfer_map(exa->scrn, transfer), - &priv->tex->block, transfer->stride, 0, 0, w, h, + priv->tex->format, transfer->stride, 0, 0, w, h, (unsigned char*)src, src_pitch, 0, 0); exa->scrn->transfer_unmap(exa->scrn, transfer); @@ -761,7 +761,6 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, memset(&template, 0, sizeof(template)); template.target = PIPE_TEXTURE_2D; exa_get_pipe_format(depth, &template.format, &bitsPerPixel, &priv->picture_format); - pf_get_block(template.format, &template.block); template.width0 = width; template.height0 = height; template.depth0 = 1; @@ -840,7 +839,6 @@ xorg_exa_create_root_texture(ScrnInfoPtr pScrn, memset(&template, 0, sizeof(template)); template.target = PIPE_TEXTURE_2D; exa_get_pipe_format(depth, &template.format, &bitsPerPixel, &dummy); - pf_get_block(template.format, &template.block); template.width0 = width; template.height0 = height; template.depth0 = 1; diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c index 418a8dd88b..5c34e71b1b 100644 --- a/src/gallium/state_trackers/xorg/xorg_renderer.c +++ b/src/gallium/state_trackers/xorg/xorg_renderer.c @@ -733,7 +733,6 @@ create_sampler_texture(struct xorg_renderer *r, templ.width0 = src->width0; templ.height0 = src->height0; templ.depth0 = 1; - pf_get_block(format, &templ.block); templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER; pt = screen->texture_create(screen, &templ); diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c index bb515a0f49..459ab3c64e 100644 --- a/src/gallium/state_trackers/xorg/xorg_xv.c +++ b/src/gallium/state_trackers/xorg/xorg_xv.c @@ -170,7 +170,6 @@ create_component_texture(struct pipe_context *pipe, templ.width0 = width; templ.height0 = height; templ.depth0 = 1; - pf_get_block(PIPE_FORMAT_L8_UNORM, &templ.block); templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER; tex = screen->texture_create(screen, &templ); diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c index d497861324..8d95826c9a 100644 --- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c +++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c @@ -28,7 +28,6 @@ dri_surface_from_handle(struct drm_api *api, struct pipe_screen *pscreen, tmpl.format = format; tmpl.width0 = width; tmpl.height0 = height; - pf_get_block(tmpl.format, &tmpl.block); pt = api->texture_from_shared_handle(api, pscreen, &tmpl, "front buffer", pitch, handle); @@ -247,7 +246,7 @@ nouveau_drm_handle_from_pt(struct drm_api *api, struct pipe_screen *pscreen, return false; *handle = mt->bo->handle; - *stride = mt->base.nblocksx[0] * mt->base.block.size; + *stride = pf_get_stride(mt->base.format, mt->base.width0); return true; } diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c index 74afffc9cf..65f7babff2 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c @@ -113,17 +113,13 @@ static struct pipe_buffer *radeon_surface_buffer_create(struct pipe_winsys *ws, unsigned tex_usage, unsigned *stride) { - struct pipe_format_block block; - unsigned nblocksx, nblocksy, size; - - pf_get_block(format, &block); - - nblocksx = pf_get_nblocksx(&block, width); - nblocksy = pf_get_nblocksy(&block, height); - /* Radeons enjoy things in multiples of 32. */ /* XXX this can be 32 when POT */ - *stride = (nblocksx * block.size + 63) & ~63; + const unsigned alignment = 64; + unsigned nblocksy, size; + + nblocksy = pf_get_nblocksy(format, height); + *stride = align(pf_get_stride(format, width), alignment); size = *stride * nblocksy; return radeon_buffer_create(ws, 64, usage, size); @@ -321,9 +317,6 @@ struct pipe_surface *radeon_surface_from_handle(struct radeon_context *radeon_co tmpl.height0 = h; tmpl.depth0 = 1; tmpl.format = format; - pf_get_block(tmpl.format, &tmpl.block); - tmpl.nblocksx[0] = pf_get_nblocksx(&tmpl.block, w); - tmpl.nblocksy[0] = pf_get_nblocksy(&tmpl.block, h); pt = pipe_screen->texture_blanket(pipe_screen, &tmpl, &pitch, pb); if (pt == NULL) { diff --git a/src/gallium/winsys/egl_xlib/sw_winsys.c b/src/gallium/winsys/egl_xlib/sw_winsys.c index 79ff2cc985..d5644c161f 100644 --- a/src/gallium/winsys/egl_xlib/sw_winsys.c +++ b/src/gallium/winsys/egl_xlib/sw_winsys.c @@ -71,16 +71,6 @@ sw_pipe_buffer(struct pipe_buffer *b) } -/** - * Round n up to next multiple. - */ -static INLINE unsigned -round_up(unsigned n, unsigned multiple) -{ - return (n + multiple - 1) & ~(multiple - 1); -} - - static const char * get_name(struct pipe_winsys *pws) { @@ -170,13 +160,10 @@ surface_buffer_create(struct pipe_winsys *winsys, unsigned *stride) { const unsigned alignment = 64; - struct pipe_format_block block; - unsigned nblocksx, nblocksy; + unsigned nblocksy; - pf_get_block(format, &block); - nblocksx = pf_get_nblocksx(&block, width); - nblocksy = pf_get_nblocksy(&block, height); - *stride = round_up(nblocksx * block.size, alignment); + nblocksy = pf_get_nblocksy(format, height); + *stride = align(pf_get_stride(format, width), alignment); return winsys->buffer_create(winsys, alignment, usage, diff --git a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c index 08067aad64..b8c8502d7b 100644 --- a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c +++ b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c @@ -138,13 +138,10 @@ static struct pipe_buffer* xsp_surface_buffer_create ) { const unsigned int ALIGNMENT = 1; - struct pipe_format_block block; - unsigned nblocksx, nblocksy; + unsigned nblocksy; - pf_get_block(format, &block); - nblocksx = pf_get_nblocksx(&block, width); - nblocksy = pf_get_nblocksy(&block, height); - *stride = align(nblocksx * block.size, ALIGNMENT); + nblocksy = pf_get_nblocksy(format, height); + *stride = align(pf_get_stride(format, width), ALIGNMENT); return pws->buffer_create(pws, ALIGNMENT, usage, *stride * nblocksy); diff --git a/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c b/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c index e8bc0f55ac..81c46c0a96 100644 --- a/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c +++ b/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c @@ -49,7 +49,6 @@ struct gdi_llvmpipe_displaytarget { enum pipe_format format; - struct pipe_format_block block; unsigned width; unsigned height; unsigned stride; @@ -118,16 +117,6 @@ gdi_llvmpipe_displaytarget_destroy(struct llvmpipe_winsys *winsys, } -/** - * Round n up to next multiple. - */ -static INLINE unsigned -round_up(unsigned n, unsigned multiple) -{ - return (n + multiple - 1) & ~(multiple - 1); -} - - static struct llvmpipe_displaytarget * gdi_llvmpipe_displaytarget_create(struct llvmpipe_winsys *winsys, enum pipe_format format, @@ -147,10 +136,10 @@ gdi_llvmpipe_displaytarget_create(struct llvmpipe_winsys *winsys, gdt->width = width; gdt->height = height; - bpp = pf_get_bits(format); - cpp = pf_get_size(format); + bpp = pf_get_blocksizebits(format); + cpp = pf_get_blocksize(format); - gdt->stride = round_up(width * cpp, alignment); + gdt->stride = align(width * cpp, alignment); gdt->size = gdt->stride * height; gdt->data = align_malloc(gdt->size, alignment); diff --git a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c index 5e0ccf32f4..173fa5b6fe 100644 --- a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c +++ b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c @@ -151,16 +151,6 @@ gdi_softpipe_user_buffer_create(struct pipe_winsys *winsys, } -/** - * Round n up to next multiple. - */ -static INLINE unsigned -round_up(unsigned n, unsigned multiple) -{ - return (n + multiple - 1) & ~(multiple - 1); -} - - static struct pipe_buffer * gdi_softpipe_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, @@ -170,13 +160,10 @@ gdi_softpipe_surface_buffer_create(struct pipe_winsys *winsys, unsigned *stride) { const unsigned alignment = 64; - struct pipe_format_block block; - unsigned nblocksx, nblocksy; + unsigned nblocksy; - pf_get_block(format, &block); - nblocksx = pf_get_nblocksx(&block, width); - nblocksy = pf_get_nblocksy(&block, height); - *stride = round_up(nblocksx * block.size, alignment); + nblocksy = pf_get_nblocksy(format, height); + *stride = align(pf_get_stride(format, width), alignment); return winsys->buffer_create(winsys, alignment, usage, @@ -283,10 +270,10 @@ gdi_softpipe_present(struct pipe_screen *screen, memset(&bmi, 0, sizeof(BITMAPINFO)); bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER); - bmi.bmiHeader.biWidth = texture->stride[surface->level] / pf_get_size(surface->format); + bmi.bmiHeader.biWidth = texture->stride[surface->level] / pf_get_blocksize(surface->format); bmi.bmiHeader.biHeight= -(long)surface->height; bmi.bmiHeader.biPlanes = 1; - bmi.bmiHeader.biBitCount = pf_get_bits(surface->format); + bmi.bmiHeader.biBitCount = pf_get_blocksizebits(surface->format); bmi.bmiHeader.biCompression = BI_RGB; bmi.bmiHeader.biSizeImage = 0; bmi.bmiHeader.biXPelsPerMeter = 0; diff --git a/src/gallium/winsys/xlib/xlib_cell.c b/src/gallium/winsys/xlib/xlib_cell.c index 13e609f58f..6e984ebe3c 100644 --- a/src/gallium/winsys/xlib/xlib_cell.c +++ b/src/gallium/winsys/xlib/xlib_cell.c @@ -277,15 +277,6 @@ xm_user_buffer_create(struct pipe_winsys *pws, void *ptr, unsigned bytes) -/** - * Round n up to next multiple. - */ -static INLINE unsigned -round_up(unsigned n, unsigned multiple) -{ - return (n + multiple - 1) & ~(multiple - 1); -} - static struct pipe_buffer * xm_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, @@ -294,18 +285,15 @@ xm_surface_buffer_create(struct pipe_winsys *winsys, unsigned *stride) { const unsigned alignment = 64; - struct pipe_format_block block; - unsigned nblocksx, nblocksy; + unsigned nblocksy; - pf_get_block(format, &block); - nblocksx = pf_get_nblocksx(&block, width); - nblocksy = pf_get_nblocksy(&block, height); - *stride = round_up(nblocksx * block.size, alignment); + nblocksy = pf_get_nblocksy(format, height); + *stride = align(pf_get_stride(format, width), alignment); return winsys->buffer_create(winsys, alignment, usage, /* XXX a bit of a hack */ - *stride * round_up(nblocksy, TILE_SIZE)); + *stride * align(nblocksy, TILE_SIZE)); } diff --git a/src/gallium/winsys/xlib/xlib_llvmpipe.c b/src/gallium/winsys/xlib/xlib_llvmpipe.c index 3dd15e099b..41f3e248e8 100644 --- a/src/gallium/winsys/xlib/xlib_llvmpipe.c +++ b/src/gallium/winsys/xlib/xlib_llvmpipe.c @@ -58,7 +58,6 @@ struct xm_displaytarget { enum pipe_format format; - struct pipe_format_block block; unsigned width; unsigned height; unsigned stride; @@ -262,10 +261,10 @@ xm_llvmpipe_display(struct xmesa_buffer *xm_buffer, { if (xm_dt->tempImage == NULL) { - assert(xm_dt->block.width == 1); - assert(xm_dt->block.height == 1); + assert(pf_get_blockwidth(xm_dt->format) == 1); + assert(pf_get_blockheight(xm_dt->format) == 1); alloc_shm_ximage(xm_dt, xm_buffer, - xm_dt->stride / xm_dt->block.size, + xm_dt->stride / pf_get_blocksize(xm_dt->format), xm_dt->height); } @@ -321,7 +320,7 @@ xm_displaytarget_create(struct llvmpipe_winsys *winsys, unsigned *stride) { struct xm_displaytarget *xm_dt = CALLOC_STRUCT(xm_displaytarget); - unsigned nblocksx, nblocksy, size; + unsigned nblocksy, size; xm_dt = CALLOC_STRUCT(xm_displaytarget); if(!xm_dt) @@ -331,10 +330,8 @@ xm_displaytarget_create(struct llvmpipe_winsys *winsys, xm_dt->width = width; xm_dt->height = height; - pf_get_block(format, &xm_dt->block); - nblocksx = pf_get_nblocksx(&xm_dt->block, width); - nblocksy = pf_get_nblocksy(&xm_dt->block, height); - xm_dt->stride = align(nblocksx * xm_dt->block.size, alignment); + nblocksy = pf_get_nblocksy(format, height); + xm_dt->stride = align(pf_get_stride(format, width), alignment); size = xm_dt->stride * nblocksy; #ifdef USE_XSHM diff --git a/src/gallium/winsys/xlib/xlib_softpipe.c b/src/gallium/winsys/xlib/xlib_softpipe.c index 260b39e2a0..69a5dcc2b7 100644 --- a/src/gallium/winsys/xlib/xlib_softpipe.c +++ b/src/gallium/winsys/xlib/xlib_softpipe.c @@ -254,10 +254,10 @@ xlib_softpipe_display_surface(struct xmesa_buffer *b, { if (xm_buf->tempImage == NULL) { - assert(surf->texture->block.width == 1); - assert(surf->texture->block.height == 1); + assert(pf_get_blockwidth(surf->texture->format) == 1); + assert(pf_get_blockheight(surf->texture->format) == 1); alloc_shm_ximage(xm_buf, b, spt->stride[surf->level] / - surf->texture->block.size, surf->height); + pf_get_blocksize(surf->texture->format), surf->height); } ximage = xm_buf->tempImage; @@ -360,13 +360,10 @@ xm_surface_buffer_create(struct pipe_winsys *winsys, unsigned *stride) { const unsigned alignment = 64; - struct pipe_format_block block; - unsigned nblocksx, nblocksy, size; + unsigned nblocksy, size; - pf_get_block(format, &block); - nblocksx = pf_get_nblocksx(&block, width); - nblocksy = pf_get_nblocksy(&block, height); - *stride = align(nblocksx * block.size, alignment); + nblocksy = pf_get_nblocksy(format, height); + *stride = align(pf_get_stride(format, width), alignment); size = *stride * nblocksy; #ifdef USE_XSHM -- cgit v1.2.3 From cd3409ce059e46b4b675d2ad6f1f3b75939aa2ab Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 1 Dec 2009 08:38:32 +0100 Subject: cell: Update for renamed sampler/texture state setters. --- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index ccd0fef6e8..c18a5d0635 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -383,10 +383,10 @@ cell_init_state_functions(struct cell_context *cell) cell->pipe.delete_blend_state = cell_delete_blend_state; cell->pipe.create_sampler_state = cell_create_sampler_state; - cell->pipe.bind_sampler_states = cell_bind_sampler_states; + cell->pipe.bind_fragment_sampler_states = cell_bind_sampler_states; cell->pipe.delete_sampler_state = cell_delete_sampler_state; - cell->pipe.set_sampler_textures = cell_set_sampler_textures; + cell->pipe.set_fragment_sampler_textures = cell_set_sampler_textures; cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state; cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state; -- cgit v1.2.3 From e04324b8f93919d75f224644a160a32405740860 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 1 Dec 2009 08:39:07 +0100 Subject: fo: Update for renamed sampler/texture state setters. --- src/gallium/drivers/failover/fo_state.c | 16 ++++++++-------- src/gallium/drivers/failover/fo_state_emit.c | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index c8eb926299..fca6caa227 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -339,10 +339,10 @@ failover_bind_sampler_states(struct pipe_context *pipe, } failover->dirty |= FO_NEW_SAMPLER; failover->num_samplers = num; - failover->sw->bind_sampler_states(failover->sw, num, - failover->sw_sampler_state); - failover->hw->bind_sampler_states(failover->hw, num, - failover->hw_sampler_state); + failover->sw->bind_fragment_sampler_states(failover->sw, num, + failover->sw_sampler_state); + failover->hw->bind_fragment_sampler_states(failover->hw, num, + failover->hw_sampler_state); } static void @@ -381,8 +381,8 @@ failover_set_sampler_textures(struct pipe_context *pipe, NULL); failover->dirty |= FO_NEW_TEXTURE; failover->num_textures = num; - failover->sw->set_sampler_textures( failover->sw, num, texture ); - failover->hw->set_sampler_textures( failover->hw, num, texture ); + failover->sw->set_fragment_sampler_textures( failover->sw, num, texture ); + failover->hw->set_fragment_sampler_textures( failover->hw, num, texture ); } @@ -453,7 +453,7 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.bind_blend_state = failover_bind_blend_state; failover->pipe.delete_blend_state = failover_delete_blend_state; failover->pipe.create_sampler_state = failover_create_sampler_state; - failover->pipe.bind_sampler_states = failover_bind_sampler_states; + failover->pipe.bind_fragment_sampler_states = failover_bind_sampler_states; failover->pipe.delete_sampler_state = failover_delete_sampler_state; failover->pipe.create_depth_stencil_alpha_state = failover_create_depth_stencil_state; failover->pipe.bind_depth_stencil_alpha_state = failover_bind_depth_stencil_state; @@ -473,7 +473,7 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.set_framebuffer_state = failover_set_framebuffer_state; failover->pipe.set_polygon_stipple = failover_set_polygon_stipple; failover->pipe.set_scissor_state = failover_set_scissor_state; - failover->pipe.set_sampler_textures = failover_set_sampler_textures; + failover->pipe.set_fragment_sampler_textures = failover_set_sampler_textures; failover->pipe.set_viewport_state = failover_set_viewport_state; failover->pipe.set_vertex_buffers = failover_set_vertex_buffers; failover->pipe.set_vertex_elements = failover_set_vertex_elements; diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c index bd4fce9d20..b4b1067924 100644 --- a/src/gallium/drivers/failover/fo_state_emit.c +++ b/src/gallium/drivers/failover/fo_state_emit.c @@ -92,13 +92,13 @@ failover_state_emit( struct failover_context *failover ) failover->sw->set_viewport_state( failover->sw, &failover->viewport ); if (failover->dirty & FO_NEW_SAMPLER) { - failover->sw->bind_sampler_states( failover->sw, failover->num_samplers, - failover->sw_sampler_state ); + failover->sw->bind_fragment_sampler_states( failover->sw, failover->num_samplers, + failover->sw_sampler_state ); } if (failover->dirty & FO_NEW_TEXTURE) { - failover->sw->set_sampler_textures( failover->sw, failover->num_textures, - failover->texture ); + failover->sw->set_fragment_sampler_textures( failover->sw, failover->num_textures, + failover->texture ); } if (failover->dirty & FO_NEW_VERTEX_BUFFER) { -- cgit v1.2.3 From 25bb04a1ee9b3f28bfa6e60d7ce71ff23726c5b6 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 1 Dec 2009 08:39:19 +0100 Subject: i915: Update for renamed sampler/texture state setters. --- src/gallium/drivers/i915/i915_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index 71f00bc346..9103847f1c 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -767,7 +767,7 @@ i915_init_state_functions( struct i915_context *i915 ) i915->base.delete_blend_state = i915_delete_blend_state; i915->base.create_sampler_state = i915_create_sampler_state; - i915->base.bind_sampler_states = i915_bind_sampler_states; + i915->base.bind_fragment_sampler_states = i915_bind_sampler_states; i915->base.delete_sampler_state = i915_delete_sampler_state; i915->base.create_depth_stencil_alpha_state = i915_create_depth_stencil_state; @@ -791,7 +791,7 @@ i915_init_state_functions( struct i915_context *i915 ) i915->base.set_polygon_stipple = i915_set_polygon_stipple; i915->base.set_scissor_state = i915_set_scissor_state; - i915->base.set_sampler_textures = i915_set_sampler_textures; + i915->base.set_fragment_sampler_textures = i915_set_sampler_textures; i915->base.set_viewport_state = i915_set_viewport_state; i915->base.set_vertex_buffers = i915_set_vertex_buffers; i915->base.set_vertex_elements = i915_set_vertex_elements; -- cgit v1.2.3 From f0d3abf3834d3ae6107e66b61d8660e6c09a0a99 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 1 Dec 2009 08:39:49 +0100 Subject: id: Update for renamed sampler/texture state setters. --- src/gallium/drivers/identity/id_context.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index 4e700089e3..2f5b38ea15 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -228,9 +228,9 @@ identity_bind_sampler_states(struct pipe_context *_pipe, struct identity_context *id_pipe = identity_context(_pipe); struct pipe_context *pipe = id_pipe->pipe; - pipe->bind_sampler_states(pipe, - num, - samplers); + pipe->bind_fragment_sampler_states(pipe, + num, + samplers); } static void @@ -499,9 +499,9 @@ identity_set_sampler_textures(struct pipe_context *_pipe, textures = unwrapped_textures; } - pipe->set_sampler_textures(pipe, - num_textures, - textures); + pipe->set_fragment_sampler_textures(pipe, + num_textures, + textures); } static void @@ -682,7 +682,7 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.bind_blend_state = identity_bind_blend_state; id_pipe->base.delete_blend_state = identity_delete_blend_state; id_pipe->base.create_sampler_state = identity_create_sampler_state; - id_pipe->base.bind_sampler_states = identity_bind_sampler_states; + id_pipe->base.bind_fragment_sampler_states = identity_bind_sampler_states; id_pipe->base.delete_sampler_state = identity_delete_sampler_state; id_pipe->base.create_rasterizer_state = identity_create_rasterizer_state; id_pipe->base.bind_rasterizer_state = identity_bind_rasterizer_state; @@ -703,7 +703,7 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.set_polygon_stipple = identity_set_polygon_stipple; id_pipe->base.set_scissor_state = identity_set_scissor_state; id_pipe->base.set_viewport_state = identity_set_viewport_state; - id_pipe->base.set_sampler_textures = identity_set_sampler_textures; + id_pipe->base.set_fragment_sampler_textures = identity_set_sampler_textures; id_pipe->base.set_vertex_buffers = identity_set_vertex_buffers; id_pipe->base.set_vertex_elements = identity_set_vertex_elements; id_pipe->base.surface_copy = identity_surface_copy; -- cgit v1.2.3 From 551b2db82b5e5093dc19bde130785aceb92868a6 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 1 Dec 2009 08:40:04 +0100 Subject: lp: Update for renamed sampler/texture state setters. --- src/gallium/drivers/llvmpipe/lp_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 57e71f3e98..c081f6de03 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -180,7 +180,7 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->pipe.delete_blend_state = llvmpipe_delete_blend_state; llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state; - llvmpipe->pipe.bind_sampler_states = llvmpipe_bind_sampler_states; + llvmpipe->pipe.bind_fragment_sampler_states = llvmpipe_bind_sampler_states; llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state; llvmpipe->pipe.create_depth_stencil_alpha_state = llvmpipe_create_depth_stencil_state; @@ -205,7 +205,7 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state; llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple; llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state; - llvmpipe->pipe.set_sampler_textures = llvmpipe_set_sampler_textures; + llvmpipe->pipe.set_fragment_sampler_textures = llvmpipe_set_sampler_textures; llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state; llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; -- cgit v1.2.3 From d15bb1cba3fd2d36c48e33e14cc3c548cf40d555 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 1 Dec 2009 08:40:21 +0100 Subject: nv: Update for renamed sampler/texture state setters. --- src/gallium/drivers/nv04/nv04_state.c | 4 ++-- src/gallium/drivers/nv10/nv10_state.c | 4 ++-- src/gallium/drivers/nv20/nv20_state.c | 4 ++-- src/gallium/drivers/nv30/nv30_state.c | 4 ++-- src/gallium/drivers/nv40/nv40_state.c | 4 ++-- src/gallium/drivers/nv50/nv50_state.c | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c index d356ebd8b3..ef3005db5f 100644 --- a/src/gallium/drivers/nv04/nv04_state.c +++ b/src/gallium/drivers/nv04/nv04_state.c @@ -425,9 +425,9 @@ nv04_init_state_functions(struct nv04_context *nv04) nv04->pipe.delete_blend_state = nv04_blend_state_delete; nv04->pipe.create_sampler_state = nv04_sampler_state_create; - nv04->pipe.bind_sampler_states = nv04_sampler_state_bind; + nv04->pipe.bind_fragment_sampler_states = nv04_sampler_state_bind; nv04->pipe.delete_sampler_state = nv04_sampler_state_delete; - nv04->pipe.set_sampler_textures = nv04_set_sampler_texture; + nv04->pipe.set_fragment_sampler_textures = nv04_set_sampler_texture; nv04->pipe.create_rasterizer_state = nv04_rasterizer_state_create; nv04->pipe.bind_rasterizer_state = nv04_rasterizer_state_bind; diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index 9b38219b99..ffc6be3c40 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -553,9 +553,9 @@ nv10_init_state_functions(struct nv10_context *nv10) nv10->pipe.delete_blend_state = nv10_blend_state_delete; nv10->pipe.create_sampler_state = nv10_sampler_state_create; - nv10->pipe.bind_sampler_states = nv10_sampler_state_bind; + nv10->pipe.bind_fragment_sampler_states = nv10_sampler_state_bind; nv10->pipe.delete_sampler_state = nv10_sampler_state_delete; - nv10->pipe.set_sampler_textures = nv10_set_sampler_texture; + nv10->pipe.set_fragment_sampler_textures = nv10_set_sampler_texture; nv10->pipe.create_rasterizer_state = nv10_rasterizer_state_create; nv10->pipe.bind_rasterizer_state = nv10_rasterizer_state_bind; diff --git a/src/gallium/drivers/nv20/nv20_state.c b/src/gallium/drivers/nv20/nv20_state.c index ed4084980f..3a82e63423 100644 --- a/src/gallium/drivers/nv20/nv20_state.c +++ b/src/gallium/drivers/nv20/nv20_state.c @@ -546,9 +546,9 @@ nv20_init_state_functions(struct nv20_context *nv20) nv20->pipe.delete_blend_state = nv20_blend_state_delete; nv20->pipe.create_sampler_state = nv20_sampler_state_create; - nv20->pipe.bind_sampler_states = nv20_sampler_state_bind; + nv20->pipe.bind_fragment_sampler_states = nv20_sampler_state_bind; nv20->pipe.delete_sampler_state = nv20_sampler_state_delete; - nv20->pipe.set_sampler_textures = nv20_set_sampler_texture; + nv20->pipe.set_fragment_sampler_textures = nv20_set_sampler_texture; nv20->pipe.create_rasterizer_state = nv20_rasterizer_state_create; nv20->pipe.bind_rasterizer_state = nv20_rasterizer_state_bind; diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index b91e972c12..3f802d9241 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -690,9 +690,9 @@ nv30_init_state_functions(struct nv30_context *nv30) nv30->pipe.delete_blend_state = nv30_blend_state_delete; nv30->pipe.create_sampler_state = nv30_sampler_state_create; - nv30->pipe.bind_sampler_states = nv30_sampler_state_bind; + nv30->pipe.bind_fragment_sampler_states = nv30_sampler_state_bind; nv30->pipe.delete_sampler_state = nv30_sampler_state_delete; - nv30->pipe.set_sampler_textures = nv30_set_sampler_texture; + nv30->pipe.set_fragment_sampler_textures = nv30_set_sampler_texture; nv30->pipe.create_rasterizer_state = nv30_rasterizer_state_create; nv30->pipe.bind_rasterizer_state = nv30_rasterizer_state_bind; diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index c3ee4d2345..bc34e32a4b 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -705,9 +705,9 @@ nv40_init_state_functions(struct nv40_context *nv40) nv40->pipe.delete_blend_state = nv40_blend_state_delete; nv40->pipe.create_sampler_state = nv40_sampler_state_create; - nv40->pipe.bind_sampler_states = nv40_sampler_state_bind; + nv40->pipe.bind_fragment_sampler_states = nv40_sampler_state_bind; nv40->pipe.delete_sampler_state = nv40_sampler_state_delete; - nv40->pipe.set_sampler_textures = nv40_set_sampler_texture; + nv40->pipe.set_fragment_sampler_textures = nv40_set_sampler_texture; nv40->pipe.create_rasterizer_state = nv40_rasterizer_state_create; nv40->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index ffaa5e29d1..07318f2394 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -648,9 +648,9 @@ nv50_init_state_functions(struct nv50_context *nv50) nv50->pipe.delete_blend_state = nv50_blend_state_delete; nv50->pipe.create_sampler_state = nv50_sampler_state_create; - nv50->pipe.bind_sampler_states = nv50_sampler_state_bind; + nv50->pipe.bind_fragment_sampler_states = nv50_sampler_state_bind; nv50->pipe.delete_sampler_state = nv50_sampler_state_delete; - nv50->pipe.set_sampler_textures = nv50_set_sampler_texture; + nv50->pipe.set_fragment_sampler_textures = nv50_set_sampler_texture; nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create; nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind; -- cgit v1.2.3 From c1bcedc4ce48031c9e5d2a2430d27c7a9aaa8b37 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 1 Dec 2009 08:40:31 +0100 Subject: r300: Update for renamed sampler/texture state setters. --- src/gallium/drivers/r300/r300_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index a88d66db24..7505353953 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -822,10 +822,10 @@ void r300_init_state_functions(struct r300_context* r300) r300->context.delete_rasterizer_state = r300_delete_rs_state; r300->context.create_sampler_state = r300_create_sampler_state; - r300->context.bind_sampler_states = r300_bind_sampler_states; + r300->context.bind_fragment_sampler_states = r300_bind_sampler_states; r300->context.delete_sampler_state = r300_delete_sampler_state; - r300->context.set_sampler_textures = r300_set_sampler_textures; + r300->context.set_fragment_sampler_textures = r300_set_sampler_textures; r300->context.set_scissor_state = r300_set_scissor_state; -- cgit v1.2.3 From 8eecd3bafb759df3f1853490cf149d053c8fcbce Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 1 Dec 2009 08:40:53 +0100 Subject: svga: Update for renamed sampler/texture state setters. --- src/gallium/drivers/svga/svga_pipe_sampler.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c index 3eeca6b784..b4e57c5d15 100644 --- a/src/gallium/drivers/svga/svga_pipe_sampler.c +++ b/src/gallium/drivers/svga/svga_pipe_sampler.c @@ -234,9 +234,9 @@ static void svga_set_sampler_textures(struct pipe_context *pipe, void svga_init_sampler_functions( struct svga_context *svga ) { svga->pipe.create_sampler_state = svga_create_sampler_state; - svga->pipe.bind_sampler_states = svga_bind_sampler_states; + svga->pipe.bind_fragment_sampler_states = svga_bind_sampler_states; svga->pipe.delete_sampler_state = svga_delete_sampler_state; - svga->pipe.set_sampler_textures = svga_set_sampler_textures; + svga->pipe.set_fragment_sampler_textures = svga_set_sampler_textures; } -- cgit v1.2.3 From eeb8dd12b48c6ad3f466cf0ea88472fca576ebd4 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 1 Dec 2009 08:43:51 +0100 Subject: trace: Update for renamed sampler/texture state setters. --- src/gallium/drivers/trace/tr_context.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index bf470b46ae..26c01c9b84 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -486,13 +486,13 @@ trace_context_bind_sampler_states(struct pipe_context *_pipe, struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin("pipe_context", "bind_sampler_states"); + trace_dump_call_begin("pipe_context", "bind_fragment_sampler_states"); trace_dump_arg(ptr, pipe); trace_dump_arg(uint, num_states); trace_dump_arg_array(ptr, states, num_states); - pipe->bind_sampler_states(pipe, num_states, states);; + pipe->bind_fragment_sampler_states(pipe, num_states, states); trace_dump_call_end(); } @@ -959,13 +959,13 @@ trace_context_set_sampler_textures(struct pipe_context *_pipe, } textures = unwrapped_textures; - trace_dump_call_begin("pipe_context", "set_sampler_textures"); + trace_dump_call_begin("pipe_context", "set_fragment_sampler_textures"); trace_dump_arg(ptr, pipe); trace_dump_arg(uint, num_textures); trace_dump_arg_array(ptr, textures, num_textures); - pipe->set_sampler_textures(pipe, num_textures, textures);; + pipe->set_fragment_sampler_textures(pipe, num_textures, textures); trace_dump_call_end(); } @@ -1253,7 +1253,7 @@ trace_context_create(struct pipe_screen *_screen, tr_ctx->base.bind_blend_state = trace_context_bind_blend_state; tr_ctx->base.delete_blend_state = trace_context_delete_blend_state; tr_ctx->base.create_sampler_state = trace_context_create_sampler_state; - tr_ctx->base.bind_sampler_states = trace_context_bind_sampler_states; + tr_ctx->base.bind_fragment_sampler_states = trace_context_bind_sampler_states; tr_ctx->base.delete_sampler_state = trace_context_delete_sampler_state; tr_ctx->base.create_rasterizer_state = trace_context_create_rasterizer_state; tr_ctx->base.bind_rasterizer_state = trace_context_bind_rasterizer_state; @@ -1274,7 +1274,7 @@ trace_context_create(struct pipe_screen *_screen, tr_ctx->base.set_polygon_stipple = trace_context_set_polygon_stipple; tr_ctx->base.set_scissor_state = trace_context_set_scissor_state; tr_ctx->base.set_viewport_state = trace_context_set_viewport_state; - tr_ctx->base.set_sampler_textures = trace_context_set_sampler_textures; + tr_ctx->base.set_fragment_sampler_textures = trace_context_set_sampler_textures; tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers; tr_ctx->base.set_vertex_elements = trace_context_set_vertex_elements; if (pipe->surface_copy) -- cgit v1.2.3 From 8a619e62bffa6f21330df747940e322909937806 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 1 Dec 2009 08:51:20 +0100 Subject: sp: Implement separate vertex sampler state. --- src/gallium/drivers/softpipe/sp_context.c | 22 ++++++-- src/gallium/drivers/softpipe/sp_context.h | 7 ++- src/gallium/drivers/softpipe/sp_flush.c | 3 ++ src/gallium/drivers/softpipe/sp_screen.c | 4 +- src/gallium/drivers/softpipe/sp_state.h | 9 ++++ src/gallium/drivers/softpipe/sp_state_derived.c | 13 +++++ src/gallium/drivers/softpipe/sp_state_sampler.c | 69 +++++++++++++++++++++++-- 7 files changed, 116 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index bdbb7fa9b9..f8bf3e9974 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -107,6 +107,11 @@ softpipe_destroy( struct pipe_context *pipe ) pipe_texture_reference(&softpipe->texture[i], NULL); } + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + sp_destroy_tex_tile_cache(softpipe->vertex_tex_cache[i]); + pipe_texture_reference(&softpipe->vertex_textures[i], NULL); + } + for (i = 0; i < Elements(softpipe->constants); i++) { if (softpipe->constants[i].buffer) { pipe_buffer_reference(&softpipe->constants[i].buffer, NULL); @@ -153,6 +158,11 @@ softpipe_is_texture_referenced( struct pipe_context *pipe, softpipe->tex_cache[i]->texture == texture) return PIPE_REFERENCED_FOR_READ; } + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + if (softpipe->vertex_tex_cache[i] && + softpipe->vertex_tex_cache[i]->texture == texture) + return PIPE_REFERENCED_FOR_READ; + } return PIPE_UNREFERENCED; } @@ -192,7 +202,8 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.delete_blend_state = softpipe_delete_blend_state; softpipe->pipe.create_sampler_state = softpipe_create_sampler_state; - softpipe->pipe.bind_sampler_states = softpipe_bind_sampler_states; + softpipe->pipe.bind_fragment_sampler_states = softpipe_bind_sampler_states; + softpipe->pipe.bind_vertex_sampler_states = softpipe_bind_vertex_sampler_states; softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state; softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state; @@ -217,7 +228,8 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state; softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple; softpipe->pipe.set_scissor_state = softpipe_set_scissor_state; - softpipe->pipe.set_sampler_textures = softpipe_set_sampler_textures; + softpipe->pipe.set_fragment_sampler_textures = softpipe_set_sampler_textures; + softpipe->pipe.set_vertex_sampler_textures = softpipe_set_vertex_sampler_textures; softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers; @@ -247,7 +259,9 @@ softpipe_create( struct pipe_screen *screen ) for (i = 0; i < PIPE_MAX_SAMPLERS; i++) softpipe->tex_cache[i] = sp_create_tex_tile_cache( screen ); - + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + softpipe->vertex_tex_cache[i] = sp_create_tex_tile_cache(screen); + } /* setup quad rendering stages */ softpipe->quad.shade = sp_quad_shade_stage(softpipe); @@ -263,7 +277,7 @@ softpipe_create( struct pipe_screen *screen ) goto fail; draw_texture_samplers(softpipe->draw, - PIPE_MAX_SAMPLERS, + PIPE_MAX_VERTEX_SAMPLERS, (struct tgsi_sampler **) softpipe->tgsi.vert_samplers_list); diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index a735573d6f..8ce20c5744 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -53,6 +53,7 @@ struct softpipe_context { /** Constant state objects */ struct pipe_blend_state *blend; struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_depth_stencil_alpha_state *depth_stencil; struct pipe_rasterizer_state *rasterizer; struct sp_fragment_shader *fs; @@ -66,12 +67,15 @@ struct softpipe_context { struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; unsigned num_samplers; unsigned num_textures; + unsigned num_vertex_samplers; + unsigned num_vertex_textures; unsigned num_vertex_elements; unsigned num_vertex_buffers; @@ -121,7 +125,7 @@ struct softpipe_context { /** TGSI exec things */ struct { - struct sp_sampler_varient *vert_samplers_list[PIPE_MAX_SAMPLERS]; + struct sp_sampler_varient *vert_samplers_list[PIPE_MAX_VERTEX_SAMPLERS]; struct sp_sampler_varient *frag_samplers_list[PIPE_MAX_SAMPLERS]; } tgsi; @@ -139,6 +143,7 @@ struct softpipe_context { unsigned tex_timestamp; struct softpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; + struct softpipe_tex_tile_cache *vertex_tex_cache[PIPE_MAX_VERTEX_SAMPLERS]; unsigned use_sse : 1; unsigned dump_fs : 1; diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index e38b767cf2..75dac810a1 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -55,6 +55,9 @@ softpipe_flush( struct pipe_context *pipe, for (i = 0; i < softpipe->num_textures; i++) { sp_flush_tex_tile_cache(softpipe->tex_cache[i]); } + for (i = 0; i < softpipe->num_vertex_textures; i++) { + sp_flush_tex_tile_cache(softpipe->vertex_tex_cache[i]); + } } if (flags & PIPE_FLUSH_SWAPBUFFERS) { diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 81fb7aa20c..68e4ef4137 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -58,7 +58,9 @@ softpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: return PIPE_MAX_SAMPLERS; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return PIPE_MAX_SAMPLERS; + return PIPE_MAX_VERTEX_SAMPLERS; + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS; case PIPE_CAP_NPOT_TEXTURES: return 1; case PIPE_CAP_TWO_SIDED_STENCIL: diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 77ee3c1136..d488fb8710 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -104,6 +104,10 @@ void * softpipe_create_sampler_state(struct pipe_context *, const struct pipe_sampler_state *); void softpipe_bind_sampler_states(struct pipe_context *, unsigned, void **); +void +softpipe_bind_vertex_sampler_states(struct pipe_context *, + unsigned num_samplers, + void **samplers); void softpipe_delete_sampler_state(struct pipe_context *, void *); void * @@ -150,6 +154,11 @@ void softpipe_set_sampler_textures( struct pipe_context *, unsigned num, struct pipe_texture ** ); +void +softpipe_set_vertex_sampler_textures(struct pipe_context *, + unsigned num_textures, + struct pipe_texture **); + void softpipe_set_viewport_state( struct pipe_context *, const struct pipe_viewport_state * ); diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 3bc96b9538..c24a737d07 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -213,6 +213,19 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) } } } + + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + struct softpipe_tex_tile_cache *tc = softpipe->vertex_tex_cache[i]; + + if (tc->texture) { + struct softpipe_texture *spt = softpipe_texture(tc->texture); + + if (spt->timestamp != tc->timestamp) { + sp_tex_tile_cache_validate_texture(tc); + tc->timestamp = spt->timestamp; + } + } + } } diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index db0b8ab76b..ceb4e338f1 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -93,6 +93,34 @@ softpipe_bind_sampler_states(struct pipe_context *pipe, } +void +softpipe_bind_vertex_sampler_states(struct pipe_context *pipe, + unsigned num_samplers, + void **samplers) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + unsigned i; + + assert(num_samplers <= PIPE_MAX_VERTEX_SAMPLERS); + + /* Check for no-op */ + if (num_samplers == softpipe->num_vertex_samplers && + !memcmp(softpipe->vertex_samplers, samplers, num_samplers * sizeof(void *))) + return; + + draw_flush(softpipe->draw); + + for (i = 0; i < num_samplers; ++i) + softpipe->vertex_samplers[i] = samplers[i]; + for (i = num_samplers; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) + softpipe->vertex_samplers[i] = NULL; + + softpipe->num_vertex_samplers = num_samplers; + + softpipe->dirty |= SP_NEW_SAMPLER; +} + + void softpipe_set_sampler_textures(struct pipe_context *pipe, unsigned num, struct pipe_texture **texture) @@ -122,6 +150,37 @@ softpipe_set_sampler_textures(struct pipe_context *pipe, } +void +softpipe_set_vertex_sampler_textures(struct pipe_context *pipe, + unsigned num_textures, + struct pipe_texture **textures) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint i; + + assert(num_textures <= PIPE_MAX_VERTEX_SAMPLERS); + + /* Check for no-op */ + if (num_textures == softpipe->num_vertex_textures && + !memcmp(softpipe->vertex_textures, textures, num_textures * sizeof(struct pipe_texture *))) { + return; + } + + draw_flush(softpipe->draw); + + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + struct pipe_texture *tex = i < num_textures ? textures[i] : NULL; + + pipe_texture_reference(&softpipe->vertex_textures[i], tex); + sp_tex_tile_cache_set_texture(softpipe->vertex_tex_cache[i], tex); + } + + softpipe->num_vertex_textures = num_textures; + + softpipe->dirty |= SP_NEW_TEXTURE; +} + + /** * Find/create an sp_sampler_varient object for sampling the given texture, * sampler and tex unit. @@ -185,16 +244,16 @@ softpipe_reset_sampler_varients(struct softpipe_context *softpipe) * fragment programs. */ for (i = 0; i <= softpipe->vs->max_sampler; i++) { - if (softpipe->sampler[i]) { + if (softpipe->vertex_samplers[i]) { softpipe->tgsi.vert_samplers_list[i] = get_sampler_varient( i, - sp_sampler(softpipe->sampler[i]), - softpipe->texture[i], + sp_sampler(softpipe->vertex_samplers[i]), + softpipe->vertex_textures[i], TGSI_PROCESSOR_VERTEX ); sp_sampler_varient_bind_texture( softpipe->tgsi.vert_samplers_list[i], - softpipe->tex_cache[i], - softpipe->texture[i] ); + softpipe->vertex_tex_cache[i], + softpipe->vertex_textures[i] ); } } -- cgit v1.2.3 From 0f884ed993500171ad91fc9f2552574face9ee17 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 1 Dec 2009 08:52:37 +0100 Subject: sp: Do not falsely advertise support for some SNORM formats. --- src/gallium/drivers/softpipe/sp_screen.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 68e4ef4137..6bf3df8e6a 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -145,6 +145,10 @@ softpipe_is_format_supported( struct pipe_screen *screen, case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_B6UG5SR5S_NORM: + case PIPE_FORMAT_X8UB8UG8SR8S_NORM: + case PIPE_FORMAT_A8B8G8R8_SNORM: return FALSE; default: return TRUE; -- cgit v1.2.3 From 3f900c33ae6ede1c6f309628b1369a1b968a115d Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 1 Dec 2009 08:54:30 +0100 Subject: trace: Reduce double semicolons to single ones. --- src/gallium/drivers/trace/tr_context.c | 60 +++++++++++++++++----------------- 1 file changed, 30 insertions(+), 30 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 26c01c9b84..696b787c74 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -107,7 +107,7 @@ trace_context_set_edgeflags(struct pipe_context *_pipe, /* FIXME: we don't know how big this array is */ trace_dump_arg(ptr, bitfield); - pipe->set_edgeflags(pipe, bitfield);; + pipe->set_edgeflags(pipe, bitfield); trace_dump_call_end(); } @@ -192,7 +192,7 @@ trace_context_draw_arrays(struct pipe_context *_pipe, trace_dump_arg(uint, start); trace_dump_arg(uint, count); - result = pipe->draw_arrays(pipe, mode, start, count);; + result = pipe->draw_arrays(pipe, mode, start, count); trace_dump_ret(bool, result); @@ -232,7 +232,7 @@ trace_context_draw_elements(struct pipe_context *_pipe, trace_dump_arg(uint, start); trace_dump_arg(uint, count); - result = pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count);; + result = pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count); trace_dump_ret(bool, result); @@ -306,7 +306,7 @@ trace_context_create_query(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(uint, query_type); - result = pipe->create_query(pipe, query_type);; + result = pipe->create_query(pipe, query_type); trace_dump_ret(ptr, result); @@ -328,7 +328,7 @@ trace_context_destroy_query(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, query); - pipe->destroy_query(pipe, query);; + pipe->destroy_query(pipe, query); trace_dump_call_end(); } @@ -346,7 +346,7 @@ trace_context_begin_query(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, query); - pipe->begin_query(pipe, query);; + pipe->begin_query(pipe, query); trace_dump_call_end(); } @@ -385,7 +385,7 @@ trace_context_get_query_result(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); - _result = pipe->get_query_result(pipe, query, wait, presult);; + _result = pipe->get_query_result(pipe, query, wait, presult); result = *presult; trace_dump_arg(uint, result); @@ -410,7 +410,7 @@ trace_context_create_blend_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(blend_state, state); - result = pipe->create_blend_state(pipe, state);; + result = pipe->create_blend_state(pipe, state); trace_dump_ret(ptr, result); @@ -432,7 +432,7 @@ trace_context_bind_blend_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->bind_blend_state(pipe, state);; + pipe->bind_blend_state(pipe, state); trace_dump_call_end(); } @@ -450,7 +450,7 @@ trace_context_delete_blend_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->delete_blend_state(pipe, state);; + pipe->delete_blend_state(pipe, state); trace_dump_call_end(); } @@ -469,7 +469,7 @@ trace_context_create_sampler_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(sampler_state, state); - result = pipe->create_sampler_state(pipe, state);; + result = pipe->create_sampler_state(pipe, state); trace_dump_ret(ptr, result); @@ -510,7 +510,7 @@ trace_context_delete_sampler_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->delete_sampler_state(pipe, state);; + pipe->delete_sampler_state(pipe, state); trace_dump_call_end(); } @@ -529,7 +529,7 @@ trace_context_create_rasterizer_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(rasterizer_state, state); - result = pipe->create_rasterizer_state(pipe, state);; + result = pipe->create_rasterizer_state(pipe, state); trace_dump_ret(ptr, result); @@ -551,7 +551,7 @@ trace_context_bind_rasterizer_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->bind_rasterizer_state(pipe, state);; + pipe->bind_rasterizer_state(pipe, state); trace_dump_call_end(); } @@ -569,7 +569,7 @@ trace_context_delete_rasterizer_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->delete_rasterizer_state(pipe, state);; + pipe->delete_rasterizer_state(pipe, state); trace_dump_call_end(); } @@ -585,7 +585,7 @@ trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe, trace_dump_call_begin("pipe_context", "create_depth_stencil_alpha_state"); - result = pipe->create_depth_stencil_alpha_state(pipe, state);; + result = pipe->create_depth_stencil_alpha_state(pipe, state); trace_dump_arg(ptr, pipe); trace_dump_arg(depth_stencil_alpha_state, state); @@ -610,7 +610,7 @@ trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->bind_depth_stencil_alpha_state(pipe, state);; + pipe->bind_depth_stencil_alpha_state(pipe, state); trace_dump_call_end(); } @@ -628,7 +628,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->delete_depth_stencil_alpha_state(pipe, state);; + pipe->delete_depth_stencil_alpha_state(pipe, state); trace_dump_call_end(); } @@ -647,7 +647,7 @@ trace_context_create_fs_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(shader_state, state); - result = pipe->create_fs_state(pipe, state);; + result = pipe->create_fs_state(pipe, state); trace_dump_ret(ptr, result); @@ -750,7 +750,7 @@ trace_context_bind_vs_state(struct pipe_context *_pipe, if (tr_shdr && tr_shdr->replaced) state = tr_shdr->replaced; - pipe->bind_vs_state(pipe, state);; + pipe->bind_vs_state(pipe, state); trace_dump_call_end(); } @@ -770,7 +770,7 @@ trace_context_delete_vs_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->delete_vs_state(pipe, state);; + pipe->delete_vs_state(pipe, state); trace_dump_call_end(); @@ -790,7 +790,7 @@ trace_context_set_blend_color(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(blend_color, state); - pipe->set_blend_color(pipe, state);; + pipe->set_blend_color(pipe, state); trace_dump_call_end(); } @@ -808,7 +808,7 @@ trace_context_set_clip_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(clip_state, state); - pipe->set_clip_state(pipe, state);; + pipe->set_clip_state(pipe, state); trace_dump_call_end(); } @@ -880,7 +880,7 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(framebuffer_state, state); - pipe->set_framebuffer_state(pipe, state);; + pipe->set_framebuffer_state(pipe, state); trace_dump_call_end(); } @@ -898,7 +898,7 @@ trace_context_set_polygon_stipple(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(poly_stipple, state); - pipe->set_polygon_stipple(pipe, state);; + pipe->set_polygon_stipple(pipe, state); trace_dump_call_end(); } @@ -916,7 +916,7 @@ trace_context_set_scissor_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(scissor_state, state); - pipe->set_scissor_state(pipe, state);; + pipe->set_scissor_state(pipe, state); trace_dump_call_end(); } @@ -934,7 +934,7 @@ trace_context_set_viewport_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(viewport_state, state); - pipe->set_viewport_state(pipe, state);; + pipe->set_viewport_state(pipe, state); trace_dump_call_end(); } @@ -1024,7 +1024,7 @@ trace_context_set_vertex_elements(struct pipe_context *_pipe, trace_dump_struct_array(vertex_element, elements, num_elements); trace_dump_arg_end(); - pipe->set_vertex_elements(pipe, num_elements, elements);; + pipe->set_vertex_elements(pipe, num_elements, elements); trace_dump_call_end(); } @@ -1085,7 +1085,7 @@ trace_context_surface_fill(struct pipe_context *_pipe, trace_dump_arg(uint, width); trace_dump_arg(uint, height); - pipe->surface_fill(pipe, dst, dstx, dsty, width, height, value);; + pipe->surface_fill(pipe, dst, dstx, dsty, width, height, value); trace_dump_call_end(); } @@ -1128,7 +1128,7 @@ trace_context_flush(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(uint, flags); - pipe->flush(pipe, flags, fence);; + pipe->flush(pipe, flags, fence); if(fence) trace_dump_ret(ptr, *fence); -- cgit v1.2.3 From f2f7bd855af49752b1c77746542c62f1c529e953 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 1 Dec 2009 09:01:27 +0100 Subject: id: Implement separate vertex sampler state. --- src/gallium/drivers/identity/id_context.c | 58 ++++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index 2f5b38ea15..4509c7b1e5 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -221,18 +221,31 @@ identity_create_sampler_state(struct pipe_context *_pipe, } static void -identity_bind_sampler_states(struct pipe_context *_pipe, - unsigned num, - void **samplers) +identity_bind_fragment_sampler_states(struct pipe_context *_pipe, + unsigned num_samplers, + void **samplers) { struct identity_context *id_pipe = identity_context(_pipe); struct pipe_context *pipe = id_pipe->pipe; pipe->bind_fragment_sampler_states(pipe, - num, + num_samplers, samplers); } +static void +identity_bind_vertex_sampler_states(struct pipe_context *_pipe, + unsigned num_samplers, + void **samplers) +{ + struct identity_context *id_pipe = identity_context(_pipe); + struct pipe_context *pipe = id_pipe->pipe; + + pipe->bind_vertex_sampler_states(pipe, + num_samplers, + samplers); +} + static void identity_delete_sampler_state(struct pipe_context *_pipe, void *sampler) @@ -480,9 +493,9 @@ identity_set_viewport_state(struct pipe_context *_pipe, } static void -identity_set_sampler_textures(struct pipe_context *_pipe, - unsigned num_textures, - struct pipe_texture **_textures) +identity_set_fragment_sampler_textures(struct pipe_context *_pipe, + unsigned num_textures, + struct pipe_texture **_textures) { struct identity_context *id_pipe = identity_context(_pipe); struct pipe_context *pipe = id_pipe->pipe; @@ -504,6 +517,31 @@ identity_set_sampler_textures(struct pipe_context *_pipe, textures); } +static void +identity_set_vertex_sampler_textures(struct pipe_context *_pipe, + unsigned num_textures, + struct pipe_texture **_textures) +{ + struct identity_context *id_pipe = identity_context(_pipe); + struct pipe_context *pipe = id_pipe->pipe; + struct pipe_texture *unwrapped_textures[PIPE_MAX_VERTEX_SAMPLERS]; + struct pipe_texture **textures = NULL; + unsigned i; + + if (_textures) { + for (i = 0; i < num_textures; i++) + unwrapped_textures[i] = identity_texture_unwrap(_textures[i]); + for (; i < PIPE_MAX_VERTEX_SAMPLERS; i++) + unwrapped_textures[i] = NULL; + + textures = unwrapped_textures; + } + + pipe->set_vertex_sampler_textures(pipe, + num_textures, + textures); +} + static void identity_set_vertex_buffers(struct pipe_context *_pipe, unsigned num_buffers, @@ -682,7 +720,8 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.bind_blend_state = identity_bind_blend_state; id_pipe->base.delete_blend_state = identity_delete_blend_state; id_pipe->base.create_sampler_state = identity_create_sampler_state; - id_pipe->base.bind_fragment_sampler_states = identity_bind_sampler_states; + id_pipe->base.bind_fragment_sampler_states = identity_bind_fragment_sampler_states; + id_pipe->base.bind_vertex_sampler_states = identity_bind_vertex_sampler_states; id_pipe->base.delete_sampler_state = identity_delete_sampler_state; id_pipe->base.create_rasterizer_state = identity_create_rasterizer_state; id_pipe->base.bind_rasterizer_state = identity_bind_rasterizer_state; @@ -703,7 +742,8 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.set_polygon_stipple = identity_set_polygon_stipple; id_pipe->base.set_scissor_state = identity_set_scissor_state; id_pipe->base.set_viewport_state = identity_set_viewport_state; - id_pipe->base.set_fragment_sampler_textures = identity_set_sampler_textures; + id_pipe->base.set_fragment_sampler_textures = identity_set_vertex_sampler_textures; + id_pipe->base.set_vertex_sampler_textures = identity_set_vertex_sampler_textures; id_pipe->base.set_vertex_buffers = identity_set_vertex_buffers; id_pipe->base.set_vertex_elements = identity_set_vertex_elements; id_pipe->base.surface_copy = identity_surface_copy; -- cgit v1.2.3 From f8969db2f8410fd3b653734948251ada4284a3c6 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 1 Dec 2009 09:39:08 +0100 Subject: fo: Implement separate vertex sampler state. --- src/gallium/drivers/failover/fo_context.h | 6 +++ src/gallium/drivers/failover/fo_state.c | 77 +++++++++++++++++++++++++--- src/gallium/drivers/failover/fo_state_emit.c | 6 +++ 3 files changed, 82 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h index 9ba86ba866..149393712a 100644 --- a/src/gallium/drivers/failover/fo_context.h +++ b/src/gallium/drivers/failover/fo_context.h @@ -72,6 +72,7 @@ struct failover_context { */ const struct fo_state *blend; const struct fo_state *sampler[PIPE_MAX_SAMPLERS]; + const struct fo_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; const struct fo_state *depth_stencil; const struct fo_state *rasterizer; const struct fo_state *fragment_shader; @@ -83,6 +84,7 @@ struct failover_context { struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS]; @@ -92,11 +94,15 @@ struct failover_context { void *sw_sampler_state[PIPE_MAX_SAMPLERS]; void *hw_sampler_state[PIPE_MAX_SAMPLERS]; + void *sw_vertex_sampler_state[PIPE_MAX_VERTEX_SAMPLERS]; + void *hw_vertex_sampler_state[PIPE_MAX_VERTEX_SAMPLERS]; unsigned dirty; unsigned num_samplers; + unsigned num_vertex_samplers; unsigned num_textures; + unsigned num_vertex_textures; unsigned mode; struct pipe_context *hw; diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index fca6caa227..3f5f556032 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -322,8 +322,9 @@ failover_create_sampler_state(struct pipe_context *pipe, } static void -failover_bind_sampler_states(struct pipe_context *pipe, - unsigned num, void **sampler) +failover_bind_fragment_sampler_states(struct pipe_context *pipe, + unsigned num, + void **sampler) { struct failover_context *failover = failover_context(pipe); struct fo_state *state = (struct fo_state*)sampler; @@ -345,6 +346,36 @@ failover_bind_sampler_states(struct pipe_context *pipe, failover->hw_sampler_state); } +static void +failover_bind_vertex_sampler_states(struct pipe_context *pipe, + unsigned num_samplers, + void **samplers) +{ + struct failover_context *failover = failover_context(pipe); + struct fo_state *state = (struct fo_state*)samplers; + uint i; + + assert(num_samplers <= PIPE_MAX_VERTEX_SAMPLERS); + + /* Check for no-op */ + if (num_samplers == failover->num_vertex_samplers && + !memcmp(failover->vertex_samplers, samplers, num_samplers * sizeof(void *))) { + return; + } + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + failover->sw_vertex_sampler_state[i] = i < num_samplers ? state[i].sw_state : NULL; + failover->hw_vertex_sampler_state[i] = i < num_samplers ? state[i].hw_state : NULL; + } + failover->dirty |= FO_NEW_SAMPLER; + failover->num_vertex_samplers = num_samplers; + failover->sw->bind_vertex_sampler_states(failover->sw, + num_samplers, + failover->sw_vertex_sampler_state); + failover->hw->bind_vertex_sampler_states(failover->hw, + num_samplers, + failover->hw_vertex_sampler_state); +} + static void failover_delete_sampler_state(struct pipe_context *pipe, void *sampler) { @@ -360,9 +391,9 @@ failover_delete_sampler_state(struct pipe_context *pipe, void *sampler) static void -failover_set_sampler_textures(struct pipe_context *pipe, - unsigned num, - struct pipe_texture **texture) +failover_set_fragment_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) { struct failover_context *failover = failover_context(pipe); uint i; @@ -386,6 +417,36 @@ failover_set_sampler_textures(struct pipe_context *pipe, } +static void +failover_set_vertex_sampler_textures(struct pipe_context *pipe, + unsigned num_textures, + struct pipe_texture **textures) +{ + struct failover_context *failover = failover_context(pipe); + uint i; + + assert(num_textures <= PIPE_MAX_VERTEX_SAMPLERS); + + /* Check for no-op */ + if (num_textures == failover->num_vertex_textures && + !memcmp(failover->vertex_textures, textures, num_textures * sizeof(struct pipe_texture *))) { + return; + } + for (i = 0; i < num_textures; i++) { + pipe_texture_reference((struct pipe_texture **)&failover->vertex_textures[i], + textures[i]); + } + for (i = num_textures; i < failover->num_vertex_textures; i++) { + pipe_texture_reference((struct pipe_texture **)&failover->vertex_textures[i], + NULL); + } + failover->dirty |= FO_NEW_TEXTURE; + failover->num_vertex_textures = num_textures; + failover->sw->set_vertex_sampler_textures(failover->sw, num_textures, textures); + failover->hw->set_vertex_sampler_textures(failover->hw, num_textures, textures); +} + + static void failover_set_viewport_state( struct pipe_context *pipe, const struct pipe_viewport_state *viewport ) @@ -453,7 +514,8 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.bind_blend_state = failover_bind_blend_state; failover->pipe.delete_blend_state = failover_delete_blend_state; failover->pipe.create_sampler_state = failover_create_sampler_state; - failover->pipe.bind_fragment_sampler_states = failover_bind_sampler_states; + failover->pipe.bind_fragment_sampler_states = failover_bind_fragment_sampler_states; + failover->pipe.bind_vertex_sampler_states = failover_bind_vertex_sampler_states; failover->pipe.delete_sampler_state = failover_delete_sampler_state; failover->pipe.create_depth_stencil_alpha_state = failover_create_depth_stencil_state; failover->pipe.bind_depth_stencil_alpha_state = failover_bind_depth_stencil_state; @@ -473,7 +535,8 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.set_framebuffer_state = failover_set_framebuffer_state; failover->pipe.set_polygon_stipple = failover_set_polygon_stipple; failover->pipe.set_scissor_state = failover_set_scissor_state; - failover->pipe.set_fragment_sampler_textures = failover_set_sampler_textures; + failover->pipe.set_fragment_sampler_textures = failover_set_fragment_sampler_textures; + failover->pipe.set_vertex_sampler_textures = failover_set_vertex_sampler_textures; failover->pipe.set_viewport_state = failover_set_viewport_state; failover->pipe.set_vertex_buffers = failover_set_vertex_buffers; failover->pipe.set_vertex_elements = failover_set_vertex_elements; diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c index b4b1067924..a3341e33f8 100644 --- a/src/gallium/drivers/failover/fo_state_emit.c +++ b/src/gallium/drivers/failover/fo_state_emit.c @@ -94,11 +94,17 @@ failover_state_emit( struct failover_context *failover ) if (failover->dirty & FO_NEW_SAMPLER) { failover->sw->bind_fragment_sampler_states( failover->sw, failover->num_samplers, failover->sw_sampler_state ); + failover->sw->bind_vertex_sampler_states(failover->sw, + failover->num_vertex_samplers, + failover->sw_vertex_sampler_state); } if (failover->dirty & FO_NEW_TEXTURE) { failover->sw->set_fragment_sampler_textures( failover->sw, failover->num_textures, failover->texture ); + failover->sw->set_vertex_sampler_textures(failover->sw, + failover->num_vertex_textures, + failover->vertex_textures); } if (failover->dirty & FO_NEW_VERTEX_BUFFER) { -- cgit v1.2.3 From 57ed791305ded187c455b07e6c6a5b916f37a293 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 1 Dec 2009 09:50:38 +0100 Subject: trace: Implement separate vertex sampler state. --- src/gallium/drivers/trace/tr_context.c | 76 ++++++++++++++++++++++++++++++---- src/gallium/drivers/trace/tr_context.h | 3 ++ 2 files changed, 71 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 696b787c74..2f0f063d2d 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -143,10 +143,16 @@ trace_context_draw_block(struct trace_context *tr_ctx, int flag) for (k = 0; k < tr_ctx->curr.nr_cbufs; k++) if (tr_ctx->draw_rule.surf == tr_ctx->curr.cbufs[k]) block = TRUE; - if (tr_ctx->draw_rule.tex) + if (tr_ctx->draw_rule.tex) { for (k = 0; k < tr_ctx->curr.num_texs; k++) if (tr_ctx->draw_rule.tex == tr_ctx->curr.tex[k]) block = TRUE; + for (k = 0; k < tr_ctx->curr.num_vert_texs; k++) { + if (tr_ctx->draw_rule.tex == tr_ctx->curr.vert_tex[k]) { + block = TRUE; + } + } + } if (block) tr_ctx->draw_blocked |= (flag | 4); @@ -480,8 +486,9 @@ trace_context_create_sampler_state(struct pipe_context *_pipe, static INLINE void -trace_context_bind_sampler_states(struct pipe_context *_pipe, - unsigned num_states, void **states) +trace_context_bind_fragment_sampler_states(struct pipe_context *_pipe, + unsigned num_states, + void **states) { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; @@ -498,6 +505,26 @@ trace_context_bind_sampler_states(struct pipe_context *_pipe, } +static INLINE void +trace_context_bind_vertex_sampler_states(struct pipe_context *_pipe, + unsigned num_states, + void **states) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "bind_vertex_sampler_states"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, num_states); + trace_dump_arg_array(ptr, states, num_states); + + pipe->bind_vertex_sampler_states(pipe, num_states, states); + + trace_dump_call_end(); +} + + static INLINE void trace_context_delete_sampler_state(struct pipe_context *_pipe, void *state) @@ -941,9 +968,9 @@ trace_context_set_viewport_state(struct pipe_context *_pipe, static INLINE void -trace_context_set_sampler_textures(struct pipe_context *_pipe, - unsigned num_textures, - struct pipe_texture **textures) +trace_context_set_fragment_sampler_textures(struct pipe_context *_pipe, + unsigned num_textures, + struct pipe_texture **textures) { struct trace_context *tr_ctx = trace_context(_pipe); struct trace_texture *tr_tex; @@ -971,6 +998,37 @@ trace_context_set_sampler_textures(struct pipe_context *_pipe, } +static INLINE void +trace_context_set_vertex_sampler_textures(struct pipe_context *_pipe, + unsigned num_textures, + struct pipe_texture **textures) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_texture *tr_tex; + struct pipe_context *pipe = tr_ctx->pipe; + struct pipe_texture *unwrapped_textures[PIPE_MAX_VERTEX_SAMPLERS]; + unsigned i; + + tr_ctx->curr.num_vert_texs = num_textures; + for(i = 0; i < num_textures; ++i) { + tr_tex = trace_texture(textures[i]); + tr_ctx->curr.vert_tex[i] = tr_tex; + unwrapped_textures[i] = tr_tex ? tr_tex->texture : NULL; + } + textures = unwrapped_textures; + + trace_dump_call_begin("pipe_context", "set_vertex_sampler_textures"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, num_textures); + trace_dump_arg_array(ptr, textures, num_textures); + + pipe->set_vertex_sampler_textures(pipe, num_textures, textures); + + trace_dump_call_end(); +} + + static INLINE void trace_context_set_vertex_buffers(struct pipe_context *_pipe, unsigned num_buffers, @@ -1253,7 +1311,8 @@ trace_context_create(struct pipe_screen *_screen, tr_ctx->base.bind_blend_state = trace_context_bind_blend_state; tr_ctx->base.delete_blend_state = trace_context_delete_blend_state; tr_ctx->base.create_sampler_state = trace_context_create_sampler_state; - tr_ctx->base.bind_fragment_sampler_states = trace_context_bind_sampler_states; + tr_ctx->base.bind_fragment_sampler_states = trace_context_bind_fragment_sampler_states; + tr_ctx->base.bind_vertex_sampler_states = trace_context_bind_vertex_sampler_states; tr_ctx->base.delete_sampler_state = trace_context_delete_sampler_state; tr_ctx->base.create_rasterizer_state = trace_context_create_rasterizer_state; tr_ctx->base.bind_rasterizer_state = trace_context_bind_rasterizer_state; @@ -1274,7 +1333,8 @@ trace_context_create(struct pipe_screen *_screen, tr_ctx->base.set_polygon_stipple = trace_context_set_polygon_stipple; tr_ctx->base.set_scissor_state = trace_context_set_scissor_state; tr_ctx->base.set_viewport_state = trace_context_set_viewport_state; - tr_ctx->base.set_fragment_sampler_textures = trace_context_set_sampler_textures; + tr_ctx->base.set_fragment_sampler_textures = trace_context_set_fragment_sampler_textures; + tr_ctx->base.set_vertex_sampler_textures = trace_context_set_vertex_sampler_textures; tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers; tr_ctx->base.set_vertex_elements = trace_context_set_vertex_elements; if (pipe->surface_copy) diff --git a/src/gallium/drivers/trace/tr_context.h b/src/gallium/drivers/trace/tr_context.h index 6febe4b411..852b480765 100644 --- a/src/gallium/drivers/trace/tr_context.h +++ b/src/gallium/drivers/trace/tr_context.h @@ -54,6 +54,9 @@ struct trace_context struct trace_texture *tex[PIPE_MAX_SAMPLERS]; unsigned num_texs; + struct trace_texture *vert_tex[PIPE_MAX_VERTEX_SAMPLERS]; + unsigned num_vert_texs; + unsigned nr_cbufs; struct trace_texture *cbufs[PIPE_MAX_COLOR_BUFS]; struct trace_texture *zsbuf; -- cgit v1.2.3 From 759604e32bb5b00d7b70fbab7bd8125e135d7a68 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 25 Nov 2009 00:24:28 +0100 Subject: r300g: add R300 prefix in reg definitions --- src/gallium/drivers/r300/r300_reg.h | 82 ++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 41 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 3a419b24b0..85b1ea568a 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -661,20 +661,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_GB_SUPER_TILE_B (1 << 15) # define R300_GB_SUBPIXEL_1_12 (0 << 16) # define R300_GB_SUBPIXEL_1_16 (1 << 16) -# define GB_TILE_CONFIG_QUADS_PER_RAS_4 (0 << 17) -# define GB_TILE_CONFIG_QUADS_PER_RAS_8 (1 << 17) -# define GB_TILE_CONFIG_QUADS_PER_RAS_16 (2 << 17) -# define GB_TILE_CONFIG_QUADS_PER_RAS_32 (3 << 17) -# define GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19) -# define GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19) -# define GB_TILE_CONFIG_ALT_SCAN_EN_LR (0 << 20) -# define GB_TILE_CONFIG_ALT_SCAN_EN_LRL (1 << 20) -# define GB_TILE_CONFIG_ALT_OFFSET (0 << 21) -# define GB_TILE_CONFIG_SUBPRECISION (0 << 22) -# define GB_TILE_CONFIG_ALT_TILING_DEF (0 << 23) -# define GB_TILE_CONFIG_ALT_TILING_3_2 (1 << 23) -# define GB_TILE_CONFIG_Z_EXTENDED_24_1 (0 << 24) -# define GB_TILE_CONFIG_Z_EXTENDED_S25_1 (1 << 24) +# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_4 (0 << 17) +# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_8 (1 << 17) +# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_16 (2 << 17) +# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_32 (3 << 17) +# define R300_GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19) +# define R300_GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19) +# define R300_GB_TILE_CONFIG_ALT_SCAN_EN_LR (0 << 20) +# define R300_GB_TILE_CONFIG_ALT_SCAN_EN_LRL (1 << 20) +# define R300_GB_TILE_CONFIG_ALT_OFFSET (0 << 21) +# define R300_GB_TILE_CONFIG_SUBPRECISION (0 << 22) +# define R300_GB_TILE_CONFIG_ALT_TILING_DEF (0 << 23) +# define R300_GB_TILE_CONFIG_ALT_TILING_3_2 (1 << 23) +# define R300_GB_TILE_CONFIG_Z_EXTENDED_24_1 (0 << 24) +# define R300_GB_TILE_CONFIG_Z_EXTENDED_S25_1 (1 << 24) /* Specifies the sizes of the various FIFO`s in the sc/rs/us. This register must be the first one written */ #define R300_GB_FIFO_SIZE 0x4024 @@ -700,9 +700,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_OFIFO_HIGHWATER_SHIFT 22 /* two bits only */ # define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT 24 -#define GB_Z_PEQ_CONFIG 0x4028 -# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4 (0 << 0) -# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8 (1 << 0) +#define R300_GB_Z_PEQ_CONFIG 0x4028 +# define R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4 (0 << 0) +# define R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8 (1 << 0) /* Specifies various polygon specific selects (fog, depth, perspective). */ #define R300_GB_SELECT 0x401c @@ -725,39 +725,39 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Specifies the graphics pipeline configuration for antialiasing. */ #define R300_GB_AA_CONFIG 0x4020 -# define GB_AA_CONFIG_AA_DISABLE (0 << 0) -# define GB_AA_CONFIG_AA_ENABLE (1 << 0) -# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2 (0 << 1) -# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3 (1 << 1) -# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4 (2 << 1) -# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6 (3 << 1) +# define R300_GB_AA_CONFIG_AA_DISABLE (0 << 0) +# define R300_GB_AA_CONFIG_AA_ENABLE (1 << 0) +# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2 (0 << 1) +# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3 (1 << 1) +# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4 (2 << 1) +# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6 (3 << 1) /* Selects which of 4 pipes are active. */ -#define GB_PIPE_SELECT 0x402c -# define GB_PIPE_SELECT_PIPE0_ID_SHIFT 0 -# define GB_PIPE_SELECT_PIPE1_ID_SHIFT 2 -# define GB_PIPE_SELECT_PIPE2_ID_SHIFT 4 -# define GB_PIPE_SELECT_PIPE3_ID_SHIFT 6 -# define GB_PIPE_SELECT_PIPE_MASK_SHIFT 8 -# define GB_PIPE_SELECT_MAX_PIPE 12 -# define GB_PIPE_SELECT_BAD_PIPES 14 -# define GB_PIPE_SELECT_CONFIG_PIPES 18 +#define R300_GB_PIPE_SELECT 0x402c +# define R300_GB_PIPE_SELECT_PIPE0_ID_SHIFT 0 +# define R300_GB_PIPE_SELECT_PIPE1_ID_SHIFT 2 +# define R300_GB_PIPE_SELECT_PIPE2_ID_SHIFT 4 +# define R300_GB_PIPE_SELECT_PIPE3_ID_SHIFT 6 +# define R300_GB_PIPE_SELECT_PIPE_MASK_SHIFT 8 +# define R300_GB_PIPE_SELECT_MAX_PIPE 12 +# define R300_GB_PIPE_SELECT_BAD_PIPES 14 +# define R300_GB_PIPE_SELECT_CONFIG_PIPES 18 /* Specifies the sizes of the various FIFO`s in the sc/rs. */ -#define GB_FIFO_SIZE1 0x4070 +#define R300_GB_FIFO_SIZE1 0x4070 /* High water mark for SC input fifo */ -# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0 -# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK 0x0000003f +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK 0x0000003f /* High water mark for SC input fifo (B) */ -# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6 -# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK 0x00000fc0 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK 0x00000fc0 /* High water mark for RS colors' fifo */ -# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT 12 -# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK 0x0003f000 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT 12 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK 0x0003f000 /* High water mark for RS textures' fifo */ -# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT 18 -# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK 0x00fc0000 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT 18 +# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK 0x00fc0000 /* This table specifies the source location and format for up to 16 texture * addresses (i[0]:i[15]) and four colors (c[0]:c[3]) -- cgit v1.2.3 From 6f05eba0204a9f1371bb1ae5cdb0f71bb819eb28 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 26 Nov 2009 13:49:41 +0100 Subject: r300g: VS->FS attribute routing rework Now it always correctly pairs up VS and FS even if the semantics and indices of VS outputs and FS inputs don't match. --- src/gallium/drivers/r300/r300_state_derived.c | 562 ++++++++++++++++++-------- 1 file changed, 392 insertions(+), 170 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 6fb780cb29..03cdba0538 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson + * Copyright 2009 Marek Olšák * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -35,6 +36,25 @@ /* r300_state_derived: Various bits of state which are dependent upon * currently bound CSO data. */ +#define ATTR_UNUSED (-1) +#define ATTR_COLOR_COUNT 2 +#define ATTR_GENERIC_COUNT 16 + +/* This structure contains information about what attributes are written by VS + * or read by FS. (but not both) It's much easier to work with than + * tgsi_shader_info. + * + * The variables basically means used/unused and may optionally contain + * indices to tgsi_shader_info semantics which we need to know for Draw. */ +struct r300_shader_info { + int pos; + int psize; + int color[ATTR_COLOR_COUNT]; + int bcolor[ATTR_COLOR_COUNT]; + int generic[ATTR_GENERIC_COUNT]; + int fog; +}; + struct r300_shader_key { struct r300_vertex_shader* vs; struct r300_fragment_shader* fs; @@ -61,51 +81,184 @@ int r300_shader_key_compare(void* key1, void* key2) { (shader_key1->fs == shader_key2->fs); } -/* Set up the vs_output_tab and routes. */ -static void r300_vs_output_tab_routes(struct r300_context* r300, - int* vs_output_tab) +static void r300_draw_emit_attrib(struct r300_context* r300, + enum attrib_emit emit, + enum interp_mode interp, + int index) { - struct vertex_info* vinfo = &r300->vertex_info->vinfo; - boolean pos = FALSE, psize = FALSE, fog = FALSE; - int i, texs = 0, cols = 0; - struct tgsi_shader_info* info = &r300->fs->info; + struct tgsi_shader_info* info = &r300->vs->info; + int output; - /* XXX One day we should figure out how to handle a different number of - * VS outputs and FS inputs, as well as a different number of vertex streams - * and VS inputs. It's definitely one of the sources of hardlocks. */ + if (r300->draw) { + output = draw_find_vs_output(r300->draw, + info->output_semantic_name[index], + info->output_semantic_index[index]); + draw_emit_vertex_attr(&r300->vertex_info->vinfo, emit, interp, output); + } +} - for (i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { +static void r300_shader_info_reset(struct r300_shader_info* info) +{ + int i; + + info->pos = ATTR_UNUSED; + info->psize = ATTR_UNUSED; + info->fog = ATTR_UNUSED; + + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + info->color[i] = ATTR_UNUSED; + info->bcolor[i] = ATTR_UNUSED; + } + + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + info->generic[i] = ATTR_UNUSED; + } +} + +/* Convert info about VS output semantics to r300_shader_info. */ +static void r300_shader_read_vs_outputs(struct tgsi_shader_info* info, + struct r300_shader_info* vs_outputs) +{ + int i; + unsigned index; + + r300_shader_info_reset(vs_outputs); + + for (i = 0; i < info->num_outputs; i++) { + index = info->output_semantic_index[i]; + + switch (info->output_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: - pos = TRUE; - vs_output_tab[i] = 0; + assert(index == 0); + vs_outputs->pos = i; + break; + + case TGSI_SEMANTIC_PSIZE: + assert(index == 0); + vs_outputs->psize = i; break; + case TGSI_SEMANTIC_COLOR: - vs_output_tab[i] = 2 + cols; - cols++; + assert(index <= ATTR_COLOR_COUNT); + vs_outputs->color[index] = i; break; - case TGSI_SEMANTIC_PSIZE: - assert(psize == FALSE); - psize = TRUE; - vs_output_tab[i] = 15; + + case TGSI_SEMANTIC_BCOLOR: + assert(index <= ATTR_COLOR_COUNT); + vs_outputs->bcolor[index] = i; break; - case TGSI_SEMANTIC_FOG: - assert(fog == FALSE); - fog = TRUE; - /* Fall through */ + case TGSI_SEMANTIC_GENERIC: - vs_output_tab[i] = 6 + texs; - texs++; + assert(index <= ATTR_GENERIC_COUNT); + vs_outputs->generic[index] = i; break; - default: - debug_printf("r300: Unknown vertex input %d\n", - info->input_semantic_name[i]); + + case TGSI_SEMANTIC_FOG: + assert(index == 0); + vs_outputs->fog = i; break; + + default: + assert(0); } } +} + +/* Set VS output stream locations for SWTCL. */ +static void r300_stream_locations_swtcl(struct r300_shader_info* vs_outputs, + int* vs_output_tab) +{ + int i, tabi = 0, gen_count; + + /* XXX Check whether the numbers (0, 1, 2+i, etc.) are correct. + * These should go to VAP_PROG_STREAM_CNTL/DST_VEC_LOC. */ + + /* Position. */ + vs_output_tab[tabi++] = 0; + + /* Point size. */ + if (vs_outputs->psize != ATTR_UNUSED) { + vs_output_tab[tabi++] = 1; + } + + /* Colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->color[i] != ATTR_UNUSED) { + vs_output_tab[tabi++] = 2 + i; + } + } + + /* Back-face colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->bcolor[i] != ATTR_UNUSED) { + vs_output_tab[tabi++] = 4 + i; + } + } + + /* Texture coordinates. */ + gen_count = 0; + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (vs_outputs->bcolor[i] != ATTR_UNUSED) { + assert(tabi < 16); + vs_output_tab[tabi++] = 6 + gen_count; + gen_count++; + } + } + + /* Fog coordinates. */ + if (vs_outputs->fog != ATTR_UNUSED) { + assert(tabi < 16); + vs_output_tab[tabi++] = 6 + gen_count; + gen_count++; + } /* XXX magic */ - assert(texs <= 8); + assert(gen_count <= 8); + + for (; tabi < 16;) { + vs_output_tab[tabi++] = -1; + } +} + +/* Convert info about FS input semantics to r300_shader_info. */ +static void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, + struct r300_shader_info* fs_inputs) +{ + int i; + unsigned index; + + r300_shader_info_reset(fs_inputs); + + for (i = 0; i < info->num_inputs; i++) { + index = info->input_semantic_index[i]; + + switch (info->input_semantic_name[i]) { + case TGSI_SEMANTIC_COLOR: + assert(index <= ATTR_COLOR_COUNT); + fs_inputs->color[index] = i; + break; + + case TGSI_SEMANTIC_GENERIC: + assert(index <= ATTR_GENERIC_COUNT); + fs_inputs->generic[index] = i; + break; + + case TGSI_SEMANTIC_FOG: + assert(index == 0); + fs_inputs->fog = i; + break; + + default: + assert(0); + } + } +} + +static void r300_update_vap_output_fmt(struct r300_context* r300, + struct r300_shader_info* vs_outputs) +{ + struct vertex_info* vinfo = &r300->vertex_info->vinfo; + int i, gen_count; /* Do the actual vertex_info setup. * @@ -117,69 +270,59 @@ static void r300_vs_output_tab_routes(struct r300_context* r300, vinfo->hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */ - /* We need to add vertex position attribute only for SW TCL case, - * for HW TCL case it could be generated by vertex shader */ - if (!pos) { - /* Make room for the position attribute - * at the beginning of the vs_output_tab. */ - for (i = 15; i > 0; i--) { - vs_output_tab[i] = vs_output_tab[i-1]; - } - vs_output_tab[0] = 0; - } - /* Position. */ - if (r300->draw) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_POSITION, 0)); + if (vs_outputs->pos != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, + vs_outputs->pos); + vinfo->hwfmt[1] |= R300_INPUT_CNTL_POS; + vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + } else { + assert(0); } - vinfo->hwfmt[1] |= R300_INPUT_CNTL_POS; - vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; /* Point size. */ - if (psize) { - if (r300->draw) { - draw_emit_vertex_attr(vinfo, EMIT_1F_PSIZE, INTERP_POS, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_PSIZE, 0)); - } + if (vs_outputs->psize != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS, + vs_outputs->psize); vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; } /* Colors. */ - for (i = 0; i < cols; i++) { - if (r300->draw) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_COLOR, i)); + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->color[i] != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR, + vs_outputs->color[i]); + vinfo->hwfmt[1] |= R300_INPUT_CNTL_COLOR; + vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; } - vinfo->hwfmt[1] |= R300_INPUT_CNTL_COLOR; - vinfo->hwfmt[2] |= (R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i); } - /* Init i right here, increment it if fog is enabled. - * This gets around a double-increment problem. */ - i = 0; - - /* Fog. This is a special-cased texcoord. */ - if (fog) { - i++; - if (r300->draw) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_FOG, 0)); + /* XXX Back-face colors. */ + + /* Texture coordinates. */ + gen_count = 0; + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (vs_outputs->generic[i] != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, + vs_outputs->generic[i]); + vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count); + vinfo->hwfmt[3] |= (4 << (3 * gen_count)); + gen_count++; } - vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i); - vinfo->hwfmt[3] |= (4 << (3 * i)); } - /* Texcoords. */ - for (; i < texs; i++) { - if (r300->draw) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i)); - } - vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i); - vinfo->hwfmt[3] |= (4 << (3 * i)); + /* Fog coordinates. */ + if (vs_outputs->fog != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, + vs_outputs->fog); + vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count); + vinfo->hwfmt[3] |= (4 << (3 * gen_count)); + gen_count++; } + /* XXX magic */ + assert(gen_count <= 8); + draw_compute_vertex_size(vinfo); } @@ -279,109 +422,191 @@ static void r300_swtcl_vertex_psc(struct r300_context* r300, (R300_LAST_VEC << (i & 1 ? 16 : 0)); } -/* Set up the RS block. This is the part of the chipset that actually does - * the rasterization of vertices into fragments. This is also the part of the - * chipset that locks up if any part of it is even slightly wrong. */ -static void r300_update_rs_block(struct r300_context* r300) +static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr, + boolean swizzle_0001) { - struct r300_rs_block* rs = r300->rs_block; - struct tgsi_shader_info* info = &r300->fs->info; - int col_count = 0, fp_offset = 0, i, tex_count = 0; - int rs_tex_comp = 0; + rs->ip[id] |= R300_RS_COL_PTR(ptr); + if (swizzle_0001) { + rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001); + } else { + rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); + } + rs->inst[id] |= R300_RS_INST_COL_ID(id); +} - if (r300_screen(r300->context.screen)->caps->is_r500) { - for (i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { - case TGSI_SEMANTIC_COLOR: - rs->ip[col_count] |= - R500_RS_COL_PTR(col_count) | - R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); - col_count++; - break; - case TGSI_SEMANTIC_GENERIC: - rs->ip[tex_count] |= - R500_RS_SEL_S(rs_tex_comp) | - R500_RS_SEL_T(rs_tex_comp + 1) | - R500_RS_SEL_R(rs_tex_comp + 2) | - R500_RS_SEL_Q(rs_tex_comp + 3); - tex_count++; - rs_tex_comp += 4; - break; - default: - break; - } - } +static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset) +{ + rs->inst[id] |= R300_RS_INST_COL_CN_WRITE | + R300_RS_INST_COL_ADDR(fp_offset); +} - /* Rasterize at least one color, or bad things happen. */ - if ((col_count == 0) && (tex_count == 0)) { - rs->ip[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001); - col_count++; - } +static void r300_rs_tex(struct r300_rs_block* rs, int id, int ptr, + boolean swizzle_X001) +{ + if (swizzle_X001) { + rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) | + R300_RS_SEL_S(R300_RS_SEL_C0) | + R300_RS_SEL_T(R300_RS_SEL_K0) | + R300_RS_SEL_R(R300_RS_SEL_K0) | + R300_RS_SEL_Q(R300_RS_SEL_K1); + } else { + rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) | + R300_RS_SEL_S(R300_RS_SEL_C0) | + R300_RS_SEL_T(R300_RS_SEL_C1) | + R300_RS_SEL_R(R300_RS_SEL_C2) | + R300_RS_SEL_Q(R300_RS_SEL_C3); + } + rs->inst[id] |= R300_RS_INST_TEX_ID(id); +} - for (i = 0; i < col_count; i++) { - rs->inst[i] |= R500_RS_INST_COL_ID(i) | - R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_offset); - fp_offset++; - } +static void r300_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset) +{ + rs->inst[id] |= R300_RS_INST_TEX_CN_WRITE | + R300_RS_INST_TEX_ADDR(fp_offset); +} - for (i = 0; i < tex_count; i++) { - rs->inst[i] |= R500_RS_INST_TEX_ID(i) | - R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_offset); - fp_offset++; - } +static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr, + boolean swizzle_0001) +{ + rs->ip[id] |= R500_RS_COL_PTR(ptr); + if (swizzle_0001) { + rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001); + } else { + rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); + } + rs->inst[id] |= R500_RS_INST_COL_ID(id); +} + +static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset) +{ + rs->inst[id] |= R500_RS_INST_COL_CN_WRITE | + R500_RS_INST_COL_ADDR(fp_offset); +} +static void r500_rs_tex(struct r300_rs_block* rs, int id, int ptr, + boolean swizzle_X001) +{ + int rs_tex_comp = ptr*4; + + if (swizzle_X001) { + rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) | + R500_RS_SEL_T(R500_RS_IP_PTR_K0) | + R500_RS_SEL_R(R500_RS_IP_PTR_K0) | + R500_RS_SEL_Q(R500_RS_IP_PTR_K1); } else { - for (i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { - case TGSI_SEMANTIC_COLOR: - rs->ip[col_count] |= - R300_RS_COL_PTR(col_count) | - R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); - col_count++; - break; - case TGSI_SEMANTIC_GENERIC: - rs->ip[tex_count] |= - R300_RS_TEX_PTR(rs_tex_comp) | - R300_RS_SEL_S(R300_RS_SEL_C0) | - R300_RS_SEL_T(R300_RS_SEL_C1) | - R300_RS_SEL_R(R300_RS_SEL_C2) | - R300_RS_SEL_Q(R300_RS_SEL_C3); - tex_count++; - rs_tex_comp+=4; - break; - default: - break; - } - } + rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) | + R500_RS_SEL_T(rs_tex_comp + 1) | + R500_RS_SEL_R(rs_tex_comp + 2) | + R500_RS_SEL_Q(rs_tex_comp + 3); + } + rs->inst[id] |= R500_RS_INST_TEX_ID(id); +} + +static void r500_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset) +{ + rs->inst[id] |= R500_RS_INST_TEX_CN_WRITE | + R500_RS_INST_TEX_ADDR(fp_offset); +} + +/* Set up the RS block. + * + * This is the part of the chipset that actually does the rasterization + * of vertices into fragments. This is also the part of the chipset that + * locks up if any part of it is even slightly wrong. */ +static void r300_update_rs_block(struct r300_context* r300, + struct r300_shader_info* vs_outputs, + struct r300_shader_info* fs_inputs) +{ + struct r300_rs_block* rs = r300->rs_block; + int i, col_count = 0, tex_count = 0, fp_offset = 0; + void (*rX00_rs_col)(struct r300_rs_block*, int, int, boolean); + void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); + void (*rX00_rs_tex)(struct r300_rs_block*, int, int, boolean); + void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int); - /* Rasterize at least one color, or bad things happen. */ - if (col_count == 0) { - rs->ip[0] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001); + if (r300_screen(r300->context.screen)->caps->is_r500) { + rX00_rs_col = r500_rs_col; + rX00_rs_col_write = r500_rs_col_write; + rX00_rs_tex = r500_rs_tex; + rX00_rs_tex_write = r500_rs_tex_write; + } else { + rX00_rs_col = r300_rs_col; + rX00_rs_col_write = r300_rs_col_write; + rX00_rs_tex = r300_rs_tex; + rX00_rs_tex_write = r300_rs_tex_write; + } + + /* Rasterize colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->color[i] != ATTR_UNUSED) { + /* Always rasterize if it's written by the VS, + * otherwise it locks up. */ + rX00_rs_col(rs, col_count, i, FALSE); + + /* Write it to the FS input register if it's used by the FS. */ + if (fs_inputs->color[i] != ATTR_UNUSED) { + rX00_rs_col_write(rs, col_count, fp_offset); + fp_offset++; + } col_count++; + } else { + /* Skip the FS input register, leave it uninitialized. */ + /* If we try to set it to (0,0,0,1), it will lock up. */ + if (fs_inputs->color[i] != ATTR_UNUSED) { + fp_offset++; + } } + } - if (tex_count == 0) { - rs->ip[0] |= - R300_RS_SEL_S(R300_RS_SEL_K0) | - R300_RS_SEL_T(R300_RS_SEL_K0) | - R300_RS_SEL_R(R300_RS_SEL_K0) | - R300_RS_SEL_Q(R300_RS_SEL_K1); + /* Rasterize texture coordinates. */ + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (vs_outputs->generic[i] != ATTR_UNUSED) { + /* Always rasterize if it's written by the VS, + * otherwise it locks up. */ + rX00_rs_tex(rs, tex_count, tex_count, FALSE); + + /* Write it to the FS input register if it's used by the FS. */ + if (fs_inputs->generic[i] != ATTR_UNUSED) { + rX00_rs_tex_write(rs, tex_count, fp_offset); + fp_offset++; + } + tex_count++; + } else { + /* Skip the FS input register, leave it uninitialized. */ + /* If we try to set it to (0,0,0,1), it will lock up. */ + if (fs_inputs->generic[i] != ATTR_UNUSED) { + fp_offset++; + } } + } - for (i = 0; i < col_count; i++) { - rs->inst[i] |= R300_RS_INST_COL_ID(i) | - R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_offset); + /* Rasterize fog coordinates. */ + if (vs_outputs->fog != ATTR_UNUSED) { + /* Always rasterize if it's written by the VS, + * otherwise it locks up. */ + rX00_rs_tex(rs, tex_count, tex_count, TRUE); + + /* Write it to the FS input register if it's used by the FS. */ + if (fs_inputs->fog != ATTR_UNUSED) { + rX00_rs_tex_write(rs, tex_count, fp_offset); fp_offset++; } - - for (i = 0; i < tex_count; i++) { - rs->inst[i] |= R300_RS_INST_TEX_ID(i) | - R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_offset); + tex_count++; + } else { + /* Skip the FS input register, leave it uninitialized. */ + /* If we try to set it to (0,0,0,1), it will lock up. */ + if (fs_inputs->fog != ATTR_UNUSED) { fp_offset++; } } - rs->count = (rs_tex_comp) | (col_count << R300_IC_COUNT_SHIFT) | + /* Rasterize at least one color, or bad things happen. */ + if (col_count == 0 && tex_count == 0) { + rX00_rs_col(rs, 0, 0, TRUE); + col_count++; + } + + rs->count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; rs->inst_count = MAX3(col_count - 1, tex_count - 1, 0); @@ -391,9 +616,8 @@ static void r300_update_rs_block(struct r300_context* r300) static void r300_update_derived_shader_state(struct r300_context* r300) { struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct r300_shader_info vs_outputs, fs_inputs; int vs_output_tab[16]; - int i; - /* struct r300_shader_key* key; @@ -425,21 +649,19 @@ static void r300_update_derived_shader_state(struct r300_context* r300) memset(r300->rs_block, 0, sizeof(struct r300_rs_block)); memset(r300->vertex_info, 0, sizeof(struct r300_vertex_info)); - for (i = 0; i < 16; i++) { - vs_output_tab[i] = -1; - } + r300_shader_read_vs_outputs(&r300->vs->info, &vs_outputs); + r300_shader_read_fs_inputs(&r300->fs->info, &fs_inputs); - /* Update states */ - r300_vs_output_tab_routes(r300, vs_output_tab); + r300_update_vap_output_fmt(r300, &vs_outputs); + r300_update_rs_block(r300, &vs_outputs, &fs_inputs); if (r300screen->caps->has_tcl) { r300_vertex_psc(r300); } else { + r300_stream_locations_swtcl(&vs_outputs, vs_output_tab); r300_swtcl_vertex_psc(r300, vs_output_tab); } - r300_update_rs_block(r300); - r300->dirty_state |= R300_NEW_RS_BLOCK; } -- cgit v1.2.3 From cb90235135ef7c657053657f3bdfbda7ca70d708 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 26 Nov 2009 19:37:58 +0100 Subject: r300g: clean up derived states The state setups which aren't derived anymore have been moved to the VS and FS objects. --- src/gallium/drivers/r300/r300_fs.c | 42 +++- src/gallium/drivers/r300/r300_fs.h | 6 +- src/gallium/drivers/r300/r300_shader_semantics.h | 64 ++++++ src/gallium/drivers/r300/r300_state_derived.c | 235 ++--------------------- src/gallium/drivers/r300/r300_vs.c | 182 +++++++++++++++++- src/gallium/drivers/r300/r300_vs.h | 11 +- 6 files changed, 311 insertions(+), 229 deletions(-) create mode 100644 src/gallium/drivers/r300/r300_shader_semantics.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 29ddc84c41..9cc833e606 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -1,6 +1,7 @@ /* * Copyright 2008 Corbin Simpson * Joakim Sindholt + * Copyright 2009 Marek Olšák * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -31,6 +32,40 @@ #include "radeon_code.h" #include "radeon_compiler.h" +/* Convert info about FS input semantics to r300_shader_semantics. */ +static void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, + struct r300_shader_semantics* fs_inputs) +{ + int i; + unsigned index; + + r300_shader_semantics_reset(fs_inputs); + + for (i = 0; i < info->num_inputs; i++) { + index = info->input_semantic_index[i]; + + switch (info->input_semantic_name[i]) { + case TGSI_SEMANTIC_COLOR: + assert(index <= ATTR_COLOR_COUNT); + fs_inputs->color[index] = i; + break; + + case TGSI_SEMANTIC_GENERIC: + assert(index <= ATTR_GENERIC_COUNT); + fs_inputs->generic[index] = i; + break; + + case TGSI_SEMANTIC_FOG: + assert(index == 0); + fs_inputs->fog = i; + break; + + default: + assert(0); + } + } +} + static void find_output_registers(struct r300_fragment_program_compiler * compiler, struct r300_fragment_shader * fs) { @@ -98,6 +133,10 @@ void r300_translate_fragment_shader(struct r300_context* r300, struct r300_fragment_program_compiler compiler; struct tgsi_to_rc ttr; + /* Initialize. */ + r300_shader_read_fs_inputs(&fs->info, &fs->inputs); + + /* Setup the compiler. */ memset(&compiler, 0, sizeof(compiler)); rc_init(&compiler.Base); compiler.Base.Debug = DBG_ON(r300, DBG_FP); @@ -107,7 +146,7 @@ void r300_translate_fragment_shader(struct r300_context* r300, compiler.AllocateHwInputs = &allocate_hardware_inputs; compiler.UserData = fs; - /* TODO: Program compilation depends on texture compare modes, + /* XXX: Program compilation depends on texture compare modes, * which are sampler state. Therefore, programs need to be recompiled * depending on this state as in the classic Mesa driver. * @@ -133,6 +172,7 @@ void r300_translate_fragment_shader(struct r300_context* r300, /* XXX failover maybe? */ DBG(r300, DBG_FP, "r300: Error compiling fragment program: %s\n", compiler.Base.ErrorMsg); + assert(0); } /* And, finally... */ diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h index e831c30301..630e2d0c8a 100644 --- a/src/gallium/drivers/r300/r300_fs.h +++ b/src/gallium/drivers/r300/r300_fs.h @@ -1,6 +1,7 @@ /* * Copyright 2008 Corbin Simpson * Joakim Sindholt + * Copyright 2009 Marek Olšák * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -25,15 +26,16 @@ #define R300_FS_H #include "pipe/p_state.h" - #include "tgsi/tgsi_scan.h" - #include "radeon_code.h" +#include "r300_shader_semantics.h" struct r300_fragment_shader { /* Parent class */ struct pipe_shader_state state; + struct tgsi_shader_info info; + struct r300_shader_semantics inputs; /* Has this shader been translated yet? */ boolean translated; diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h new file mode 100644 index 0000000000..85184e2cfd --- /dev/null +++ b/src/gallium/drivers/r300/r300_shader_semantics.h @@ -0,0 +1,64 @@ +/* + * Copyright 2009 Marek Olšák + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_SHADER_SEMANTICS_H +#define R300_SHADER_SEMANTICS_H + +#define ATTR_UNUSED (-1) +#define ATTR_COLOR_COUNT 2 +#define ATTR_GENERIC_COUNT 16 + +/* This structure contains information about what attributes are written by VS + * or read by FS. (but not both) It's much easier to work with than + * tgsi_shader_info. + * + * The variables contain indices to tgsi_shader_info semantics and those + * indices are nothing else than input/output register numbers. */ +struct r300_shader_semantics { + int pos; + int psize; + int color[ATTR_COLOR_COUNT]; + int bcolor[ATTR_COLOR_COUNT]; + int generic[ATTR_GENERIC_COUNT]; + int fog; +}; + +static INLINE void r300_shader_semantics_reset( + struct r300_shader_semantics* info) +{ + int i; + + info->pos = ATTR_UNUSED; + info->psize = ATTR_UNUSED; + info->fog = ATTR_UNUSED; + + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + info->color[i] = ATTR_UNUSED; + info->bcolor[i] = ATTR_UNUSED; + } + + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + info->generic[i] = ATTR_UNUSED; + } +} + +#endif diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 03cdba0538..cd969d633b 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -29,6 +29,7 @@ #include "r300_context.h" #include "r300_fs.h" #include "r300_screen.h" +#include "r300_shader_semantics.h" #include "r300_state_derived.h" #include "r300_state_inlines.h" #include "r300_vs.h" @@ -36,25 +37,6 @@ /* r300_state_derived: Various bits of state which are dependent upon * currently bound CSO data. */ -#define ATTR_UNUSED (-1) -#define ATTR_COLOR_COUNT 2 -#define ATTR_GENERIC_COUNT 16 - -/* This structure contains information about what attributes are written by VS - * or read by FS. (but not both) It's much easier to work with than - * tgsi_shader_info. - * - * The variables basically means used/unused and may optionally contain - * indices to tgsi_shader_info semantics which we need to know for Draw. */ -struct r300_shader_info { - int pos; - int psize; - int color[ATTR_COLOR_COUNT]; - int bcolor[ATTR_COLOR_COUNT]; - int generic[ATTR_GENERIC_COUNT]; - int fog; -}; - struct r300_shader_key { struct r300_vertex_shader* vs; struct r300_fragment_shader* fs; @@ -89,193 +71,21 @@ static void r300_draw_emit_attrib(struct r300_context* r300, struct tgsi_shader_info* info = &r300->vs->info; int output; - if (r300->draw) { - output = draw_find_vs_output(r300->draw, - info->output_semantic_name[index], - info->output_semantic_index[index]); - draw_emit_vertex_attr(&r300->vertex_info->vinfo, emit, interp, output); - } + output = draw_find_vs_output(r300->draw, + info->output_semantic_name[index], + info->output_semantic_index[index]); + draw_emit_vertex_attr(&r300->vertex_info->vinfo, emit, interp, output); } -static void r300_shader_info_reset(struct r300_shader_info* info) +static void r300_draw_emit_all_attribs(struct r300_context* r300) { - int i; - - info->pos = ATTR_UNUSED; - info->psize = ATTR_UNUSED; - info->fog = ATTR_UNUSED; - - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - info->color[i] = ATTR_UNUSED; - info->bcolor[i] = ATTR_UNUSED; - } - - for (i = 0; i < ATTR_GENERIC_COUNT; i++) { - info->generic[i] = ATTR_UNUSED; - } -} - -/* Convert info about VS output semantics to r300_shader_info. */ -static void r300_shader_read_vs_outputs(struct tgsi_shader_info* info, - struct r300_shader_info* vs_outputs) -{ - int i; - unsigned index; - - r300_shader_info_reset(vs_outputs); - - for (i = 0; i < info->num_outputs; i++) { - index = info->output_semantic_index[i]; - - switch (info->output_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - assert(index == 0); - vs_outputs->pos = i; - break; - - case TGSI_SEMANTIC_PSIZE: - assert(index == 0); - vs_outputs->psize = i; - break; - - case TGSI_SEMANTIC_COLOR: - assert(index <= ATTR_COLOR_COUNT); - vs_outputs->color[index] = i; - break; - - case TGSI_SEMANTIC_BCOLOR: - assert(index <= ATTR_COLOR_COUNT); - vs_outputs->bcolor[index] = i; - break; - - case TGSI_SEMANTIC_GENERIC: - assert(index <= ATTR_GENERIC_COUNT); - vs_outputs->generic[index] = i; - break; - - case TGSI_SEMANTIC_FOG: - assert(index == 0); - vs_outputs->fog = i; - break; - - default: - assert(0); - } - } -} - -/* Set VS output stream locations for SWTCL. */ -static void r300_stream_locations_swtcl(struct r300_shader_info* vs_outputs, - int* vs_output_tab) -{ - int i, tabi = 0, gen_count; - - /* XXX Check whether the numbers (0, 1, 2+i, etc.) are correct. - * These should go to VAP_PROG_STREAM_CNTL/DST_VEC_LOC. */ - - /* Position. */ - vs_output_tab[tabi++] = 0; - - /* Point size. */ - if (vs_outputs->psize != ATTR_UNUSED) { - vs_output_tab[tabi++] = 1; - } - - /* Colors. */ - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED) { - vs_output_tab[tabi++] = 2 + i; - } - } - - /* Back-face colors. */ - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->bcolor[i] != ATTR_UNUSED) { - vs_output_tab[tabi++] = 4 + i; - } - } - - /* Texture coordinates. */ - gen_count = 0; - for (i = 0; i < ATTR_GENERIC_COUNT; i++) { - if (vs_outputs->bcolor[i] != ATTR_UNUSED) { - assert(tabi < 16); - vs_output_tab[tabi++] = 6 + gen_count; - gen_count++; - } - } - - /* Fog coordinates. */ - if (vs_outputs->fog != ATTR_UNUSED) { - assert(tabi < 16); - vs_output_tab[tabi++] = 6 + gen_count; - gen_count++; - } - - /* XXX magic */ - assert(gen_count <= 8); - - for (; tabi < 16;) { - vs_output_tab[tabi++] = -1; - } -} - -/* Convert info about FS input semantics to r300_shader_info. */ -static void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, - struct r300_shader_info* fs_inputs) -{ - int i; - unsigned index; - - r300_shader_info_reset(fs_inputs); - - for (i = 0; i < info->num_inputs; i++) { - index = info->input_semantic_index[i]; - - switch (info->input_semantic_name[i]) { - case TGSI_SEMANTIC_COLOR: - assert(index <= ATTR_COLOR_COUNT); - fs_inputs->color[index] = i; - break; - - case TGSI_SEMANTIC_GENERIC: - assert(index <= ATTR_GENERIC_COUNT); - fs_inputs->generic[index] = i; - break; - - case TGSI_SEMANTIC_FOG: - assert(index == 0); - fs_inputs->fog = i; - break; - - default: - assert(0); - } - } -} - -static void r300_update_vap_output_fmt(struct r300_context* r300, - struct r300_shader_info* vs_outputs) -{ - struct vertex_info* vinfo = &r300->vertex_info->vinfo; + struct r300_shader_semantics* vs_outputs = &r300->vs->outputs; int i, gen_count; - /* Do the actual vertex_info setup. - * - * vertex_info has four uints of hardware-specific data in it. - * vinfo.hwfmt[0] is R300_VAP_VTX_STATE_CNTL - * vinfo.hwfmt[1] is R300_VAP_VSM_VTX_ASSM - * vinfo.hwfmt[2] is R300_VAP_OUTPUT_VTX_FMT_0 - * vinfo.hwfmt[3] is R300_VAP_OUTPUT_VTX_FMT_1 */ - - vinfo->hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */ - /* Position. */ if (vs_outputs->pos != ATTR_UNUSED) { r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, vs_outputs->pos); - vinfo->hwfmt[1] |= R300_INPUT_CNTL_POS; - vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; } else { assert(0); } @@ -284,7 +94,6 @@ static void r300_update_vap_output_fmt(struct r300_context* r300, if (vs_outputs->psize != ATTR_UNUSED) { r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS, vs_outputs->psize); - vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; } /* Colors. */ @@ -292,8 +101,6 @@ static void r300_update_vap_output_fmt(struct r300_context* r300, if (vs_outputs->color[i] != ATTR_UNUSED) { r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR, vs_outputs->color[i]); - vinfo->hwfmt[1] |= R300_INPUT_CNTL_COLOR; - vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; } } @@ -305,8 +112,6 @@ static void r300_update_vap_output_fmt(struct r300_context* r300, if (vs_outputs->generic[i] != ATTR_UNUSED) { r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, vs_outputs->generic[i]); - vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count); - vinfo->hwfmt[3] |= (4 << (3 * gen_count)); gen_count++; } } @@ -315,15 +120,11 @@ static void r300_update_vap_output_fmt(struct r300_context* r300, if (vs_outputs->fog != ATTR_UNUSED) { r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, vs_outputs->fog); - vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count); - vinfo->hwfmt[3] |= (4 << (3 * gen_count)); gen_count++; } /* XXX magic */ assert(gen_count <= 8); - - draw_compute_vertex_size(vinfo); } /* Update the PSC tables. */ @@ -370,14 +171,14 @@ static void r300_vertex_psc(struct r300_context* r300) } /* Update the PSC tables for SW TCL, using Draw. */ -static void r300_swtcl_vertex_psc(struct r300_context* r300, - int* vs_output_tab) +static void r300_swtcl_vertex_psc(struct r300_context* r300) { struct r300_vertex_info *vformat = r300->vertex_info; struct vertex_info* vinfo = &vformat->vinfo; uint16_t type, swizzle; enum pipe_format format; unsigned i, attrib_count; + int* vs_output_tab = r300->vs->output_stream_loc_swtcl; /* For each Draw attribute, route it to the fragment shader according * to the vs_output_tab. */ @@ -514,8 +315,8 @@ static void r500_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset) * of vertices into fragments. This is also the part of the chipset that * locks up if any part of it is even slightly wrong. */ static void r300_update_rs_block(struct r300_context* r300, - struct r300_shader_info* vs_outputs, - struct r300_shader_info* fs_inputs) + struct r300_shader_semantics* vs_outputs, + struct r300_shader_semantics* fs_inputs) { struct r300_rs_block* rs = r300->rs_block; int i, col_count = 0, tex_count = 0, fp_offset = 0; @@ -616,8 +417,6 @@ static void r300_update_rs_block(struct r300_context* r300, static void r300_update_derived_shader_state(struct r300_context* r300) { struct r300_screen* r300screen = r300_screen(r300->context.screen); - struct r300_shader_info vs_outputs, fs_inputs; - int vs_output_tab[16]; /* struct r300_shader_key* key; @@ -648,18 +447,16 @@ static void r300_update_derived_shader_state(struct r300_context* r300) /* Reset structures */ memset(r300->rs_block, 0, sizeof(struct r300_rs_block)); memset(r300->vertex_info, 0, sizeof(struct r300_vertex_info)); + memcpy(r300->vertex_info->vinfo.hwfmt, r300->vs->hwfmt, sizeof(uint)*4); - r300_shader_read_vs_outputs(&r300->vs->info, &vs_outputs); - r300_shader_read_fs_inputs(&r300->fs->info, &fs_inputs); - - r300_update_vap_output_fmt(r300, &vs_outputs); - r300_update_rs_block(r300, &vs_outputs, &fs_inputs); + r300_update_rs_block(r300, &r300->vs->outputs, &r300->fs->inputs); if (r300screen->caps->has_tcl) { r300_vertex_psc(r300); } else { - r300_stream_locations_swtcl(&vs_outputs, vs_output_tab); - r300_swtcl_vertex_psc(r300, vs_output_tab); + r300_draw_emit_all_attribs(r300); + draw_compute_vertex_size(&r300->vertex_info->vinfo); + r300_swtcl_vertex_psc(r300); } r300->dirty_state |= R300_NEW_RS_BLOCK; diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 74ef416dc1..49bff3e931 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -1,5 +1,6 @@ /* * Copyright 2009 Corbin Simpson + * Copyright 2009 Marek Olšák * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -23,13 +24,181 @@ #include "r300_vs.h" #include "r300_context.h" +#include "r300_screen.h" #include "r300_tgsi_to_rc.h" +#include "r300_reg.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_parse.h" #include "radeon_compiler.h" +/* Convert info about VS output semantics into r300_shader_semantics. */ +static void r300_shader_read_vs_outputs( + struct tgsi_shader_info* info, + struct r300_shader_semantics* vs_outputs) +{ + int i; + unsigned index; + + r300_shader_semantics_reset(vs_outputs); + + for (i = 0; i < info->num_outputs; i++) { + index = info->output_semantic_index[i]; + + switch (info->output_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + assert(index == 0); + vs_outputs->pos = i; + break; + + case TGSI_SEMANTIC_PSIZE: + assert(index == 0); + vs_outputs->psize = i; + break; + + case TGSI_SEMANTIC_COLOR: + assert(index <= ATTR_COLOR_COUNT); + vs_outputs->color[index] = i; + break; + + case TGSI_SEMANTIC_BCOLOR: + assert(index <= ATTR_COLOR_COUNT); + vs_outputs->bcolor[index] = i; + break; + + case TGSI_SEMANTIC_GENERIC: + assert(index <= ATTR_GENERIC_COUNT); + vs_outputs->generic[index] = i; + break; + + case TGSI_SEMANTIC_FOG: + assert(index == 0); + vs_outputs->fog = i; + break; + + default: + assert(0); + } + } +} + +static void r300_shader_vap_output_fmt( + struct r300_shader_semantics* vs_outputs, + uint* hwfmt) +{ + int i, gen_count; + + /* Do the actual vertex_info setup. + * + * vertex_info has four uints of hardware-specific data in it. + * vinfo.hwfmt[0] is R300_VAP_VTX_STATE_CNTL + * vinfo.hwfmt[1] is R300_VAP_VSM_VTX_ASSM + * vinfo.hwfmt[2] is R300_VAP_OUTPUT_VTX_FMT_0 + * vinfo.hwfmt[3] is R300_VAP_OUTPUT_VTX_FMT_1 */ + + hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */ + + /* Position. */ + if (vs_outputs->pos != ATTR_UNUSED) { + hwfmt[1] |= R300_INPUT_CNTL_POS; + hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + } else { + assert(0); + } + + /* Point size. */ + if (vs_outputs->psize != ATTR_UNUSED) { + hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; + } + + /* Colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->color[i] != ATTR_UNUSED) { + hwfmt[1] |= R300_INPUT_CNTL_COLOR; + hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; + } + } + + /* XXX Back-face colors. */ + + /* Texture coordinates. */ + gen_count = 0; + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (vs_outputs->generic[i] != ATTR_UNUSED) { + hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count); + hwfmt[3] |= (4 << (3 * gen_count)); + gen_count++; + } + } + + /* Fog coordinates. */ + if (vs_outputs->fog != ATTR_UNUSED) { + hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count); + hwfmt[3] |= (4 << (3 * gen_count)); + gen_count++; + } + + /* XXX magic */ + assert(gen_count <= 8); +} + +/* Set VS output stream locations for SWTCL. */ +static void r300_stream_locations_swtcl( + struct r300_shader_semantics* vs_outputs, + int* output_stream_loc) +{ + int i, tabi = 0, gen_count; + + /* XXX Check whether the numbers (0, 1, 2+i, etc.) are correct. + * These should go to VAP_PROG_STREAM_CNTL/DST_VEC_LOC. */ + + /* Position. */ + output_stream_loc[tabi++] = 0; + + /* Point size. */ + if (vs_outputs->psize != ATTR_UNUSED) { + output_stream_loc[tabi++] = 1; + } + + /* Colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->color[i] != ATTR_UNUSED) { + output_stream_loc[tabi++] = 2 + i; + } + } + + /* Back-face colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->bcolor[i] != ATTR_UNUSED) { + output_stream_loc[tabi++] = 4 + i; + } + } + + /* Texture coordinates. */ + gen_count = 0; + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (vs_outputs->bcolor[i] != ATTR_UNUSED) { + assert(tabi < 16); + output_stream_loc[tabi++] = 6 + gen_count; + gen_count++; + } + } + + /* Fog coordinates. */ + if (vs_outputs->fog != ATTR_UNUSED) { + assert(tabi < 16); + output_stream_loc[tabi++] = 6 + gen_count; + gen_count++; + } + + /* XXX magic */ + assert(gen_count <= 8); + + for (; tabi < 16;) { + output_stream_loc[tabi++] = -1; + } +} static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) { @@ -99,20 +268,27 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) default: debug_printf("r300: vs: Bad semantic declaration %d\n", decl->Semantic.SemanticName); - break; + assert(0); } } tgsi_parse_free(&parser); } - void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs) { struct r300_vertex_program_compiler compiler; struct tgsi_to_rc ttr; + /* Initialize. */ + r300_shader_read_vs_outputs(&vs->info, &vs->outputs); + r300_shader_vap_output_fmt(&vs->outputs, vs->hwfmt); + + if (!r300_screen(r300->context.screen)->caps->has_tcl) { + r300_stream_locations_swtcl(&vs->outputs, vs->output_stream_loc_swtcl); + } + /* Setup the compiler */ rc_init(&compiler.Base); @@ -137,7 +313,7 @@ void r300_translate_vertex_shader(struct r300_context* r300, /* Invoke the compiler */ r3xx_compile_vertex_program(&compiler); if (compiler.Base.Error) { - /* Todo: Fail gracefully */ + /* XXX Fail gracefully */ fprintf(stderr, "r300 VP: Compiler error\n"); abort(); } diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 00b02bf510..283dd5a9e8 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -1,5 +1,6 @@ /* * Copyright 2009 Corbin Simpson + * Copyright 2009 Marek Olšák * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -25,15 +26,20 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" - #include "radeon_code.h" +#include "r300_shader_semantics.h" + struct r300_context; struct r300_vertex_shader { /* Parent class */ struct pipe_shader_state state; + struct tgsi_shader_info info; + struct r300_shader_semantics outputs; + int output_stream_loc_swtcl[16]; + uint hwfmt[4]; /* Has this shader been translated yet? */ boolean translated; @@ -42,9 +48,6 @@ struct r300_vertex_shader { struct r300_vertex_program_code code; }; - -extern struct r300_vertex_program_code r300_passthrough_vertex_shader; - void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); -- cgit v1.2.3 From f55c088f89eeaa6d16480f5f373887c6a2965e21 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 27 Nov 2009 06:36:31 +0100 Subject: r300g: simplify allocations of FS input registers --- src/gallium/drivers/r300/r300_fs.c | 43 +++++++++++++------------------------- 1 file changed, 15 insertions(+), 28 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 9cc833e606..79b01bb4dc 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -66,6 +66,7 @@ static void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, } } + static void find_output_registers(struct r300_fragment_program_compiler * compiler, struct r300_fragment_shader * fs) { @@ -93,38 +94,24 @@ static void allocate_hardware_inputs( void (*allocate)(void * data, unsigned input, unsigned hwreg), void * mydata) { - struct tgsi_shader_info* info = &((struct r300_fragment_shader*)c->UserData)->info; - int total_colors = 0; - int colors = 0; - int total_generic = 0; - int generic = 0; - int i; - - for (i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { - case TGSI_SEMANTIC_COLOR: - total_colors++; - break; - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - total_generic++; - break; + struct r300_shader_semantics* inputs = + &((struct r300_fragment_shader*)c->UserData)->inputs; + int i, reg = 0; + + /* Allocate input registers. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (inputs->color[i] != ATTR_UNUSED) { + allocate(mydata, inputs->color[i], reg++); } } - - for(i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { - case TGSI_SEMANTIC_COLOR: - allocate(mydata, i, colors); - colors++; - break; - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - allocate(mydata, i, total_colors + generic); - generic++; - break; + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (inputs->generic[i] != ATTR_UNUSED) { + allocate(mydata, inputs->generic[i], reg++); } } + if (inputs->fog != ATTR_UNUSED) { + allocate(mydata, inputs->fog, reg++); + } } void r300_translate_fragment_shader(struct r300_context* r300, -- cgit v1.2.3 From af3dea36603687067197c22747537eaeb6c4ad2b Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 27 Nov 2009 10:19:20 +0100 Subject: r300g: simplify allocations of VS output registers No need to parse TGSI tokens since it's easier to walk through shader semantics. Also fog coordinates now work reliably. --- src/gallium/drivers/r300/r300_vs.c | 82 ++++++++++++-------------------------- 1 file changed, 26 insertions(+), 56 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 49bff3e931..31248346bc 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -203,76 +203,46 @@ static void r300_stream_locations_swtcl( static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) { struct r300_vertex_shader * vs = c->UserData; + struct r300_shader_semantics* outputs = &vs->outputs; struct tgsi_shader_info* info = &vs->info; - struct tgsi_parse_context parser; - struct tgsi_full_declaration * decl; - boolean pointsize = FALSE; - int out_colors = 0; - int colors = 0; - int out_generic = 0; - int generic = 0; - int i; + int i, reg = 0; /* Fill in the input mapping */ for (i = 0; i < info->num_inputs; i++) c->code->inputs[i] = i; - /* Fill in the output mapping */ - for (i = 0; i < info->num_outputs; i++) { - switch (info->output_semantic_name[i]) { - case TGSI_SEMANTIC_PSIZE: - pointsize = TRUE; - break; - case TGSI_SEMANTIC_COLOR: - out_colors++; - break; - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - out_generic++; - break; - } + /* Position. */ + if (outputs->pos != ATTR_UNUSED) { + c->code->outputs[outputs->pos] = reg++; + } else { + assert(0); } - tgsi_parse_init(&parser, vs->state.tokens); - - while (!tgsi_parse_end_of_tokens(&parser)) { - tgsi_parse_token(&parser); - - if (parser.FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION) - continue; + /* Point size. */ + if (outputs->psize != ATTR_UNUSED) { + c->code->outputs[outputs->psize] = reg++; + } - decl = &parser.FullToken.FullDeclaration; + /* Colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (outputs->color[i] != ATTR_UNUSED) { + c->code->outputs[outputs->color[i]] = reg++; + } + } - if (decl->Declaration.File != TGSI_FILE_OUTPUT) - continue; + /* XXX Back-face colors. */ - switch (decl->Semantic.SemanticName) { - case TGSI_SEMANTIC_POSITION: - c->code->outputs[decl->DeclarationRange.First] = 0; - break; - case TGSI_SEMANTIC_PSIZE: - c->code->outputs[decl->DeclarationRange.First] = 1; - break; - case TGSI_SEMANTIC_COLOR: - c->code->outputs[decl->DeclarationRange.First] = 1 + - (pointsize ? 1 : 0) + - colors++; - break; - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - c->code->outputs[decl->DeclarationRange.First] = 1 + - (pointsize ? 1 : 0) + - out_colors + - generic++; - break; - default: - debug_printf("r300: vs: Bad semantic declaration %d\n", - decl->Semantic.SemanticName); - assert(0); + /* Texture coordinates. */ + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (outputs->generic[i] != ATTR_UNUSED) { + c->code->outputs[outputs->generic[i]] = reg++; } } - tgsi_parse_free(&parser); + /* Fog coordinates. */ + if (outputs->fog != ATTR_UNUSED) { + c->code->outputs[outputs->fog] = reg++; + } } void r300_translate_vertex_shader(struct r300_context* r300, -- cgit v1.2.3 From 9077ddaa2557e1e76c8a052c8d079ef3d443186b Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Wed, 25 Nov 2009 00:33:43 +0100 Subject: svga: Add header files for overlay support --- src/gallium/drivers/svga/include/svga_escape.h | 89 +++++++++++ src/gallium/drivers/svga/include/svga_overlay.h | 201 ++++++++++++++++++++++++ 2 files changed, 290 insertions(+) create mode 100644 src/gallium/drivers/svga/include/svga_escape.h create mode 100644 src/gallium/drivers/svga/include/svga_overlay.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/include/svga_escape.h b/src/gallium/drivers/svga/include/svga_escape.h new file mode 100644 index 0000000000..7b85e9b8c8 --- /dev/null +++ b/src/gallium/drivers/svga/include/svga_escape.h @@ -0,0 +1,89 @@ +/********************************************************** + * Copyright 2007-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/* + * svga_escape.h -- + * + * Definitions for our own (vendor-specific) SVGA Escape commands. + */ + +#ifndef _SVGA_ESCAPE_H_ +#define _SVGA_ESCAPE_H_ + + +/* + * Namespace IDs for the escape command + */ + +#define SVGA_ESCAPE_NSID_VMWARE 0x00000000 +#define SVGA_ESCAPE_NSID_DEVEL 0xFFFFFFFF + + +/* + * Within SVGA_ESCAPE_NSID_VMWARE, we multiplex commands according to + * the first DWORD of escape data (after the nsID and size). As a + * guideline we're using the high word and low word as a major and + * minor command number, respectively. + * + * Major command number allocation: + * + * 0000: Reserved + * 0001: SVGA_ESCAPE_VMWARE_LOG (svga_binary_logger.h) + * 0002: SVGA_ESCAPE_VMWARE_VIDEO (svga_overlay.h) + * 0003: SVGA_ESCAPE_VMWARE_HINT (svga_escape.h) + */ + +#define SVGA_ESCAPE_VMWARE_MAJOR_MASK 0xFFFF0000 + + +/* + * SVGA Hint commands. + * + * These escapes let the SVGA driver provide optional information to + * he host about the state of the guest or guest applications. The + * host can use these hints to make user interface or performance + * decisions. + * + * Notes: + * + * - SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN is deprecated for guests + * that use the SVGA Screen Object extension. Instead of sending + * this escape, use the SVGA_SCREEN_FULLSCREEN_HINT flag on your + * Screen Object. + */ + +#define SVGA_ESCAPE_VMWARE_HINT 0x00030000 +#define SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN 0x00030001 // Deprecated + +typedef +struct { + uint32 command; + uint32 fullscreen; + struct { + int32 x, y; + } monitorPosition; +} SVGAEscapeHintFullscreen; + +#endif /* _SVGA_ESCAPE_H_ */ diff --git a/src/gallium/drivers/svga/include/svga_overlay.h b/src/gallium/drivers/svga/include/svga_overlay.h new file mode 100644 index 0000000000..82c1d3ff3e --- /dev/null +++ b/src/gallium/drivers/svga/include/svga_overlay.h @@ -0,0 +1,201 @@ +/********************************************************** + * Copyright 2007-2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/* + * svga_overlay.h -- + * + * Definitions for video-overlay support. + */ + +#ifndef _SVGA_OVERLAY_H_ +#define _SVGA_OVERLAY_H_ + +#include "svga_reg.h" + +/* + * Video formats we support + */ + +#define VMWARE_FOURCC_YV12 0x32315659 // 'Y' 'V' '1' '2' +#define VMWARE_FOURCC_YUY2 0x32595559 // 'Y' 'U' 'Y' '2' +#define VMWARE_FOURCC_UYVY 0x59565955 // 'U' 'Y' 'V' 'Y' + +typedef enum { + SVGA_OVERLAY_FORMAT_INVALID = 0, + SVGA_OVERLAY_FORMAT_YV12 = VMWARE_FOURCC_YV12, + SVGA_OVERLAY_FORMAT_YUY2 = VMWARE_FOURCC_YUY2, + SVGA_OVERLAY_FORMAT_UYVY = VMWARE_FOURCC_UYVY, +} SVGAOverlayFormat; + +#define SVGA_VIDEO_COLORKEY_MASK 0x00ffffff + +#define SVGA_ESCAPE_VMWARE_VIDEO 0x00020000 + +#define SVGA_ESCAPE_VMWARE_VIDEO_SET_REGS 0x00020001 + /* FIFO escape layout: + * Type, Stream Id, (Register Id, Value) pairs */ + +#define SVGA_ESCAPE_VMWARE_VIDEO_FLUSH 0x00020002 + /* FIFO escape layout: + * Type, Stream Id */ + +typedef +struct SVGAEscapeVideoSetRegs { + struct { + uint32 cmdType; + uint32 streamId; + } header; + + // May include zero or more items. + struct { + uint32 registerId; + uint32 value; + } items[1]; +} SVGAEscapeVideoSetRegs; + +typedef +struct SVGAEscapeVideoFlush { + uint32 cmdType; + uint32 streamId; +} SVGAEscapeVideoFlush; + + +/* + * Struct definitions for the video overlay commands built on + * SVGAFifoCmdEscape. + */ +typedef +struct { + uint32 command; + uint32 overlay; +} SVGAFifoEscapeCmdVideoBase; + +typedef +struct { + SVGAFifoEscapeCmdVideoBase videoCmd; +} SVGAFifoEscapeCmdVideoFlush; + +typedef +struct { + SVGAFifoEscapeCmdVideoBase videoCmd; + struct { + uint32 regId; + uint32 value; + } items[1]; +} SVGAFifoEscapeCmdVideoSetRegs; + +typedef +struct { + SVGAFifoEscapeCmdVideoBase videoCmd; + struct { + uint32 regId; + uint32 value; + } items[SVGA_VIDEO_NUM_REGS]; +} SVGAFifoEscapeCmdVideoSetAllRegs; + + +/* + *---------------------------------------------------------------------- + * + * VMwareVideoGetAttributes -- + * + * Computes the size, pitches and offsets for YUV frames. + * + * Results: + * TRUE on success; otherwise FALSE on failure. + * + * Side effects: + * Pitches and offsets for the given YUV frame are put in 'pitches' + * and 'offsets' respectively. They are both optional though. + * + *---------------------------------------------------------------------- + */ + +static INLINE Bool +VMwareVideoGetAttributes(const SVGAOverlayFormat format, // IN + uint32 *width, // IN / OUT + uint32 *height, // IN / OUT + uint32 *size, // OUT + uint32 *pitches, // OUT (optional) + uint32 *offsets) // OUT (optional) +{ + int tmp; + + *width = (*width + 1) & ~1; + + if (offsets) { + offsets[0] = 0; + } + + switch (format) { + case VMWARE_FOURCC_YV12: + *height = (*height + 1) & ~1; + *size = (*width + 3) & ~3; + + if (pitches) { + pitches[0] = *size; + } + + *size *= *height; + + if (offsets) { + offsets[1] = *size; + } + + tmp = ((*width >> 1) + 3) & ~3; + + if (pitches) { + pitches[1] = pitches[2] = tmp; + } + + tmp *= (*height >> 1); + *size += tmp; + + if (offsets) { + offsets[2] = *size; + } + + *size += tmp; + break; + + case VMWARE_FOURCC_YUY2: + case VMWARE_FOURCC_UYVY: + *size = *width * 2; + + if (pitches) { + pitches[0] = *size; + } + + *size *= *height; + break; + + default: + return FALSE; + } + + return TRUE; +} + +#endif // _SVGA_OVERLAY_H_ -- cgit v1.2.3 From e0399fddf2efd556ece8b81078368e6ab388c3b7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 30 Nov 2009 09:21:49 -0700 Subject: softpipe: setup machine->Face without a conditional --- src/gallium/drivers/softpipe/sp_fs_exec.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index a8999ed347..27fa126b7c 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -127,11 +127,8 @@ exec_run( const struct sp_fragment_shader *base, (float)quad->input.x0, (float)quad->input.y0, &machine->QuadPos); - if (quad->input.facing) { - machine->Face = -1.0f; - } else { - machine->Face = 1.0f; - } + /* convert 0 to 1.0 and 1 to -1.0 */ + machine->Face = (float) (quad->input.facing * -2 + 1); quad->inout.mask &= tgsi_exec_machine_run( machine ); if (quad->inout.mask == 0) -- cgit v1.2.3 From c78748a5274e58bcbb122923edf81065be9bbe16 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Wed, 2 Dec 2009 02:08:26 +0100 Subject: gallium: adapt drivers to interface cleanups --- src/gallium/drivers/cell/ppu/cell_texture.c | 30 +++--- src/gallium/drivers/i915/i915_surface.c | 21 +++-- src/gallium/drivers/i915/i915_texture.c | 125 ++++++++++--------------- src/gallium/drivers/llvmpipe/lp_setup.c | 2 +- src/gallium/drivers/llvmpipe/lp_tex_cache.c | 2 +- src/gallium/drivers/llvmpipe/lp_texture.c | 48 +++++----- src/gallium/drivers/llvmpipe/lp_tile_cache.c | 6 +- src/gallium/drivers/r300/r300_emit.c | 12 +-- src/gallium/drivers/r300/r300_screen.c | 9 +- src/gallium/drivers/r300/r300_texture.c | 12 +-- src/gallium/drivers/svga/svga_screen_texture.c | 37 ++++---- src/gallium/drivers/svga/svga_state_vs.c | 2 +- 12 files changed, 138 insertions(+), 168 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index e6b8a87045..77a57aef14 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -65,14 +65,11 @@ cell_texture_layout(struct cell_texture *ct) w_tile = align(width, TILE_SIZE); h_tile = align(height, TILE_SIZE); - pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w_tile); - pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h_tile); - - ct->stride[level] = pt->nblocksx[level] * pt->block.size; + ct->stride[level] = pf_get_stride(pt->format, w_tile); ct->level_offset[level] = ct->buffer_size; - size = pt->nblocksx[level] * pt->nblocksy[level] * pt->block.size; + size = ct->stride[level] * pf_get_nblocksy(pt->format, h_tile); if (pt->target == PIPE_TEXTURE_CUBE) size *= 6; else @@ -283,10 +280,12 @@ cell_get_tex_surface(struct pipe_screen *screen, ps->zslice = zslice; if (pt->target == PIPE_TEXTURE_CUBE) { - ps->offset += face * pt->nblocksy[level] * ct->stride[level]; + unsigned h_tile = align(ps->height, TILE_SIZE); + ps->offset += face * pf_get_nblocksy(ps->format, h_tile) * ct->stride[level]; } else if (pt->target == PIPE_TEXTURE_3D) { - ps->offset += zslice * pt->nblocksy[level] * ct->stride[level]; + unsigned h_tile = align(ps->height, TILE_SIZE); + ps->offset += zslice * pf_get_nblocksy(ps->format, h_tile) * ct->stride[level]; } else { assert(face == 0); @@ -327,14 +326,10 @@ cell_get_tex_transfer(struct pipe_screen *screen, if (ctrans) { struct pipe_transfer *pt = &ctrans->base; pipe_texture_reference(&pt->texture, texture); - pt->format = texture->format; - pt->block = texture->block; pt->x = x; pt->y = y; pt->width = w; pt->height = h; - pt->nblocksx = texture->nblocksx[level]; - pt->nblocksy = texture->nblocksy[level]; pt->stride = ct->stride[level]; pt->usage = usage; pt->face = face; @@ -344,10 +339,12 @@ cell_get_tex_transfer(struct pipe_screen *screen, ctrans->offset = ct->level_offset[level]; if (texture->target == PIPE_TEXTURE_CUBE) { - ctrans->offset += face * pt->nblocksy * pt->stride; + unsigned h_tile = align(u_minify(texture->height0, level), TILE_SIZE); + ctrans->offset += face * pf_get_nblocksy(texture->format, h_tile) * pt->stride; } else if (texture->target == PIPE_TEXTURE_3D) { - ctrans->offset += zslice * pt->nblocksy * pt->stride; + unsigned h_tile = align(u_minify(texture->height0, level), TILE_SIZE); + ctrans->offset += zslice * pf_get_nblocksy(texture->format, h_tile) * pt->stride; } else { assert(face == 0); @@ -400,7 +397,8 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer) * Create a buffer of ordinary memory for the linear texture. * This is the memory that the user will read/write. */ - size = pt->nblocksx[level] * pt->nblocksy[level] * pt->block.size; + size = pf_get_stride(pt->format, align(texWidth, TILE_SIZE)) * + pf_get_nblocksy(pt->format, align(texHeight, TILE_SIZE)); ctrans->map = align_malloc(size, 16); if (!ctrans->map) @@ -408,7 +406,7 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer) if (transfer->usage & PIPE_TRANSFER_READ) { /* need to untwiddle the texture to make a linear version */ - const uint bpp = pf_get_size(ct->base.format); + const uint bpp = pf_get_blocksize(ct->base.format); if (bpp == 4) { const uint *src = (uint *) (ct->mapped + ctrans->offset); uint *dst = ctrans->map; @@ -451,7 +449,7 @@ cell_transfer_unmap(struct pipe_screen *screen, /* The user wrote new texture data into the mapped buffer. * We need to convert the new linear data into the twiddled/tiled format. */ - const uint bpp = pf_get_size(ct->base.format); + const uint bpp = pf_get_blocksize(ct->base.format); if (bpp == 4) { const uint *src = ctrans->map; uint *dst = (uint *) (ct->mapped + ctrans->offset); diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c index ab8331f3e6..24e1024aaa 100644 --- a/src/gallium/drivers/i915/i915_surface.c +++ b/src/gallium/drivers/i915/i915_surface.c @@ -48,17 +48,19 @@ i915_surface_copy(struct pipe_context *pipe, { struct i915_texture *dst_tex = (struct i915_texture *)dst->texture; struct i915_texture *src_tex = (struct i915_texture *)src->texture; + struct pipe_texture *dpt = &dst_tex->base; + struct pipe_texture *spt = &src_tex->base; assert( dst != src ); - assert( dst_tex->base.block.size == src_tex->base.block.size ); - assert( dst_tex->base.block.width == src_tex->base.block.height ); - assert( dst_tex->base.block.height == src_tex->base.block.height ); - assert( dst_tex->base.block.width == 1 ); - assert( dst_tex->base.block.height == 1 ); + assert( pf_get_blocksize(dpt->format) == pf_get_blocksize(spt->format) ); + assert( pf_get_blockwidth(dpt->format) == pf_get_blockwidth(spt->format) ); + assert( pf_get_blockheight(dpt->format) == pf_get_blockheight(spt->format) ); + assert( pf_get_blockwidth(dpt->format) == 1 ); + assert( pf_get_blockheight(dpt->format) == 1 ); i915_copy_blit( i915_context(pipe), FALSE, - dst_tex->base.block.size, + pf_get_blocksize(dpt->format), (unsigned short) src_tex->stride, src_tex->buffer, src->offset, (unsigned short) dst_tex->stride, dst_tex->buffer, dst->offset, (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height ); @@ -72,12 +74,13 @@ i915_surface_fill(struct pipe_context *pipe, unsigned width, unsigned height, unsigned value) { struct i915_texture *tex = (struct i915_texture *)dst->texture; + struct pipe_texture *pt = &tex->base; - assert(tex->base.block.width == 1); - assert(tex->base.block.height == 1); + assert(pf_get_blockwidth(pt->format) == 1); + assert(pf_get_blockheight(pt->format) == 1); i915_fill_blit( i915_context(pipe), - tex->base.block.size, + pf_get_blocksize(pt->format), (unsigned short) tex->stride, tex->buffer, dst->offset, (short) dstx, (short) dsty, diff --git a/src/gallium/drivers/i915/i915_texture.c b/src/gallium/drivers/i915/i915_texture.c index c7b86dd4c5..b28b413771 100644 --- a/src/gallium/drivers/i915/i915_texture.c +++ b/src/gallium/drivers/i915/i915_texture.c @@ -74,6 +74,9 @@ static const int step_offsets[6][2] = { {-1, 1} }; +/* XXX really need twice the size if x is already pot? + Otherwise just use util_next_power_of_two? +*/ static unsigned power_of_two(unsigned x) { @@ -83,13 +86,6 @@ power_of_two(unsigned x) return value; } -static unsigned -round_up(unsigned n, unsigned multiple) -{ - return (n + multiple - 1) & ~(multiple - 1); -} - - /* * More advanced helper funcs */ @@ -101,13 +97,8 @@ i915_miptree_set_level_info(struct i915_texture *tex, unsigned nr_images, unsigned w, unsigned h, unsigned d) { - struct pipe_texture *pt = &tex->base; - assert(level < PIPE_MAX_TEXTURE_LEVELS); - pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w); - pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h); - tex->nr_images[level] = nr_images; /* @@ -138,7 +129,7 @@ i915_miptree_set_image_offset(struct i915_texture *tex, assert(img < tex->nr_images[level]); - tex->image_offset[level][img] = y * tex->stride + x * tex->base.block.size; + tex->image_offset[level][img] = y * tex->stride + x * pf_get_blocksize(tex->base.format); /* printf("%s level %d img %d pos %d,%d image_offset %x\n", @@ -160,28 +151,28 @@ i915_scanout_layout(struct i915_texture *tex) { struct pipe_texture *pt = &tex->base; - if (pt->last_level > 0 || pt->block.size != 4) + if (pt->last_level > 0 || pf_get_blocksize(pt->format) != 4) return FALSE; i915_miptree_set_level_info(tex, 0, 1, - tex->base.width0, - tex->base.height0, + pt->width0, + pt->height0, 1); i915_miptree_set_image_offset(tex, 0, 0, 0, 0); - if (tex->base.width0 >= 240) { - tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); - tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); + if (pt->width0 >= 240) { + tex->stride = power_of_two(pf_get_stride(pt->format, pt->width0)); + tex->total_nblocksy = align(pf_get_nblocksy(pt->format, pt->height0), 8); tex->hw_tiled = INTEL_TILE_X; - } else if (tex->base.width0 == 64 && tex->base.height0 == 64) { - tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); - tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); + } else if (pt->width0 == 64 && pt->height0 == 64) { + tex->stride = power_of_two(pf_get_stride(pt->format, pt->width0)); + tex->total_nblocksy = align(pf_get_nblocksy(pt->format, pt->height0), 8); } else { return FALSE; } debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, - tex->base.width0, tex->base.height0, pt->block.size, + pt->width0, pt->height0, pf_get_blocksize(pt->format), tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy); return TRUE; @@ -195,25 +186,25 @@ i915_display_target_layout(struct i915_texture *tex) { struct pipe_texture *pt = &tex->base; - if (pt->last_level > 0 || pt->block.size != 4) + if (pt->last_level > 0 || pf_get_blocksize(pt->format) != 4) return FALSE; /* fallback to normal textures for small textures */ - if (tex->base.width0 < 240) + if (pt->width0 < 240) return FALSE; i915_miptree_set_level_info(tex, 0, 1, - tex->base.width0, - tex->base.height0, + pt->width0, + pt->height0, 1); i915_miptree_set_image_offset(tex, 0, 0, 0, 0); - tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); - tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); + tex->stride = power_of_two(pf_get_stride(pt->format, pt->width0)); + tex->total_nblocksy = align(pf_get_nblocksy(pt->format, pt->height0), 8); tex->hw_tiled = INTEL_TILE_X; debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, - tex->base.width0, tex->base.height0, pt->block.size, + pt->width0, pt->height0, pf_get_blocksize(pt->format), tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy); return TRUE; @@ -226,34 +217,32 @@ i915_miptree_layout_2d(struct i915_texture *tex) unsigned level; unsigned width = pt->width0; unsigned height = pt->height0; - unsigned nblocksx = pt->nblocksx[0]; - unsigned nblocksy = pt->nblocksy[0]; + unsigned nblocksy = pf_get_nblocksy(pt->format, pt->width0); /* used for scanouts that need special layouts */ - if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) + if (pt->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) if (i915_scanout_layout(tex)) return; /* for shared buffers we use some very like scanout */ - if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) + if (pt->tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) if (i915_display_target_layout(tex)) return; - tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + tex->stride = align(pf_get_stride(pt->format, pt->width0), 4); tex->total_nblocksy = 0; for (level = 0; level <= pt->last_level; level++) { i915_miptree_set_level_info(tex, level, 1, width, height, 1); i915_miptree_set_image_offset(tex, level, 0, 0, tex->total_nblocksy); - nblocksy = round_up(MAX2(2, nblocksy), 2); + nblocksy = align(MAX2(2, nblocksy), 2); tex->total_nblocksy += nblocksy; width = u_minify(width, 1); height = u_minify(height, 1); - nblocksx = pf_get_nblocksx(&pt->block, width); - nblocksy = pf_get_nblocksy(&pt->block, height); + nblocksy = pf_get_nblocksy(pt->format, height); } } @@ -266,13 +255,12 @@ i915_miptree_layout_3d(struct i915_texture *tex) unsigned width = pt->width0; unsigned height = pt->height0; unsigned depth = pt->depth0; - unsigned nblocksx = pt->nblocksx[0]; - unsigned nblocksy = pt->nblocksy[0]; + unsigned nblocksy = pf_get_nblocksy(pt->format, pt->height0); unsigned stack_nblocksy = 0; /* Calculate the size of a single slice. */ - tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + tex->stride = align(pf_get_stride(pt->format, pt->width0), 4); /* XXX: hardware expects/requires 9 levels at minimum. */ @@ -283,8 +271,7 @@ i915_miptree_layout_3d(struct i915_texture *tex) width = u_minify(width, 1); height = u_minify(height, 1); - nblocksx = pf_get_nblocksx(&pt->block, width); - nblocksy = pf_get_nblocksy(&pt->block, height); + nblocksy = pf_get_nblocksy(pt->format, height); } /* Fixup depth image_offsets: @@ -309,14 +296,14 @@ i915_miptree_layout_cube(struct i915_texture *tex) { struct pipe_texture *pt = &tex->base; unsigned width = pt->width0, height = pt->height0; - const unsigned nblocks = pt->nblocksx[0]; + const unsigned nblocks = pf_get_nblocksx(pt->format, pt->width0); unsigned level; unsigned face; assert(width == height); /* cubemap images are square */ /* double pitch for cube layouts */ - tex->stride = round_up(nblocks * pt->block.size * 2, 4); + tex->stride = align(nblocks * pf_get_blocksize(pt->format) * 2, 4); tex->total_nblocksy = nblocks * 4; for (level = 0; level <= pt->last_level; level++) { @@ -379,8 +366,8 @@ i945_miptree_layout_2d(struct i915_texture *tex) unsigned y = 0; unsigned width = pt->width0; unsigned height = pt->height0; - unsigned nblocksx = pt->nblocksx[0]; - unsigned nblocksy = pt->nblocksy[0]; + unsigned nblocksx = pf_get_nblocksx(pt->format, pt->width0); + unsigned nblocksy = pf_get_nblocksy(pt->format, pt->height0); /* used for scanouts that need special layouts */ if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) @@ -392,7 +379,7 @@ i945_miptree_layout_2d(struct i915_texture *tex) if (i915_display_target_layout(tex)) return; - tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + tex->stride = align(pf_get_stride(pt->format, pt->width0), 4); /* May need to adjust pitch to accomodate the placement of * the 2nd mipmap level. This occurs when the alignment @@ -401,11 +388,11 @@ i945_miptree_layout_2d(struct i915_texture *tex) */ if (pt->last_level > 0) { unsigned mip1_nblocksx - = align(pf_get_nblocksx(&pt->block, u_minify(width, 1)), align_x) - + pf_get_nblocksx(&pt->block, u_minify(width, 2)); + = align(pf_get_nblocksx(pt->format, u_minify(width, 1)), align_x) + + pf_get_nblocksx(pt->format, u_minify(width, 2)); if (mip1_nblocksx > nblocksx) - tex->stride = mip1_nblocksx * pt->block.size; + tex->stride = mip1_nblocksx * pf_get_blocksize(pt->format); } /* Pitch must be a whole number of dwords @@ -435,8 +422,8 @@ i945_miptree_layout_2d(struct i915_texture *tex) width = u_minify(width, 1); height = u_minify(height, 1); - nblocksx = pf_get_nblocksx(&pt->block, width); - nblocksy = pf_get_nblocksy(&pt->block, height); + nblocksx = pf_get_nblocksx(pt->format, width); + nblocksy = pf_get_nblocksy(pt->format, height); } } @@ -447,17 +434,16 @@ i945_miptree_layout_3d(struct i915_texture *tex) unsigned width = pt->width0; unsigned height = pt->height0; unsigned depth = pt->depth0; - unsigned nblocksx = pt->nblocksx[0]; - unsigned nblocksy = pt->nblocksy[0]; + unsigned nblocksy = pf_get_nblocksy(pt->format, pt->width0); unsigned pack_x_pitch, pack_x_nr; unsigned pack_y_pitch; unsigned level; - tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + tex->stride = align(pf_get_stride(pt->format, pt->width0), 4); tex->total_nblocksy = 0; - pack_y_pitch = MAX2(pt->nblocksy[0], 2); - pack_x_pitch = tex->stride / pt->block.size; + pack_y_pitch = MAX2(nblocksy, 2); + pack_x_pitch = tex->stride / pf_get_blocksize(pt->format); pack_x_nr = 1; for (level = 0; level <= pt->last_level; level++) { @@ -482,7 +468,7 @@ i945_miptree_layout_3d(struct i915_texture *tex) if (pack_x_pitch > 4) { pack_x_pitch >>= 1; pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr * pt->block.size <= tex->stride); + assert(pack_x_pitch * pack_x_nr * pf_get_blocksize(pt->format) <= tex->stride); } if (pack_y_pitch > 2) { @@ -492,8 +478,7 @@ i945_miptree_layout_3d(struct i915_texture *tex) width = u_minify(width, 1); height = u_minify(height, 1); depth = u_minify(depth, 1); - nblocksx = pf_get_nblocksx(&pt->block, width); - nblocksy = pf_get_nblocksy(&pt->block, height); + nblocksy = pf_get_nblocksy(pt->format, height); } } @@ -503,7 +488,7 @@ i945_miptree_layout_cube(struct i915_texture *tex) struct pipe_texture *pt = &tex->base; unsigned level; - const unsigned nblocks = pt->nblocksx[0]; + const unsigned nblocks = pf_get_nblocksx(pt->format, pt->width0); unsigned face; unsigned width = pt->width0; unsigned height = pt->height0; @@ -523,9 +508,9 @@ i945_miptree_layout_cube(struct i915_texture *tex) * or the final row of 4x4, 2x2 and 1x1 faces below this. */ if (nblocks > 32) - tex->stride = round_up(nblocks * pt->block.size * 2, 4); + tex->stride = align(nblocks * pf_get_blocksize(pt->format) * 2, 4); else - tex->stride = 14 * 8 * pt->block.size; + tex->stride = 14 * 8 * pf_get_blocksize(pt->format); tex->total_nblocksy = nblocks * 4; @@ -645,9 +630,6 @@ i915_texture_create(struct pipe_screen *screen, pipe_reference_init(&tex->base.reference, 1); tex->base.screen = screen; - tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width0); - tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height0); - if (is->is_i945) { if (!i945_miptree_layout(tex)) goto fail; @@ -829,14 +811,10 @@ i915_get_tex_transfer(struct pipe_screen *screen, trans = CALLOC_STRUCT(i915_transfer); if (trans) { pipe_texture_reference(&trans->base.texture, texture); - trans->base.format = trans->base.format; trans->base.x = x; trans->base.y = y; trans->base.width = w; trans->base.height = h; - trans->base.block = texture->block; - trans->base.nblocksx = texture->nblocksx[level]; - trans->base.nblocksy = texture->nblocksy[level]; trans->base.stride = tex->stride; trans->offset = offset; trans->base.usage = usage; @@ -852,6 +830,7 @@ i915_transfer_map(struct pipe_screen *screen, struct intel_winsys *iws = i915_screen(tex->base.screen)->iws; char *map; boolean write = FALSE; + enum pipe_format format = tex->base.format; if (transfer->usage & PIPE_TRANSFER_WRITE) write = TRUE; @@ -861,8 +840,8 @@ i915_transfer_map(struct pipe_screen *screen, return NULL; return map + i915_transfer(transfer)->offset + - transfer->y / transfer->block.height * transfer->stride + - transfer->x / transfer->block.width * transfer->block.size; + transfer->y / pf_get_blockheight(format) * transfer->stride + + transfer->x / pf_get_blockwidth(format) * pf_get_blocksize(format); } static void diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index ffcbc9a379..b4aabd4d7c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -166,7 +166,7 @@ shade_quads(struct llvmpipe_context *llvmpipe, assert((y % 2) == 0); depth = llvmpipe->zsbuf_map + y*llvmpipe->zsbuf_transfer->stride + - 2*x*llvmpipe->zsbuf_transfer->block.size; + 2*x*pf_get_blocksize(llvmpipe->zsbuf_transfer->texture->format); } else depth = NULL; diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c index c7c4143bc6..5dbc597d2c 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c @@ -291,7 +291,7 @@ lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc, assert(0); } - util_format_read_4ub(tc->tex_trans->format, + util_format_read_4ub(tc->tex_trans->texture->format, (uint8_t *)tile->color, sizeof tile->color[0], tc->tex_trans_map, tc->tex_trans->stride, x, y, w, h); diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 65d62fd072..f099f903bd 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -48,7 +48,6 @@ /* Simple, maximally packed layout. */ - /* Conventional allocation path for non-display textures: */ static boolean @@ -63,20 +62,15 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, unsigned buffer_size = 0; - pf_get_block(lpt->base.format, &lpt->base.block); - for (level = 0; level <= pt->last_level; level++) { unsigned nblocksx, nblocksy; - pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); - /* Allocate storage for whole quads. This is particularly important * for depth surfaces, which are currently stored in a swizzled format. */ - nblocksx = pf_get_nblocksx(&pt->block, align(width, 2)); - nblocksy = pf_get_nblocksy(&pt->block, align(height, 2)); + nblocksx = pf_get_nblocksx(pt->format, align(width, 2)); + nblocksy = pf_get_nblocksy(pt->format, align(height, 2)); - lpt->stride[level] = align(nblocksx*pt->block.size, 16); + lpt->stride[level] = align(nblocksx * pf_get_blocksize(pt->format), 16); lpt->level_offset[level] = buffer_size; @@ -100,10 +94,6 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, { struct llvmpipe_winsys *winsys = screen->winsys; - pf_get_block(lpt->base.format, &lpt->base.block); - lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width0); - lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height0); - lpt->dt = winsys->displaytarget_create(winsys, lpt->base.format, lpt->base.width0, @@ -180,8 +170,6 @@ llvmpipe_texture_blanket(struct pipe_screen * screen, lpt->base = *base; pipe_reference_init(&lpt->base.reference, 1); lpt->base.screen = screen; - lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width0); - lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height0); lpt->stride[0] = stride[0]; pipe_buffer_reference(&lpt->buffer, buffer); @@ -255,11 +243,17 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen, ps->level = level; ps->zslice = zslice; + /* XXX shouldn't that rather be + tex_height = align(ps->height, 2); + to account for alignment done in llvmpipe_texture_layout ? + */ if (pt->target == PIPE_TEXTURE_CUBE) { - ps->offset += face * pt->nblocksy[level] * lpt->stride[level]; + unsigned tex_height = ps->height; + ps->offset += face * pf_get_nblocksy(pt->format, tex_height) * lpt->stride[level]; } else if (pt->target == PIPE_TEXTURE_3D) { - ps->offset += zslice * pt->nblocksy[level] * lpt->stride[level]; + unsigned tex_height = ps->height; + ps->offset += zslice * pf_get_nblocksy(pt->format, tex_height) * lpt->stride[level]; } else { assert(face == 0); @@ -300,14 +294,10 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen, if (lpt) { struct pipe_transfer *pt = &lpt->base; pipe_texture_reference(&pt->texture, texture); - pt->format = texture->format; - pt->block = texture->block; pt->x = x; pt->y = y; pt->width = w; pt->height = h; - pt->nblocksx = texture->nblocksx[level]; - pt->nblocksy = texture->nblocksy[level]; pt->stride = lptex->stride[level]; pt->usage = usage; pt->face = face; @@ -316,11 +306,17 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen, lpt->offset = lptex->level_offset[level]; + /* XXX shouldn't that rather be + tex_height = align(u_minify(texture->height0, level), 2) + to account for alignment done in llvmpipe_texture_layout ? + */ if (texture->target == PIPE_TEXTURE_CUBE) { - lpt->offset += face * pt->nblocksy * pt->stride; + unsigned tex_height = u_minify(texture->height0, level); + lpt->offset += face * pf_get_nblocksy(texture->format, tex_height) * pt->stride; } else if (texture->target == PIPE_TEXTURE_3D) { - lpt->offset += zslice * pt->nblocksy * pt->stride; + unsigned tex_height = u_minify(texture->height0, level); + lpt->offset += zslice * pf_get_nblocksy(texture->format, tex_height) * pt->stride; } else { assert(face == 0); @@ -352,9 +348,11 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, struct llvmpipe_screen *screen = llvmpipe_screen(_screen); ubyte *map, *xfer_map; struct llvmpipe_texture *lpt; + enum pipe_format format; assert(transfer->texture); lpt = llvmpipe_texture(transfer->texture); + format = lpt->base.format; if(lpt->dt) { struct llvmpipe_winsys *winsys = screen->winsys; @@ -379,8 +377,8 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, } xfer_map = map + llvmpipe_transfer(transfer)->offset + - transfer->y / transfer->block.height * transfer->stride + - transfer->x / transfer->block.width * transfer->block.size; + transfer->y / pf_get_blockheight(format) * transfer->stride + + transfer->x / pf_get_blockwidth(format) * pf_get_blocksize(format); /*printf("map = %p xfer map = %p\n", map, xfer_map);*/ return xfer_map; } diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c index ec3e002d62..50891c4227 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c @@ -252,13 +252,13 @@ lp_flush_tile_cache(struct llvmpipe_tile_cache *tc) case LP_TILE_STATUS_CLEAR: /* Actually clear the tiles which were flagged as being in a * clear state. */ - util_fill_rect(tc->transfer_map, &pt->block, pt->stride, + util_fill_rect(tc->transfer_map, pt->texture->format, pt->stride, x, y, w, h, tc->clear_val); break; case LP_TILE_STATUS_DEFINED: - lp_tile_write_4ub(pt->format, + lp_tile_write_4ub(pt->texture->format, tile->color, tc->transfer_map, pt->stride, x, y, w, h); @@ -306,7 +306,7 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc, y &= ~(TILE_SIZE - 1); if (!pipe_clip_tile(x, y, &w, &h, tc->transfer)) - lp_tile_read_4ub(pt->format, + lp_tile_read_4ub(pt->texture->format, tile->color, tc->transfer_map, tc->transfer->stride, x, y, w, h); diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 98a39390bf..a479842f9e 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -631,10 +631,10 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) for (i = 0; i < aos_count - 1; i += 2) { int buf_num1 = velem[i].vertex_buffer_index; int buf_num2 = velem[i+1].vertex_buffer_index; - assert(vbuf[buf_num1].stride % 4 == 0 && pf_get_size(velem[i].src_format) % 4 == 0); - assert(vbuf[buf_num2].stride % 4 == 0 && pf_get_size(velem[i+1].src_format) % 4 == 0); - OUT_CS((pf_get_size(velem[i].src_format) >> 2) | (vbuf[buf_num1].stride << 6) | - (pf_get_size(velem[i+1].src_format) << 14) | (vbuf[buf_num2].stride << 22)); + assert(vbuf[buf_num1].stride % 4 == 0 && pf_get_blocksize(velem[i].src_format) % 4 == 0); + assert(vbuf[buf_num2].stride % 4 == 0 && pf_get_blocksize(velem[i+1].src_format) % 4 == 0); + OUT_CS((pf_get_blocksize(velem[i].src_format) >> 2) | (vbuf[buf_num1].stride << 6) | + (pf_get_blocksize(velem[i+1].src_format) << 14) | (vbuf[buf_num2].stride << 22)); OUT_CS(vbuf[buf_num1].buffer_offset + velem[i].src_offset + offset * vbuf[buf_num1].stride); OUT_CS(vbuf[buf_num2].buffer_offset + velem[i+1].src_offset + @@ -642,8 +642,8 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) } if (aos_count & 1) { int buf_num = velem[i].vertex_buffer_index; - assert(vbuf[buf_num].stride % 4 == 0 && pf_get_size(velem[i].src_format) % 4 == 0); - OUT_CS((pf_get_size(velem[i].src_format) >> 2) | (vbuf[buf_num].stride << 6)); + assert(vbuf[buf_num].stride % 4 == 0 && pf_get_blocksize(velem[i].src_format) % 4 == 0); + OUT_CS((pf_get_blocksize(velem[i].src_format) >> 2) | (vbuf[buf_num].stride << 6)); OUT_CS(vbuf[buf_num].buffer_offset + velem[i].src_offset + offset * vbuf[buf_num].stride); } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 390b63007e..032fa69ec0 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -311,14 +311,10 @@ r300_get_tex_transfer(struct pipe_screen *screen, trans = CALLOC_STRUCT(r300_transfer); if (trans) { pipe_texture_reference(&trans->transfer.texture, texture); - trans->transfer.format = texture->format; trans->transfer.x = x; trans->transfer.y = y; trans->transfer.width = w; trans->transfer.height = h; - trans->transfer.block = texture->block; - trans->transfer.nblocksx = texture->nblocksx[level]; - trans->transfer.nblocksy = texture->nblocksy[level]; trans->transfer.stride = r300_texture_get_stride(tex, level); trans->transfer.usage = usage; @@ -344,6 +340,7 @@ static void* r300_transfer_map(struct pipe_screen* screen, { struct r300_texture* tex = (struct r300_texture*)transfer->texture; char* map; + enum pipe_format format = tex->tex.format; map = pipe_buffer_map(screen, tex->buffer, pipe_transfer_buffer_flags(transfer)); @@ -353,8 +350,8 @@ static void* r300_transfer_map(struct pipe_screen* screen, } return map + r300_transfer(transfer)->offset + - transfer->y / transfer->block.height * transfer->stride + - transfer->x / transfer->block.width * transfer->block.size; + transfer->y / pf_get_blockheight(format) * transfer->stride + + transfer->x / pf_get_blockwidth(format) * pf_get_blocksize(format); } static void r300_transfer_unmap(struct pipe_screen* screen, diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 093a21ebe2..63fc6a235a 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -105,7 +105,7 @@ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) return 0; } - return align(pf_get_stride(&tex->tex.block, u_minify(tex->tex.width0, level)), 32); + return align(pf_get_stride(tex->tex.format, u_minify(tex->tex.width0, level)), 32); } static void r300_setup_miptree(struct r300_texture* tex) @@ -115,11 +115,10 @@ static void r300_setup_miptree(struct r300_texture* tex) int i; for (i = 0; i <= base->last_level; i++) { - base->nblocksx[i] = pf_get_nblocksx(&base->block, u_minify(base->width0, i)); - base->nblocksy[i] = pf_get_nblocksy(&base->block, u_minify(base->height0, i)); + unsigned nblocksy = pf_get_nblocksy(base->format, u_minify(base->height0, i)); stride = r300_texture_get_stride(tex, i); - layer_size = stride * base->nblocksy[i]; + layer_size = stride * nblocksy; if (base->target == PIPE_TEXTURE_CUBE) size = layer_size * 6; @@ -129,7 +128,7 @@ static void r300_setup_miptree(struct r300_texture* tex) tex->offset[i] = align(tex->size, 32); tex->size = tex->offset[i] + size; tex->layer_size[i] = layer_size; - tex->pitch[i] = stride / base->block.size; + tex->pitch[i] = stride / pf_get_blocksize(base->format); debug_printf("r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes)\n", @@ -245,7 +244,7 @@ static struct pipe_texture* tex->tex.screen = screen; tex->stride_override = *stride; - tex->pitch[0] = *stride / base->block.size; + tex->pitch[0] = *stride / pf_get_blocksize(base->format); r300_setup_flags(tex); r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); @@ -283,7 +282,6 @@ r300_video_surface_create(struct pipe_screen *screen, template.width0 = util_next_power_of_two(width); template.height0 = util_next_power_of_two(height); template.depth0 = 1; - pf_get_block(template.format, &template.block); template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET; diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c index fb11b80dcf..410adf881b 100644 --- a/src/gallium/drivers/svga/svga_screen_texture.c +++ b/src/gallium/drivers/svga/svga_screen_texture.c @@ -158,7 +158,8 @@ svga_transfer_dma_band(struct svga_transfer *st, st->base.x + st->base.width, y + h, st->base.zslice + 1, - texture->base.block.size*8/(texture->base.block.width*texture->base.block.height)); + pf_get_blocksize(texture->base.format)*8/ + (pf_get_blockwidth(texture->base.format)*pf_get_blockheight(texture->base.format))); box.x = st->base.x; box.y = y; @@ -208,7 +209,8 @@ svga_transfer_dma(struct svga_transfer *st, } else { unsigned y, h, srcy; - h = st->hw_nblocksy * st->base.block.height; + unsigned blockheight = pf_get_blockheight(st->base.texture->format); + h = st->hw_nblocksy * blockheight; srcy = 0; for(y = 0; y < st->base.height; y += h) { unsigned offset, length; @@ -218,11 +220,11 @@ svga_transfer_dma(struct svga_transfer *st, h = st->base.height - y; /* Transfer band must be aligned to pixel block boundaries */ - assert(y % st->base.block.height == 0); - assert(h % st->base.block.height == 0); + assert(y % blockheight == 0); + assert(h % blockheight == 0); - offset = y * st->base.stride / st->base.block.height; - length = h * st->base.stride / st->base.block.height; + offset = y * st->base.stride / blockheight; + length = h * st->base.stride / blockheight; sw = (uint8_t *)st->swbuf + offset; @@ -291,8 +293,6 @@ svga_texture_create(struct pipe_screen *screen, height = templat->height0; depth = templat->depth0; for(level = 0; level <= templat->last_level; ++level) { - tex->base.nblocksx[level] = pf_get_nblocksx(&tex->base.block, width); - tex->base.nblocksy[level] = pf_get_nblocksy(&tex->base.block, height); width = u_minify(width, 1); height = u_minify(height, 1); depth = u_minify(depth, 1); @@ -750,6 +750,8 @@ svga_get_tex_transfer(struct pipe_screen *screen, struct svga_screen *ss = svga_screen(screen); struct svga_winsys_screen *sws = ss->sws; struct svga_transfer *st; + unsigned nblocksx = pf_get_nblocksx(texture->format, w); + unsigned nblocksy = pf_get_nblocksy(texture->format, h); /* We can't map texture storage directly */ if (usage & PIPE_TRANSFER_MAP_DIRECTLY) @@ -759,21 +761,17 @@ svga_get_tex_transfer(struct pipe_screen *screen, if (!st) return NULL; - st->base.format = texture->format; - st->base.block = texture->block; st->base.x = x; st->base.y = y; st->base.width = w; st->base.height = h; - st->base.nblocksx = pf_get_nblocksx(&texture->block, w); - st->base.nblocksy = pf_get_nblocksy(&texture->block, h); - st->base.stride = st->base.nblocksx*st->base.block.size; + st->base.stride = nblocksx*pf_get_blocksize(texture->format); st->base.usage = usage; st->base.face = face; st->base.level = level; st->base.zslice = zslice; - st->hw_nblocksy = st->base.nblocksy; + st->hw_nblocksy = nblocksy; st->hwbuf = svga_winsys_buffer_create(ss, 1, @@ -789,15 +787,15 @@ svga_get_tex_transfer(struct pipe_screen *screen, if(!st->hwbuf) goto no_hwbuf; - if(st->hw_nblocksy < st->base.nblocksy) { + if(st->hw_nblocksy < nblocksy) { /* We couldn't allocate a hardware buffer big enough for the transfer, * so allocate regular malloc memory instead */ debug_printf("%s: failed to allocate %u KB of DMA, splitting into %u x %u KB DMA transfers\n", __FUNCTION__, - (st->base.nblocksy*st->base.stride + 1023)/1024, - (st->base.nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy, + (nblocksy*st->base.stride + 1023)/1024, + (nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy, (st->hw_nblocksy*st->base.stride + 1023)/1024); - st->swbuf = MALLOC(st->base.nblocksy*st->base.stride); + st->swbuf = MALLOC(nblocksy*st->base.stride); if(!st->swbuf) goto no_swbuf; } @@ -1046,8 +1044,7 @@ svga_screen_buffer_from_texture(struct pipe_texture *texture, svga_translate_format(texture->format), stex->handle); - *stride = pf_get_nblocksx(&texture->block, texture->width0) * - texture->block.size; + *stride = pf_get_stride(texture->format, texture->width0); return *buffer != NULL; } diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index a947745732..f1b0daf9f6 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -210,7 +210,7 @@ static int update_zero_stride( struct svga_context *svga, mapped_buffer = pipe_buffer_map_range(svga->pipe.screen, vbuffer->buffer, vel->src_offset, - pf_get_size(vel->src_format), + pf_get_blocksize(vel->src_format), PIPE_BUFFER_USAGE_CPU_READ); translate->set_buffer(translate, vel->vertex_buffer_index, mapped_buffer, -- cgit v1.2.3 From 94b5c28a98850f42fbcdab9ceda1450279e1e6fd Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Wed, 2 Dec 2009 16:55:33 +0100 Subject: gallium: adapt nv drivers to interface cleanups --- src/gallium/drivers/nv04/nv04_miptree.c | 13 ++------ src/gallium/drivers/nv04/nv04_surface_2d.c | 12 ++++---- src/gallium/drivers/nv04/nv04_transfer.c | 9 +----- src/gallium/drivers/nv10/nv10_miptree.c | 11 ++----- src/gallium/drivers/nv10/nv10_transfer.c | 9 +----- src/gallium/drivers/nv20/nv20_miptree.c | 10 ++----- src/gallium/drivers/nv20/nv20_transfer.c | 9 +----- src/gallium/drivers/nv30/nv30_miptree.c | 11 ++----- src/gallium/drivers/nv30/nv30_transfer.c | 9 +----- src/gallium/drivers/nv40/nv40_miptree.c | 11 ++----- src/gallium/drivers/nv40/nv40_transfer.c | 9 +----- src/gallium/drivers/nv50/nv50_miptree.c | 10 +++---- src/gallium/drivers/nv50/nv50_transfer.c | 48 +++++++++++++----------------- 13 files changed, 51 insertions(+), 120 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c index 4fd72c82e6..eeab6dfa30 100644 --- a/src/gallium/drivers/nv04/nv04_miptree.c +++ b/src/gallium/drivers/nv04/nv04_miptree.c @@ -10,28 +10,21 @@ static void nv04_miptree_layout(struct nv04_miptree *nv04mt) { struct pipe_texture *pt = &nv04mt->base; - uint width = pt->width0, height = pt->height0; uint offset = 0; int nr_faces, l; nr_faces = 1; for (l = 0; l <= pt->last_level; l++) { - - pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); - nv04mt->level[l].pitch = pt->width0; nv04mt->level[l].pitch = (nv04mt->level[l].pitch + 63) & ~63; - - width = u_minify(width, 1); - height = u_minify(height, 1); } for (l = 0; l <= pt->last_level; l++) { - nv04mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); + /* XXX guess was obviously missing */ + nv04mt->level[l].image_offset[0] = offset; offset += nv04mt->level[l].pitch * u_minify(pt->height0, l); } @@ -128,7 +121,7 @@ nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ns->base.zslice = zslice; ns->pitch = nv04mt->level[level].pitch; - ns->base.offset = nv04mt->level[level].image_offset; + ns->base.offset = nv04mt->level[level].image_offset[0]; return &ns->base; } diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c index 8be134b83d..932893eef5 100644 --- a/src/gallium/drivers/nv04/nv04_surface_2d.c +++ b/src/gallium/drivers/nv04/nv04_surface_2d.c @@ -155,10 +155,10 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, sub_w = MIN2(sub_w, w - x); /* Must be 64-byte aligned */ - assert(!((dst->offset + nv04_swizzle_bits(dx+x, dy+y) * dst->texture->block.size) & 63)); + assert(!((dst->offset + nv04_swizzle_bits(dx+x, dy+y) * pf_get_blocksize(dst->texture->format)) & 63)); BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1); - OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(dx+x, dy+y) * dst->texture->block.size, + OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(dx+x, dy+y) * pf_get_blocksize(dst->texture->format), NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9); @@ -177,7 +177,7 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, OUT_RING (chan, src_pitch | NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER | NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE); - OUT_RELOCl(chan, src_bo, src->offset + (sy+y) * src_pitch + (sx+x) * src->texture->block.size, + OUT_RELOCl(chan, src_bo, src->offset + (sy+y) * src_pitch + (sx+x) * pf_get_blocksize(src->texture->format), NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); OUT_RING (chan, 0); } @@ -198,9 +198,9 @@ nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx, unsigned src_pitch = ((struct nv04_surface *)src)->pitch; unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch; unsigned dst_offset = dst->offset + dy * dst_pitch + - dx * dst->texture->block.size; + dx * pf_get_blocksize(dst->texture->format); unsigned src_offset = src->offset + sy * src_pitch + - sx * src->texture->block.size; + sx * pf_get_blocksize(src->texture->format); WAIT_RING (chan, 3 + ((h / 2047) + 1) * 9); BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2); @@ -219,7 +219,7 @@ nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR); OUT_RING (chan, src_pitch); OUT_RING (chan, dst_pitch); - OUT_RING (chan, w * src->texture->block.size); + OUT_RING (chan, w * pf_get_blocksize(src->texture->format)); OUT_RING (chan, count); OUT_RING (chan, 0x0101); OUT_RING (chan, 0); diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c index e6456429f4..e8ff686b4a 100644 --- a/src/gallium/drivers/nv04/nv04_transfer.c +++ b/src/gallium/drivers/nv04/nv04_transfer.c @@ -24,9 +24,6 @@ nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, template->width0 = u_minify(pt->width0, level); template->height0 = u_minify(pt->height0, level); template->depth0 = 1; - template->block = pt->block; - template->nblocksx[0] = pt->nblocksx[level]; - template->nblocksy[0] = pt->nblocksx[level]; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -49,14 +46,10 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&tx->base.texture, pt); - tx->base.format = pt->format; tx->base.x = x; tx->base.y = y; tx->base.width = w; tx->base.height = h; - tx->base.block = pt->block; - tx->base.nblocksx = pt->nblocksx[level]; - tx->base.nblocksy = pt->nblocksy[level]; tx->base.stride = mt->level[level].pitch; tx->base.usage = usage; tx->base.face = face; @@ -158,7 +151,7 @@ nv04_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) pipe_transfer_buffer_flags(ptx)); return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * ptx->block.size; + ptx->y * ns->pitch + ptx->x * pf_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c index b2a6c59b74..439beeccc3 100644 --- a/src/gallium/drivers/nv10/nv10_miptree.c +++ b/src/gallium/drivers/nv10/nv10_miptree.c @@ -11,7 +11,7 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt) { struct pipe_texture *pt = &nv10mt->base; boolean swizzled = FALSE; - uint width = pt->width0, height = pt->height0; + uint width = pt->width0; uint offset = 0; int nr_faces, l, f; @@ -22,21 +22,16 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt) } for (l = 0; l <= pt->last_level; l++) { - - pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); - if (swizzled) - nv10mt->level[l].pitch = pt->nblocksx[l] * pt->block.size; + nv10mt->level[l].pitch = pf_get_stride(pt->format, width); else - nv10mt->level[l].pitch = pt->nblocksx[0] * pt->block.size; + nv10mt->level[l].pitch = pf_get_stride(pt->format, pt->width0); nv10mt->level[l].pitch = (nv10mt->level[l].pitch + 63) & ~63; nv10mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); width = u_minify(width, 1); - height = u_minify(height, 1); } diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c index ec54297ab0..9e44d37367 100644 --- a/src/gallium/drivers/nv10/nv10_transfer.c +++ b/src/gallium/drivers/nv10/nv10_transfer.c @@ -24,9 +24,6 @@ nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, template->width0 = u_minify(pt->width0, level); template->height0 = u_minify(pt->height0, level); template->depth0 = 1; - template->block = pt->block; - template->nblocksx[0] = pt->nblocksx[level]; - template->nblocksy[0] = pt->nblocksx[level]; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -49,14 +46,10 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&tx->base.texture, pt); - tx->base.format = pt->format; tx->base.x = x; tx->base.y = y; tx->base.width = w; tx->base.height = h; - tx->base.block = pt->block; - tx->base.nblocksx = pt->nblocksx[level]; - tx->base.nblocksy = pt->nblocksy[level]; tx->base.stride = mt->level[level].pitch; tx->base.usage = usage; tx->base.face = face; @@ -158,7 +151,7 @@ nv10_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) pipe_transfer_buffer_flags(ptx)); return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * ptx->block.size; + ptx->y * ns->pitch + ptx->x * pf_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c index 554e28e47d..2bde9fb75b 100644 --- a/src/gallium/drivers/nv20/nv20_miptree.c +++ b/src/gallium/drivers/nv20/nv20_miptree.c @@ -10,7 +10,7 @@ static void nv20_miptree_layout(struct nv20_miptree *nv20mt) { struct pipe_texture *pt = &nv20mt->base; - uint width = pt->width0, height = pt->height0; + uint width = pt->width0; uint offset = 0; int nr_faces, l, f; uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | @@ -26,19 +26,15 @@ nv20_miptree_layout(struct nv20_miptree *nv20mt) } for (l = 0; l <= pt->last_level; l++) { - pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); - if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) - nv20mt->level[l].pitch = align(pt->width0 * pt->block.size, 64); + nv20mt->level[l].pitch = align(pf_get_stride(pt->format, pt->width0), 64); else - nv20mt->level[l].pitch = u_minify(pt->width0, l) * pt->block.size; + nv20mt->level[l].pitch = pf_get_stride(pt->format, width); nv20mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); width = u_minify(width, 1); - height = u_minify(height, 1); } for (f = 0; f < nr_faces; f++) { diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c index 87b5c14a3c..f2e0a34db9 100644 --- a/src/gallium/drivers/nv20/nv20_transfer.c +++ b/src/gallium/drivers/nv20/nv20_transfer.c @@ -24,9 +24,6 @@ nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, template->width0 = u_minify(pt->width0, level); template->height0 = u_minify(pt->height0, level); template->depth0 = 1; - template->block = pt->block; - template->nblocksx[0] = pt->nblocksx[level]; - template->nblocksy[0] = pt->nblocksx[level]; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -49,14 +46,10 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&tx->base.texture, pt); - tx->base.format = pt->format; tx->base.x = x; tx->base.y = y; tx->base.width = w; tx->base.height = h; - tx->base.block = pt->block; - tx->base.nblocksx = pt->nblocksx[level]; - tx->base.nblocksy = pt->nblocksy[level]; tx->base.stride = mt->level[level].pitch; tx->base.usage = usage; tx->base.face = face; @@ -158,7 +151,7 @@ nv20_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) pipe_transfer_buffer_flags(ptx)); return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * ptx->block.size; + ptx->y * ns->pitch + ptx->x * pf_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index b4c306d127..9e50a7cf6b 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -9,7 +9,7 @@ static void nv30_miptree_layout(struct nv30_miptree *nv30mt) { struct pipe_texture *pt = &nv30mt->base; - uint width = pt->width0, height = pt->height0, depth = pt->depth0; + uint width = pt->width0; uint offset = 0; int nr_faces, l, f; uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | @@ -28,20 +28,15 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt) } for (l = 0; l <= pt->last_level; l++) { - pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); - if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) - nv30mt->level[l].pitch = align(pt->width0 * pt->block.size, 64); + nv30mt->level[l].pitch = align(pf_get_stride(pt->format, pt->width0), 64); else - nv30mt->level[l].pitch = u_minify(pt->width0, l) * pt->block.size; + nv30mt->level[l].pitch = pf_get_stride(pt->format, width); nv30mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); width = u_minify(width, 1); - height = u_minify(height, 1); - depth = u_minify(depth, 1); } for (f = 0; f < nr_faces; f++) { diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c index 5e429b4d85..c8c3bd1f17 100644 --- a/src/gallium/drivers/nv30/nv30_transfer.c +++ b/src/gallium/drivers/nv30/nv30_transfer.c @@ -24,9 +24,6 @@ nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, template->width0 = u_minify(pt->width0, level); template->height0 = u_minify(pt->height0, level); template->depth0 = 1; - template->block = pt->block; - template->nblocksx[0] = pt->nblocksx[level]; - template->nblocksy[0] = pt->nblocksx[level]; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -49,14 +46,10 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&tx->base.texture, pt); - tx->base.format = pt->format; tx->base.x = x; tx->base.y = y; tx->base.width = w; tx->base.height = h; - tx->base.block = pt->block; - tx->base.nblocksx = pt->nblocksx[level]; - tx->base.nblocksy = pt->nblocksy[level]; tx->base.stride = mt->level[level].pitch; tx->base.usage = usage; tx->base.face = face; @@ -158,7 +151,7 @@ nv30_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) pipe_transfer_buffer_flags(ptx)); return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * ptx->block.size; + ptx->y * ns->pitch + ptx->x * pf_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index f73bedff6d..8779c5572b 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -9,7 +9,7 @@ static void nv40_miptree_layout(struct nv40_miptree *mt) { struct pipe_texture *pt = &mt->base; - uint width = pt->width0, height = pt->height0, depth = pt->depth0; + uint width = pt->width0; uint offset = 0; int nr_faces, l, f; uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | @@ -28,20 +28,15 @@ nv40_miptree_layout(struct nv40_miptree *mt) } for (l = 0; l <= pt->last_level; l++) { - pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); - if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) - mt->level[l].pitch = align(pt->width0 * pt->block.size, 64); + mt->level[l].pitch = align(pf_get_stride(pt->format, pt->width0), 64); else - mt->level[l].pitch = u_minify(pt->width0, l) * pt->block.size; + mt->level[l].pitch = pf_get_stride(pt->format, width); mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); width = u_minify(width, 1); - height = u_minify(height, 1); - depth = u_minify(depth, 1); } for (f = 0; f < nr_faces; f++) { diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c index 36e253c96f..1ee5cf39e0 100644 --- a/src/gallium/drivers/nv40/nv40_transfer.c +++ b/src/gallium/drivers/nv40/nv40_transfer.c @@ -24,9 +24,6 @@ nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, template->width0 = u_minify(pt->width0, level); template->height0 = u_minify(pt->height0, level); template->depth0 = 1; - template->block = pt->block; - template->nblocksx[0] = pt->nblocksx[level]; - template->nblocksy[0] = pt->nblocksx[level]; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -49,14 +46,10 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&tx->base.texture, pt); - tx->base.format = pt->format; tx->base.x = x; tx->base.y = y; tx->base.width = w; tx->base.height = h; - tx->base.block = pt->block; - tx->base.nblocksx = pt->nblocksx[level]; - tx->base.nblocksy = pt->nblocksy[level]; tx->base.stride = mt->level[level].pitch; tx->base.usage = usage; tx->base.face = face; @@ -158,7 +151,7 @@ nv40_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) pipe_transfer_buffer_flags(ptx)); return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * ptx->block.size; + ptx->y * ns->pitch + ptx->x * pf_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 3d58746793..40ee665999 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -91,13 +91,11 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) for (l = 0; l <= pt->last_level; l++) { struct nv50_miptree_level *lvl = &mt->level[l]; - - pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + unsigned nblocksy = pf_get_nblocksy(pt->format, height); lvl->image_offset = CALLOC(mt->image_nr, sizeof(int)); - lvl->pitch = align(pt->nblocksx[l] * pt->block.size, 64); - lvl->tile_mode = get_tile_mode(pt->nblocksy[l], depth); + lvl->pitch = align(pf_get_stride(pt->format, width), 64); + lvl->tile_mode = get_tile_mode(nblocksy, depth); width = u_minify(width, 1); height = u_minify(height, 1); @@ -118,7 +116,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) unsigned tile_d = get_tile_depth(lvl->tile_mode); size = lvl->pitch; - size *= align(pt->nblocksy[l], tile_h); + size *= align(pf_get_nblocksy(pt->format, u_minify(pt->height0, l)), tile_h); size *= align(u_minify(pt->depth0, l), tile_d); lvl->image_offset[i] = mt->total_size; diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index 39d65279fc..4705f96f57 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -16,6 +16,8 @@ struct nv50_transfer { int level_depth; int level_x; int level_y; + unsigned nblocksx; + unsigned nblocksy; }; static void @@ -151,20 +153,11 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_texture_reference(&tx->base.texture, pt); - tx->base.format = pt->format; + tx->nblocksx = pf_get_nblocksx(pt->format, u_minify(pt->width0, level)); + tx->nblocksy = pf_get_nblocksy(pt->format, u_minify(pt->height0, level)); tx->base.width = w; tx->base.height = h; - tx->base.block = pt->block; - if (!pt->nblocksx[level]) { - tx->base.nblocksx = pf_get_nblocksx(&pt->block, - u_minify(pt->width0, level)); - tx->base.nblocksy = pf_get_nblocksy(&pt->block, - u_minify(pt->height0, level)); - } else { - tx->base.nblocksx = pt->nblocksx[level]; - tx->base.nblocksy = pt->nblocksy[level]; - } - tx->base.stride = tx->base.nblocksx * pt->block.size; + tx->base.stride = tx->nblocksx * pf_get_blocksize(pt->format); tx->base.usage = usage; tx->level_pitch = lvl->pitch; @@ -173,10 +166,10 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->level_depth = u_minify(mt->base.base.depth0, level); tx->level_offset = lvl->image_offset[image]; tx->level_tiling = lvl->tile_mode; - tx->level_x = pf_get_nblocksx(&tx->base.block, x); - tx->level_y = pf_get_nblocksy(&tx->base.block, y); + tx->level_x = pf_get_nblocksx(pt->format, x); + tx->level_y = pf_get_nblocksy(pt->format, y); ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, - tx->base.nblocksy * tx->base.stride, &tx->bo); + tx->nblocksy * tx->base.stride, &tx->bo); if (ret) { FREE(tx); return NULL; @@ -185,22 +178,22 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, if (pt->target == PIPE_TEXTURE_3D) tx->level_offset += get_zslice_offset(lvl->tile_mode, zslice, lvl->pitch, - tx->base.nblocksy); + tx->nblocksy); if (usage & PIPE_TRANSFER_READ) { - nx = pf_get_nblocksx(&tx->base.block, tx->base.width); - ny = pf_get_nblocksy(&tx->base.block, tx->base.height); + nx = pf_get_nblocksx(pt->format, tx->base.width); + ny = pf_get_nblocksy(pt->format, tx->base.height); nv50_transfer_rect_m2mf(pscreen, mt->base.bo, tx->level_offset, tx->level_pitch, tx->level_tiling, x, y, - tx->base.nblocksx, tx->base.nblocksy, + tx->nblocksx, tx->nblocksy, tx->level_depth, tx->bo, 0, tx->base.stride, tx->bo->tile_mode, 0, 0, - tx->base.nblocksx, tx->base.nblocksy, 1, - tx->base.block.size, nx, ny, + tx->nblocksx, tx->nblocksy, 1, + pf_get_blocksize(pt->format), nx, ny, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART, NOUVEAU_BO_GART); } @@ -213,23 +206,24 @@ nv50_transfer_del(struct pipe_transfer *ptx) { struct nv50_transfer *tx = (struct nv50_transfer *)ptx; struct nv50_miptree *mt = nv50_miptree(ptx->texture); + struct pipe_texture *pt = ptx->texture; - unsigned nx = pf_get_nblocksx(&tx->base.block, tx->base.width); - unsigned ny = pf_get_nblocksy(&tx->base.block, tx->base.height); + unsigned nx = pf_get_nblocksx(pt->format, tx->base.width); + unsigned ny = pf_get_nblocksy(pt->format, tx->base.height); if (ptx->usage & PIPE_TRANSFER_WRITE) { - struct pipe_screen *pscreen = ptx->texture->screen; + struct pipe_screen *pscreen = pt->screen; nv50_transfer_rect_m2mf(pscreen, tx->bo, 0, tx->base.stride, tx->bo->tile_mode, 0, 0, - tx->base.nblocksx, tx->base.nblocksy, 1, + tx->nblocksx, tx->nblocksy, 1, mt->base.bo, tx->level_offset, tx->level_pitch, tx->level_tiling, tx->level_x, tx->level_y, - tx->base.nblocksx, tx->base.nblocksy, + tx->nblocksx, tx->nblocksy, tx->level_depth, - tx->base.block.size, nx, ny, + pf_get_blocksize(pt->format), nx, ny, NOUVEAU_BO_GART, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); } -- cgit v1.2.3 From 429bf7541777de08e070df3920b8566e3ac78223 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 2 Dec 2009 09:23:37 -0700 Subject: cell: fix TGSI breakage --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 4d43f65d29..1895a7940c 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -1351,7 +1351,7 @@ emit_function_call(struct codegen *gen, static boolean emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) { - const uint target = inst->InstructionExtTexture.Texture; + const uint target = inst->Texture.Texture; const uint unit = inst->Src[1].Register.Index; uint addr; int ch; -- cgit v1.2.3 From 792888121b92913733daec7526c9441f27ce1231 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 2 Dec 2009 10:09:53 -0700 Subject: llvmpipe: plug in dummy pipe_context::set_vertex_sampler_textures function Fixes immediate segfault. --- src/gallium/drivers/llvmpipe/lp_context.c | 3 ++- src/gallium/drivers/llvmpipe/lp_state.h | 10 +++++++--- src/gallium/drivers/llvmpipe/lp_state_sampler.c | 14 ++++++++++++-- 3 files changed, 21 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index c081f6de03..66549132d4 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -205,7 +205,8 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state; llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple; llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state; - llvmpipe->pipe.set_fragment_sampler_textures = llvmpipe_set_sampler_textures; + llvmpipe->pipe.set_fragment_sampler_textures = llvmpipe_set_fragment_sampler_textures; + llvmpipe->pipe.set_vertex_sampler_textures = llvmpipe_set_vertex_sampler_textures; llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state; llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 7b26ce61a3..805959af89 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -168,9 +168,13 @@ void llvmpipe_set_polygon_stipple( struct pipe_context *, void llvmpipe_set_scissor_state( struct pipe_context *, const struct pipe_scissor_state * ); -void llvmpipe_set_sampler_textures( struct pipe_context *, - unsigned num, - struct pipe_texture ** ); +void llvmpipe_set_fragment_sampler_textures( struct pipe_context *, + unsigned num, + struct pipe_texture ** ); + +void llvmpipe_set_vertex_sampler_textures( struct pipe_context *, + unsigned num, + struct pipe_texture ** ); void llvmpipe_set_viewport_state( struct pipe_context *, const struct pipe_viewport_state * ); diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 8333805a3f..b61b669093 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -78,8 +78,9 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe, void -llvmpipe_set_sampler_textures(struct pipe_context *pipe, - unsigned num, struct pipe_texture **texture) +llvmpipe_set_fragment_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); uint i; @@ -116,6 +117,15 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe, } +void +llvmpipe_set_vertex_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) +{ + /* XXX to do */ +} + + void llvmpipe_delete_sampler_state(struct pipe_context *pipe, void *sampler) -- cgit v1.2.3 From f42192e783521f49a7caab09b073740e63ab092b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 2 Dec 2009 12:19:31 -0700 Subject: llvmpipe: return 0 for PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS query The driver (and draw module) don't support vertex shader textures yet. --- src/gallium/drivers/llvmpipe/lp_screen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 0fb133486a..a6ecaa0b2b 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -59,7 +59,7 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: return PIPE_MAX_SAMPLERS; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return PIPE_MAX_SAMPLERS; + return 0; case PIPE_CAP_NPOT_TEXTURES: return 1; case PIPE_CAP_TWO_SIDED_STENCIL: -- cgit v1.2.3 From d5e5909f171952bc15f43bc618238fc0699edc09 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 2 Dec 2009 12:20:15 -0700 Subject: Revert "llvmpipe: plug in dummy pipe_context::set_vertex_sampler_textures function" This reverts commit 792888121b92913733daec7526c9441f27ce1231. We're instead returning 0 for the PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS query. --- src/gallium/drivers/llvmpipe/lp_context.c | 3 +-- src/gallium/drivers/llvmpipe/lp_state.h | 10 +++------- src/gallium/drivers/llvmpipe/lp_state_sampler.c | 14 ++------------ 3 files changed, 6 insertions(+), 21 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 66549132d4..c081f6de03 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -205,8 +205,7 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state; llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple; llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state; - llvmpipe->pipe.set_fragment_sampler_textures = llvmpipe_set_fragment_sampler_textures; - llvmpipe->pipe.set_vertex_sampler_textures = llvmpipe_set_vertex_sampler_textures; + llvmpipe->pipe.set_fragment_sampler_textures = llvmpipe_set_sampler_textures; llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state; llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 805959af89..7b26ce61a3 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -168,13 +168,9 @@ void llvmpipe_set_polygon_stipple( struct pipe_context *, void llvmpipe_set_scissor_state( struct pipe_context *, const struct pipe_scissor_state * ); -void llvmpipe_set_fragment_sampler_textures( struct pipe_context *, - unsigned num, - struct pipe_texture ** ); - -void llvmpipe_set_vertex_sampler_textures( struct pipe_context *, - unsigned num, - struct pipe_texture ** ); +void llvmpipe_set_sampler_textures( struct pipe_context *, + unsigned num, + struct pipe_texture ** ); void llvmpipe_set_viewport_state( struct pipe_context *, const struct pipe_viewport_state * ); diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index b61b669093..8333805a3f 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -78,9 +78,8 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe, void -llvmpipe_set_fragment_sampler_textures(struct pipe_context *pipe, - unsigned num, - struct pipe_texture **texture) +llvmpipe_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); uint i; @@ -117,15 +116,6 @@ llvmpipe_set_fragment_sampler_textures(struct pipe_context *pipe, } -void -llvmpipe_set_vertex_sampler_textures(struct pipe_context *pipe, - unsigned num, - struct pipe_texture **texture) -{ - /* XXX to do */ -} - - void llvmpipe_delete_sampler_state(struct pipe_context *pipe, void *sampler) -- cgit v1.2.3 From 08383af4c749566dcb58db94d7b72ee02e4cab11 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 2 Dec 2009 11:22:55 -0800 Subject: r300g: No vertex textures here. --- src/gallium/drivers/r300/r300_state.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 7505353953..442af70e14 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -566,6 +566,12 @@ static void r300_bind_sampler_states(struct pipe_context* pipe, r300->sampler_count = count; } +static void r300_lacks_vertex_textures(struct pipe_context* pipe, + unsigned count, + void** states) +{ +} + static void r300_delete_sampler_state(struct pipe_context* pipe, void* state) { FREE(state); @@ -823,6 +829,7 @@ void r300_init_state_functions(struct r300_context* r300) r300->context.create_sampler_state = r300_create_sampler_state; r300->context.bind_fragment_sampler_states = r300_bind_sampler_states; + r300->context.bind_vertex_sampler_states = r300_lacks_vertex_textures; r300->context.delete_sampler_state = r300_delete_sampler_state; r300->context.set_fragment_sampler_textures = r300_set_sampler_textures; -- cgit v1.2.3 From 4f77b0103d5f150845300ee8bddcef20d11a9820 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 2 Dec 2009 12:16:19 -0800 Subject: r300g, radeong: De-specialize r300_winsys into radeon_winsys. There's like five good reasons for this, I swear. --- src/gallium/drivers/r300/Makefile | 3 +- src/gallium/drivers/r300/r300_context.c | 9 +- src/gallium/drivers/r300/r300_context.h | 2 +- src/gallium/drivers/r300/r300_cs.h | 5 +- src/gallium/drivers/r300/r300_screen.c | 13 +- src/gallium/drivers/r300/r300_screen.h | 4 +- src/gallium/drivers/r300/r300_vbo.c | 3 +- src/gallium/drivers/r300/r300_winsys.h | 70 +--------- src/gallium/winsys/drm/radeon/core/radeon_buffer.h | 10 +- src/gallium/winsys/drm/radeon/core/radeon_drm.c | 7 +- src/gallium/winsys/drm/radeon/core/radeon_r300.c | 141 ++++++++------------- src/gallium/winsys/drm/radeon/core/radeon_r300.h | 5 +- src/gallium/winsys/drm/radeon/core/radeon_winsys.h | 105 +++++++++++++++ 13 files changed, 190 insertions(+), 187 deletions(-) create mode 100644 src/gallium/winsys/drm/radeon/core/radeon_winsys.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index d13bb7a36b..63ae5c2766 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -23,7 +23,8 @@ C_SOURCES = \ r300_tgsi_to_rc.c LIBRARY_INCLUDES = \ - -I$(TOP)/src/mesa/drivers/dri/r300/compiler + -I$(TOP)/src/mesa/drivers/dri/r300/compiler \ + -I$(TOP)/src/gallium/winsys/drm/radeon/core COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 769733b6dd..68a17dcb63 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -36,7 +36,8 @@ #include "r300_screen.h" #include "r300_state_derived.h" #include "r300_state_invariant.h" -#include "r300_winsys.h" + +#include "radeon_winsys.h" static enum pipe_error r300_clear_hash_table(void* key, void* value, void* data) @@ -105,7 +106,7 @@ static void r300_flush_cb(void *data) } struct pipe_context* r300_create_context(struct pipe_screen* screen, - struct r300_winsys* r300_winsys) + struct radeon_winsys* radeon_winsys) { struct r300_context* r300 = CALLOC_STRUCT(r300_context); struct r300_screen* r300screen = r300_screen(screen); @@ -113,9 +114,9 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, if (!r300) return NULL; - r300->winsys = r300_winsys; + r300->winsys = radeon_winsys; - r300->context.winsys = (struct pipe_winsys*)r300_winsys; + r300->context.winsys = (struct pipe_winsys*)radeon_winsys; r300->context.screen = screen; r300_init_debug(r300); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 39c0914cff..dd3f6ac143 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -237,7 +237,7 @@ struct r300_context { struct pipe_context context; /* The interface to the windowing system, etc. */ - struct r300_winsys* winsys; + struct radeon_winsys* winsys; /* Draw module. Used mostly for SW TCL. */ struct draw_context* draw; diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 86ba91db52..8b100375fd 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -26,7 +26,8 @@ #include "util/u_math.h" #include "r300_reg.h" -#include "r300_winsys.h" + +#include "radeon_winsys.h" /* Yes, I know macros are ugly. However, they are much prettier than the code * that they neatly hide away, and don't have the cost of function setup,so @@ -50,7 +51,7 @@ #define CS_LOCALS(context) \ struct r300_context* const cs_context_copy = (context); \ - struct r300_winsys* cs_winsys = cs_context_copy->winsys; \ + struct radeon_winsys* cs_winsys = cs_context_copy->winsys; \ int cs_count = 0; #define CHECK_CS(size) \ diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 390b63007e..2e7b1423e6 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -27,7 +27,8 @@ #include "r300_context.h" #include "r300_screen.h" #include "r300_texture.h" -#include "r300_winsys.h" + +#include "radeon_winsys.h" /* Return the identifier behind whom the brave coders responsible for this * amalgamation of code, sweat, and duct tape, routinely obscure their names. @@ -372,7 +373,7 @@ static void r300_destroy_screen(struct pipe_screen* pscreen) FREE(r300screen); } -struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys) +struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys) { struct r300_screen* r300screen = CALLOC_STRUCT(r300_screen); struct r300_capabilities* caps = CALLOC_STRUCT(r300_capabilities); @@ -380,14 +381,14 @@ struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys) if (!r300screen || !caps) return NULL; - caps->pci_id = r300_winsys->pci_id; - caps->num_frag_pipes = r300_winsys->gb_pipes; - caps->num_z_pipes = r300_winsys->z_pipes; + caps->pci_id = radeon_winsys->pci_id; + caps->num_frag_pipes = radeon_winsys->gb_pipes; + caps->num_z_pipes = radeon_winsys->z_pipes; r300_parse_chipset(caps); r300screen->caps = caps; - r300screen->screen.winsys = (struct pipe_winsys*)r300_winsys; + r300screen->screen.winsys = (struct pipe_winsys*)radeon_winsys; r300screen->screen.destroy = r300_destroy_screen; r300screen->screen.get_name = r300_get_name; r300screen->screen.get_vendor = r300_get_vendor; diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 1ce5ff3904..2217988add 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -27,7 +27,7 @@ #include "r300_chipset.h" -struct r300_winsys; +struct radeon_winsys; struct r300_screen { /* Parent class */ @@ -58,6 +58,6 @@ r300_transfer(struct pipe_transfer* transfer) } /* Creates a new r300 screen. */ -struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys); +struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys); #endif /* R300_SCREEN_H */ diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c index 6ebaf715dc..d8610dadfa 100644 --- a/src/gallium/drivers/r300/r300_vbo.c +++ b/src/gallium/drivers/r300/r300_vbo.c @@ -32,7 +32,8 @@ #include "r300_context.h" #include "r300_state_inlines.h" #include "r300_reg.h" -#include "r300_winsys.h" + +#include "radeon_winsys.h" static INLINE int get_buffer_offset(struct r300_context *r300, unsigned int buf_nr, diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 864a6146b2..f86985841f 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -35,76 +35,8 @@ extern "C" { #include "pipe/p_state.h" #include "pipe/internal/p_winsys_screen.h" -struct r300_winsys { - /* Parent class */ - struct pipe_winsys base; - - /* Opaque Radeon-specific winsys object. */ - void* radeon_winsys; - - /* PCI ID */ - uint32_t pci_id; - - /* GB pipe count */ - uint32_t gb_pipes; - - /* Z pipe count (rv530 only) */ - uint32_t z_pipes; - - /* GART size. */ - uint32_t gart_size; - - /* VRAM size. */ - uint32_t vram_size; - - /* Add a pipe_buffer to the list of buffer objects to validate. */ - boolean (*add_buffer)(struct r300_winsys* winsys, - struct pipe_buffer* pbuffer, - uint32_t rd, - uint32_t wd); - - /* Revalidate all currently setup pipe_buffers. - * Returns TRUE if a flush is required. */ - boolean (*validate)(struct r300_winsys* winsys); - - /* Check to see if there's room for commands. */ - boolean (*check_cs)(struct r300_winsys* winsys, int size); - - /* Start a command emit. */ - void (*begin_cs)(struct r300_winsys* winsys, - int size, - const char* file, - const char* function, - int line); - - /* Write a dword to the command buffer. */ - void (*write_cs_dword)(struct r300_winsys* winsys, uint32_t dword); - - /* Write a relocated dword to the command buffer. */ - void (*write_cs_reloc)(struct r300_winsys* winsys, - struct pipe_buffer* bo, - uint32_t rd, - uint32_t wd, - uint32_t flags); - - /* Finish a command emit. */ - void (*end_cs)(struct r300_winsys* winsys, - const char* file, - const char* function, - int line); - - /* Flush the CS. */ - void (*flush_cs)(struct r300_winsys* winsys); - - /* winsys flush - callback from winsys when flush required */ - void (*set_flush_cb)(struct r300_winsys *winsys, - void (*flush_cb)(void *), void *data); - - void (*reset_bos)(struct r300_winsys *winsys); -}; - struct pipe_context* r300_create_context(struct pipe_screen* screen, - struct r300_winsys* r300_winsys); + struct radeon_winsys* radeon_winsys); boolean r300_get_texture_buffer(struct pipe_texture* texture, struct pipe_buffer** buffer, diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h index f5153b06af..bfe2221d1e 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h @@ -45,6 +45,8 @@ #include "radeon_drm.h" +#include "radeon_winsys.h" + struct radeon_pipe_buffer { struct pipe_buffer base; struct radeon_bo *bo; @@ -68,14 +70,6 @@ struct radeon_winsys_priv { struct radeon_cs* cs; }; -struct radeon_winsys { - /* Parent class. */ - struct pipe_winsys base; - - /* This corresponds to void* radeon_winsys in r300_winsys. */ - struct radeon_winsys_priv* priv; -}; - struct radeon_winsys* radeon_pipe_winsys(int fb); #if 0 struct pipe_surface *radeon_surface_from_handle(struct radeon_context *radeon_context, diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c index 69f14e54f2..770d7c73eb 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c @@ -41,9 +41,8 @@ struct pipe_screen* radeon_create_screen(struct drm_api* api, if (debug_get_bool_option("RADEON_SOFTPIPE", FALSE)) { return softpipe_create_screen((struct pipe_winsys*)winsys); } else { - struct r300_winsys* r300 = radeon_create_r300_winsys(drmFB, winsys); - FREE(winsys); - return r300_create_screen(r300); + radeon_setup_winsys(drmFB, winsys); + return r300_create_screen(winsys); } } @@ -55,7 +54,7 @@ struct pipe_context* radeon_create_context(struct drm_api* api, return radeon_create_softpipe(screen->winsys); } else { return r300_create_context(screen, - (struct r300_winsys*)screen->winsys); + (struct radeon_winsys*)screen->winsys); } } diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c index d3e468a9ef..b64e9c16e1 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c @@ -22,36 +22,27 @@ #include "radeon_r300.h" -static void radeon_r300_set_flush_cb(struct r300_winsys *winsys, - void (*flush_cb)(void *), - void *data) +static void radeon_set_flush_cb(struct radeon_winsys *winsys, + void (*flush_cb)(void *), + void *data) { - struct radeon_winsys_priv* priv = - (struct radeon_winsys_priv*)winsys->radeon_winsys; - - radeon_cs_space_set_flush(priv->cs, flush_cb, - data); + radeon_cs_space_set_flush(winsys->priv->cs, flush_cb, data); } -static boolean radeon_r300_add_buffer(struct r300_winsys* winsys, - struct pipe_buffer* pbuffer, - uint32_t rd, - uint32_t wd) +static boolean radeon_add_buffer(struct radeon_winsys* winsys, + struct pipe_buffer* pbuffer, + uint32_t rd, + uint32_t wd) { - struct radeon_winsys_priv* priv = - (struct radeon_winsys_priv*)winsys->radeon_winsys; struct radeon_bo* bo = ((struct radeon_pipe_buffer*)pbuffer)->bo; - radeon_cs_space_add_persistent_bo(priv->cs, bo, rd, wd); + radeon_cs_space_add_persistent_bo(winsys->priv->cs, bo, rd, wd); return TRUE; } -static boolean radeon_r300_validate(struct r300_winsys* winsys) +static boolean radeon_validate(struct radeon_winsys* winsys) { - struct radeon_winsys_priv* priv = - (struct radeon_winsys_priv*)winsys->radeon_winsys; - - if (radeon_cs_space_check(priv->cs) < 0) { + if (radeon_cs_space_check(winsys->priv->cs) < 0) { return FALSE; } @@ -59,45 +50,37 @@ static boolean radeon_r300_validate(struct r300_winsys* winsys) return TRUE; } -static boolean radeon_r300_check_cs(struct r300_winsys* winsys, int size) +static boolean radeon_check_cs(struct radeon_winsys* winsys, int size) { /* XXX check size here, lazy ass! */ /* XXX also validate buffers */ return TRUE; } -static void radeon_r300_begin_cs(struct r300_winsys* winsys, - int size, - const char* file, - const char* function, - int line) +static void radeon_begin_cs(struct radeon_winsys* winsys, + int size, + const char* file, + const char* function, + int line) { - struct radeon_winsys_priv* priv = - (struct radeon_winsys_priv*)winsys->radeon_winsys; - - radeon_cs_begin(priv->cs, size, file, function, line); + radeon_cs_begin(winsys->priv->cs, size, file, function, line); } -static void radeon_r300_write_cs_dword(struct r300_winsys* winsys, - uint32_t dword) +static void radeon_write_cs_dword(struct radeon_winsys* winsys, + uint32_t dword) { - struct radeon_winsys_priv* priv = - (struct radeon_winsys_priv*)winsys->radeon_winsys; - - radeon_cs_write_dword(priv->cs, dword); + radeon_cs_write_dword(winsys->priv->cs, dword); } -static void radeon_r300_write_cs_reloc(struct r300_winsys* winsys, - struct pipe_buffer* pbuffer, - uint32_t rd, - uint32_t wd, - uint32_t flags) +static void radeon_write_cs_reloc(struct radeon_winsys* winsys, + struct pipe_buffer* pbuffer, + uint32_t rd, + uint32_t wd, + uint32_t flags) { - struct radeon_winsys_priv* priv = - (struct radeon_winsys_priv*)winsys->radeon_winsys; int retval = 0; - retval = radeon_cs_write_reloc(priv->cs, + retval = radeon_cs_write_reloc(winsys->priv->cs, ((struct radeon_pipe_buffer*)pbuffer)->bo, rd, wd, flags); if (retval) { @@ -106,46 +89,39 @@ static void radeon_r300_write_cs_reloc(struct r300_winsys* winsys, } } -static void radeon_r300_reset_bos(struct r300_winsys *winsys) +static void radeon_reset_bos(struct radeon_winsys *winsys) { - struct radeon_winsys_priv* priv = - (struct radeon_winsys_priv*)winsys->radeon_winsys; - radeon_cs_space_reset_bos(priv->cs); + radeon_cs_space_reset_bos(winsys->priv->cs); } -static void radeon_r300_end_cs(struct r300_winsys* winsys, - const char* file, - const char* function, - int line) +static void radeon_end_cs(struct radeon_winsys* winsys, + const char* file, + const char* function, + int line) { - struct radeon_winsys_priv* priv = - (struct radeon_winsys_priv*)winsys->radeon_winsys; - - radeon_cs_end(priv->cs, file, function, line); + radeon_cs_end(winsys->priv->cs, file, function, line); } -static void radeon_r300_flush_cs(struct r300_winsys* winsys) +static void radeon_flush_cs(struct radeon_winsys* winsys) { - struct radeon_winsys_priv* priv = - (struct radeon_winsys_priv*)winsys->radeon_winsys; int retval; /* Emit the CS. */ - retval = radeon_cs_emit(priv->cs); + retval = radeon_cs_emit(winsys->priv->cs); if (retval) { debug_printf("radeon: Bad CS, dumping...\n"); - radeon_cs_print(priv->cs, stderr); + radeon_cs_print(winsys->priv->cs, stderr); } /* Reset CS. * Someday, when we care about performance, we should really find a way * to rotate between two or three CS objects so that the GPU can be * spinning through one CS while another one is being filled. */ - radeon_cs_erase(priv->cs); + radeon_cs_erase(winsys->priv->cs); } /* Helper function to do the ioctls needed for setup and init. */ -static void do_ioctls(struct r300_winsys* winsys, int fd) +static void do_ioctls(struct radeon_winsys* winsys, int fd) { struct drm_radeon_gem_info gem_info = {0}; struct drm_radeon_info info = {0}; @@ -207,18 +183,17 @@ static void do_ioctls(struct r300_winsys* winsys, int fd) winsys->vram_size = gem_info.vram_visible; } -struct r300_winsys* -radeon_create_r300_winsys(int fd, struct radeon_winsys* old_winsys) +void +radeon_setup_winsys(int fd, struct radeon_winsys* winsys) { - struct r300_winsys* winsys = CALLOC_STRUCT(r300_winsys); - struct radeon_winsys_priv* priv; - + /* XXX is this check needed now? */ if (winsys == NULL) { - return NULL; + return; } - priv = old_winsys->priv; + struct radeon_winsys_priv* priv = winsys->priv; + /* XXX backwards is bad precedent */ do_ioctls(winsys, fd); priv->csm = radeon_cs_manager_gem_ctor(fd); @@ -229,19 +204,15 @@ radeon_create_r300_winsys(int fd, struct radeon_winsys* old_winsys) radeon_cs_set_limit(priv->cs, RADEON_GEM_DOMAIN_VRAM, winsys->vram_size); - winsys->add_buffer = radeon_r300_add_buffer; - winsys->validate = radeon_r300_validate; - - winsys->check_cs = radeon_r300_check_cs; - winsys->begin_cs = radeon_r300_begin_cs; - winsys->write_cs_dword = radeon_r300_write_cs_dword; - winsys->write_cs_reloc = radeon_r300_write_cs_reloc; - winsys->end_cs = radeon_r300_end_cs; - winsys->flush_cs = radeon_r300_flush_cs; - winsys->reset_bos = radeon_r300_reset_bos; - winsys->set_flush_cb = radeon_r300_set_flush_cb; - - memcpy(winsys, old_winsys, sizeof(struct radeon_winsys)); - - return winsys; + winsys->add_buffer = radeon_add_buffer; + winsys->validate = radeon_validate; + + winsys->check_cs = radeon_check_cs; + winsys->begin_cs = radeon_begin_cs; + winsys->write_cs_dword = radeon_write_cs_dword; + winsys->write_cs_reloc = radeon_write_cs_reloc; + winsys->end_cs = radeon_end_cs; + winsys->flush_cs = radeon_flush_cs; + winsys->reset_bos = radeon_reset_bos; + winsys->set_flush_cb = radeon_set_flush_cb; } diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.h b/src/gallium/winsys/drm/radeon/core/radeon_r300.h index 775d7937fd..cfbdb30266 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_r300.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.h @@ -34,9 +34,6 @@ #include "radeon_buffer.h" -struct radeon_winsys; - -struct r300_winsys* -radeon_create_r300_winsys(int fd, struct radeon_winsys* old_winsys); +void radeon_setup_winsys(int fd, struct radeon_winsys* winsys); #endif /* RADEON_R300_H */ diff --git a/src/gallium/winsys/drm/radeon/core/radeon_winsys.h b/src/gallium/winsys/drm/radeon/core/radeon_winsys.h new file mode 100644 index 0000000000..9edc9e038c --- /dev/null +++ b/src/gallium/winsys/drm/radeon/core/radeon_winsys.h @@ -0,0 +1,105 @@ +/* + * Copyright © 2009 Corbin Simpson + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Corbin Simpson + */ +#ifndef RADEON_WINSYS_H +#define RADEON_WINSYS_H + +#include "pipe/internal/p_winsys_screen.h" + +struct radeon_winsys_priv; + +struct radeon_winsys { + /* Parent class. */ + struct pipe_winsys base; + + /* Winsys private */ + struct radeon_winsys_priv* priv; + + /* PCI ID */ + uint32_t pci_id; + + /* GB pipe count */ + uint32_t gb_pipes; + + /* Z pipe count (rv530 only) */ + uint32_t z_pipes; + + /* GART size. */ + uint32_t gart_size; + + /* VRAM size. */ + uint32_t vram_size; + + /* Add a pipe_buffer to the list of buffer objects to validate. */ + boolean (*add_buffer)(struct radeon_winsys* winsys, + struct pipe_buffer* pbuffer, + uint32_t rd, + uint32_t wd); + + /* Revalidate all currently setup pipe_buffers. + * Returns TRUE if a flush is required. */ + boolean (*validate)(struct radeon_winsys* winsys); + + /* Check to see if there's room for commands. */ + boolean (*check_cs)(struct radeon_winsys* winsys, int size); + + /* Start a command emit. */ + void (*begin_cs)(struct radeon_winsys* winsys, + int size, + const char* file, + const char* function, + int line); + + /* Write a dword to the command buffer. */ + void (*write_cs_dword)(struct radeon_winsys* winsys, uint32_t dword); + + /* Write a relocated dword to the command buffer. */ + void (*write_cs_reloc)(struct radeon_winsys* winsys, + struct pipe_buffer* bo, + uint32_t rd, + uint32_t wd, + uint32_t flags); + + /* Finish a command emit. */ + void (*end_cs)(struct radeon_winsys* winsys, + const char* file, + const char* function, + int line); + + /* Flush the CS. */ + void (*flush_cs)(struct radeon_winsys* winsys); + + /* winsys flush - callback from winsys when flush required */ + void (*set_flush_cb)(struct radeon_winsys *winsys, + void (*flush_cb)(void *), void *data); + + void (*reset_bos)(struct radeon_winsys *winsys); +}; + +#endif -- cgit v1.2.3 From 35a15f02634a31c1517363d91aaef8f190e24687 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Thu, 3 Dec 2009 23:15:38 +0100 Subject: gallium: fix reference counting functions to be strict-aliasing compliant Historically, parts of mesa code are not strict-aliasing safe, hence -fno-strict-aliasing is needed to compile (this got forgotten for scons builds for gallium, which indeed not only caused compiler warnings but also unexplicable crashes in non-debug builds). However, we should try to eliminate code not complying with strict-aliasing code at least for gallium. Hence change pipe_reference functions to make them strict-aliasing compliant. This adds a bit more complexity (especially for derived classes) but is the right thing to do, and it does in fact fix a segfault. --- src/gallium/auxiliary/pipebuffer/pb_buffer.h | 3 ++- src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c | 5 ++++- src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 3 +++ src/gallium/drivers/svga/svga_screen_texture.h | 3 ++- src/gallium/include/pipe/p_refcnt.h | 18 ++++++++---------- src/gallium/include/pipe/p_state.h | 9 ++++++--- src/gallium/include/pipe/p_video_state.h | 3 ++- 7 files changed, 27 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index 4ef372233f..eb7e84be84 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -237,8 +237,9 @@ pb_reference(struct pb_buffer **dst, { struct pb_buffer *old = *dst; - if (pipe_reference((struct pipe_reference**)dst, &src->base.reference)) + if (pipe_reference(&(*dst)->base.reference, &src->base.reference)) pb_destroy( old ); + *dst = src; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 2f973684f6..a9375abd21 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -243,6 +243,7 @@ fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list, struct pb_fence_ops *ops = fenced_list->ops; struct list_head *curr, *next; struct fenced_buffer *fenced_buf; + struct pb_buffer *pb_buf; struct pipe_fence_handle *prev_fence = NULL; curr = fenced_list->delayed.next; @@ -271,7 +272,9 @@ fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list, fenced_buffer_remove_locked(fenced_list, fenced_buf); pipe_mutex_unlock(fenced_buf->mutex); - pb_reference((struct pb_buffer **)&fenced_buf, NULL); + pb_buf = &fenced_buf->base; + pb_reference(&pb_buf, NULL); + curr = next; next = curr->next; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index 57d1ede45a..f0c88a0ccb 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -293,8 +293,11 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr, if(buf) { LIST_DEL(&buf->head); pipe_mutex_unlock(mgr->mutex); +#if 0 + /* XXX this didn't do anything right??? */ /* Increase refcount */ pb_reference((struct pb_buffer**)&buf, &buf->base); +#endif return &buf->base; } diff --git a/src/gallium/drivers/svga/svga_screen_texture.h b/src/gallium/drivers/svga/svga_screen_texture.h index 1cc4063e65..727f2c51d2 100644 --- a/src/gallium/drivers/svga/svga_screen_texture.h +++ b/src/gallium/drivers/svga/svga_screen_texture.h @@ -164,8 +164,9 @@ svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_ { struct svga_sampler_view *old = *ptr; - if (pipe_reference((struct pipe_reference **)ptr, &v->reference)) + if (pipe_reference(&(*ptr)->reference, &v->reference)) svga_destroy_sampler_view_priv(old); + *ptr = v; } extern void diff --git a/src/gallium/include/pipe/p_refcnt.h b/src/gallium/include/pipe/p_refcnt.h index 1f9088b3e9..f1875b6b82 100644 --- a/src/gallium/include/pipe/p_refcnt.h +++ b/src/gallium/include/pipe/p_refcnt.h @@ -59,30 +59,28 @@ pipe_is_referenced(struct pipe_reference *reference) /** - * Set 'ptr' to point to 'reference' and update reference counting. - * The old thing pointed to, if any, will be unreferenced first. - * 'reference' may be NULL. + * Update reference counting. + * The old thing pointed to, if any, will be unreferenced. + * Both 'ptr' and 'reference' may be NULL. */ static INLINE bool -pipe_reference(struct pipe_reference **ptr, struct pipe_reference *reference) +pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference) { bool destroy = FALSE; - if(*ptr != reference) { + if(ptr != reference) { /* bump the reference.count first */ if (reference) { assert(pipe_is_referenced(reference)); p_atomic_inc(&reference->count); } - if (*ptr) { - assert(pipe_is_referenced(*ptr)); - if (p_atomic_dec_zero(&(*ptr)->count)) { + if (ptr) { + assert(pipe_is_referenced(ptr)); + if (p_atomic_dec_zero(&ptr->count)) { destroy = TRUE; } } - - *ptr = reference; } return destroy; diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 6de7af6a81..b9dfa1c7d3 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -400,8 +400,9 @@ pipe_buffer_reference(struct pipe_buffer **ptr, struct pipe_buffer *buf) { struct pipe_buffer *old_buf = *ptr; - if (pipe_reference((struct pipe_reference **)ptr, &buf->reference)) + if (pipe_reference(&(*ptr)->reference, &buf->reference)) old_buf->screen->buffer_destroy(old_buf); + *ptr = buf; } static INLINE void @@ -409,8 +410,9 @@ pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf) { struct pipe_surface *old_surf = *ptr; - if (pipe_reference((struct pipe_reference **)ptr, &surf->reference)) + if (pipe_reference(&(*ptr)->reference, &surf->reference)) old_surf->texture->screen->tex_surface_destroy(old_surf); + *ptr = surf; } static INLINE void @@ -418,8 +420,9 @@ pipe_texture_reference(struct pipe_texture **ptr, struct pipe_texture *tex) { struct pipe_texture *old_tex = *ptr; - if (pipe_reference((struct pipe_reference **)ptr, &tex->reference)) + if (pipe_reference(&(*ptr)->reference, &tex->reference)) old_tex->screen->texture_destroy(old_tex); + *ptr = tex; } diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h index 4da26d608c..b85f01c2b0 100644 --- a/src/gallium/include/pipe/p_video_state.h +++ b/src/gallium/include/pipe/p_video_state.h @@ -56,8 +56,9 @@ pipe_video_surface_reference(struct pipe_video_surface **ptr, struct pipe_video_ { struct pipe_video_surface *old_surf = *ptr; - if (pipe_reference((struct pipe_reference **)ptr, &surf->reference)) + if (pipe_reference(&(*ptr)->reference, &surf->reference)) old_surf->screen->video_surface_destroy(old_surf); + *ptr = surf; } struct pipe_video_rect -- cgit v1.2.3 From 4153ec547cfb7fcb26bbeb09ac9ef19fe88d3e4e Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Thu, 3 Dec 2009 23:58:30 +0100 Subject: gallium: fix remaining users of pipe_reference function --- src/gallium/drivers/nouveau/nouveau_stateobj.h | 3 ++- src/gallium/state_trackers/python/st_device.c | 3 ++- src/gallium/winsys/drm/intel/gem/intel_drm_fence.c | 3 ++- src/gallium/winsys/drm/vmware/core/vmw_surface.c | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h index b595405357..62990f9b6a 100644 --- a/src/gallium/drivers/nouveau/nouveau_stateobj.h +++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h @@ -48,13 +48,14 @@ so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso) struct nouveau_stateobj *so = *pso; int i; - if (pipe_reference((struct pipe_reference**)pso, &ref->reference)) { + if (pipe_reference(&(*pso)->reference, &ref->reference)) { free(so->push); for (i = 0; i < so->cur_reloc; i++) nouveau_bo_ref(NULL, &so->reloc[i].bo); free(so->reloc); free(so); } + *pso = ref; } static INLINE void diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c index a791113aba..f19cf4b577 100644 --- a/src/gallium/state_trackers/python/st_device.c +++ b/src/gallium/state_trackers/python/st_device.c @@ -62,8 +62,9 @@ st_device_reference(struct st_device **ptr, struct st_device *st_dev) { struct st_device *old_dev = *ptr; - if (pipe_reference((struct pipe_reference **)ptr, &st_dev->reference)) + if (pipe_reference(&(*ptr)->reference, &st_dev->reference)) st_device_really_destroy(old_dev); + *ptr = st_dev; } diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_fence.c b/src/gallium/winsys/drm/intel/gem/intel_drm_fence.c index e70bfe7b44..b6248a3bcf 100644 --- a/src/gallium/winsys/drm/intel/gem/intel_drm_fence.c +++ b/src/gallium/winsys/drm/intel/gem/intel_drm_fence.c @@ -39,11 +39,12 @@ intel_drm_fence_reference(struct intel_winsys *iws, struct intel_drm_fence *old = (struct intel_drm_fence *)*ptr; struct intel_drm_fence *f = (struct intel_drm_fence *)fence; - if (pipe_reference((struct pipe_reference**)ptr, &f->reference)) { + if (pipe_reference(&(*ptr)->reference, &f->reference)) { if (old->bo) drm_intel_bo_unreference(old->bo); FREE(old); } + *ptr = fence; } static int diff --git a/src/gallium/winsys/drm/vmware/core/vmw_surface.c b/src/gallium/winsys/drm/vmware/core/vmw_surface.c index 64eb32f8b9..5f1b9ad577 100644 --- a/src/gallium/winsys/drm/vmware/core/vmw_surface.c +++ b/src/gallium/winsys/drm/vmware/core/vmw_surface.c @@ -47,7 +47,7 @@ vmw_svga_winsys_surface_reference(struct vmw_svga_winsys_surface **pdst, src_ref = src ? &src->refcnt : NULL; dst_ref = dst ? &dst->refcnt : NULL; - if (pipe_reference(&dst_ref, src_ref)) { + if (pipe_reference(dst_ref, src_ref)) { vmw_ioctl_surface_destroy(dst->screen, dst->sid); #ifdef DEBUG /* to detect dangling pointers */ -- cgit v1.2.3 From a4b3bb12d7627a0bb39dd625e7646c9ef9ccd7fb Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 4 Dec 2009 11:49:42 +0000 Subject: softpipe: fix double-minify in texture layout --- src/gallium/drivers/softpipe/sp_texture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index bd653216c0..4f946ccfcf 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -67,7 +67,7 @@ softpipe_texture_layout(struct pipe_screen *screen, spt->level_offset[level] = buffer_size; - buffer_size += (pf_get_nblocksy(pt->format, u_minify(height, level)) * + buffer_size += (pf_get_nblocksy(pt->format, height) * ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * spt->stride[level]); -- cgit v1.2.3 From 6bb415f862fec94b82915f806beb3a7427bd4bb8 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 4 Dec 2009 14:15:21 +0000 Subject: softpipe: dont claim to support PIPE_FORMAT_NONE --- src/gallium/drivers/softpipe/sp_screen.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 6bf3df8e6a..bd3532de4f 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -149,6 +149,7 @@ softpipe_is_format_supported( struct pipe_screen *screen, case PIPE_FORMAT_B6UG5SR5S_NORM: case PIPE_FORMAT_X8UB8UG8SR8S_NORM: case PIPE_FORMAT_A8B8G8R8_SNORM: + case PIPE_FORMAT_NONE: return FALSE; default: return TRUE; -- cgit v1.2.3 From 225bc70b77fcf107dd8abc93be27a15c27743071 Mon Sep 17 00:00:00 2001 From: Coleman Kane Date: Fri, 4 Dec 2009 08:44:57 -0700 Subject: r300g: use $(MAKE) variable Fixes bug 24501 --- src/gallium/drivers/r300/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index d7a2c8c462..8d0c6e33bb 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -38,4 +38,4 @@ include ../../Makefile.template .PHONY : $(COMPILER_ARCHIVE) $(COMPILER_ARCHIVE): - cd $(TOP)/src/mesa/drivers/dri/r300/compiler; make + $(MAKE) -C $(TOP)/src/mesa/drivers/dri/r300/compiler -- cgit v1.2.3 From c977dd9c7716b0a086eeb0c07f2da148065c3b18 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Fri, 4 Dec 2009 18:23:35 +0100 Subject: svga: fix another pipe_reference strict aliasing violation --- src/gallium/drivers/svga/svga_screen_buffer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_screen_buffer.c b/src/gallium/drivers/svga/svga_screen_buffer.c index 1f8a889672..58a1aba464 100644 --- a/src/gallium/drivers/svga/svga_screen_buffer.c +++ b/src/gallium/drivers/svga/svga_screen_buffer.c @@ -356,7 +356,8 @@ svga_buffer_upload_flush(struct svga_context *svga, sbuf->hw.boxes = NULL; /* Decrement reference count */ - pipe_buffer_reference((struct pipe_buffer **)&sbuf, NULL); + pipe_reference(&(sbuf->base.reference), NULL); + sbuf = NULL; } -- cgit v1.2.3 From 3da8265cd3233e2b22ab0f8a28fbba892984e399 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 4 Dec 2009 16:06:16 +0100 Subject: r300g: fix warnings --- src/gallium/drivers/r300/r300_context.c | 4 ++-- src/gallium/drivers/r300/r300_winsys.h | 2 ++ src/gallium/winsys/drm/radeon/core/radeon_buffer.c | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 68a17dcb63..5b337f03ac 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -36,8 +36,8 @@ #include "r300_screen.h" #include "r300_state_derived.h" #include "r300_state_invariant.h" - -#include "radeon_winsys.h" +#include "r300_texture.h" +#include "r300_winsys.h" static enum pipe_error r300_clear_hash_table(void* key, void* value, void* data) diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index f86985841f..1ae6de70fe 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -35,6 +35,8 @@ extern "C" { #include "pipe/p_state.h" #include "pipe/internal/p_winsys_screen.h" +#include "radeon_winsys.h" + struct pipe_context* r300_create_context(struct pipe_screen* screen, struct radeon_winsys* radeon_winsys); diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c index 65f7babff2..0ca7b39255 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c @@ -35,7 +35,9 @@ #include "radeon_bo_gem.h" #include "softpipe/sp_texture.h" #include "r300_context.h" +#include "util/u_math.h" #include + struct radeon_vl_context { Display *display; -- cgit v1.2.3 From 7679447b5835fd73ab44b3d77b12a034c95af5c5 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 2 Dec 2009 17:15:27 +0100 Subject: r300g, radeong: fix the CS overflow --- src/gallium/drivers/r300/r300_cs.h | 2 +- src/gallium/drivers/r300/r300_emit.c | 9 ++++++++- src/gallium/winsys/drm/radeon/core/radeon_r300.c | 5 +++-- 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 8b100375fd..9fcf3ab538 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -55,7 +55,7 @@ int cs_count = 0; #define CHECK_CS(size) \ - cs_winsys->check_cs(cs_winsys, (size)) + assert(cs_winsys->check_cs(cs_winsys, (size))) #define BEGIN_CS(size) do { \ CHECK_CS(size); \ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index a479842f9e..3bb42f9e43 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -871,10 +871,17 @@ void r300_emit_dirty_state(struct r300_context* r300) return; } + /* Check size of CS. */ + /* Make sure we have at least 8*1024 spare dwords. */ + /* XXX It would be nice to know the number of dwords we really need to + * XXX emit. */ + if (!r300->winsys->check_cs(r300->winsys, 8*1024)) { + r300->context.flush(&r300->context, 0, NULL); + } + /* Clean out BOs. */ r300->winsys->reset_bos(r300->winsys); - /* XXX check size */ validate: /* Color buffers... */ for (i = 0; i < r300->framebuffer_state.nr_cbufs; i++) { diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c index 7362279b77..ba0596c30d 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c @@ -52,8 +52,9 @@ static boolean radeon_validate(struct radeon_winsys* winsys) static boolean radeon_check_cs(struct radeon_winsys* winsys, int size) { - /* XXX check size here, lazy ass! */ - return radeon_validate(winsys); + struct radeon_cs* cs = winsys->priv->cs; + + return radeon_validate(winsys) && cs->cdw + size <= cs->ndw; } static void radeon_begin_cs(struct radeon_winsys* winsys, -- cgit v1.2.3 From 042b524d48ebb15215430149b9b1653f4b46dee3 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 4 Dec 2009 15:54:29 +0100 Subject: radeong: flush CS if a buffer being mapped is referenced by it Also, overlapping occlusion queries seems to work now. --- src/gallium/drivers/r300/r300_emit.c | 2 -- src/gallium/winsys/drm/radeon/core/radeon_buffer.c | 5 +++++ 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 3bb42f9e43..60be03f54f 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -382,8 +382,6 @@ static void r300_emit_query_start(struct r300_context *r300) if (!query) return; - /* XXX This will almost certainly not return good results - * for overlapping queries. */ BEGIN_CS(4); if (caps->family == CHIP_FAMILY_RV530) { OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c index 0ca7b39255..2a8daed051 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c @@ -140,10 +140,15 @@ static void *radeon_buffer_map(struct pipe_winsys *ws, struct pipe_buffer *buffer, unsigned flags) { + struct radeon_winsys_priv *priv = ((struct radeon_winsys *)ws)->priv; struct radeon_pipe_buffer *radeon_buffer = (struct radeon_pipe_buffer*)buffer; int write = 0; + if (radeon_bo_is_referenced_by_cs(radeon_buffer->bo, priv->cs)) { + priv->cs->space_flush_fn(priv->cs->space_flush_data); + } + if (flags & PIPE_BUFFER_USAGE_DONTBLOCK) { uint32_t domain; -- cgit v1.2.3 From 7d9b2edb97419b562a542b5cd701724c009421d4 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Fri, 4 Dec 2009 18:34:52 +0100 Subject: identity: fix copy&paste error --- src/gallium/drivers/identity/id_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index 4509c7b1e5..bedab56f59 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -742,7 +742,7 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.set_polygon_stipple = identity_set_polygon_stipple; id_pipe->base.set_scissor_state = identity_set_scissor_state; id_pipe->base.set_viewport_state = identity_set_viewport_state; - id_pipe->base.set_fragment_sampler_textures = identity_set_vertex_sampler_textures; + id_pipe->base.set_fragment_sampler_textures = identity_set_fragment_sampler_textures; id_pipe->base.set_vertex_sampler_textures = identity_set_vertex_sampler_textures; id_pipe->base.set_vertex_buffers = identity_set_vertex_buffers; id_pipe->base.set_vertex_elements = identity_set_vertex_elements; -- cgit v1.2.3 From b00b06b6e486a87dd88a695ae122863df13ad84e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 4 Dec 2009 18:59:24 +0000 Subject: llvmpipe: Remove debug printf. --- src/gallium/drivers/llvmpipe/lp_tex_cache.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c index 5dbc597d2c..a6d9a2c1ac 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c @@ -155,7 +155,6 @@ lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc) if (lpt->timestamp != tc->timestamp) { /* texture was modified, invalidate all cached tiles */ uint i; - debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp); for (i = 0; i < NUM_ENTRIES; i++) { tc->entries[i].addr.bits.invalid = 1; } -- cgit v1.2.3 From a312e76468435fc1eb7ec5fe0a98601a7fdfec53 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 4 Dec 2009 21:16:14 +0000 Subject: llvmpipe: Ensure transfers are mapped. This shouldn't happen but it does by some misterious reason. Fail the assertion but at least do not segfault on release builds. --- src/gallium/drivers/llvmpipe/lp_tile_cache.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c index 50891c4227..e83210f93b 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c @@ -290,6 +290,10 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc, assert(tc->surface); assert(tc->transfer); + assert(tc->transfer_map); + + if(!tc->transfer_map) + lp_tile_cache_map_transfers(tc); switch(tile->status) { case LP_TILE_STATUS_CLEAR: -- cgit v1.2.3 From c0a13bbae15a471fea278e37b92b874fed1f6b3b Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 4 Dec 2009 21:25:40 +0000 Subject: llvmpipe: Port vertex sampler support from softpipe. Just enough boilerplate code to avoid segfaulting. --- src/gallium/drivers/llvmpipe/lp_context.c | 20 +++++++-- src/gallium/drivers/llvmpipe/lp_context.h | 5 +++ src/gallium/drivers/llvmpipe/lp_screen.c | 4 +- src/gallium/drivers/llvmpipe/lp_state.h | 9 ++++ src/gallium/drivers/llvmpipe/lp_state_derived.c | 12 +++-- src/gallium/drivers/llvmpipe/lp_state_sampler.c | 59 +++++++++++++++++++++++++ 6 files changed, 101 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index c081f6de03..679e244274 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -118,6 +118,11 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) pipe_texture_reference(&llvmpipe->texture[i], NULL); } + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + lp_destroy_tex_tile_cache(llvmpipe->vertex_tex_cache[i]); + pipe_texture_reference(&llvmpipe->vertex_textures[i], NULL); + } + for (i = 0; i < Elements(llvmpipe->constants); i++) { if (llvmpipe->constants[i].buffer) { pipe_buffer_reference(&llvmpipe->constants[i].buffer, NULL); @@ -145,6 +150,11 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe, llvmpipe->framebuffer.zsbuf->texture == texture) return PIPE_REFERENCED_FOR_WRITE; } + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + if (llvmpipe->vertex_tex_cache[i] && + llvmpipe->vertex_tex_cache[i]->texture == texture) + return PIPE_REFERENCED_FOR_READ; + } return PIPE_UNREFERENCED; } @@ -181,6 +191,7 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state; llvmpipe->pipe.bind_fragment_sampler_states = llvmpipe_bind_sampler_states; + llvmpipe->pipe.bind_vertex_sampler_states = llvmpipe_bind_vertex_sampler_states; llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state; llvmpipe->pipe.create_depth_stencil_alpha_state = llvmpipe_create_depth_stencil_state; @@ -206,6 +217,7 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple; llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state; llvmpipe->pipe.set_fragment_sampler_textures = llvmpipe_set_sampler_textures; + llvmpipe->pipe.set_vertex_sampler_textures = llvmpipe_set_vertex_sampler_textures; llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state; llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; @@ -234,13 +246,15 @@ llvmpipe_create( struct pipe_screen *screen ) for (i = 0; i < PIPE_MAX_SAMPLERS; i++) llvmpipe->tex_cache[i] = lp_create_tex_tile_cache( screen ); + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) + llvmpipe->vertex_tex_cache[i] = lp_create_tex_tile_cache(screen); /* vertex shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples; llvmpipe->tgsi.vert_samplers[i].processor = TGSI_PROCESSOR_VERTEX; - llvmpipe->tgsi.vert_samplers[i].cache = llvmpipe->tex_cache[i]; + llvmpipe->tgsi.vert_samplers[i].cache = llvmpipe->vertex_tex_cache[i]; llvmpipe->tgsi.vert_samplers_list[i] = &llvmpipe->tgsi.vert_samplers[i]; } @@ -260,7 +274,7 @@ llvmpipe_create( struct pipe_screen *screen ) goto fail; draw_texture_samplers(llvmpipe->draw, - PIPE_MAX_SAMPLERS, + PIPE_MAX_VERTEX_SAMPLERS, (struct tgsi_sampler **) llvmpipe->tgsi.vert_samplers_list); diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 3ad95d0bfc..cc4d5ad5fd 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -55,6 +55,7 @@ struct llvmpipe_context { /** Constant state objects */ const struct pipe_blend_state *blend; const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; const struct pipe_depth_stencil_alpha_state *depth_stencil; const struct pipe_rasterizer_state *rasterizer; struct lp_fragment_shader *fs; @@ -68,12 +69,15 @@ struct llvmpipe_context { struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; unsigned num_samplers; unsigned num_textures; + unsigned num_vertex_samplers; + unsigned num_vertex_textures; unsigned num_vertex_elements; unsigned num_vertex_buffers; @@ -136,6 +140,7 @@ struct llvmpipe_context { unsigned tex_timestamp; struct llvmpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; + struct llvmpipe_tex_tile_cache *vertex_tex_cache[PIPE_MAX_VERTEX_SAMPLERS]; unsigned no_rast : 1; diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index a6ecaa0b2b..19fe2850fd 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -59,7 +59,9 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: return PIPE_MAX_SAMPLERS; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return 0; + return PIPE_MAX_VERTEX_SAMPLERS; + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS; case PIPE_CAP_NPOT_TEXTURES: return 1; case PIPE_CAP_TWO_SIDED_STENCIL: diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 7b26ce61a3..d1c74ab07b 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -126,6 +126,10 @@ void * llvmpipe_create_sampler_state(struct pipe_context *, const struct pipe_sampler_state *); void llvmpipe_bind_sampler_states(struct pipe_context *, unsigned, void **); +void +llvmpipe_bind_vertex_sampler_states(struct pipe_context *, + unsigned num_samplers, + void **samplers); void llvmpipe_delete_sampler_state(struct pipe_context *, void *); void * @@ -172,6 +176,11 @@ void llvmpipe_set_sampler_textures( struct pipe_context *, unsigned num, struct pipe_texture ** ); +void +llvmpipe_set_vertex_sampler_textures(struct pipe_context *, + unsigned num_textures, + struct pipe_texture **); + void llvmpipe_set_viewport_state( struct pipe_context *, const struct pipe_viewport_state * ); diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index c753b183c0..e703964aaa 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -198,10 +198,14 @@ update_tgsi_samplers( struct llvmpipe_context *llvmpipe ) unsigned i; /* vertex shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - llvmpipe->tgsi.vert_samplers[i].sampler = llvmpipe->sampler[i]; - llvmpipe->tgsi.vert_samplers[i].texture = llvmpipe->texture[i]; - llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples; + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + llvmpipe->tgsi.vert_samplers[i].sampler = llvmpipe->vertex_samplers[i]; + llvmpipe->tgsi.vert_samplers[i].texture = llvmpipe->vertex_textures[i]; + llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples; + } + + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + lp_tex_tile_cache_validate_texture( llvmpipe->vertex_tex_cache[i] ); } /* fragment shader samplers */ diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 8333805a3f..d382f9ca87 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -77,6 +77,34 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe, } +void +llvmpipe_bind_vertex_sampler_states(struct pipe_context *pipe, + unsigned num_samplers, + void **samplers) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + unsigned i; + + assert(num_samplers <= PIPE_MAX_VERTEX_SAMPLERS); + + /* Check for no-op */ + if (num_samplers == llvmpipe->num_vertex_samplers && + !memcmp(llvmpipe->vertex_samplers, samplers, num_samplers * sizeof(void *))) + return; + + draw_flush(llvmpipe->draw); + + for (i = 0; i < num_samplers; ++i) + llvmpipe->vertex_samplers[i] = samplers[i]; + for (i = num_samplers; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) + llvmpipe->vertex_samplers[i] = NULL; + + llvmpipe->num_vertex_samplers = num_samplers; + + llvmpipe->dirty |= LP_NEW_SAMPLER; +} + + void llvmpipe_set_sampler_textures(struct pipe_context *pipe, unsigned num, struct pipe_texture **texture) @@ -116,6 +144,37 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe, } +void +llvmpipe_set_vertex_sampler_textures(struct pipe_context *pipe, + unsigned num_textures, + struct pipe_texture **textures) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + uint i; + + assert(num_textures <= PIPE_MAX_VERTEX_SAMPLERS); + + /* Check for no-op */ + if (num_textures == llvmpipe->num_vertex_textures && + !memcmp(llvmpipe->vertex_textures, textures, num_textures * sizeof(struct pipe_texture *))) { + return; + } + + draw_flush(llvmpipe->draw); + + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + struct pipe_texture *tex = i < num_textures ? textures[i] : NULL; + + pipe_texture_reference(&llvmpipe->vertex_textures[i], tex); + lp_tex_tile_cache_set_texture(llvmpipe->vertex_tex_cache[i], tex); + } + + llvmpipe->num_vertex_textures = num_textures; + + llvmpipe->dirty |= LP_NEW_TEXTURE; +} + + void llvmpipe_delete_sampler_state(struct pipe_context *pipe, void *sampler) -- cgit v1.2.3 From dd51b4f9091abf762e470f0cd4c802215a108290 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 5 Dec 2009 05:43:10 +0000 Subject: llvmpipe: Stop disassembling when an unsupported opcode is found. Otherwise the terminal gets full of garbage. --- src/gallium/drivers/llvmpipe/lp_bld_debug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/drivers/llvmpipe/lp_bld_debug.c index 59d8f492e6..354b3af49e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_debug.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.c @@ -115,7 +115,8 @@ lp_disassemble(const void* func) } } - if (ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret) + if ((ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret) || + ud_obj.mnemonic == UD_Iinvalid) break; } debug_printf("\n"); -- cgit v1.2.3 From 501989bbcd159f8b44148a22151bb46c4800d298 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 5 Dec 2009 05:43:53 +0000 Subject: llvmpipe: Tweak disassembly to match gdb. Helps verifying udis86 output. --- src/gallium/drivers/llvmpipe/lp_bld_debug.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/drivers/llvmpipe/lp_bld_debug.c index 354b3af49e..39dfc51e50 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_debug.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.c @@ -77,10 +77,10 @@ lp_disassemble(const void* func) while (ud_disassemble(&ud_obj)) { #ifdef PIPE_ARCH_X86 - debug_printf("%08lx: ", (unsigned long)ud_insn_off(&ud_obj)); + debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj)); #endif #ifdef PIPE_ARCH_X86_64 - debug_printf("%016llx: ", (unsigned long long)ud_insn_off(&ud_obj)); + debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj)); #endif #if 0 @@ -119,6 +119,12 @@ lp_disassemble(const void* func) ud_obj.mnemonic == UD_Iinvalid) break; } + +#if 0 + /* Print GDB command, useful to verify udis86 output */ + debug_printf("disassemble %p %p\n", func, (void*)(uintptr_t)ud_obj.pc); +#endif + debug_printf("\n"); #else (void)func; -- cgit v1.2.3 From 781d8fccba1bdaadbae042d23bf1d17e25c800fd Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 5 Dec 2009 06:05:56 +0000 Subject: svga: Use _debug_printf, so that output may be dumped in release builds too. The dump calls should be wrapped in #ifdef DEBUG .. #endif. --- src/gallium/drivers/svga/svgadump/svga_dump.c | 1122 ++++++++++---------- src/gallium/drivers/svga/svgadump/svga_dump.py | 22 +- .../drivers/svga/svgadump/svga_shader_dump.c | 230 ++-- 3 files changed, 687 insertions(+), 687 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c index 910afa2528..18e0eb5139 100644 --- a/src/gallium/drivers/svga/svgadump/svga_dump.c +++ b/src/gallium/drivers/svga/svgadump/svga_dump.c @@ -42,554 +42,554 @@ dump_SVGA3dVertexDecl(const SVGA3dVertexDecl *cmd) { switch((*cmd).identity.type) { case SVGA3D_DECLTYPE_FLOAT1: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT1\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT1\n"); break; case SVGA3D_DECLTYPE_FLOAT2: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT2\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT2\n"); break; case SVGA3D_DECLTYPE_FLOAT3: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT3\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT3\n"); break; case SVGA3D_DECLTYPE_FLOAT4: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT4\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT4\n"); break; case SVGA3D_DECLTYPE_D3DCOLOR: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_D3DCOLOR\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_D3DCOLOR\n"); break; case SVGA3D_DECLTYPE_UBYTE4: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4\n"); break; case SVGA3D_DECLTYPE_SHORT2: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2\n"); break; case SVGA3D_DECLTYPE_SHORT4: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4\n"); break; case SVGA3D_DECLTYPE_UBYTE4N: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4N\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4N\n"); break; case SVGA3D_DECLTYPE_SHORT2N: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2N\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2N\n"); break; case SVGA3D_DECLTYPE_SHORT4N: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4N\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4N\n"); break; case SVGA3D_DECLTYPE_USHORT2N: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT2N\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT2N\n"); break; case SVGA3D_DECLTYPE_USHORT4N: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT4N\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT4N\n"); break; case SVGA3D_DECLTYPE_UDEC3: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UDEC3\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UDEC3\n"); break; case SVGA3D_DECLTYPE_DEC3N: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_DEC3N\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_DEC3N\n"); break; case SVGA3D_DECLTYPE_FLOAT16_2: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_2\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_2\n"); break; case SVGA3D_DECLTYPE_FLOAT16_4: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_4\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_4\n"); break; case SVGA3D_DECLTYPE_MAX: - debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_MAX\n"); + _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_MAX\n"); break; default: - debug_printf("\t\t.identity.type = %i\n", (*cmd).identity.type); + _debug_printf("\t\t.identity.type = %i\n", (*cmd).identity.type); break; } switch((*cmd).identity.method) { case SVGA3D_DECLMETHOD_DEFAULT: - debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_DEFAULT\n"); + _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_DEFAULT\n"); break; case SVGA3D_DECLMETHOD_PARTIALU: - debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALU\n"); + _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALU\n"); break; case SVGA3D_DECLMETHOD_PARTIALV: - debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALV\n"); + _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALV\n"); break; case SVGA3D_DECLMETHOD_CROSSUV: - debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_CROSSUV\n"); + _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_CROSSUV\n"); break; case SVGA3D_DECLMETHOD_UV: - debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_UV\n"); + _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_UV\n"); break; case SVGA3D_DECLMETHOD_LOOKUP: - debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUP\n"); + _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUP\n"); break; case SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED: - debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED\n"); + _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED\n"); break; default: - debug_printf("\t\t.identity.method = %i\n", (*cmd).identity.method); + _debug_printf("\t\t.identity.method = %i\n", (*cmd).identity.method); break; } switch((*cmd).identity.usage) { case SVGA3D_DECLUSAGE_POSITION: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITION\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITION\n"); break; case SVGA3D_DECLUSAGE_BLENDWEIGHT: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDWEIGHT\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDWEIGHT\n"); break; case SVGA3D_DECLUSAGE_BLENDINDICES: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDINDICES\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDINDICES\n"); break; case SVGA3D_DECLUSAGE_NORMAL: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_NORMAL\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_NORMAL\n"); break; case SVGA3D_DECLUSAGE_PSIZE: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_PSIZE\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_PSIZE\n"); break; case SVGA3D_DECLUSAGE_TEXCOORD: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TEXCOORD\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TEXCOORD\n"); break; case SVGA3D_DECLUSAGE_TANGENT: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TANGENT\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TANGENT\n"); break; case SVGA3D_DECLUSAGE_BINORMAL: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BINORMAL\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BINORMAL\n"); break; case SVGA3D_DECLUSAGE_TESSFACTOR: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TESSFACTOR\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TESSFACTOR\n"); break; case SVGA3D_DECLUSAGE_POSITIONT: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITIONT\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITIONT\n"); break; case SVGA3D_DECLUSAGE_COLOR: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_COLOR\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_COLOR\n"); break; case SVGA3D_DECLUSAGE_FOG: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_FOG\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_FOG\n"); break; case SVGA3D_DECLUSAGE_DEPTH: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_DEPTH\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_DEPTH\n"); break; case SVGA3D_DECLUSAGE_SAMPLE: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_SAMPLE\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_SAMPLE\n"); break; case SVGA3D_DECLUSAGE_MAX: - debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_MAX\n"); + _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_MAX\n"); break; default: - debug_printf("\t\t.identity.usage = %i\n", (*cmd).identity.usage); + _debug_printf("\t\t.identity.usage = %i\n", (*cmd).identity.usage); break; } - debug_printf("\t\t.identity.usageIndex = %u\n", (*cmd).identity.usageIndex); - debug_printf("\t\t.array.surfaceId = %u\n", (*cmd).array.surfaceId); - debug_printf("\t\t.array.offset = %u\n", (*cmd).array.offset); - debug_printf("\t\t.array.stride = %u\n", (*cmd).array.stride); - debug_printf("\t\t.rangeHint.first = %u\n", (*cmd).rangeHint.first); - debug_printf("\t\t.rangeHint.last = %u\n", (*cmd).rangeHint.last); + _debug_printf("\t\t.identity.usageIndex = %u\n", (*cmd).identity.usageIndex); + _debug_printf("\t\t.array.surfaceId = %u\n", (*cmd).array.surfaceId); + _debug_printf("\t\t.array.offset = %u\n", (*cmd).array.offset); + _debug_printf("\t\t.array.stride = %u\n", (*cmd).array.stride); + _debug_printf("\t\t.rangeHint.first = %u\n", (*cmd).rangeHint.first); + _debug_printf("\t\t.rangeHint.last = %u\n", (*cmd).rangeHint.last); } static void dump_SVGA3dTextureState(const SVGA3dTextureState *cmd) { - debug_printf("\t\t.stage = %u\n", (*cmd).stage); + _debug_printf("\t\t.stage = %u\n", (*cmd).stage); switch((*cmd).name) { case SVGA3D_TS_INVALID: - debug_printf("\t\t.name = SVGA3D_TS_INVALID\n"); + _debug_printf("\t\t.name = SVGA3D_TS_INVALID\n"); break; case SVGA3D_TS_BIND_TEXTURE: - debug_printf("\t\t.name = SVGA3D_TS_BIND_TEXTURE\n"); + _debug_printf("\t\t.name = SVGA3D_TS_BIND_TEXTURE\n"); break; case SVGA3D_TS_COLOROP: - debug_printf("\t\t.name = SVGA3D_TS_COLOROP\n"); + _debug_printf("\t\t.name = SVGA3D_TS_COLOROP\n"); break; case SVGA3D_TS_COLORARG1: - debug_printf("\t\t.name = SVGA3D_TS_COLORARG1\n"); + _debug_printf("\t\t.name = SVGA3D_TS_COLORARG1\n"); break; case SVGA3D_TS_COLORARG2: - debug_printf("\t\t.name = SVGA3D_TS_COLORARG2\n"); + _debug_printf("\t\t.name = SVGA3D_TS_COLORARG2\n"); break; case SVGA3D_TS_ALPHAOP: - debug_printf("\t\t.name = SVGA3D_TS_ALPHAOP\n"); + _debug_printf("\t\t.name = SVGA3D_TS_ALPHAOP\n"); break; case SVGA3D_TS_ALPHAARG1: - debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG1\n"); + _debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG1\n"); break; case SVGA3D_TS_ALPHAARG2: - debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG2\n"); + _debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG2\n"); break; case SVGA3D_TS_ADDRESSU: - debug_printf("\t\t.name = SVGA3D_TS_ADDRESSU\n"); + _debug_printf("\t\t.name = SVGA3D_TS_ADDRESSU\n"); break; case SVGA3D_TS_ADDRESSV: - debug_printf("\t\t.name = SVGA3D_TS_ADDRESSV\n"); + _debug_printf("\t\t.name = SVGA3D_TS_ADDRESSV\n"); break; case SVGA3D_TS_MIPFILTER: - debug_printf("\t\t.name = SVGA3D_TS_MIPFILTER\n"); + _debug_printf("\t\t.name = SVGA3D_TS_MIPFILTER\n"); break; case SVGA3D_TS_MAGFILTER: - debug_printf("\t\t.name = SVGA3D_TS_MAGFILTER\n"); + _debug_printf("\t\t.name = SVGA3D_TS_MAGFILTER\n"); break; case SVGA3D_TS_MINFILTER: - debug_printf("\t\t.name = SVGA3D_TS_MINFILTER\n"); + _debug_printf("\t\t.name = SVGA3D_TS_MINFILTER\n"); break; case SVGA3D_TS_BORDERCOLOR: - debug_printf("\t\t.name = SVGA3D_TS_BORDERCOLOR\n"); + _debug_printf("\t\t.name = SVGA3D_TS_BORDERCOLOR\n"); break; case SVGA3D_TS_TEXCOORDINDEX: - debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDINDEX\n"); + _debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDINDEX\n"); break; case SVGA3D_TS_TEXTURETRANSFORMFLAGS: - debug_printf("\t\t.name = SVGA3D_TS_TEXTURETRANSFORMFLAGS\n"); + _debug_printf("\t\t.name = SVGA3D_TS_TEXTURETRANSFORMFLAGS\n"); break; case SVGA3D_TS_TEXCOORDGEN: - debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDGEN\n"); + _debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDGEN\n"); break; case SVGA3D_TS_BUMPENVMAT00: - debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT00\n"); + _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT00\n"); break; case SVGA3D_TS_BUMPENVMAT01: - debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT01\n"); + _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT01\n"); break; case SVGA3D_TS_BUMPENVMAT10: - debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT10\n"); + _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT10\n"); break; case SVGA3D_TS_BUMPENVMAT11: - debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT11\n"); + _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT11\n"); break; case SVGA3D_TS_TEXTURE_MIPMAP_LEVEL: - debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_MIPMAP_LEVEL\n"); + _debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_MIPMAP_LEVEL\n"); break; case SVGA3D_TS_TEXTURE_LOD_BIAS: - debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_LOD_BIAS\n"); + _debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_LOD_BIAS\n"); break; case SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL: - debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL\n"); + _debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL\n"); break; case SVGA3D_TS_ADDRESSW: - debug_printf("\t\t.name = SVGA3D_TS_ADDRESSW\n"); + _debug_printf("\t\t.name = SVGA3D_TS_ADDRESSW\n"); break; case SVGA3D_TS_GAMMA: - debug_printf("\t\t.name = SVGA3D_TS_GAMMA\n"); + _debug_printf("\t\t.name = SVGA3D_TS_GAMMA\n"); break; case SVGA3D_TS_BUMPENVLSCALE: - debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLSCALE\n"); + _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLSCALE\n"); break; case SVGA3D_TS_BUMPENVLOFFSET: - debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLOFFSET\n"); + _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLOFFSET\n"); break; case SVGA3D_TS_COLORARG0: - debug_printf("\t\t.name = SVGA3D_TS_COLORARG0\n"); + _debug_printf("\t\t.name = SVGA3D_TS_COLORARG0\n"); break; case SVGA3D_TS_ALPHAARG0: - debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG0\n"); + _debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG0\n"); break; case SVGA3D_TS_MAX: - debug_printf("\t\t.name = SVGA3D_TS_MAX\n"); + _debug_printf("\t\t.name = SVGA3D_TS_MAX\n"); break; default: - debug_printf("\t\t.name = %i\n", (*cmd).name); + _debug_printf("\t\t.name = %i\n", (*cmd).name); break; } - debug_printf("\t\t.value = %u\n", (*cmd).value); - debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue); + _debug_printf("\t\t.value = %u\n", (*cmd).value); + _debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue); } static void dump_SVGA3dCopyBox(const SVGA3dCopyBox *cmd) { - debug_printf("\t\t.x = %u\n", (*cmd).x); - debug_printf("\t\t.y = %u\n", (*cmd).y); - debug_printf("\t\t.z = %u\n", (*cmd).z); - debug_printf("\t\t.w = %u\n", (*cmd).w); - debug_printf("\t\t.h = %u\n", (*cmd).h); - debug_printf("\t\t.d = %u\n", (*cmd).d); - debug_printf("\t\t.srcx = %u\n", (*cmd).srcx); - debug_printf("\t\t.srcy = %u\n", (*cmd).srcy); - debug_printf("\t\t.srcz = %u\n", (*cmd).srcz); + _debug_printf("\t\t.x = %u\n", (*cmd).x); + _debug_printf("\t\t.y = %u\n", (*cmd).y); + _debug_printf("\t\t.z = %u\n", (*cmd).z); + _debug_printf("\t\t.w = %u\n", (*cmd).w); + _debug_printf("\t\t.h = %u\n", (*cmd).h); + _debug_printf("\t\t.d = %u\n", (*cmd).d); + _debug_printf("\t\t.srcx = %u\n", (*cmd).srcx); + _debug_printf("\t\t.srcy = %u\n", (*cmd).srcy); + _debug_printf("\t\t.srcz = %u\n", (*cmd).srcz); } static void dump_SVGA3dCmdSetClipPlane(const SVGA3dCmdSetClipPlane *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); - debug_printf("\t\t.index = %u\n", (*cmd).index); - debug_printf("\t\t.plane[0] = %f\n", (*cmd).plane[0]); - debug_printf("\t\t.plane[1] = %f\n", (*cmd).plane[1]); - debug_printf("\t\t.plane[2] = %f\n", (*cmd).plane[2]); - debug_printf("\t\t.plane[3] = %f\n", (*cmd).plane[3]); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.index = %u\n", (*cmd).index); + _debug_printf("\t\t.plane[0] = %f\n", (*cmd).plane[0]); + _debug_printf("\t\t.plane[1] = %f\n", (*cmd).plane[1]); + _debug_printf("\t\t.plane[2] = %f\n", (*cmd).plane[2]); + _debug_printf("\t\t.plane[3] = %f\n", (*cmd).plane[3]); } static void dump_SVGA3dCmdWaitForQuery(const SVGA3dCmdWaitForQuery *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); switch((*cmd).type) { case SVGA3D_QUERYTYPE_OCCLUSION: - debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n"); + _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n"); break; case SVGA3D_QUERYTYPE_MAX: - debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n"); + _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n"); break; default: - debug_printf("\t\t.type = %i\n", (*cmd).type); + _debug_printf("\t\t.type = %i\n", (*cmd).type); break; } - debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId); - debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset); + _debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId); + _debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset); } static void dump_SVGA3dCmdSetRenderTarget(const SVGA3dCmdSetRenderTarget *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); switch((*cmd).type) { case SVGA3D_RT_DEPTH: - debug_printf("\t\t.type = SVGA3D_RT_DEPTH\n"); + _debug_printf("\t\t.type = SVGA3D_RT_DEPTH\n"); break; case SVGA3D_RT_STENCIL: - debug_printf("\t\t.type = SVGA3D_RT_STENCIL\n"); + _debug_printf("\t\t.type = SVGA3D_RT_STENCIL\n"); break; default: - debug_printf("\t\t.type = SVGA3D_RT_COLOR%u\n", (*cmd).type - SVGA3D_RT_COLOR0); + _debug_printf("\t\t.type = SVGA3D_RT_COLOR%u\n", (*cmd).type - SVGA3D_RT_COLOR0); break; } - debug_printf("\t\t.target.sid = %u\n", (*cmd).target.sid); - debug_printf("\t\t.target.face = %u\n", (*cmd).target.face); - debug_printf("\t\t.target.mipmap = %u\n", (*cmd).target.mipmap); + _debug_printf("\t\t.target.sid = %u\n", (*cmd).target.sid); + _debug_printf("\t\t.target.face = %u\n", (*cmd).target.face); + _debug_printf("\t\t.target.mipmap = %u\n", (*cmd).target.mipmap); } static void dump_SVGA3dCmdSetTextureState(const SVGA3dCmdSetTextureState *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); } static void dump_SVGA3dCmdSurfaceCopy(const SVGA3dCmdSurfaceCopy *cmd) { - debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid); - debug_printf("\t\t.src.face = %u\n", (*cmd).src.face); - debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap); - debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid); - debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face); - debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap); + _debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid); + _debug_printf("\t\t.src.face = %u\n", (*cmd).src.face); + _debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap); + _debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid); + _debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face); + _debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap); } static void dump_SVGA3dCmdSetMaterial(const SVGA3dCmdSetMaterial *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); switch((*cmd).face) { case SVGA3D_FACE_INVALID: - debug_printf("\t\t.face = SVGA3D_FACE_INVALID\n"); + _debug_printf("\t\t.face = SVGA3D_FACE_INVALID\n"); break; case SVGA3D_FACE_NONE: - debug_printf("\t\t.face = SVGA3D_FACE_NONE\n"); + _debug_printf("\t\t.face = SVGA3D_FACE_NONE\n"); break; case SVGA3D_FACE_FRONT: - debug_printf("\t\t.face = SVGA3D_FACE_FRONT\n"); + _debug_printf("\t\t.face = SVGA3D_FACE_FRONT\n"); break; case SVGA3D_FACE_BACK: - debug_printf("\t\t.face = SVGA3D_FACE_BACK\n"); + _debug_printf("\t\t.face = SVGA3D_FACE_BACK\n"); break; case SVGA3D_FACE_FRONT_BACK: - debug_printf("\t\t.face = SVGA3D_FACE_FRONT_BACK\n"); + _debug_printf("\t\t.face = SVGA3D_FACE_FRONT_BACK\n"); break; case SVGA3D_FACE_MAX: - debug_printf("\t\t.face = SVGA3D_FACE_MAX\n"); + _debug_printf("\t\t.face = SVGA3D_FACE_MAX\n"); break; default: - debug_printf("\t\t.face = %i\n", (*cmd).face); + _debug_printf("\t\t.face = %i\n", (*cmd).face); break; } - debug_printf("\t\t.material.diffuse[0] = %f\n", (*cmd).material.diffuse[0]); - debug_printf("\t\t.material.diffuse[1] = %f\n", (*cmd).material.diffuse[1]); - debug_printf("\t\t.material.diffuse[2] = %f\n", (*cmd).material.diffuse[2]); - debug_printf("\t\t.material.diffuse[3] = %f\n", (*cmd).material.diffuse[3]); - debug_printf("\t\t.material.ambient[0] = %f\n", (*cmd).material.ambient[0]); - debug_printf("\t\t.material.ambient[1] = %f\n", (*cmd).material.ambient[1]); - debug_printf("\t\t.material.ambient[2] = %f\n", (*cmd).material.ambient[2]); - debug_printf("\t\t.material.ambient[3] = %f\n", (*cmd).material.ambient[3]); - debug_printf("\t\t.material.specular[0] = %f\n", (*cmd).material.specular[0]); - debug_printf("\t\t.material.specular[1] = %f\n", (*cmd).material.specular[1]); - debug_printf("\t\t.material.specular[2] = %f\n", (*cmd).material.specular[2]); - debug_printf("\t\t.material.specular[3] = %f\n", (*cmd).material.specular[3]); - debug_printf("\t\t.material.emissive[0] = %f\n", (*cmd).material.emissive[0]); - debug_printf("\t\t.material.emissive[1] = %f\n", (*cmd).material.emissive[1]); - debug_printf("\t\t.material.emissive[2] = %f\n", (*cmd).material.emissive[2]); - debug_printf("\t\t.material.emissive[3] = %f\n", (*cmd).material.emissive[3]); - debug_printf("\t\t.material.shininess = %f\n", (*cmd).material.shininess); + _debug_printf("\t\t.material.diffuse[0] = %f\n", (*cmd).material.diffuse[0]); + _debug_printf("\t\t.material.diffuse[1] = %f\n", (*cmd).material.diffuse[1]); + _debug_printf("\t\t.material.diffuse[2] = %f\n", (*cmd).material.diffuse[2]); + _debug_printf("\t\t.material.diffuse[3] = %f\n", (*cmd).material.diffuse[3]); + _debug_printf("\t\t.material.ambient[0] = %f\n", (*cmd).material.ambient[0]); + _debug_printf("\t\t.material.ambient[1] = %f\n", (*cmd).material.ambient[1]); + _debug_printf("\t\t.material.ambient[2] = %f\n", (*cmd).material.ambient[2]); + _debug_printf("\t\t.material.ambient[3] = %f\n", (*cmd).material.ambient[3]); + _debug_printf("\t\t.material.specular[0] = %f\n", (*cmd).material.specular[0]); + _debug_printf("\t\t.material.specular[1] = %f\n", (*cmd).material.specular[1]); + _debug_printf("\t\t.material.specular[2] = %f\n", (*cmd).material.specular[2]); + _debug_printf("\t\t.material.specular[3] = %f\n", (*cmd).material.specular[3]); + _debug_printf("\t\t.material.emissive[0] = %f\n", (*cmd).material.emissive[0]); + _debug_printf("\t\t.material.emissive[1] = %f\n", (*cmd).material.emissive[1]); + _debug_printf("\t\t.material.emissive[2] = %f\n", (*cmd).material.emissive[2]); + _debug_printf("\t\t.material.emissive[3] = %f\n", (*cmd).material.emissive[3]); + _debug_printf("\t\t.material.shininess = %f\n", (*cmd).material.shininess); } static void dump_SVGA3dCmdSetLightData(const SVGA3dCmdSetLightData *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); - debug_printf("\t\t.index = %u\n", (*cmd).index); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.index = %u\n", (*cmd).index); switch((*cmd).data.type) { case SVGA3D_LIGHTTYPE_INVALID: - debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_INVALID\n"); + _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_INVALID\n"); break; case SVGA3D_LIGHTTYPE_POINT: - debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_POINT\n"); + _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_POINT\n"); break; case SVGA3D_LIGHTTYPE_SPOT1: - debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT1\n"); + _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT1\n"); break; case SVGA3D_LIGHTTYPE_SPOT2: - debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT2\n"); + _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT2\n"); break; case SVGA3D_LIGHTTYPE_DIRECTIONAL: - debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_DIRECTIONAL\n"); + _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_DIRECTIONAL\n"); break; case SVGA3D_LIGHTTYPE_MAX: - debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_MAX\n"); + _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_MAX\n"); break; default: - debug_printf("\t\t.data.type = %i\n", (*cmd).data.type); + _debug_printf("\t\t.data.type = %i\n", (*cmd).data.type); break; } - debug_printf("\t\t.data.inWorldSpace = %u\n", (*cmd).data.inWorldSpace); - debug_printf("\t\t.data.diffuse[0] = %f\n", (*cmd).data.diffuse[0]); - debug_printf("\t\t.data.diffuse[1] = %f\n", (*cmd).data.diffuse[1]); - debug_printf("\t\t.data.diffuse[2] = %f\n", (*cmd).data.diffuse[2]); - debug_printf("\t\t.data.diffuse[3] = %f\n", (*cmd).data.diffuse[3]); - debug_printf("\t\t.data.specular[0] = %f\n", (*cmd).data.specular[0]); - debug_printf("\t\t.data.specular[1] = %f\n", (*cmd).data.specular[1]); - debug_printf("\t\t.data.specular[2] = %f\n", (*cmd).data.specular[2]); - debug_printf("\t\t.data.specular[3] = %f\n", (*cmd).data.specular[3]); - debug_printf("\t\t.data.ambient[0] = %f\n", (*cmd).data.ambient[0]); - debug_printf("\t\t.data.ambient[1] = %f\n", (*cmd).data.ambient[1]); - debug_printf("\t\t.data.ambient[2] = %f\n", (*cmd).data.ambient[2]); - debug_printf("\t\t.data.ambient[3] = %f\n", (*cmd).data.ambient[3]); - debug_printf("\t\t.data.position[0] = %f\n", (*cmd).data.position[0]); - debug_printf("\t\t.data.position[1] = %f\n", (*cmd).data.position[1]); - debug_printf("\t\t.data.position[2] = %f\n", (*cmd).data.position[2]); - debug_printf("\t\t.data.position[3] = %f\n", (*cmd).data.position[3]); - debug_printf("\t\t.data.direction[0] = %f\n", (*cmd).data.direction[0]); - debug_printf("\t\t.data.direction[1] = %f\n", (*cmd).data.direction[1]); - debug_printf("\t\t.data.direction[2] = %f\n", (*cmd).data.direction[2]); - debug_printf("\t\t.data.direction[3] = %f\n", (*cmd).data.direction[3]); - debug_printf("\t\t.data.range = %f\n", (*cmd).data.range); - debug_printf("\t\t.data.falloff = %f\n", (*cmd).data.falloff); - debug_printf("\t\t.data.attenuation0 = %f\n", (*cmd).data.attenuation0); - debug_printf("\t\t.data.attenuation1 = %f\n", (*cmd).data.attenuation1); - debug_printf("\t\t.data.attenuation2 = %f\n", (*cmd).data.attenuation2); - debug_printf("\t\t.data.theta = %f\n", (*cmd).data.theta); - debug_printf("\t\t.data.phi = %f\n", (*cmd).data.phi); + _debug_printf("\t\t.data.inWorldSpace = %u\n", (*cmd).data.inWorldSpace); + _debug_printf("\t\t.data.diffuse[0] = %f\n", (*cmd).data.diffuse[0]); + _debug_printf("\t\t.data.diffuse[1] = %f\n", (*cmd).data.diffuse[1]); + _debug_printf("\t\t.data.diffuse[2] = %f\n", (*cmd).data.diffuse[2]); + _debug_printf("\t\t.data.diffuse[3] = %f\n", (*cmd).data.diffuse[3]); + _debug_printf("\t\t.data.specular[0] = %f\n", (*cmd).data.specular[0]); + _debug_printf("\t\t.data.specular[1] = %f\n", (*cmd).data.specular[1]); + _debug_printf("\t\t.data.specular[2] = %f\n", (*cmd).data.specular[2]); + _debug_printf("\t\t.data.specular[3] = %f\n", (*cmd).data.specular[3]); + _debug_printf("\t\t.data.ambient[0] = %f\n", (*cmd).data.ambient[0]); + _debug_printf("\t\t.data.ambient[1] = %f\n", (*cmd).data.ambient[1]); + _debug_printf("\t\t.data.ambient[2] = %f\n", (*cmd).data.ambient[2]); + _debug_printf("\t\t.data.ambient[3] = %f\n", (*cmd).data.ambient[3]); + _debug_printf("\t\t.data.position[0] = %f\n", (*cmd).data.position[0]); + _debug_printf("\t\t.data.position[1] = %f\n", (*cmd).data.position[1]); + _debug_printf("\t\t.data.position[2] = %f\n", (*cmd).data.position[2]); + _debug_printf("\t\t.data.position[3] = %f\n", (*cmd).data.position[3]); + _debug_printf("\t\t.data.direction[0] = %f\n", (*cmd).data.direction[0]); + _debug_printf("\t\t.data.direction[1] = %f\n", (*cmd).data.direction[1]); + _debug_printf("\t\t.data.direction[2] = %f\n", (*cmd).data.direction[2]); + _debug_printf("\t\t.data.direction[3] = %f\n", (*cmd).data.direction[3]); + _debug_printf("\t\t.data.range = %f\n", (*cmd).data.range); + _debug_printf("\t\t.data.falloff = %f\n", (*cmd).data.falloff); + _debug_printf("\t\t.data.attenuation0 = %f\n", (*cmd).data.attenuation0); + _debug_printf("\t\t.data.attenuation1 = %f\n", (*cmd).data.attenuation1); + _debug_printf("\t\t.data.attenuation2 = %f\n", (*cmd).data.attenuation2); + _debug_printf("\t\t.data.theta = %f\n", (*cmd).data.theta); + _debug_printf("\t\t.data.phi = %f\n", (*cmd).data.phi); } static void dump_SVGA3dCmdSetViewport(const SVGA3dCmdSetViewport *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); - debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x); - debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y); - debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w); - debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x); + _debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y); + _debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w); + _debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h); } static void dump_SVGA3dCmdSetScissorRect(const SVGA3dCmdSetScissorRect *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); - debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x); - debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y); - debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w); - debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x); + _debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y); + _debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w); + _debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h); } static void dump_SVGA3dCopyRect(const SVGA3dCopyRect *cmd) { - debug_printf("\t\t.x = %u\n", (*cmd).x); - debug_printf("\t\t.y = %u\n", (*cmd).y); - debug_printf("\t\t.w = %u\n", (*cmd).w); - debug_printf("\t\t.h = %u\n", (*cmd).h); - debug_printf("\t\t.srcx = %u\n", (*cmd).srcx); - debug_printf("\t\t.srcy = %u\n", (*cmd).srcy); + _debug_printf("\t\t.x = %u\n", (*cmd).x); + _debug_printf("\t\t.y = %u\n", (*cmd).y); + _debug_printf("\t\t.w = %u\n", (*cmd).w); + _debug_printf("\t\t.h = %u\n", (*cmd).h); + _debug_printf("\t\t.srcx = %u\n", (*cmd).srcx); + _debug_printf("\t\t.srcy = %u\n", (*cmd).srcy); } static void dump_SVGA3dCmdSetShader(const SVGA3dCmdSetShader *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); switch((*cmd).type) { case SVGA3D_SHADERTYPE_COMPILED_DX8: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); break; case SVGA3D_SHADERTYPE_VS: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); break; case SVGA3D_SHADERTYPE_PS: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); break; case SVGA3D_SHADERTYPE_MAX: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); break; default: - debug_printf("\t\t.type = %i\n", (*cmd).type); + _debug_printf("\t\t.type = %i\n", (*cmd).type); break; } - debug_printf("\t\t.shid = %u\n", (*cmd).shid); + _debug_printf("\t\t.shid = %u\n", (*cmd).shid); } static void dump_SVGA3dCmdEndQuery(const SVGA3dCmdEndQuery *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); switch((*cmd).type) { case SVGA3D_QUERYTYPE_OCCLUSION: - debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n"); + _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n"); break; case SVGA3D_QUERYTYPE_MAX: - debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n"); + _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n"); break; default: - debug_printf("\t\t.type = %i\n", (*cmd).type); + _debug_printf("\t\t.type = %i\n", (*cmd).type); break; } - debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId); - debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset); + _debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId); + _debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset); } static void dump_SVGA3dSize(const SVGA3dSize *cmd) { - debug_printf("\t\t.width = %u\n", (*cmd).width); - debug_printf("\t\t.height = %u\n", (*cmd).height); - debug_printf("\t\t.depth = %u\n", (*cmd).depth); + _debug_printf("\t\t.width = %u\n", (*cmd).width); + _debug_printf("\t\t.height = %u\n", (*cmd).height); + _debug_printf("\t\t.depth = %u\n", (*cmd).depth); } static void dump_SVGA3dCmdDestroySurface(const SVGA3dCmdDestroySurface *cmd) { - debug_printf("\t\t.sid = %u\n", (*cmd).sid); + _debug_printf("\t\t.sid = %u\n", (*cmd).sid); } static void dump_SVGA3dCmdDefineContext(const SVGA3dCmdDefineContext *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); } static void dump_SVGA3dRect(const SVGA3dRect *cmd) { - debug_printf("\t\t.x = %u\n", (*cmd).x); - debug_printf("\t\t.y = %u\n", (*cmd).y); - debug_printf("\t\t.w = %u\n", (*cmd).w); - debug_printf("\t\t.h = %u\n", (*cmd).h); + _debug_printf("\t\t.x = %u\n", (*cmd).x); + _debug_printf("\t\t.y = %u\n", (*cmd).y); + _debug_printf("\t\t.w = %u\n", (*cmd).w); + _debug_printf("\t\t.h = %u\n", (*cmd).h); } static void dump_SVGA3dCmdBeginQuery(const SVGA3dCmdBeginQuery *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); switch((*cmd).type) { case SVGA3D_QUERYTYPE_OCCLUSION: - debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n"); + _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n"); break; case SVGA3D_QUERYTYPE_MAX: - debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n"); + _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n"); break; default: - debug_printf("\t\t.type = %i\n", (*cmd).type); + _debug_printf("\t\t.type = %i\n", (*cmd).type); break; } } @@ -599,336 +599,336 @@ dump_SVGA3dRenderState(const SVGA3dRenderState *cmd) { switch((*cmd).state) { case SVGA3D_RS_INVALID: - debug_printf("\t\t.state = SVGA3D_RS_INVALID\n"); + _debug_printf("\t\t.state = SVGA3D_RS_INVALID\n"); break; case SVGA3D_RS_ZENABLE: - debug_printf("\t\t.state = SVGA3D_RS_ZENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_ZENABLE\n"); break; case SVGA3D_RS_ZWRITEENABLE: - debug_printf("\t\t.state = SVGA3D_RS_ZWRITEENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_ZWRITEENABLE\n"); break; case SVGA3D_RS_ALPHATESTENABLE: - debug_printf("\t\t.state = SVGA3D_RS_ALPHATESTENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_ALPHATESTENABLE\n"); break; case SVGA3D_RS_DITHERENABLE: - debug_printf("\t\t.state = SVGA3D_RS_DITHERENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_DITHERENABLE\n"); break; case SVGA3D_RS_BLENDENABLE: - debug_printf("\t\t.state = SVGA3D_RS_BLENDENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_BLENDENABLE\n"); break; case SVGA3D_RS_FOGENABLE: - debug_printf("\t\t.state = SVGA3D_RS_FOGENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_FOGENABLE\n"); break; case SVGA3D_RS_SPECULARENABLE: - debug_printf("\t\t.state = SVGA3D_RS_SPECULARENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_SPECULARENABLE\n"); break; case SVGA3D_RS_STENCILENABLE: - debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE\n"); break; case SVGA3D_RS_LIGHTINGENABLE: - debug_printf("\t\t.state = SVGA3D_RS_LIGHTINGENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_LIGHTINGENABLE\n"); break; case SVGA3D_RS_NORMALIZENORMALS: - debug_printf("\t\t.state = SVGA3D_RS_NORMALIZENORMALS\n"); + _debug_printf("\t\t.state = SVGA3D_RS_NORMALIZENORMALS\n"); break; case SVGA3D_RS_POINTSPRITEENABLE: - debug_printf("\t\t.state = SVGA3D_RS_POINTSPRITEENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_POINTSPRITEENABLE\n"); break; case SVGA3D_RS_POINTSCALEENABLE: - debug_printf("\t\t.state = SVGA3D_RS_POINTSCALEENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_POINTSCALEENABLE\n"); break; case SVGA3D_RS_STENCILREF: - debug_printf("\t\t.state = SVGA3D_RS_STENCILREF\n"); + _debug_printf("\t\t.state = SVGA3D_RS_STENCILREF\n"); break; case SVGA3D_RS_STENCILMASK: - debug_printf("\t\t.state = SVGA3D_RS_STENCILMASK\n"); + _debug_printf("\t\t.state = SVGA3D_RS_STENCILMASK\n"); break; case SVGA3D_RS_STENCILWRITEMASK: - debug_printf("\t\t.state = SVGA3D_RS_STENCILWRITEMASK\n"); + _debug_printf("\t\t.state = SVGA3D_RS_STENCILWRITEMASK\n"); break; case SVGA3D_RS_FOGSTART: - debug_printf("\t\t.state = SVGA3D_RS_FOGSTART\n"); + _debug_printf("\t\t.state = SVGA3D_RS_FOGSTART\n"); break; case SVGA3D_RS_FOGEND: - debug_printf("\t\t.state = SVGA3D_RS_FOGEND\n"); + _debug_printf("\t\t.state = SVGA3D_RS_FOGEND\n"); break; case SVGA3D_RS_FOGDENSITY: - debug_printf("\t\t.state = SVGA3D_RS_FOGDENSITY\n"); + _debug_printf("\t\t.state = SVGA3D_RS_FOGDENSITY\n"); break; case SVGA3D_RS_POINTSIZE: - debug_printf("\t\t.state = SVGA3D_RS_POINTSIZE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_POINTSIZE\n"); break; case SVGA3D_RS_POINTSIZEMIN: - debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMIN\n"); + _debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMIN\n"); break; case SVGA3D_RS_POINTSIZEMAX: - debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMAX\n"); + _debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMAX\n"); break; case SVGA3D_RS_POINTSCALE_A: - debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_A\n"); + _debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_A\n"); break; case SVGA3D_RS_POINTSCALE_B: - debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_B\n"); + _debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_B\n"); break; case SVGA3D_RS_POINTSCALE_C: - debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_C\n"); + _debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_C\n"); break; case SVGA3D_RS_FOGCOLOR: - debug_printf("\t\t.state = SVGA3D_RS_FOGCOLOR\n"); + _debug_printf("\t\t.state = SVGA3D_RS_FOGCOLOR\n"); break; case SVGA3D_RS_AMBIENT: - debug_printf("\t\t.state = SVGA3D_RS_AMBIENT\n"); + _debug_printf("\t\t.state = SVGA3D_RS_AMBIENT\n"); break; case SVGA3D_RS_CLIPPLANEENABLE: - debug_printf("\t\t.state = SVGA3D_RS_CLIPPLANEENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_CLIPPLANEENABLE\n"); break; case SVGA3D_RS_FOGMODE: - debug_printf("\t\t.state = SVGA3D_RS_FOGMODE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_FOGMODE\n"); break; case SVGA3D_RS_FILLMODE: - debug_printf("\t\t.state = SVGA3D_RS_FILLMODE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_FILLMODE\n"); break; case SVGA3D_RS_SHADEMODE: - debug_printf("\t\t.state = SVGA3D_RS_SHADEMODE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_SHADEMODE\n"); break; case SVGA3D_RS_LINEPATTERN: - debug_printf("\t\t.state = SVGA3D_RS_LINEPATTERN\n"); + _debug_printf("\t\t.state = SVGA3D_RS_LINEPATTERN\n"); break; case SVGA3D_RS_SRCBLEND: - debug_printf("\t\t.state = SVGA3D_RS_SRCBLEND\n"); + _debug_printf("\t\t.state = SVGA3D_RS_SRCBLEND\n"); break; case SVGA3D_RS_DSTBLEND: - debug_printf("\t\t.state = SVGA3D_RS_DSTBLEND\n"); + _debug_printf("\t\t.state = SVGA3D_RS_DSTBLEND\n"); break; case SVGA3D_RS_BLENDEQUATION: - debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATION\n"); + _debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATION\n"); break; case SVGA3D_RS_CULLMODE: - debug_printf("\t\t.state = SVGA3D_RS_CULLMODE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_CULLMODE\n"); break; case SVGA3D_RS_ZFUNC: - debug_printf("\t\t.state = SVGA3D_RS_ZFUNC\n"); + _debug_printf("\t\t.state = SVGA3D_RS_ZFUNC\n"); break; case SVGA3D_RS_ALPHAFUNC: - debug_printf("\t\t.state = SVGA3D_RS_ALPHAFUNC\n"); + _debug_printf("\t\t.state = SVGA3D_RS_ALPHAFUNC\n"); break; case SVGA3D_RS_STENCILFUNC: - debug_printf("\t\t.state = SVGA3D_RS_STENCILFUNC\n"); + _debug_printf("\t\t.state = SVGA3D_RS_STENCILFUNC\n"); break; case SVGA3D_RS_STENCILFAIL: - debug_printf("\t\t.state = SVGA3D_RS_STENCILFAIL\n"); + _debug_printf("\t\t.state = SVGA3D_RS_STENCILFAIL\n"); break; case SVGA3D_RS_STENCILZFAIL: - debug_printf("\t\t.state = SVGA3D_RS_STENCILZFAIL\n"); + _debug_printf("\t\t.state = SVGA3D_RS_STENCILZFAIL\n"); break; case SVGA3D_RS_STENCILPASS: - debug_printf("\t\t.state = SVGA3D_RS_STENCILPASS\n"); + _debug_printf("\t\t.state = SVGA3D_RS_STENCILPASS\n"); break; case SVGA3D_RS_ALPHAREF: - debug_printf("\t\t.state = SVGA3D_RS_ALPHAREF\n"); + _debug_printf("\t\t.state = SVGA3D_RS_ALPHAREF\n"); break; case SVGA3D_RS_FRONTWINDING: - debug_printf("\t\t.state = SVGA3D_RS_FRONTWINDING\n"); + _debug_printf("\t\t.state = SVGA3D_RS_FRONTWINDING\n"); break; case SVGA3D_RS_COORDINATETYPE: - debug_printf("\t\t.state = SVGA3D_RS_COORDINATETYPE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_COORDINATETYPE\n"); break; case SVGA3D_RS_ZBIAS: - debug_printf("\t\t.state = SVGA3D_RS_ZBIAS\n"); + _debug_printf("\t\t.state = SVGA3D_RS_ZBIAS\n"); break; case SVGA3D_RS_RANGEFOGENABLE: - debug_printf("\t\t.state = SVGA3D_RS_RANGEFOGENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_RANGEFOGENABLE\n"); break; case SVGA3D_RS_COLORWRITEENABLE: - debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE\n"); break; case SVGA3D_RS_VERTEXMATERIALENABLE: - debug_printf("\t\t.state = SVGA3D_RS_VERTEXMATERIALENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_VERTEXMATERIALENABLE\n"); break; case SVGA3D_RS_DIFFUSEMATERIALSOURCE: - debug_printf("\t\t.state = SVGA3D_RS_DIFFUSEMATERIALSOURCE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_DIFFUSEMATERIALSOURCE\n"); break; case SVGA3D_RS_SPECULARMATERIALSOURCE: - debug_printf("\t\t.state = SVGA3D_RS_SPECULARMATERIALSOURCE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_SPECULARMATERIALSOURCE\n"); break; case SVGA3D_RS_AMBIENTMATERIALSOURCE: - debug_printf("\t\t.state = SVGA3D_RS_AMBIENTMATERIALSOURCE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_AMBIENTMATERIALSOURCE\n"); break; case SVGA3D_RS_EMISSIVEMATERIALSOURCE: - debug_printf("\t\t.state = SVGA3D_RS_EMISSIVEMATERIALSOURCE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_EMISSIVEMATERIALSOURCE\n"); break; case SVGA3D_RS_TEXTUREFACTOR: - debug_printf("\t\t.state = SVGA3D_RS_TEXTUREFACTOR\n"); + _debug_printf("\t\t.state = SVGA3D_RS_TEXTUREFACTOR\n"); break; case SVGA3D_RS_LOCALVIEWER: - debug_printf("\t\t.state = SVGA3D_RS_LOCALVIEWER\n"); + _debug_printf("\t\t.state = SVGA3D_RS_LOCALVIEWER\n"); break; case SVGA3D_RS_SCISSORTESTENABLE: - debug_printf("\t\t.state = SVGA3D_RS_SCISSORTESTENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_SCISSORTESTENABLE\n"); break; case SVGA3D_RS_BLENDCOLOR: - debug_printf("\t\t.state = SVGA3D_RS_BLENDCOLOR\n"); + _debug_printf("\t\t.state = SVGA3D_RS_BLENDCOLOR\n"); break; case SVGA3D_RS_STENCILENABLE2SIDED: - debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE2SIDED\n"); + _debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE2SIDED\n"); break; case SVGA3D_RS_CCWSTENCILFUNC: - debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFUNC\n"); + _debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFUNC\n"); break; case SVGA3D_RS_CCWSTENCILFAIL: - debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFAIL\n"); + _debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFAIL\n"); break; case SVGA3D_RS_CCWSTENCILZFAIL: - debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILZFAIL\n"); + _debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILZFAIL\n"); break; case SVGA3D_RS_CCWSTENCILPASS: - debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILPASS\n"); + _debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILPASS\n"); break; case SVGA3D_RS_VERTEXBLEND: - debug_printf("\t\t.state = SVGA3D_RS_VERTEXBLEND\n"); + _debug_printf("\t\t.state = SVGA3D_RS_VERTEXBLEND\n"); break; case SVGA3D_RS_SLOPESCALEDEPTHBIAS: - debug_printf("\t\t.state = SVGA3D_RS_SLOPESCALEDEPTHBIAS\n"); + _debug_printf("\t\t.state = SVGA3D_RS_SLOPESCALEDEPTHBIAS\n"); break; case SVGA3D_RS_DEPTHBIAS: - debug_printf("\t\t.state = SVGA3D_RS_DEPTHBIAS\n"); + _debug_printf("\t\t.state = SVGA3D_RS_DEPTHBIAS\n"); break; case SVGA3D_RS_OUTPUTGAMMA: - debug_printf("\t\t.state = SVGA3D_RS_OUTPUTGAMMA\n"); + _debug_printf("\t\t.state = SVGA3D_RS_OUTPUTGAMMA\n"); break; case SVGA3D_RS_ZVISIBLE: - debug_printf("\t\t.state = SVGA3D_RS_ZVISIBLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_ZVISIBLE\n"); break; case SVGA3D_RS_LASTPIXEL: - debug_printf("\t\t.state = SVGA3D_RS_LASTPIXEL\n"); + _debug_printf("\t\t.state = SVGA3D_RS_LASTPIXEL\n"); break; case SVGA3D_RS_CLIPPING: - debug_printf("\t\t.state = SVGA3D_RS_CLIPPING\n"); + _debug_printf("\t\t.state = SVGA3D_RS_CLIPPING\n"); break; case SVGA3D_RS_WRAP0: - debug_printf("\t\t.state = SVGA3D_RS_WRAP0\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP0\n"); break; case SVGA3D_RS_WRAP1: - debug_printf("\t\t.state = SVGA3D_RS_WRAP1\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP1\n"); break; case SVGA3D_RS_WRAP2: - debug_printf("\t\t.state = SVGA3D_RS_WRAP2\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP2\n"); break; case SVGA3D_RS_WRAP3: - debug_printf("\t\t.state = SVGA3D_RS_WRAP3\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP3\n"); break; case SVGA3D_RS_WRAP4: - debug_printf("\t\t.state = SVGA3D_RS_WRAP4\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP4\n"); break; case SVGA3D_RS_WRAP5: - debug_printf("\t\t.state = SVGA3D_RS_WRAP5\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP5\n"); break; case SVGA3D_RS_WRAP6: - debug_printf("\t\t.state = SVGA3D_RS_WRAP6\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP6\n"); break; case SVGA3D_RS_WRAP7: - debug_printf("\t\t.state = SVGA3D_RS_WRAP7\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP7\n"); break; case SVGA3D_RS_WRAP8: - debug_printf("\t\t.state = SVGA3D_RS_WRAP8\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP8\n"); break; case SVGA3D_RS_WRAP9: - debug_printf("\t\t.state = SVGA3D_RS_WRAP9\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP9\n"); break; case SVGA3D_RS_WRAP10: - debug_printf("\t\t.state = SVGA3D_RS_WRAP10\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP10\n"); break; case SVGA3D_RS_WRAP11: - debug_printf("\t\t.state = SVGA3D_RS_WRAP11\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP11\n"); break; case SVGA3D_RS_WRAP12: - debug_printf("\t\t.state = SVGA3D_RS_WRAP12\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP12\n"); break; case SVGA3D_RS_WRAP13: - debug_printf("\t\t.state = SVGA3D_RS_WRAP13\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP13\n"); break; case SVGA3D_RS_WRAP14: - debug_printf("\t\t.state = SVGA3D_RS_WRAP14\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP14\n"); break; case SVGA3D_RS_WRAP15: - debug_printf("\t\t.state = SVGA3D_RS_WRAP15\n"); + _debug_printf("\t\t.state = SVGA3D_RS_WRAP15\n"); break; case SVGA3D_RS_MULTISAMPLEANTIALIAS: - debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEANTIALIAS\n"); + _debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEANTIALIAS\n"); break; case SVGA3D_RS_MULTISAMPLEMASK: - debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEMASK\n"); + _debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEMASK\n"); break; case SVGA3D_RS_INDEXEDVERTEXBLENDENABLE: - debug_printf("\t\t.state = SVGA3D_RS_INDEXEDVERTEXBLENDENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_INDEXEDVERTEXBLENDENABLE\n"); break; case SVGA3D_RS_TWEENFACTOR: - debug_printf("\t\t.state = SVGA3D_RS_TWEENFACTOR\n"); + _debug_printf("\t\t.state = SVGA3D_RS_TWEENFACTOR\n"); break; case SVGA3D_RS_ANTIALIASEDLINEENABLE: - debug_printf("\t\t.state = SVGA3D_RS_ANTIALIASEDLINEENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_ANTIALIASEDLINEENABLE\n"); break; case SVGA3D_RS_COLORWRITEENABLE1: - debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE1\n"); + _debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE1\n"); break; case SVGA3D_RS_COLORWRITEENABLE2: - debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE2\n"); + _debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE2\n"); break; case SVGA3D_RS_COLORWRITEENABLE3: - debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE3\n"); + _debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE3\n"); break; case SVGA3D_RS_SEPARATEALPHABLENDENABLE: - debug_printf("\t\t.state = SVGA3D_RS_SEPARATEALPHABLENDENABLE\n"); + _debug_printf("\t\t.state = SVGA3D_RS_SEPARATEALPHABLENDENABLE\n"); break; case SVGA3D_RS_SRCBLENDALPHA: - debug_printf("\t\t.state = SVGA3D_RS_SRCBLENDALPHA\n"); + _debug_printf("\t\t.state = SVGA3D_RS_SRCBLENDALPHA\n"); break; case SVGA3D_RS_DSTBLENDALPHA: - debug_printf("\t\t.state = SVGA3D_RS_DSTBLENDALPHA\n"); + _debug_printf("\t\t.state = SVGA3D_RS_DSTBLENDALPHA\n"); break; case SVGA3D_RS_BLENDEQUATIONALPHA: - debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATIONALPHA\n"); + _debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATIONALPHA\n"); break; case SVGA3D_RS_MAX: - debug_printf("\t\t.state = SVGA3D_RS_MAX\n"); + _debug_printf("\t\t.state = SVGA3D_RS_MAX\n"); break; default: - debug_printf("\t\t.state = %i\n", (*cmd).state); + _debug_printf("\t\t.state = %i\n", (*cmd).state); break; } - debug_printf("\t\t.uintValue = %u\n", (*cmd).uintValue); - debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue); + _debug_printf("\t\t.uintValue = %u\n", (*cmd).uintValue); + _debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue); } static void dump_SVGA3dVertexDivisor(const SVGA3dVertexDivisor *cmd) { - debug_printf("\t\t.value = %u\n", (*cmd).value); - debug_printf("\t\t.count = %u\n", (*cmd).count); - debug_printf("\t\t.indexedData = %u\n", (*cmd).indexedData); - debug_printf("\t\t.instanceData = %u\n", (*cmd).instanceData); + _debug_printf("\t\t.value = %u\n", (*cmd).value); + _debug_printf("\t\t.count = %u\n", (*cmd).count); + _debug_printf("\t\t.indexedData = %u\n", (*cmd).indexedData); + _debug_printf("\t\t.instanceData = %u\n", (*cmd).instanceData); } static void dump_SVGA3dCmdDefineShader(const SVGA3dCmdDefineShader *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); - debug_printf("\t\t.shid = %u\n", (*cmd).shid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.shid = %u\n", (*cmd).shid); switch((*cmd).type) { case SVGA3D_SHADERTYPE_COMPILED_DX8: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); break; case SVGA3D_SHADERTYPE_VS: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); break; case SVGA3D_SHADERTYPE_PS: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); break; case SVGA3D_SHADERTYPE_MAX: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); break; default: - debug_printf("\t\t.type = %i\n", (*cmd).type); + _debug_printf("\t\t.type = %i\n", (*cmd).type); break; } } @@ -936,53 +936,53 @@ dump_SVGA3dCmdDefineShader(const SVGA3dCmdDefineShader *cmd) static void dump_SVGA3dCmdSetShaderConst(const SVGA3dCmdSetShaderConst *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); - debug_printf("\t\t.reg = %u\n", (*cmd).reg); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.reg = %u\n", (*cmd).reg); switch((*cmd).type) { case SVGA3D_SHADERTYPE_COMPILED_DX8: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); break; case SVGA3D_SHADERTYPE_VS: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); break; case SVGA3D_SHADERTYPE_PS: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); break; case SVGA3D_SHADERTYPE_MAX: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); break; default: - debug_printf("\t\t.type = %i\n", (*cmd).type); + _debug_printf("\t\t.type = %i\n", (*cmd).type); break; } switch((*cmd).ctype) { case SVGA3D_CONST_TYPE_FLOAT: - debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_FLOAT\n"); - debug_printf("\t\t.values[0] = %f\n", *(const float *)&(*cmd).values[0]); - debug_printf("\t\t.values[1] = %f\n", *(const float *)&(*cmd).values[1]); - debug_printf("\t\t.values[2] = %f\n", *(const float *)&(*cmd).values[2]); - debug_printf("\t\t.values[3] = %f\n", *(const float *)&(*cmd).values[3]); + _debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_FLOAT\n"); + _debug_printf("\t\t.values[0] = %f\n", *(const float *)&(*cmd).values[0]); + _debug_printf("\t\t.values[1] = %f\n", *(const float *)&(*cmd).values[1]); + _debug_printf("\t\t.values[2] = %f\n", *(const float *)&(*cmd).values[2]); + _debug_printf("\t\t.values[3] = %f\n", *(const float *)&(*cmd).values[3]); break; case SVGA3D_CONST_TYPE_INT: - debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_INT\n"); - debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]); - debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]); - debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]); - debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]); + _debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_INT\n"); + _debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]); + _debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]); + _debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]); + _debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]); break; case SVGA3D_CONST_TYPE_BOOL: - debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_BOOL\n"); - debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]); - debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]); - debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]); - debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]); + _debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_BOOL\n"); + _debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]); + _debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]); + _debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]); + _debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]); break; default: - debug_printf("\t\t.ctype = %i\n", (*cmd).ctype); - debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]); - debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]); - debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]); - debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]); + _debug_printf("\t\t.ctype = %i\n", (*cmd).ctype); + _debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]); + _debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]); + _debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]); + _debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]); break; } } @@ -990,25 +990,25 @@ dump_SVGA3dCmdSetShaderConst(const SVGA3dCmdSetShaderConst *cmd) static void dump_SVGA3dCmdSetZRange(const SVGA3dCmdSetZRange *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); - debug_printf("\t\t.zRange.min = %f\n", (*cmd).zRange.min); - debug_printf("\t\t.zRange.max = %f\n", (*cmd).zRange.max); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.zRange.min = %f\n", (*cmd).zRange.min); + _debug_printf("\t\t.zRange.max = %f\n", (*cmd).zRange.max); } static void dump_SVGA3dCmdDrawPrimitives(const SVGA3dCmdDrawPrimitives *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); - debug_printf("\t\t.numVertexDecls = %u\n", (*cmd).numVertexDecls); - debug_printf("\t\t.numRanges = %u\n", (*cmd).numRanges); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.numVertexDecls = %u\n", (*cmd).numVertexDecls); + _debug_printf("\t\t.numRanges = %u\n", (*cmd).numRanges); } static void dump_SVGA3dCmdSetLightEnabled(const SVGA3dCmdSetLightEnabled *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); - debug_printf("\t\t.index = %u\n", (*cmd).index); - debug_printf("\t\t.enabled = %u\n", (*cmd).enabled); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.index = %u\n", (*cmd).index); + _debug_printf("\t\t.enabled = %u\n", (*cmd).enabled); } static void @@ -1016,86 +1016,86 @@ dump_SVGA3dPrimitiveRange(const SVGA3dPrimitiveRange *cmd) { switch((*cmd).primType) { case SVGA3D_PRIMITIVE_INVALID: - debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_INVALID\n"); + _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_INVALID\n"); break; case SVGA3D_PRIMITIVE_TRIANGLELIST: - debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLELIST\n"); + _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLELIST\n"); break; case SVGA3D_PRIMITIVE_POINTLIST: - debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_POINTLIST\n"); + _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_POINTLIST\n"); break; case SVGA3D_PRIMITIVE_LINELIST: - debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINELIST\n"); + _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINELIST\n"); break; case SVGA3D_PRIMITIVE_LINESTRIP: - debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINESTRIP\n"); + _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINESTRIP\n"); break; case SVGA3D_PRIMITIVE_TRIANGLESTRIP: - debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLESTRIP\n"); + _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLESTRIP\n"); break; case SVGA3D_PRIMITIVE_TRIANGLEFAN: - debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLEFAN\n"); + _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLEFAN\n"); break; case SVGA3D_PRIMITIVE_MAX: - debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_MAX\n"); + _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_MAX\n"); break; default: - debug_printf("\t\t.primType = %i\n", (*cmd).primType); + _debug_printf("\t\t.primType = %i\n", (*cmd).primType); break; } - debug_printf("\t\t.primitiveCount = %u\n", (*cmd).primitiveCount); - debug_printf("\t\t.indexArray.surfaceId = %u\n", (*cmd).indexArray.surfaceId); - debug_printf("\t\t.indexArray.offset = %u\n", (*cmd).indexArray.offset); - debug_printf("\t\t.indexArray.stride = %u\n", (*cmd).indexArray.stride); - debug_printf("\t\t.indexWidth = %u\n", (*cmd).indexWidth); - debug_printf("\t\t.indexBias = %i\n", (*cmd).indexBias); + _debug_printf("\t\t.primitiveCount = %u\n", (*cmd).primitiveCount); + _debug_printf("\t\t.indexArray.surfaceId = %u\n", (*cmd).indexArray.surfaceId); + _debug_printf("\t\t.indexArray.offset = %u\n", (*cmd).indexArray.offset); + _debug_printf("\t\t.indexArray.stride = %u\n", (*cmd).indexArray.stride); + _debug_printf("\t\t.indexWidth = %u\n", (*cmd).indexWidth); + _debug_printf("\t\t.indexBias = %i\n", (*cmd).indexBias); } static void dump_SVGA3dCmdPresent(const SVGA3dCmdPresent *cmd) { - debug_printf("\t\t.sid = %u\n", (*cmd).sid); + _debug_printf("\t\t.sid = %u\n", (*cmd).sid); } static void dump_SVGA3dCmdSetRenderState(const SVGA3dCmdSetRenderState *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); } static void dump_SVGA3dCmdSurfaceStretchBlt(const SVGA3dCmdSurfaceStretchBlt *cmd) { - debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid); - debug_printf("\t\t.src.face = %u\n", (*cmd).src.face); - debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap); - debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid); - debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face); - debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap); - debug_printf("\t\t.boxSrc.x = %u\n", (*cmd).boxSrc.x); - debug_printf("\t\t.boxSrc.y = %u\n", (*cmd).boxSrc.y); - debug_printf("\t\t.boxSrc.z = %u\n", (*cmd).boxSrc.z); - debug_printf("\t\t.boxSrc.w = %u\n", (*cmd).boxSrc.w); - debug_printf("\t\t.boxSrc.h = %u\n", (*cmd).boxSrc.h); - debug_printf("\t\t.boxSrc.d = %u\n", (*cmd).boxSrc.d); - debug_printf("\t\t.boxDest.x = %u\n", (*cmd).boxDest.x); - debug_printf("\t\t.boxDest.y = %u\n", (*cmd).boxDest.y); - debug_printf("\t\t.boxDest.z = %u\n", (*cmd).boxDest.z); - debug_printf("\t\t.boxDest.w = %u\n", (*cmd).boxDest.w); - debug_printf("\t\t.boxDest.h = %u\n", (*cmd).boxDest.h); - debug_printf("\t\t.boxDest.d = %u\n", (*cmd).boxDest.d); + _debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid); + _debug_printf("\t\t.src.face = %u\n", (*cmd).src.face); + _debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap); + _debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid); + _debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face); + _debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap); + _debug_printf("\t\t.boxSrc.x = %u\n", (*cmd).boxSrc.x); + _debug_printf("\t\t.boxSrc.y = %u\n", (*cmd).boxSrc.y); + _debug_printf("\t\t.boxSrc.z = %u\n", (*cmd).boxSrc.z); + _debug_printf("\t\t.boxSrc.w = %u\n", (*cmd).boxSrc.w); + _debug_printf("\t\t.boxSrc.h = %u\n", (*cmd).boxSrc.h); + _debug_printf("\t\t.boxSrc.d = %u\n", (*cmd).boxSrc.d); + _debug_printf("\t\t.boxDest.x = %u\n", (*cmd).boxDest.x); + _debug_printf("\t\t.boxDest.y = %u\n", (*cmd).boxDest.y); + _debug_printf("\t\t.boxDest.z = %u\n", (*cmd).boxDest.z); + _debug_printf("\t\t.boxDest.w = %u\n", (*cmd).boxDest.w); + _debug_printf("\t\t.boxDest.h = %u\n", (*cmd).boxDest.h); + _debug_printf("\t\t.boxDest.d = %u\n", (*cmd).boxDest.d); switch((*cmd).mode) { case SVGA3D_STRETCH_BLT_POINT: - debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_POINT\n"); + _debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_POINT\n"); break; case SVGA3D_STRETCH_BLT_LINEAR: - debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_LINEAR\n"); + _debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_LINEAR\n"); break; case SVGA3D_STRETCH_BLT_MAX: - debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_MAX\n"); + _debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_MAX\n"); break; default: - debug_printf("\t\t.mode = %i\n", (*cmd).mode); + _debug_printf("\t\t.mode = %i\n", (*cmd).mode); break; } } @@ -1103,21 +1103,21 @@ dump_SVGA3dCmdSurfaceStretchBlt(const SVGA3dCmdSurfaceStretchBlt *cmd) static void dump_SVGA3dCmdSurfaceDMA(const SVGA3dCmdSurfaceDMA *cmd) { - debug_printf("\t\t.guest.ptr.gmrId = %u\n", (*cmd).guest.ptr.gmrId); - debug_printf("\t\t.guest.ptr.offset = %u\n", (*cmd).guest.ptr.offset); - debug_printf("\t\t.guest.pitch = %u\n", (*cmd).guest.pitch); - debug_printf("\t\t.host.sid = %u\n", (*cmd).host.sid); - debug_printf("\t\t.host.face = %u\n", (*cmd).host.face); - debug_printf("\t\t.host.mipmap = %u\n", (*cmd).host.mipmap); + _debug_printf("\t\t.guest.ptr.gmrId = %u\n", (*cmd).guest.ptr.gmrId); + _debug_printf("\t\t.guest.ptr.offset = %u\n", (*cmd).guest.ptr.offset); + _debug_printf("\t\t.guest.pitch = %u\n", (*cmd).guest.pitch); + _debug_printf("\t\t.host.sid = %u\n", (*cmd).host.sid); + _debug_printf("\t\t.host.face = %u\n", (*cmd).host.face); + _debug_printf("\t\t.host.mipmap = %u\n", (*cmd).host.mipmap); switch((*cmd).transfer) { case SVGA3D_WRITE_HOST_VRAM: - debug_printf("\t\t.transfer = SVGA3D_WRITE_HOST_VRAM\n"); + _debug_printf("\t\t.transfer = SVGA3D_WRITE_HOST_VRAM\n"); break; case SVGA3D_READ_HOST_VRAM: - debug_printf("\t\t.transfer = SVGA3D_READ_HOST_VRAM\n"); + _debug_printf("\t\t.transfer = SVGA3D_READ_HOST_VRAM\n"); break; default: - debug_printf("\t\t.transfer = %i\n", (*cmd).transfer); + _debug_printf("\t\t.transfer = %i\n", (*cmd).transfer); break; } } @@ -1125,107 +1125,107 @@ dump_SVGA3dCmdSurfaceDMA(const SVGA3dCmdSurfaceDMA *cmd) static void dump_SVGA3dCmdSurfaceDMASuffix(const SVGA3dCmdSurfaceDMASuffix *cmd) { - debug_printf("\t\t.suffixSize = %u\n", (*cmd).suffixSize); - debug_printf("\t\t.maximumOffset = %u\n", (*cmd).maximumOffset); - debug_printf("\t\t.flags.discard = %u\n", (*cmd).flags.discard); - debug_printf("\t\t.flags.unsynchronized = %u\n", (*cmd).flags.unsynchronized); + _debug_printf("\t\t.suffixSize = %u\n", (*cmd).suffixSize); + _debug_printf("\t\t.maximumOffset = %u\n", (*cmd).maximumOffset); + _debug_printf("\t\t.flags.discard = %u\n", (*cmd).flags.discard); + _debug_printf("\t\t.flags.unsynchronized = %u\n", (*cmd).flags.unsynchronized); } static void dump_SVGA3dCmdSetTransform(const SVGA3dCmdSetTransform *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); switch((*cmd).type) { case SVGA3D_TRANSFORM_INVALID: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_INVALID\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_INVALID\n"); break; case SVGA3D_TRANSFORM_WORLD: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD\n"); break; case SVGA3D_TRANSFORM_VIEW: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_VIEW\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_VIEW\n"); break; case SVGA3D_TRANSFORM_PROJECTION: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_PROJECTION\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_PROJECTION\n"); break; case SVGA3D_TRANSFORM_TEXTURE0: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE0\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE0\n"); break; case SVGA3D_TRANSFORM_TEXTURE1: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE1\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE1\n"); break; case SVGA3D_TRANSFORM_TEXTURE2: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE2\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE2\n"); break; case SVGA3D_TRANSFORM_TEXTURE3: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE3\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE3\n"); break; case SVGA3D_TRANSFORM_TEXTURE4: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE4\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE4\n"); break; case SVGA3D_TRANSFORM_TEXTURE5: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE5\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE5\n"); break; case SVGA3D_TRANSFORM_TEXTURE6: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE6\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE6\n"); break; case SVGA3D_TRANSFORM_TEXTURE7: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE7\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE7\n"); break; case SVGA3D_TRANSFORM_WORLD1: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD1\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD1\n"); break; case SVGA3D_TRANSFORM_WORLD2: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD2\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD2\n"); break; case SVGA3D_TRANSFORM_WORLD3: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD3\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD3\n"); break; case SVGA3D_TRANSFORM_MAX: - debug_printf("\t\t.type = SVGA3D_TRANSFORM_MAX\n"); + _debug_printf("\t\t.type = SVGA3D_TRANSFORM_MAX\n"); break; default: - debug_printf("\t\t.type = %i\n", (*cmd).type); + _debug_printf("\t\t.type = %i\n", (*cmd).type); break; } - debug_printf("\t\t.matrix[0] = %f\n", (*cmd).matrix[0]); - debug_printf("\t\t.matrix[1] = %f\n", (*cmd).matrix[1]); - debug_printf("\t\t.matrix[2] = %f\n", (*cmd).matrix[2]); - debug_printf("\t\t.matrix[3] = %f\n", (*cmd).matrix[3]); - debug_printf("\t\t.matrix[4] = %f\n", (*cmd).matrix[4]); - debug_printf("\t\t.matrix[5] = %f\n", (*cmd).matrix[5]); - debug_printf("\t\t.matrix[6] = %f\n", (*cmd).matrix[6]); - debug_printf("\t\t.matrix[7] = %f\n", (*cmd).matrix[7]); - debug_printf("\t\t.matrix[8] = %f\n", (*cmd).matrix[8]); - debug_printf("\t\t.matrix[9] = %f\n", (*cmd).matrix[9]); - debug_printf("\t\t.matrix[10] = %f\n", (*cmd).matrix[10]); - debug_printf("\t\t.matrix[11] = %f\n", (*cmd).matrix[11]); - debug_printf("\t\t.matrix[12] = %f\n", (*cmd).matrix[12]); - debug_printf("\t\t.matrix[13] = %f\n", (*cmd).matrix[13]); - debug_printf("\t\t.matrix[14] = %f\n", (*cmd).matrix[14]); - debug_printf("\t\t.matrix[15] = %f\n", (*cmd).matrix[15]); + _debug_printf("\t\t.matrix[0] = %f\n", (*cmd).matrix[0]); + _debug_printf("\t\t.matrix[1] = %f\n", (*cmd).matrix[1]); + _debug_printf("\t\t.matrix[2] = %f\n", (*cmd).matrix[2]); + _debug_printf("\t\t.matrix[3] = %f\n", (*cmd).matrix[3]); + _debug_printf("\t\t.matrix[4] = %f\n", (*cmd).matrix[4]); + _debug_printf("\t\t.matrix[5] = %f\n", (*cmd).matrix[5]); + _debug_printf("\t\t.matrix[6] = %f\n", (*cmd).matrix[6]); + _debug_printf("\t\t.matrix[7] = %f\n", (*cmd).matrix[7]); + _debug_printf("\t\t.matrix[8] = %f\n", (*cmd).matrix[8]); + _debug_printf("\t\t.matrix[9] = %f\n", (*cmd).matrix[9]); + _debug_printf("\t\t.matrix[10] = %f\n", (*cmd).matrix[10]); + _debug_printf("\t\t.matrix[11] = %f\n", (*cmd).matrix[11]); + _debug_printf("\t\t.matrix[12] = %f\n", (*cmd).matrix[12]); + _debug_printf("\t\t.matrix[13] = %f\n", (*cmd).matrix[13]); + _debug_printf("\t\t.matrix[14] = %f\n", (*cmd).matrix[14]); + _debug_printf("\t\t.matrix[15] = %f\n", (*cmd).matrix[15]); } static void dump_SVGA3dCmdDestroyShader(const SVGA3dCmdDestroyShader *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); - debug_printf("\t\t.shid = %u\n", (*cmd).shid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.shid = %u\n", (*cmd).shid); switch((*cmd).type) { case SVGA3D_SHADERTYPE_COMPILED_DX8: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n"); break; case SVGA3D_SHADERTYPE_VS: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n"); break; case SVGA3D_SHADERTYPE_PS: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n"); break; case SVGA3D_SHADERTYPE_MAX: - debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); + _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n"); break; default: - debug_printf("\t\t.type = %i\n", (*cmd).type); + _debug_printf("\t\t.type = %i\n", (*cmd).type); break; } } @@ -1233,187 +1233,187 @@ dump_SVGA3dCmdDestroyShader(const SVGA3dCmdDestroyShader *cmd) static void dump_SVGA3dCmdDestroyContext(const SVGA3dCmdDestroyContext *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); } static void dump_SVGA3dCmdClear(const SVGA3dCmdClear *cmd) { - debug_printf("\t\t.cid = %u\n", (*cmd).cid); + _debug_printf("\t\t.cid = %u\n", (*cmd).cid); switch((*cmd).clearFlag) { case SVGA3D_CLEAR_COLOR: - debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_COLOR\n"); + _debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_COLOR\n"); break; case SVGA3D_CLEAR_DEPTH: - debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_DEPTH\n"); + _debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_DEPTH\n"); break; case SVGA3D_CLEAR_STENCIL: - debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_STENCIL\n"); + _debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_STENCIL\n"); break; default: - debug_printf("\t\t.clearFlag = %i\n", (*cmd).clearFlag); + _debug_printf("\t\t.clearFlag = %i\n", (*cmd).clearFlag); break; } - debug_printf("\t\t.color = %u\n", (*cmd).color); - debug_printf("\t\t.depth = %f\n", (*cmd).depth); - debug_printf("\t\t.stencil = %u\n", (*cmd).stencil); + _debug_printf("\t\t.color = %u\n", (*cmd).color); + _debug_printf("\t\t.depth = %f\n", (*cmd).depth); + _debug_printf("\t\t.stencil = %u\n", (*cmd).stencil); } static void dump_SVGA3dCmdDefineSurface(const SVGA3dCmdDefineSurface *cmd) { - debug_printf("\t\t.sid = %u\n", (*cmd).sid); + _debug_printf("\t\t.sid = %u\n", (*cmd).sid); switch((*cmd).surfaceFlags) { case SVGA3D_SURFACE_CUBEMAP: - debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_CUBEMAP\n"); + _debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_CUBEMAP\n"); break; case SVGA3D_SURFACE_HINT_STATIC: - debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_STATIC\n"); + _debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_STATIC\n"); break; case SVGA3D_SURFACE_HINT_DYNAMIC: - debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_DYNAMIC\n"); + _debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_DYNAMIC\n"); break; case SVGA3D_SURFACE_HINT_INDEXBUFFER: - debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_INDEXBUFFER\n"); + _debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_INDEXBUFFER\n"); break; case SVGA3D_SURFACE_HINT_VERTEXBUFFER: - debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_VERTEXBUFFER\n"); + _debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_VERTEXBUFFER\n"); break; default: - debug_printf("\t\t.surfaceFlags = %i\n", (*cmd).surfaceFlags); + _debug_printf("\t\t.surfaceFlags = %i\n", (*cmd).surfaceFlags); break; } switch((*cmd).format) { case SVGA3D_FORMAT_INVALID: - debug_printf("\t\t.format = SVGA3D_FORMAT_INVALID\n"); + _debug_printf("\t\t.format = SVGA3D_FORMAT_INVALID\n"); break; case SVGA3D_X8R8G8B8: - debug_printf("\t\t.format = SVGA3D_X8R8G8B8\n"); + _debug_printf("\t\t.format = SVGA3D_X8R8G8B8\n"); break; case SVGA3D_A8R8G8B8: - debug_printf("\t\t.format = SVGA3D_A8R8G8B8\n"); + _debug_printf("\t\t.format = SVGA3D_A8R8G8B8\n"); break; case SVGA3D_R5G6B5: - debug_printf("\t\t.format = SVGA3D_R5G6B5\n"); + _debug_printf("\t\t.format = SVGA3D_R5G6B5\n"); break; case SVGA3D_X1R5G5B5: - debug_printf("\t\t.format = SVGA3D_X1R5G5B5\n"); + _debug_printf("\t\t.format = SVGA3D_X1R5G5B5\n"); break; case SVGA3D_A1R5G5B5: - debug_printf("\t\t.format = SVGA3D_A1R5G5B5\n"); + _debug_printf("\t\t.format = SVGA3D_A1R5G5B5\n"); break; case SVGA3D_A4R4G4B4: - debug_printf("\t\t.format = SVGA3D_A4R4G4B4\n"); + _debug_printf("\t\t.format = SVGA3D_A4R4G4B4\n"); break; case SVGA3D_Z_D32: - debug_printf("\t\t.format = SVGA3D_Z_D32\n"); + _debug_printf("\t\t.format = SVGA3D_Z_D32\n"); break; case SVGA3D_Z_D16: - debug_printf("\t\t.format = SVGA3D_Z_D16\n"); + _debug_printf("\t\t.format = SVGA3D_Z_D16\n"); break; case SVGA3D_Z_D24S8: - debug_printf("\t\t.format = SVGA3D_Z_D24S8\n"); + _debug_printf("\t\t.format = SVGA3D_Z_D24S8\n"); break; case SVGA3D_Z_D15S1: - debug_printf("\t\t.format = SVGA3D_Z_D15S1\n"); + _debug_printf("\t\t.format = SVGA3D_Z_D15S1\n"); break; case SVGA3D_LUMINANCE8: - debug_printf("\t\t.format = SVGA3D_LUMINANCE8\n"); + _debug_printf("\t\t.format = SVGA3D_LUMINANCE8\n"); break; case SVGA3D_LUMINANCE4_ALPHA4: - debug_printf("\t\t.format = SVGA3D_LUMINANCE4_ALPHA4\n"); + _debug_printf("\t\t.format = SVGA3D_LUMINANCE4_ALPHA4\n"); break; case SVGA3D_LUMINANCE16: - debug_printf("\t\t.format = SVGA3D_LUMINANCE16\n"); + _debug_printf("\t\t.format = SVGA3D_LUMINANCE16\n"); break; case SVGA3D_LUMINANCE8_ALPHA8: - debug_printf("\t\t.format = SVGA3D_LUMINANCE8_ALPHA8\n"); + _debug_printf("\t\t.format = SVGA3D_LUMINANCE8_ALPHA8\n"); break; case SVGA3D_DXT1: - debug_printf("\t\t.format = SVGA3D_DXT1\n"); + _debug_printf("\t\t.format = SVGA3D_DXT1\n"); break; case SVGA3D_DXT2: - debug_printf("\t\t.format = SVGA3D_DXT2\n"); + _debug_printf("\t\t.format = SVGA3D_DXT2\n"); break; case SVGA3D_DXT3: - debug_printf("\t\t.format = SVGA3D_DXT3\n"); + _debug_printf("\t\t.format = SVGA3D_DXT3\n"); break; case SVGA3D_DXT4: - debug_printf("\t\t.format = SVGA3D_DXT4\n"); + _debug_printf("\t\t.format = SVGA3D_DXT4\n"); break; case SVGA3D_DXT5: - debug_printf("\t\t.format = SVGA3D_DXT5\n"); + _debug_printf("\t\t.format = SVGA3D_DXT5\n"); break; case SVGA3D_BUMPU8V8: - debug_printf("\t\t.format = SVGA3D_BUMPU8V8\n"); + _debug_printf("\t\t.format = SVGA3D_BUMPU8V8\n"); break; case SVGA3D_BUMPL6V5U5: - debug_printf("\t\t.format = SVGA3D_BUMPL6V5U5\n"); + _debug_printf("\t\t.format = SVGA3D_BUMPL6V5U5\n"); break; case SVGA3D_BUMPX8L8V8U8: - debug_printf("\t\t.format = SVGA3D_BUMPX8L8V8U8\n"); + _debug_printf("\t\t.format = SVGA3D_BUMPX8L8V8U8\n"); break; case SVGA3D_BUMPL8V8U8: - debug_printf("\t\t.format = SVGA3D_BUMPL8V8U8\n"); + _debug_printf("\t\t.format = SVGA3D_BUMPL8V8U8\n"); break; case SVGA3D_ARGB_S10E5: - debug_printf("\t\t.format = SVGA3D_ARGB_S10E5\n"); + _debug_printf("\t\t.format = SVGA3D_ARGB_S10E5\n"); break; case SVGA3D_ARGB_S23E8: - debug_printf("\t\t.format = SVGA3D_ARGB_S23E8\n"); + _debug_printf("\t\t.format = SVGA3D_ARGB_S23E8\n"); break; case SVGA3D_A2R10G10B10: - debug_printf("\t\t.format = SVGA3D_A2R10G10B10\n"); + _debug_printf("\t\t.format = SVGA3D_A2R10G10B10\n"); break; case SVGA3D_V8U8: - debug_printf("\t\t.format = SVGA3D_V8U8\n"); + _debug_printf("\t\t.format = SVGA3D_V8U8\n"); break; case SVGA3D_Q8W8V8U8: - debug_printf("\t\t.format = SVGA3D_Q8W8V8U8\n"); + _debug_printf("\t\t.format = SVGA3D_Q8W8V8U8\n"); break; case SVGA3D_CxV8U8: - debug_printf("\t\t.format = SVGA3D_CxV8U8\n"); + _debug_printf("\t\t.format = SVGA3D_CxV8U8\n"); break; case SVGA3D_X8L8V8U8: - debug_printf("\t\t.format = SVGA3D_X8L8V8U8\n"); + _debug_printf("\t\t.format = SVGA3D_X8L8V8U8\n"); break; case SVGA3D_A2W10V10U10: - debug_printf("\t\t.format = SVGA3D_A2W10V10U10\n"); + _debug_printf("\t\t.format = SVGA3D_A2W10V10U10\n"); break; case SVGA3D_ALPHA8: - debug_printf("\t\t.format = SVGA3D_ALPHA8\n"); + _debug_printf("\t\t.format = SVGA3D_ALPHA8\n"); break; case SVGA3D_R_S10E5: - debug_printf("\t\t.format = SVGA3D_R_S10E5\n"); + _debug_printf("\t\t.format = SVGA3D_R_S10E5\n"); break; case SVGA3D_R_S23E8: - debug_printf("\t\t.format = SVGA3D_R_S23E8\n"); + _debug_printf("\t\t.format = SVGA3D_R_S23E8\n"); break; case SVGA3D_RG_S10E5: - debug_printf("\t\t.format = SVGA3D_RG_S10E5\n"); + _debug_printf("\t\t.format = SVGA3D_RG_S10E5\n"); break; case SVGA3D_RG_S23E8: - debug_printf("\t\t.format = SVGA3D_RG_S23E8\n"); + _debug_printf("\t\t.format = SVGA3D_RG_S23E8\n"); break; case SVGA3D_BUFFER: - debug_printf("\t\t.format = SVGA3D_BUFFER\n"); + _debug_printf("\t\t.format = SVGA3D_BUFFER\n"); break; case SVGA3D_Z_D24X8: - debug_printf("\t\t.format = SVGA3D_Z_D24X8\n"); + _debug_printf("\t\t.format = SVGA3D_Z_D24X8\n"); break; case SVGA3D_FORMAT_MAX: - debug_printf("\t\t.format = SVGA3D_FORMAT_MAX\n"); + _debug_printf("\t\t.format = SVGA3D_FORMAT_MAX\n"); break; default: - debug_printf("\t\t.format = %i\n", (*cmd).format); + _debug_printf("\t\t.format = %i\n", (*cmd).format); break; } - debug_printf("\t\t.face[0].numMipLevels = %u\n", (*cmd).face[0].numMipLevels); - debug_printf("\t\t.face[1].numMipLevels = %u\n", (*cmd).face[1].numMipLevels); - debug_printf("\t\t.face[2].numMipLevels = %u\n", (*cmd).face[2].numMipLevels); - debug_printf("\t\t.face[3].numMipLevels = %u\n", (*cmd).face[3].numMipLevels); - debug_printf("\t\t.face[4].numMipLevels = %u\n", (*cmd).face[4].numMipLevels); - debug_printf("\t\t.face[5].numMipLevels = %u\n", (*cmd).face[5].numMipLevels); + _debug_printf("\t\t.face[0].numMipLevels = %u\n", (*cmd).face[0].numMipLevels); + _debug_printf("\t\t.face[1].numMipLevels = %u\n", (*cmd).face[1].numMipLevels); + _debug_printf("\t\t.face[2].numMipLevels = %u\n", (*cmd).face[2].numMipLevels); + _debug_printf("\t\t.face[3].numMipLevels = %u\n", (*cmd).face[3].numMipLevels); + _debug_printf("\t\t.face[4].numMipLevels = %u\n", (*cmd).face[4].numMipLevels); + _debug_printf("\t\t.face[5].numMipLevels = %u\n", (*cmd).face[5].numMipLevels); } @@ -1438,7 +1438,7 @@ svga_dump_commands(const void *commands, uint32_t size) switch(cmd_id) { case SVGA_3D_CMD_SURFACE_DEFINE: - debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n"); + _debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n"); { const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body; dump_SVGA3dCmdDefineSurface(cmd); @@ -1450,7 +1450,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_SURFACE_DESTROY: - debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n"); + _debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n"); { const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body; dump_SVGA3dCmdDestroySurface(cmd); @@ -1458,7 +1458,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_SURFACE_COPY: - debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n"); + _debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n"); { const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body; dump_SVGA3dCmdSurfaceCopy(cmd); @@ -1470,7 +1470,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_SURFACE_STRETCHBLT: - debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n"); + _debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n"); { const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body; dump_SVGA3dCmdSurfaceStretchBlt(cmd); @@ -1478,7 +1478,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_SURFACE_DMA: - debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n"); + _debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n"); { const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body; dump_SVGA3dCmdSurfaceDMA(cmd); @@ -1494,7 +1494,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_CONTEXT_DEFINE: - debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n"); + _debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n"); { const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body; dump_SVGA3dCmdDefineContext(cmd); @@ -1502,7 +1502,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_CONTEXT_DESTROY: - debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n"); + _debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n"); { const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body; dump_SVGA3dCmdDestroyContext(cmd); @@ -1510,7 +1510,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_SETTRANSFORM: - debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n"); + _debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n"); { const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body; dump_SVGA3dCmdSetTransform(cmd); @@ -1518,7 +1518,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_SETZRANGE: - debug_printf("\tSVGA_3D_CMD_SETZRANGE\n"); + _debug_printf("\tSVGA_3D_CMD_SETZRANGE\n"); { const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body; dump_SVGA3dCmdSetZRange(cmd); @@ -1526,7 +1526,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_SETRENDERSTATE: - debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n"); + _debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n"); { const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body; dump_SVGA3dCmdSetRenderState(cmd); @@ -1538,7 +1538,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_SETRENDERTARGET: - debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n"); + _debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n"); { const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body; dump_SVGA3dCmdSetRenderTarget(cmd); @@ -1546,7 +1546,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_SETTEXTURESTATE: - debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n"); + _debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n"); { const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body; dump_SVGA3dCmdSetTextureState(cmd); @@ -1558,7 +1558,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_SETMATERIAL: - debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n"); + _debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n"); { const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body; dump_SVGA3dCmdSetMaterial(cmd); @@ -1566,7 +1566,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_SETLIGHTDATA: - debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n"); + _debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n"); { const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body; dump_SVGA3dCmdSetLightData(cmd); @@ -1574,7 +1574,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_SETLIGHTENABLED: - debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n"); + _debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n"); { const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body; dump_SVGA3dCmdSetLightEnabled(cmd); @@ -1582,7 +1582,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_SETVIEWPORT: - debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n"); + _debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n"); { const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body; dump_SVGA3dCmdSetViewport(cmd); @@ -1590,7 +1590,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_SETCLIPPLANE: - debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n"); + _debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n"); { const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body; dump_SVGA3dCmdSetClipPlane(cmd); @@ -1598,7 +1598,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_CLEAR: - debug_printf("\tSVGA_3D_CMD_CLEAR\n"); + _debug_printf("\tSVGA_3D_CMD_CLEAR\n"); { const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body; dump_SVGA3dCmdClear(cmd); @@ -1610,7 +1610,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_PRESENT: - debug_printf("\tSVGA_3D_CMD_PRESENT\n"); + _debug_printf("\tSVGA_3D_CMD_PRESENT\n"); { const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body; dump_SVGA3dCmdPresent(cmd); @@ -1622,7 +1622,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_SHADER_DEFINE: - debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n"); + _debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n"); { const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body; dump_SVGA3dCmdDefineShader(cmd); @@ -1634,7 +1634,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_SHADER_DESTROY: - debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n"); + _debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n"); { const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body; dump_SVGA3dCmdDestroyShader(cmd); @@ -1642,7 +1642,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_SET_SHADER: - debug_printf("\tSVGA_3D_CMD_SET_SHADER\n"); + _debug_printf("\tSVGA_3D_CMD_SET_SHADER\n"); { const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body; dump_SVGA3dCmdSetShader(cmd); @@ -1650,7 +1650,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_SET_SHADER_CONST: - debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n"); + _debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n"); { const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body; dump_SVGA3dCmdSetShaderConst(cmd); @@ -1658,7 +1658,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_DRAW_PRIMITIVES: - debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n"); + _debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n"); { const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body; unsigned i, j; @@ -1679,7 +1679,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_SETSCISSORRECT: - debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n"); + _debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n"); { const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body; dump_SVGA3dCmdSetScissorRect(cmd); @@ -1687,7 +1687,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_BEGIN_QUERY: - debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n"); + _debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n"); { const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body; dump_SVGA3dCmdBeginQuery(cmd); @@ -1695,7 +1695,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_END_QUERY: - debug_printf("\tSVGA_3D_CMD_END_QUERY\n"); + _debug_printf("\tSVGA_3D_CMD_END_QUERY\n"); { const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body; dump_SVGA3dCmdEndQuery(cmd); @@ -1703,7 +1703,7 @@ svga_dump_commands(const void *commands, uint32_t size) } break; case SVGA_3D_CMD_WAIT_FOR_QUERY: - debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n"); + _debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n"); { const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body; dump_SVGA3dCmdWaitForQuery(cmd); @@ -1711,24 +1711,24 @@ svga_dump_commands(const void *commands, uint32_t size) } break; default: - debug_printf("\t0x%08x\n", cmd_id); + _debug_printf("\t0x%08x\n", cmd_id); break; } while(body + sizeof(uint32_t) <= next) { - debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); + _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); body += sizeof(uint32_t); } while(body + sizeof(uint32_t) <= next) - debug_printf("\t\t0x%02x\n", *body++); + _debug_printf("\t\t0x%02x\n", *body++); } else if(cmd_id == SVGA_CMD_FENCE) { - debug_printf("\tSVGA_CMD_FENCE\n"); - debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]); + _debug_printf("\tSVGA_CMD_FENCE\n"); + _debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]); next += 2*sizeof(uint32_t); } else { - debug_printf("\t0x%08x\n", cmd_id); + _debug_printf("\t0x%08x\n", cmd_id); next += sizeof(uint32_t); } } diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.py b/src/gallium/drivers/svga/svgadump/svga_dump.py index 288e753296..dc5f3267e2 100755 --- a/src/gallium/drivers/svga/svgadump/svga_dump.py +++ b/src/gallium/drivers/svga/svgadump/svga_dump.py @@ -71,14 +71,14 @@ class decl_dumper_t(decl_visitor.decl_visitor_t): print ' switch(%s) {' % ("(*cmd)" + self._instance,) for name, value in self.decl.values: print ' case %s:' % (name,) - print ' debug_printf("\\t\\t%s = %s\\n");' % (self._instance, name) + print ' _debug_printf("\\t\\t%s = %s\\n");' % (self._instance, name) print ' break;' print ' default:' - print ' debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance) + print ' _debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance) print ' break;' print ' }' else: - print ' debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance) + print ' _debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance) def dump_decl(instance, decl): @@ -154,7 +154,7 @@ class type_dumper_t(type_visitor.type_visitor_t): dump_decl(self.instance, decl) def print_instance(self, format): - print ' debug_printf("\\t\\t%s = %s\\n", %s);' % (self.instance, format, "(*cmd)" + self.instance) + print ' _debug_printf("\\t\\t%s = %s\\n", %s);' % (self.instance, format, "(*cmd)" + self.instance) def dump_type(instance, type_): @@ -230,7 +230,7 @@ svga_dump_commands(const void *commands, uint32_t size) indexes = 'ijklmn' for id, header, body, footer in cmds: print ' case %s:' % id - print ' debug_printf("\\t%s\\n");' % id + print ' _debug_printf("\\t%s\\n");' % id print ' {' print ' const %s *cmd = (const %s *)body;' % (header, header) if len(body): @@ -255,25 +255,25 @@ svga_dump_commands(const void *commands, uint32_t size) print ' }' print ' break;' print ' default:' - print ' debug_printf("\\t0x%08x\\n", cmd_id);' + print ' _debug_printf("\\t0x%08x\\n", cmd_id);' print ' break;' print ' }' print r''' while(body + sizeof(uint32_t) <= next) { - debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); + _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); body += sizeof(uint32_t); } while(body + sizeof(uint32_t) <= next) - debug_printf("\t\t0x%02x\n", *body++); + _debug_printf("\t\t0x%02x\n", *body++); } else if(cmd_id == SVGA_CMD_FENCE) { - debug_printf("\tSVGA_CMD_FENCE\n"); - debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]); + _debug_printf("\tSVGA_CMD_FENCE\n"); + _debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]); next += 2*sizeof(uint32_t); } else { - debug_printf("\t0x%08x\n", cmd_id); + _debug_printf("\t0x%08x\n", cmd_id); next += sizeof(uint32_t); } } diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c index b0e7fdf378..70e27d86d3 100644 --- a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c +++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c @@ -50,16 +50,16 @@ static void dump_op( struct sh_op op, const char *mnemonic ) assert( op.is_reg == 0 ); if (op.coissue) - debug_printf( "+" ); - debug_printf( "%s", mnemonic ); + _debug_printf( "+" ); + _debug_printf( "%s", mnemonic ); switch (op.control) { case 0: break; case SVGA3DOPCONT_PROJECT: - debug_printf( "p" ); + _debug_printf( "p" ); break; case SVGA3DOPCONT_BIAS: - debug_printf( "b" ); + _debug_printf( "b" ); break; default: assert( 0 ); @@ -72,28 +72,28 @@ static void dump_comp_op( struct sh_op op, const char *mnemonic ) assert( op.is_reg == 0 ); if (op.coissue) - debug_printf( "+" ); - debug_printf( "%s", mnemonic ); + _debug_printf( "+" ); + _debug_printf( "%s", mnemonic ); switch (op.control) { case SVGA3DOPCOMP_RESERVED0: break; case SVGA3DOPCOMP_GT: - debug_printf("_gt"); + _debug_printf("_gt"); break; case SVGA3DOPCOMP_EQ: - debug_printf("_eq"); + _debug_printf("_eq"); break; case SVGA3DOPCOMP_GE: - debug_printf("_ge"); + _debug_printf("_ge"); break; case SVGA3DOPCOMP_LT: - debug_printf("_lt"); + _debug_printf("_lt"); break; case SVGA3DOPCOMPC_NE: - debug_printf("_ne"); + _debug_printf("_ne"); break; case SVGA3DOPCOMP_LE: - debug_printf("_le"); + _debug_printf("_le"); break; case SVGA3DOPCOMP_RESERVED1: default: @@ -109,93 +109,93 @@ static void dump_reg( struct sh_reg reg, struct sh_srcreg *indreg, const struct switch (sh_reg_type( reg )) { case SVGA3DREG_TEMP: - debug_printf( "r%u", reg.number ); + _debug_printf( "r%u", reg.number ); break; case SVGA3DREG_INPUT: - debug_printf( "v%u", reg.number ); + _debug_printf( "v%u", reg.number ); break; case SVGA3DREG_CONST: if (reg.relative) { if (sh_srcreg_type( *indreg ) == SVGA3DREG_LOOP) - debug_printf( "c[aL+%u]", reg.number ); + _debug_printf( "c[aL+%u]", reg.number ); else - debug_printf( "c[a%u.x+%u]", indreg->number, reg.number ); + _debug_printf( "c[a%u.x+%u]", indreg->number, reg.number ); } else - debug_printf( "c%u", reg.number ); + _debug_printf( "c%u", reg.number ); break; case SVGA3DREG_ADDR: /* VS */ /* SVGA3DREG_TEXTURE */ /* PS */ if (di->is_ps) - debug_printf( "t%u", reg.number ); + _debug_printf( "t%u", reg.number ); else - debug_printf( "a%u", reg.number ); + _debug_printf( "a%u", reg.number ); break; case SVGA3DREG_RASTOUT: switch (reg.number) { case 0 /*POSITION*/: - debug_printf( "oPos" ); + _debug_printf( "oPos" ); break; case 1 /*FOG*/: - debug_printf( "oFog" ); + _debug_printf( "oFog" ); break; case 2 /*POINT_SIZE*/: - debug_printf( "oPts" ); + _debug_printf( "oPts" ); break; default: assert( 0 ); - debug_printf( "???" ); + _debug_printf( "???" ); } break; case SVGA3DREG_ATTROUT: assert( reg.number < 2 ); - debug_printf( "oD%u", reg.number ); + _debug_printf( "oD%u", reg.number ); break; case SVGA3DREG_TEXCRDOUT: /* SVGA3DREG_OUTPUT */ - debug_printf( "oT%u", reg.number ); + _debug_printf( "oT%u", reg.number ); break; case SVGA3DREG_COLOROUT: - debug_printf( "oC%u", reg.number ); + _debug_printf( "oC%u", reg.number ); break; case SVGA3DREG_DEPTHOUT: - debug_printf( "oD%u", reg.number ); + _debug_printf( "oD%u", reg.number ); break; case SVGA3DREG_SAMPLER: - debug_printf( "s%u", reg.number ); + _debug_printf( "s%u", reg.number ); break; case SVGA3DREG_CONSTBOOL: assert( !reg.relative ); - debug_printf( "b%u", reg.number ); + _debug_printf( "b%u", reg.number ); break; case SVGA3DREG_CONSTINT: assert( !reg.relative ); - debug_printf( "i%u", reg.number ); + _debug_printf( "i%u", reg.number ); break; case SVGA3DREG_LOOP: assert( reg.number == 0 ); - debug_printf( "aL" ); + _debug_printf( "aL" ); break; case SVGA3DREG_MISCTYPE: switch (reg.number) { case SVGA3DMISCREG_POSITION: - debug_printf( "vPos" ); + _debug_printf( "vPos" ); break; case SVGA3DMISCREG_FACE: - debug_printf( "vFace" ); + _debug_printf( "vFace" ); break; default: assert(0); @@ -204,46 +204,46 @@ static void dump_reg( struct sh_reg reg, struct sh_srcreg *indreg, const struct break; case SVGA3DREG_LABEL: - debug_printf( "l%u", reg.number ); + _debug_printf( "l%u", reg.number ); break; case SVGA3DREG_PREDICATE: - debug_printf( "p%u", reg.number ); + _debug_printf( "p%u", reg.number ); break; default: assert( 0 ); - debug_printf( "???" ); + _debug_printf( "???" ); } } static void dump_cdata( struct sh_cdata cdata ) { - debug_printf( "%f, %f, %f, %f", cdata.xyzw[0], cdata.xyzw[1], cdata.xyzw[2], cdata.xyzw[3] ); + _debug_printf( "%f, %f, %f, %f", cdata.xyzw[0], cdata.xyzw[1], cdata.xyzw[2], cdata.xyzw[3] ); } static void dump_idata( struct sh_idata idata ) { - debug_printf( "%d, %d, %d, %d", idata.xyzw[0], idata.xyzw[1], idata.xyzw[2], idata.xyzw[3] ); + _debug_printf( "%d, %d, %d, %d", idata.xyzw[0], idata.xyzw[1], idata.xyzw[2], idata.xyzw[3] ); } static void dump_bdata( boolean bdata ) { - debug_printf( bdata ? "TRUE" : "FALSE" ); + _debug_printf( bdata ? "TRUE" : "FALSE" ); } static void dump_sampleinfo( struct ps_sampleinfo sampleinfo ) { switch (sampleinfo.texture_type) { case SVGA3DSAMP_2D: - debug_printf( "_2d" ); + _debug_printf( "_2d" ); break; case SVGA3DSAMP_CUBE: - debug_printf( "_cube" ); + _debug_printf( "_cube" ); break; case SVGA3DSAMP_VOLUME: - debug_printf( "_volume" ); + _debug_printf( "_volume" ); break; default: assert( 0 ); @@ -255,46 +255,46 @@ static void dump_usageinfo( struct vs_semantic semantic ) { switch (semantic.usage) { case SVGA3D_DECLUSAGE_POSITION: - debug_printf("_position" ); + _debug_printf("_position" ); break; case SVGA3D_DECLUSAGE_BLENDWEIGHT: - debug_printf("_blendweight" ); + _debug_printf("_blendweight" ); break; case SVGA3D_DECLUSAGE_BLENDINDICES: - debug_printf("_blendindices" ); + _debug_printf("_blendindices" ); break; case SVGA3D_DECLUSAGE_NORMAL: - debug_printf("_normal" ); + _debug_printf("_normal" ); break; case SVGA3D_DECLUSAGE_PSIZE: - debug_printf("_psize" ); + _debug_printf("_psize" ); break; case SVGA3D_DECLUSAGE_TEXCOORD: - debug_printf("_texcoord"); + _debug_printf("_texcoord"); break; case SVGA3D_DECLUSAGE_TANGENT: - debug_printf("_tangent" ); + _debug_printf("_tangent" ); break; case SVGA3D_DECLUSAGE_BINORMAL: - debug_printf("_binormal" ); + _debug_printf("_binormal" ); break; case SVGA3D_DECLUSAGE_TESSFACTOR: - debug_printf("_tessfactor" ); + _debug_printf("_tessfactor" ); break; case SVGA3D_DECLUSAGE_POSITIONT: - debug_printf("_positiont" ); + _debug_printf("_positiont" ); break; case SVGA3D_DECLUSAGE_COLOR: - debug_printf("_color" ); + _debug_printf("_color" ); break; case SVGA3D_DECLUSAGE_FOG: - debug_printf("_fog" ); + _debug_printf("_fog" ); break; case SVGA3D_DECLUSAGE_DEPTH: - debug_printf("_depth" ); + _debug_printf("_depth" ); break; case SVGA3D_DECLUSAGE_SAMPLE: - debug_printf("_sample"); + _debug_printf("_sample"); break; default: assert( 0 ); @@ -302,7 +302,7 @@ static void dump_usageinfo( struct vs_semantic semantic ) } if (semantic.usage_index != 0) { - debug_printf("%d", semantic.usage_index ); + _debug_printf("%d", semantic.usage_index ); } } @@ -316,47 +316,47 @@ static void dump_dstreg( struct sh_dstreg dstreg, const struct dump_info *di ) assert( (dstreg.modifier & (SVGA3DDSTMOD_SATURATE | SVGA3DDSTMOD_PARTIALPRECISION)) == dstreg.modifier ); if (dstreg.modifier & SVGA3DDSTMOD_SATURATE) - debug_printf( "_sat" ); + _debug_printf( "_sat" ); if (dstreg.modifier & SVGA3DDSTMOD_PARTIALPRECISION) - debug_printf( "_pp" ); + _debug_printf( "_pp" ); switch (dstreg.shift_scale) { case 0: break; case 1: - debug_printf( "_x2" ); + _debug_printf( "_x2" ); break; case 2: - debug_printf( "_x4" ); + _debug_printf( "_x4" ); break; case 3: - debug_printf( "_x8" ); + _debug_printf( "_x8" ); break; case 13: - debug_printf( "_d8" ); + _debug_printf( "_d8" ); break; case 14: - debug_printf( "_d4" ); + _debug_printf( "_d4" ); break; case 15: - debug_printf( "_d2" ); + _debug_printf( "_d2" ); break; default: assert( 0 ); } - debug_printf( " " ); + _debug_printf( " " ); u.dstreg = dstreg; dump_reg( u.reg, NULL, di ); if (dstreg.write_mask != SVGA3DWRITEMASK_ALL) { - debug_printf( "." ); + _debug_printf( "." ); if (dstreg.write_mask & SVGA3DWRITEMASK_0) - debug_printf( "x" ); + _debug_printf( "x" ); if (dstreg.write_mask & SVGA3DWRITEMASK_1) - debug_printf( "y" ); + _debug_printf( "y" ); if (dstreg.write_mask & SVGA3DWRITEMASK_2) - debug_printf( "z" ); + _debug_printf( "z" ); if (dstreg.write_mask & SVGA3DWRITEMASK_3) - debug_printf( "w" ); + _debug_printf( "w" ); } } @@ -372,19 +372,19 @@ static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, cons case SVGA3DSRCMOD_BIASNEG: case SVGA3DSRCMOD_SIGNNEG: case SVGA3DSRCMOD_X2NEG: - debug_printf( "-" ); + _debug_printf( "-" ); break; case SVGA3DSRCMOD_ABS: - debug_printf( "|" ); + _debug_printf( "|" ); break; case SVGA3DSRCMOD_ABSNEG: - debug_printf( "-|" ); + _debug_printf( "-|" ); break; case SVGA3DSRCMOD_COMP: - debug_printf( "1-" ); + _debug_printf( "1-" ); break; case SVGA3DSRCMOD_NOT: - debug_printf( "!" ); + _debug_printf( "!" ); } u.srcreg = srcreg; @@ -397,39 +397,39 @@ static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, cons break; case SVGA3DSRCMOD_ABS: case SVGA3DSRCMOD_ABSNEG: - debug_printf( "|" ); + _debug_printf( "|" ); break; case SVGA3DSRCMOD_BIAS: case SVGA3DSRCMOD_BIASNEG: - debug_printf( "_bias" ); + _debug_printf( "_bias" ); break; case SVGA3DSRCMOD_SIGN: case SVGA3DSRCMOD_SIGNNEG: - debug_printf( "_bx2" ); + _debug_printf( "_bx2" ); break; case SVGA3DSRCMOD_X2: case SVGA3DSRCMOD_X2NEG: - debug_printf( "_x2" ); + _debug_printf( "_x2" ); break; case SVGA3DSRCMOD_DZ: - debug_printf( "_dz" ); + _debug_printf( "_dz" ); break; case SVGA3DSRCMOD_DW: - debug_printf( "_dw" ); + _debug_printf( "_dw" ); break; default: assert( 0 ); } if (srcreg.swizzle_x != 0 || srcreg.swizzle_y != 1 || srcreg.swizzle_z != 2 || srcreg.swizzle_w != 3) { - debug_printf( "." ); + _debug_printf( "." ); if (srcreg.swizzle_x == srcreg.swizzle_y && srcreg.swizzle_y == srcreg.swizzle_z && srcreg.swizzle_z == srcreg.swizzle_w) { - debug_printf( "%c", "xyzw"[srcreg.swizzle_x] ); + _debug_printf( "%c", "xyzw"[srcreg.swizzle_x] ); } else { - debug_printf( "%c", "xyzw"[srcreg.swizzle_x] ); - debug_printf( "%c", "xyzw"[srcreg.swizzle_y] ); - debug_printf( "%c", "xyzw"[srcreg.swizzle_z] ); - debug_printf( "%c", "xyzw"[srcreg.swizzle_w] ); + _debug_printf( "%c", "xyzw"[srcreg.swizzle_x] ); + _debug_printf( "%c", "xyzw"[srcreg.swizzle_y] ); + _debug_printf( "%c", "xyzw"[srcreg.swizzle_z] ); + _debug_printf( "%c", "xyzw"[srcreg.swizzle_w] ); } } } @@ -447,15 +447,15 @@ svga_shader_dump( if (do_binary) { for (i = 0; i < dwords; i++) - debug_printf(" 0x%08x,\n", assem[i]); + _debug_printf(" 0x%08x,\n", assem[i]); - debug_printf("\n\n"); + _debug_printf("\n\n"); } di.version.value = *assem++; di.is_ps = (di.version.type == SVGA3D_PS_TYPE); - debug_printf( + _debug_printf( "%s_%u_%u\n", di.is_ps ? "ps" : "vs", di.version.major, @@ -465,7 +465,7 @@ svga_shader_dump( struct sh_op op = *(struct sh_op *) assem; if (assem - start >= dwords) { - debug_printf("... ran off end of buffer\n"); + _debug_printf("... ran off end of buffer\n"); assert(0); return; } @@ -475,7 +475,7 @@ svga_shader_dump( { struct sh_dcl dcl = *(struct sh_dcl *) assem; - debug_printf( "dcl" ); + _debug_printf( "dcl" ); if (sh_dstreg_type( dcl.reg ) == SVGA3DREG_SAMPLER) dump_sampleinfo( dcl.u.ps.sampleinfo ); else if (di.is_ps) { @@ -486,7 +486,7 @@ svga_shader_dump( else dump_usageinfo( dcl.u.vs.semantic ); dump_dstreg( dcl.reg, &di ); - debug_printf( "\n" ); + _debug_printf( "\n" ); assem += sizeof( struct sh_dcl ) / sizeof( unsigned ); } break; @@ -495,11 +495,11 @@ svga_shader_dump( { struct sh_defb defb = *(struct sh_defb *) assem; - debug_printf( "defb " ); + _debug_printf( "defb " ); dump_reg( defb.reg, NULL, &di ); - debug_printf( ", " ); + _debug_printf( ", " ); dump_bdata( defb.data ); - debug_printf( "\n" ); + _debug_printf( "\n" ); assem += sizeof( struct sh_defb ) / sizeof( unsigned ); } break; @@ -508,11 +508,11 @@ svga_shader_dump( { struct sh_defi defi = *(struct sh_defi *) assem; - debug_printf( "defi " ); + _debug_printf( "defi " ); dump_reg( defi.reg, NULL, &di ); - debug_printf( ", " ); + _debug_printf( ", " ); dump_idata( defi.idata ); - debug_printf( "\n" ); + _debug_printf( "\n" ); assem += sizeof( struct sh_defi ) / sizeof( unsigned ); } break; @@ -528,11 +528,11 @@ svga_shader_dump( else { struct sh_unaryop unaryop = *(struct sh_unaryop *) assem; dump_dstreg( unaryop.dst, &di ); - debug_printf( ", " ); + _debug_printf( ", " ); dump_srcreg( unaryop.src, NULL, &di ); assem += sizeof( struct sh_unaryop ) / sizeof( unsigned ); } - debug_printf( "\n" ); + _debug_printf( "\n" ); break; case SVGA3DOP_TEX: @@ -549,7 +549,7 @@ svga_shader_dump( struct sh_unaryop unaryop = *(struct sh_unaryop *) assem; dump_dstreg( unaryop.dst, &di ); - debug_printf( ", " ); + _debug_printf( ", " ); dump_srcreg( unaryop.src, NULL, &di ); assem += sizeof( struct sh_unaryop ) / sizeof( unsigned ); } @@ -559,30 +559,30 @@ svga_shader_dump( dump_op( op, "texld" ); dump_dstreg( binaryop.dst, &di ); - debug_printf( ", " ); + _debug_printf( ", " ); dump_srcreg( binaryop.src0, NULL, &di ); - debug_printf( ", " ); + _debug_printf( ", " ); dump_srcreg( binaryop.src1, NULL, &di ); assem += sizeof( struct sh_binaryop ) / sizeof( unsigned ); } - debug_printf( "\n" ); + _debug_printf( "\n" ); break; case SVGA3DOP_DEF: { struct sh_def def = *(struct sh_def *) assem; - debug_printf( "def " ); + _debug_printf( "def " ); dump_reg( def.reg, NULL, &di ); - debug_printf( ", " ); + _debug_printf( ", " ); dump_cdata( def.cdata ); - debug_printf( "\n" ); + _debug_printf( "\n" ); assem += sizeof( struct sh_def ) / sizeof( unsigned ); } break; case SVGA3DOP_PHASE: - debug_printf( "phase\n" ); + _debug_printf( "phase\n" ); assem += sizeof( struct sh_op ) / sizeof( unsigned ); break; @@ -596,12 +596,12 @@ svga_shader_dump( break; case SVGA3DOP_RET: - debug_printf( "ret\n" ); + _debug_printf( "ret\n" ); assem += sizeof( struct sh_op ) / sizeof( unsigned ); break; case SVGA3DOP_END: - debug_printf( "end\n" ); + _debug_printf( "end\n" ); finished = TRUE; break; @@ -640,14 +640,14 @@ svga_shader_dump( } if (not_first_arg) - debug_printf( ", " ); + _debug_printf( ", " ); else - debug_printf( " " ); + _debug_printf( " " ); dump_srcreg( srcreg, &indreg, &di ); not_first_arg = TRUE; } - debug_printf( "\n" ); + _debug_printf( "\n" ); } } } -- cgit v1.2.3 From 5b1a7843f841b2bfdd54538a2eaad9dadae3e09d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 5 Dec 2009 06:34:59 +0000 Subject: svga: Dump SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN commands. --- src/gallium/drivers/svga/svgadump/svga_dump.c | 38 ++++++++++++++++++++++++++ src/gallium/drivers/svga/svgadump/svga_dump.py | 9 +++--- 2 files changed, 43 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c index 18e0eb5139..e6d4a74e86 100644 --- a/src/gallium/drivers/svga/svgadump/svga_dump.c +++ b/src/gallium/drivers/svga/svgadump/svga_dump.c @@ -1416,6 +1416,32 @@ dump_SVGA3dCmdDefineSurface(const SVGA3dCmdDefineSurface *cmd) _debug_printf("\t\t.face[5].numMipLevels = %u\n", (*cmd).face[5].numMipLevels); } +static void +dump_SVGASignedRect(const SVGASignedRect *cmd) +{ + _debug_printf("\t\t.left = %i\n", (*cmd).left); + _debug_printf("\t\t.top = %i\n", (*cmd).top); + _debug_printf("\t\t.right = %i\n", (*cmd).right); + _debug_printf("\t\t.bottom = %i\n", (*cmd).bottom); +} + +static void +dump_SVGA3dCmdBlitSurfaceToScreen(const SVGA3dCmdBlitSurfaceToScreen *cmd) +{ + _debug_printf("\t\t.srcImage.sid = %u\n", (*cmd).srcImage.sid); + _debug_printf("\t\t.srcImage.face = %u\n", (*cmd).srcImage.face); + _debug_printf("\t\t.srcImage.mipmap = %u\n", (*cmd).srcImage.mipmap); + _debug_printf("\t\t.srcRect.left = %i\n", (*cmd).srcRect.left); + _debug_printf("\t\t.srcRect.top = %i\n", (*cmd).srcRect.top); + _debug_printf("\t\t.srcRect.right = %i\n", (*cmd).srcRect.right); + _debug_printf("\t\t.srcRect.bottom = %i\n", (*cmd).srcRect.bottom); + _debug_printf("\t\t.destScreenId = %u\n", (*cmd).destScreenId); + _debug_printf("\t\t.destRect.left = %i\n", (*cmd).destRect.left); + _debug_printf("\t\t.destRect.top = %i\n", (*cmd).destRect.top); + _debug_printf("\t\t.destRect.right = %i\n", (*cmd).destRect.right); + _debug_printf("\t\t.destRect.bottom = %i\n", (*cmd).destRect.bottom); +} + void svga_dump_commands(const void *commands, uint32_t size) @@ -1710,6 +1736,18 @@ svga_dump_commands(const void *commands, uint32_t size) body = (const uint8_t *)&cmd[1]; } break; + case SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN: + _debug_printf("\tSVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN\n"); + { + const SVGA3dCmdBlitSurfaceToScreen *cmd = (const SVGA3dCmdBlitSurfaceToScreen *)body; + dump_SVGA3dCmdBlitSurfaceToScreen(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGASignedRect) <= next) { + dump_SVGASignedRect((const SVGASignedRect *)body); + body += sizeof(SVGASignedRect); + } + } + break; default: _debug_printf("\t0x%08x\n", cmd_id); break; diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.py b/src/gallium/drivers/svga/svgadump/svga_dump.py index dc5f3267e2..a1ada29ef8 100755 --- a/src/gallium/drivers/svga/svgadump/svga_dump.py +++ b/src/gallium/drivers/svga/svgadump/svga_dump.py @@ -202,6 +202,7 @@ cmds = [ ('SVGA_3D_CMD_END_QUERY', 'SVGA3dCmdEndQuery', (), None), ('SVGA_3D_CMD_WAIT_FOR_QUERY', 'SVGA3dCmdWaitForQuery', (), None), #('SVGA_3D_CMD_PRESENT_READBACK', None, (), None), + ('SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN', 'SVGA3dCmdBlitSurfaceToScreen', (), 'SVGASignedRect'), ] def dump_cmds(): @@ -294,18 +295,18 @@ def main(): print '#include "svga_shader_dump.h"' print '#include "svga3d_reg.h"' print - print '#include "pipe/p_debug.h"' + print '#include "util/u_debug.h"' print '#include "svga_dump.h"' print config = parser.config_t( - include_paths = ['include'], + include_paths = ['../../../include', '../include'], compiler = 'gcc', ) headers = [ - 'include/svga_types.h', - 'include/svga3d_reg.h', + 'svga_types.h', + 'svga3d_reg.h', ] decls = parser.parse(headers, config, parser.COMPILATION_MODE.ALL_AT_ONCE) -- cgit v1.2.3 From 56a4342a0493ad1d502d4791ab941ef171d36e60 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Sat, 5 Dec 2009 17:48:00 +0100 Subject: r300g: Need to emit a hardware scissor rectangle even if scissor is disabled. Just make it cover the whole framebuffer in that case. Otherwise the kernel CS checker may complain, e.g. running progs/demos/gearbox. That runs fast now here, but doesn't look right yet. --- src/gallium/drivers/r300/r300_context.h | 2 ++ src/gallium/drivers/r300/r300_emit.c | 9 +++++++-- src/gallium/drivers/r300/r300_state.c | 19 +++++++++++++++++++ 3 files changed, 28 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index dd3f6ac143..11cd9f855f 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -101,6 +101,8 @@ struct r300_sampler_state { struct r300_scissor_state { uint32_t scissor_top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ uint32_t scissor_bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ + uint32_t no_scissor_top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ + uint32_t no_scissor_bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ }; struct r300_texture_state { diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 60be03f54f..04dca29216 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -570,8 +570,13 @@ void r300_emit_scissor_state(struct r300_context* r300, BEGIN_CS(3); OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); - OUT_CS(scissor->scissor_top_left); - OUT_CS(scissor->scissor_bottom_right); + if (r300->rs_state->rs.scissor) { + OUT_CS(scissor->scissor_top_left); + OUT_CS(scissor->scissor_bottom_right); + } else { + OUT_CS(scissor->no_scissor_top_left); + OUT_CS(scissor->no_scissor_bottom_right); + } END_CS; } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 442af70e14..2bc2b79c02 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -302,6 +302,25 @@ static void r300->framebuffer_state = *state; r300->dirty_state |= R300_NEW_FRAMEBUFFERS; + + if (r300_screen(r300->context.screen)->caps->is_r500) { + r300->scissor_state->no_scissor_top_left = + (0 << R300_SCISSORS_X_SHIFT) | + (0 << R300_SCISSORS_Y_SHIFT); + r300->scissor_state->no_scissor_bottom_right = + ((state->width - 1) << R300_SCISSORS_X_SHIFT) | + ((state->height - 1) << R300_SCISSORS_Y_SHIFT); + } else { + /* Offset of 1440 in non-R500 chipsets. */ + r300->scissor_state->no_scissor_top_left = + ((0 + 1440) << R300_SCISSORS_X_SHIFT) | + ((0 + 1440) << R300_SCISSORS_Y_SHIFT); + r300->scissor_state->no_scissor_bottom_right = + (((state->width - 1) + 1440) << R300_SCISSORS_X_SHIFT) | + (((state->height - 1) + 1440) << R300_SCISSORS_Y_SHIFT); + } + + r300->dirty_state |= R300_NEW_SCISSOR; } /* Create fragment shader state. */ -- cgit v1.2.3 From e1380cae885df37d4a211d0271f59487d9f2db78 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 5 Dec 2009 19:17:20 +0100 Subject: r300g: remove redundant code and clean up --- src/gallium/drivers/r300/r300_context.h | 11 +++-- src/gallium/drivers/r300/r300_emit.c | 19 +++++---- src/gallium/drivers/r300/r300_state.c | 73 +++++++++++++++++---------------- 3 files changed, 57 insertions(+), 46 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 11cd9f855f..23ea32c57e 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -98,11 +98,14 @@ struct r300_sampler_state { unsigned min_lod, max_lod; }; +struct r300_scissor_regs { + uint32_t top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ + uint32_t bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ +}; + struct r300_scissor_state { - uint32_t scissor_top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ - uint32_t scissor_bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ - uint32_t no_scissor_top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ - uint32_t no_scissor_bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ + struct r300_scissor_regs framebuffer; + struct r300_scissor_regs scissor; }; struct r300_texture_state { diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 04dca29216..dbf316a9b5 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -563,21 +563,26 @@ void r300_emit_rs_block_state(struct r300_context* r300, END_CS; } -void r300_emit_scissor_state(struct r300_context* r300, - struct r300_scissor_state* scissor) +static void r300_emit_scissor_regs(struct r300_context* r300, + struct r300_scissor_regs* scissor) { CS_LOCALS(r300); BEGIN_CS(3); OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); + OUT_CS(scissor->top_left); + OUT_CS(scissor->bottom_right); + END_CS; +} + +void r300_emit_scissor_state(struct r300_context* r300, + struct r300_scissor_state* scissor) +{ if (r300->rs_state->rs.scissor) { - OUT_CS(scissor->scissor_top_left); - OUT_CS(scissor->scissor_bottom_right); + r300_emit_scissor_regs(r300, &scissor->scissor); } else { - OUT_CS(scissor->no_scissor_top_left); - OUT_CS(scissor->no_scissor_bottom_right); + r300_emit_scissor_regs(r300, &scissor->framebuffer); } - END_CS; } void r300_emit_texture(struct r300_context* r300, diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 2bc2b79c02..d3233557ce 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -289,11 +289,34 @@ static void r300_set_edgeflags(struct pipe_context* pipe, /* XXX and even worse, I have no idea WTF the bitfield is */ } +static void r300_set_scissor_regs(const struct pipe_scissor_state* state, + struct r300_scissor_regs *scissor, + boolean is_r500) +{ + if (is_r500) { + scissor->top_left = + (state->minx << R300_SCISSORS_X_SHIFT) | + (state->miny << R300_SCISSORS_Y_SHIFT); + scissor->bottom_right = + ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) | + ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT); + } else { + /* Offset of 1440 in non-R500 chipsets. */ + scissor->top_left = + ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) | + ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT); + scissor->bottom_right = + (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | + (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); + } +} + static void r300_set_framebuffer_state(struct pipe_context* pipe, const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); + struct pipe_scissor_state scissor; if (r300->draw) { draw_flush(r300->draw); @@ -301,26 +324,17 @@ static void r300->framebuffer_state = *state; - r300->dirty_state |= R300_NEW_FRAMEBUFFERS; + scissor.minx = scissor.miny = 0; + scissor.maxx = state->width; + scissor.maxy = state->height; + r300_set_scissor_regs(&scissor, &r300->scissor_state->framebuffer, + r300_screen(r300->context.screen)->caps->is_r500); - if (r300_screen(r300->context.screen)->caps->is_r500) { - r300->scissor_state->no_scissor_top_left = - (0 << R300_SCISSORS_X_SHIFT) | - (0 << R300_SCISSORS_Y_SHIFT); - r300->scissor_state->no_scissor_bottom_right = - ((state->width - 1) << R300_SCISSORS_X_SHIFT) | - ((state->height - 1) << R300_SCISSORS_Y_SHIFT); - } else { - /* Offset of 1440 in non-R500 chipsets. */ - r300->scissor_state->no_scissor_top_left = - ((0 + 1440) << R300_SCISSORS_X_SHIFT) | - ((0 + 1440) << R300_SCISSORS_Y_SHIFT); - r300->scissor_state->no_scissor_bottom_right = - (((state->width - 1) + 1440) << R300_SCISSORS_X_SHIFT) | - (((state->height - 1) + 1440) << R300_SCISSORS_Y_SHIFT); + /* Don't rely on the order of states being set for the first time. */ + if (!r300->rs_state || !r300->rs_state->rs.scissor) { + r300->dirty_state |= R300_NEW_SCISSOR; } - - r300->dirty_state |= R300_NEW_SCISSOR; + r300->dirty_state |= R300_NEW_FRAMEBUFFERS; } /* Create fragment shader state. */ @@ -642,24 +656,13 @@ static void r300_set_scissor_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - if (r300_screen(r300->context.screen)->caps->is_r500) { - r300->scissor_state->scissor_top_left = - (state->minx << R300_SCISSORS_X_SHIFT) | - (state->miny << R300_SCISSORS_Y_SHIFT); - r300->scissor_state->scissor_bottom_right = - ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) | - ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT); - } else { - /* Offset of 1440 in non-R500 chipsets. */ - r300->scissor_state->scissor_top_left = - ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) | - ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT); - r300->scissor_state->scissor_bottom_right = - (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | - (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); - } + r300_set_scissor_regs(state, &r300->scissor_state->scissor, + r300_screen(r300->context.screen)->caps->is_r500); - r300->dirty_state |= R300_NEW_SCISSOR; + /* Don't rely on the order of states being set for the first time. */ + if (!r300->rs_state || r300->rs_state->rs.scissor) { + r300->dirty_state |= R300_NEW_SCISSOR; + } } static void r300_set_viewport_state(struct pipe_context* pipe, -- cgit v1.2.3 From 07487643515edb731c6abc3e931c329a89dd9293 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 5 Dec 2009 20:39:11 +0100 Subject: r300g: don't render if everything is culled by scissoring Otherwise a CS is refused by kernel 2.6.31 (and maybe all later versions, not sure). --- src/gallium/drivers/r300/r300_context.h | 3 +++ src/gallium/drivers/r300/r300_render.c | 23 +++++++++++++++++++++++ src/gallium/drivers/r300/r300_state.c | 3 +++ 3 files changed, 29 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 23ea32c57e..0be190392a 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -101,6 +101,9 @@ struct r300_sampler_state { struct r300_scissor_regs { uint32_t top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ uint32_t bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ + + /* Whether everything is culled by scissoring. */ + boolean empty_area; }; struct r300_scissor_state { diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 4c5fb405c6..35b335df6a 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -70,6 +70,12 @@ uint32_t r300_translate_primitive(unsigned prim) } } +static boolean r300_nothing_to_draw(struct r300_context *r300) +{ + return r300->rs_state->rs.scissor && + r300->scissor_state->scissor.empty_area; +} + static void r300_emit_draw_arrays(struct r300_context *r300, unsigned mode, unsigned count) @@ -173,10 +179,15 @@ boolean r300_draw_range_elements(struct pipe_context* pipe, return FALSE; } + if (count > 65535) { return FALSE; } + if (r300_nothing_to_draw(r300)) { + return TRUE; + } + r300_update_derived_state(r300); if (!r300_setup_vertex_buffers(r300)) { @@ -218,6 +229,10 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, return FALSE; } + if (r300_nothing_to_draw(r300)) { + return TRUE; + } + r300_update_derived_state(r300); if (!r300_setup_vertex_buffers(r300)) { @@ -251,6 +266,10 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, return FALSE; } + if (r300_nothing_to_draw(r300)) { + return TRUE; + } + for (i = 0; i < r300->vertex_buffer_count; i++) { void* buf = pipe_buffer_map(pipe->screen, r300->vertex_buffer[i].buffer, @@ -292,6 +311,10 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, return FALSE; } + if (r300_nothing_to_draw(r300)) { + return TRUE; + } + for (i = 0; i < r300->vertex_buffer_count; i++) { void* buf = pipe_buffer_map(pipe->screen, r300->vertex_buffer[i].buffer, diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index d3233557ce..8ef0b3b268 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -309,6 +309,9 @@ static void r300_set_scissor_regs(const struct pipe_scissor_state* state, (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); } + + scissor->empty_area = state->minx >= state->maxx || + state->miny >= state->maxy; } static void -- cgit v1.2.3 From c574f515f0aa20ccc3841cf61a6124bc5996e7b2 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sun, 6 Dec 2009 12:26:55 -0500 Subject: nouveau: Work around nv04-nv40 miptrees not matching nouveau_miptree. Thanks to Bob Gleitsmann for the patch. I'll clean this up in a better way later if noone else beats me to it. --- src/gallium/drivers/nv04/nv04_miptree.c | 3 ++- src/gallium/drivers/nv04/nv04_state.h | 1 + src/gallium/drivers/nv10/nv10_miptree.c | 2 ++ src/gallium/drivers/nv10/nv10_state.h | 1 + src/gallium/drivers/nv20/nv20_miptree.c | 2 ++ src/gallium/drivers/nv20/nv20_state.h | 1 + src/gallium/drivers/nv30/nv30_miptree.c | 2 ++ src/gallium/drivers/nv30/nv30_state.h | 1 + src/gallium/drivers/nv40/nv40_miptree.c | 5 ++++- src/gallium/drivers/nv40/nv40_state.h | 1 + 10 files changed, 17 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c index eeab6dfa30..e0a6948aeb 100644 --- a/src/gallium/drivers/nv04/nv04_miptree.c +++ b/src/gallium/drivers/nv04/nv04_miptree.c @@ -55,7 +55,7 @@ nv04_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) FREE(mt); return NULL; } - + mt->bo = nouveau_bo(mt->buffer); return &mt->base; } @@ -81,6 +81,7 @@ nv04_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); pipe_buffer_reference(&mt->buffer, pb); + mt->bo = nouveau_bo(mt->buffer); return &mt->base; } diff --git a/src/gallium/drivers/nv04/nv04_state.h b/src/gallium/drivers/nv04/nv04_state.h index 399f750dbe..81d1d2ebaa 100644 --- a/src/gallium/drivers/nv04/nv04_state.h +++ b/src/gallium/drivers/nv04/nv04_state.h @@ -31,6 +31,7 @@ struct nv04_rasterizer_state { struct nv04_miptree { struct pipe_texture base; + struct nouveau_bo *bo; struct pipe_buffer *buffer; uint total_size; diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c index 439beeccc3..6a52b6af36 100644 --- a/src/gallium/drivers/nv10/nv10_miptree.c +++ b/src/gallium/drivers/nv10/nv10_miptree.c @@ -67,6 +67,7 @@ nv10_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); pipe_buffer_reference(&mt->buffer, pb); + mt->bo = nouveau_bo(mt->buffer); return &mt->base; } @@ -90,6 +91,7 @@ nv10_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) FREE(mt); return NULL; } + mt->bo = nouveau_bo(mt->buffer); return &mt->base; } diff --git a/src/gallium/drivers/nv10/nv10_state.h b/src/gallium/drivers/nv10/nv10_state.h index 3a3fd0d4f4..2524ac02e2 100644 --- a/src/gallium/drivers/nv10/nv10_state.h +++ b/src/gallium/drivers/nv10/nv10_state.h @@ -126,6 +126,7 @@ struct nv10_depth_stencil_alpha_state { struct nv10_miptree { struct pipe_texture base; + struct nouveau_bo *bo; struct pipe_buffer *buffer; uint total_size; diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c index 2bde9fb75b..e2e01bd849 100644 --- a/src/gallium/drivers/nv20/nv20_miptree.c +++ b/src/gallium/drivers/nv20/nv20_miptree.c @@ -77,6 +77,7 @@ nv20_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); pipe_buffer_reference(&mt->buffer, pb); + mt->bo = nouveau_bo(mt->buffer); return &mt->base; } @@ -132,6 +133,7 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) FREE(mt); return NULL; } + mt->bo = nouveau_bo(mt->buffer); return &mt->base; } diff --git a/src/gallium/drivers/nv20/nv20_state.h b/src/gallium/drivers/nv20/nv20_state.h index 34f402fdcb..dde4106568 100644 --- a/src/gallium/drivers/nv20/nv20_state.h +++ b/src/gallium/drivers/nv20/nv20_state.h @@ -126,6 +126,7 @@ struct nv20_depth_stencil_alpha_state { struct nv20_miptree { struct pipe_texture base; + struct nouveau_bo *bo; struct pipe_buffer *buffer; uint total_size; diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index 9e50a7cf6b..920fe64c32 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -115,6 +115,7 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) FREE(mt); return NULL; } + mt->bo = nouveau_bo(mt->buffer); return &mt->base; } @@ -144,6 +145,7 @@ nv30_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; pipe_buffer_reference(&mt->buffer, pb); + mt->bo = nouveau_bo(mt->buffer); return &mt->base; } diff --git a/src/gallium/drivers/nv30/nv30_state.h b/src/gallium/drivers/nv30/nv30_state.h index e6f23bf166..e42e872de7 100644 --- a/src/gallium/drivers/nv30/nv30_state.h +++ b/src/gallium/drivers/nv30/nv30_state.h @@ -72,6 +72,7 @@ struct nv30_fragment_program { struct nv30_miptree { struct pipe_texture base; + struct nouveau_bo *bo; struct pipe_buffer *buffer; uint total_size; diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index 8779c5572b..89ddf373e9 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -5,6 +5,8 @@ #include "nv40_context.h" + + static void nv40_miptree_layout(struct nv40_miptree *mt) { @@ -109,7 +111,7 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) FREE(mt); return NULL; } - + mt->bo = nouveau_bo(mt->buffer); return &mt->base; } @@ -138,6 +140,7 @@ nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; pipe_buffer_reference(&mt->buffer, pb); + mt->bo = nouveau_bo(mt->buffer); return &mt->base; } diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h index 8a9d8c8fdf..192074e747 100644 --- a/src/gallium/drivers/nv40/nv40_state.h +++ b/src/gallium/drivers/nv40/nv40_state.h @@ -75,6 +75,7 @@ struct nv40_fragment_program { struct nv40_miptree { struct pipe_texture base; + struct nouveau_bo *bo; struct pipe_buffer *buffer; uint total_size; -- cgit v1.2.3 From 7091afed789dbba8364deaea0b7a5a99a12ff25e Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 5 Dec 2009 01:27:59 +0100 Subject: r300g: enhance ZTOP conditions --- src/gallium/drivers/r300/r300_state_derived.c | 37 ++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index cd969d633b..d448866ef0 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -462,6 +462,31 @@ static void r300_update_derived_shader_state(struct r300_context* r300) r300->dirty_state |= R300_NEW_RS_BLOCK; } +static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa) +{ + /* We are interested only in the cases when a new depth or stencil value + * can be written and changed. */ + + /* We might optionally check for [Z func: never] and inspect the stencil + * state in a similar fashion, but it's not terribly important. */ + return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) + || + (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) + || + ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) && + (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK)); +} + +static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa) +{ + /* We are interested only in the cases when alpha testing can kill + * a fragment. */ + uint32_t af = dsa->alpha_function; + + return (af & R300_FG_ALPHA_FUNC_ENABLE) && + (af & R300_FG_ALPHA_FUNC_ALWAYS) != R300_FG_ALPHA_FUNC_ALWAYS; +} + static void r300_update_ztop(struct r300_context* r300) { r300->ztop_state.z_buffer_top = R300_ZTOP_ENABLE; @@ -484,13 +509,13 @@ static void r300_update_ztop(struct r300_context* r300) * * ~C. */ - if (r300->dsa_state->alpha_function) { - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300->fs->info.uses_kill) { - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300_fragment_shader_writes_depth(r300->fs)) { + + if (r300->query_current || + r300_fragment_shader_writes_depth(r300->fs)) { r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300->query_current) { + } else if (r300_dsa_writes_depth_stencil(r300->dsa_state) && + (r300->fs->info.uses_kill || + r300_dsa_alpha_test_enabled(r300->dsa_state))) { r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; } } -- cgit v1.2.3 From c99fb991a3b46c5978248b00eef0efd742127a44 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 6 Dec 2009 23:33:41 -0800 Subject: r300g: Clean up previous commit. If *I* can't read it, there's a strong possibility others can't, either. --- src/gallium/drivers/r300/r300_state_derived.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index d448866ef0..6af49888b9 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -469,10 +469,8 @@ static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa) /* We might optionally check for [Z func: never] and inspect the stencil * state in a similar fashion, but it's not terribly important. */ - return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) - || - (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) - || + return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) || + (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) || ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) && (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK)); } @@ -503,19 +501,25 @@ static void r300_update_ztop(struct r300_context* r300) * The docs claim that for the first three cases, if no ZS writes happen, * then ZTOP can be used. * + * (3) will never apply since we do not support chroma-keyed operations. + * (4) will need to be re-examined (and this comment updated) if/when + * Hyper-Z becomes supported. + * * Additionally, the following conditions require disabled ZTOP: - * ~) Depth writes in fragment shader - * ~) Outstanding occlusion queries + * 5) Depth writes in fragment shader + * 6) Outstanding occlusion queries * * ~C. */ - if (r300->query_current || - r300_fragment_shader_writes_depth(r300->fs)) { + /* ZS writes */ + if (r300_dsa_writes_depth_stencil(r300->dsa_state) && + (r300_dsa_alpha_test_enabled(r300->dsa_state) || /* (1) */ + r300->fs->info.uses_kill)) { /* (2) */ + r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */ r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300_dsa_writes_depth_stencil(r300->dsa_state) && - (r300->fs->info.uses_kill || - r300_dsa_alpha_test_enabled(r300->dsa_state))) { + } else if (r300->query_current) { /* (6) */ r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; } } -- cgit v1.2.3 From d8d8b0d244d9abfd16f99de7f2f30c635033f66f Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 6 Dec 2009 23:49:02 -0800 Subject: softpipe: sp_winsys.h should define/include what it needs. --- src/gallium/drivers/softpipe/sp_winsys.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_winsys.h b/src/gallium/drivers/softpipe/sp_winsys.h index 9e571862b7..f203ded29e 100644 --- a/src/gallium/drivers/softpipe/sp_winsys.h +++ b/src/gallium/drivers/softpipe/sp_winsys.h @@ -34,15 +34,17 @@ #ifndef SP_WINSYS_H #define SP_WINSYS_H - #ifdef __cplusplus extern "C" { #endif +#include "pipe/p_defines.h" struct pipe_screen; struct pipe_winsys; struct pipe_context; +struct pipe_texture; +struct pipe_buffer; struct pipe_context *softpipe_create( struct pipe_screen * ); -- cgit v1.2.3 From 3456f9149b3009fcfce80054759d05883d3c4ee5 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Mon, 7 Dec 2009 20:35:42 +0100 Subject: gallium/util: fix util_color_[un]pack[-ub] to be strict aliasing safe use pointer to union instead of void pointer. gcc complained a lot, depending what the pointer originally actually was. Looks like it's in fact maybe legal to cast for instance uint pointers to union pointers as long as union contains a uint type, hence use this with some callers, other just use union util_color in the first place. --- src/gallium/auxiliary/util/u_clear.h | 8 +- src/gallium/auxiliary/util/u_pack_color.h | 147 +++++++++++-------------- src/gallium/drivers/cell/ppu/cell_clear.c | 6 +- src/gallium/drivers/llvmpipe/lp_clear.c | 5 +- src/gallium/drivers/r300/r300_state.c | 4 +- src/gallium/drivers/softpipe/sp_clear.c | 7 +- src/gallium/drivers/svga/svga_pipe_clear.c | 6 +- src/gallium/drivers/svga/svga_pipe_sampler.c | 2 +- src/gallium/state_trackers/vega/vg_translate.c | 54 ++++----- src/mesa/state_tracker/st_atom_pixeltransfer.c | 2 +- 10 files changed, 113 insertions(+), 128 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_clear.h b/src/gallium/auxiliary/util/u_clear.h index 1e65a035ae..2c32db6175 100644 --- a/src/gallium/auxiliary/util/u_clear.h +++ b/src/gallium/auxiliary/util/u_clear.h @@ -46,13 +46,13 @@ util_clear(struct pipe_context *pipe, { if (buffers & PIPE_CLEAR_COLOR) { struct pipe_surface *ps = framebuffer->cbufs[0]; - unsigned color; + union util_color uc; - util_pack_color(rgba, ps->format, &color); + util_pack_color(rgba, ps->format, &uc); if (pipe->surface_fill) { - pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, color); + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, uc.ui); } else { - util_surface_fill(pipe, ps, 0, 0, ps->width, ps->height, color); + util_surface_fill(pipe, ps, 0, 0, ps->width, ps->height, uc.ui); } } diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 9dacc6d83d..a2e0f26686 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -40,101 +40,97 @@ #include "util/u_math.h" + +union util_color { + ubyte ub; + ushort us; + uint ui; + float f[4]; +}; + /** * Pack ubyte R,G,B,A into dest pixel. */ static INLINE void util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a, - enum pipe_format format, void *dest) + enum pipe_format format, union util_color *uc) { switch (format) { case PIPE_FORMAT_R8G8B8A8_UNORM: { - uint *d = (uint *) dest; - *d = (r << 24) | (g << 16) | (b << 8) | a; + uc->ui = (r << 24) | (g << 16) | (b << 8) | a; } return; case PIPE_FORMAT_R8G8B8X8_UNORM: { - uint *d = (uint *) dest; - *d = (r << 24) | (g << 16) | (b << 8) | 0xff; + uc->ui = (r << 24) | (g << 16) | (b << 8) | 0xff; } return; case PIPE_FORMAT_A8R8G8B8_UNORM: { - uint *d = (uint *) dest; - *d = (a << 24) | (r << 16) | (g << 8) | b; + uc->ui = (a << 24) | (r << 16) | (g << 8) | b; } return; case PIPE_FORMAT_X8R8G8B8_UNORM: { - uint *d = (uint *) dest; - *d = (0xff << 24) | (r << 16) | (g << 8) | b; + uc->ui = (0xff << 24) | (r << 16) | (g << 8) | b; } return; case PIPE_FORMAT_B8G8R8A8_UNORM: { - uint *d = (uint *) dest; - *d = (b << 24) | (g << 16) | (r << 8) | a; + uc->ui = (b << 24) | (g << 16) | (r << 8) | a; } return; case PIPE_FORMAT_B8G8R8X8_UNORM: { - uint *d = (uint *) dest; - *d = (b << 24) | (g << 16) | (r << 8) | 0xff; + uc->ui = (b << 24) | (g << 16) | (r << 8) | 0xff; } return; case PIPE_FORMAT_R5G6B5_UNORM: { - ushort *d = (ushort *) dest; - *d = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); + uc->us = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); } return; case PIPE_FORMAT_A1R5G5B5_UNORM: { - ushort *d = (ushort *) dest; - *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); + uc->us = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); } return; case PIPE_FORMAT_A4R4G4B4_UNORM: { - ushort *d = (ushort *) dest; - *d = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); + uc->us = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); } return; case PIPE_FORMAT_A8_UNORM: { - ubyte *d = (ubyte *) dest; - *d = a; + uc->ub = a; } return; case PIPE_FORMAT_L8_UNORM: case PIPE_FORMAT_I8_UNORM: { - ubyte *d = (ubyte *) dest; - *d = r; + uc->ub = a; } return; case PIPE_FORMAT_R32G32B32A32_FLOAT: { - float *d = (float *) dest; - d[0] = (float)r / 255.0f; - d[1] = (float)g / 255.0f; - d[2] = (float)b / 255.0f; - d[3] = (float)a / 255.0f; + uc->f[0] = (float)r / 255.0f; + uc->f[1] = (float)g / 255.0f; + uc->f[2] = (float)b / 255.0f; + uc->f[3] = (float)a / 255.0f; } return; case PIPE_FORMAT_R32G32B32_FLOAT: { - float *d = (float *) dest; - d[0] = (float)r / 255.0f; - d[1] = (float)g / 255.0f; - d[2] = (float)b / 255.0f; + uc->f[0] = (float)r / 255.0f; + uc->f[1] = (float)g / 255.0f; + uc->f[2] = (float)b / 255.0f; } return; /* XXX lots more cases to add */ default: + uc->ui = 0; /* keep compiler happy */ debug_print_format("gallium: unhandled format in util_pack_color_ub()", format); assert(0); } @@ -145,13 +141,13 @@ util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a, * Unpack RGBA from a packed pixel, returning values as ubytes in [0,255]. */ static INLINE void -util_unpack_color_ub(enum pipe_format format, const void *src, +util_unpack_color_ub(enum pipe_format format, union util_color *uc, ubyte *r, ubyte *g, ubyte *b, ubyte *a) { switch (format) { case PIPE_FORMAT_R8G8B8A8_UNORM: { - uint p = ((const uint *) src)[0]; + uint p = uc->ui; *r = (ubyte) ((p >> 24) & 0xff); *g = (ubyte) ((p >> 16) & 0xff); *b = (ubyte) ((p >> 8) & 0xff); @@ -160,7 +156,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_R8G8B8X8_UNORM: { - uint p = ((const uint *) src)[0]; + uint p = uc->ui; *r = (ubyte) ((p >> 24) & 0xff); *g = (ubyte) ((p >> 16) & 0xff); *b = (ubyte) ((p >> 8) & 0xff); @@ -169,7 +165,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_A8R8G8B8_UNORM: { - uint p = ((const uint *) src)[0]; + uint p = uc->ui; *r = (ubyte) ((p >> 16) & 0xff); *g = (ubyte) ((p >> 8) & 0xff); *b = (ubyte) ((p >> 0) & 0xff); @@ -178,7 +174,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_X8R8G8B8_UNORM: { - uint p = ((const uint *) src)[0]; + uint p = uc->ui; *r = (ubyte) ((p >> 16) & 0xff); *g = (ubyte) ((p >> 8) & 0xff); *b = (ubyte) ((p >> 0) & 0xff); @@ -187,7 +183,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_B8G8R8A8_UNORM: { - uint p = ((const uint *) src)[0]; + uint p = uc->ui; *r = (ubyte) ((p >> 8) & 0xff); *g = (ubyte) ((p >> 16) & 0xff); *b = (ubyte) ((p >> 24) & 0xff); @@ -196,7 +192,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_B8G8R8X8_UNORM: { - uint p = ((const uint *) src)[0]; + uint p = uc->ui; *r = (ubyte) ((p >> 8) & 0xff); *g = (ubyte) ((p >> 16) & 0xff); *b = (ubyte) ((p >> 24) & 0xff); @@ -205,7 +201,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_R5G6B5_UNORM: { - ushort p = ((const ushort *) src)[0]; + ushort p = uc->us; *r = (ubyte) (((p >> 8) & 0xf8) | ((p >> 13) & 0x7)); *g = (ubyte) (((p >> 3) & 0xfc) | ((p >> 9) & 0x3)); *b = (ubyte) (((p << 3) & 0xf8) | ((p >> 2) & 0x7)); @@ -214,7 +210,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_A1R5G5B5_UNORM: { - ushort p = ((const ushort *) src)[0]; + ushort p = uc->us; *r = (ubyte) (((p >> 7) & 0xf8) | ((p >> 12) & 0x7)); *g = (ubyte) (((p >> 2) & 0xf8) | ((p >> 7) & 0x7)); *b = (ubyte) (((p << 3) & 0xf8) | ((p >> 2) & 0x7)); @@ -223,7 +219,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_A4R4G4B4_UNORM: { - ushort p = ((const ushort *) src)[0]; + ushort p = uc->us; *r = (ubyte) (((p >> 4) & 0xf0) | ((p >> 8) & 0xf)); *g = (ubyte) (((p >> 0) & 0xf0) | ((p >> 4) & 0xf)); *b = (ubyte) (((p << 4) & 0xf0) | ((p >> 0) & 0xf)); @@ -232,27 +228,27 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_A8_UNORM: { - ubyte p = ((const ubyte *) src)[0]; + ubyte p = uc->ub; *r = *g = *b = (ubyte) 0xff; *a = p; } return; case PIPE_FORMAT_L8_UNORM: { - ubyte p = ((const ubyte *) src)[0]; + ubyte p = uc->ub; *r = *g = *b = p; *a = (ubyte) 0xff; } return; case PIPE_FORMAT_I8_UNORM: { - ubyte p = ((const ubyte *) src)[0]; + ubyte p = uc->ub; *r = *g = *b = *a = p; } return; case PIPE_FORMAT_R32G32B32A32_FLOAT: { - const float *p = (const float *) src; + const float *p = &uc->f[0]; *r = float_to_ubyte(p[0]); *g = float_to_ubyte(p[1]); *b = float_to_ubyte(p[2]); @@ -261,7 +257,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, return; case PIPE_FORMAT_R32G32B32_FLOAT: { - const float *p = (const float *) src; + const float *p = &uc->f[0]; *r = float_to_ubyte(p[0]); *g = float_to_ubyte(p[1]); *b = float_to_ubyte(p[2]); @@ -271,7 +267,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, case PIPE_FORMAT_R32G32_FLOAT: { - const float *p = (const float *) src; + const float *p = &uc->f[0]; *r = float_to_ubyte(p[0]); *g = float_to_ubyte(p[1]); *b = *a = (ubyte) 0xff; @@ -280,7 +276,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src, case PIPE_FORMAT_R32_FLOAT: { - const float *p = (const float *) src; + const float *p = &uc->f[0]; *r = float_to_ubyte(p[0]); *g = *b = *a = (ubyte) 0xff; } @@ -293,14 +289,13 @@ util_unpack_color_ub(enum pipe_format format, const void *src, assert(0); } } - /** * Note rgba outside [0,1] will be clamped for int pixel formats. */ static INLINE void -util_pack_color(const float rgba[4], enum pipe_format format, void *dest) +util_pack_color(const float rgba[4], enum pipe_format format, union util_color *uc) { ubyte r = 0; ubyte g = 0; @@ -318,90 +313,78 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest) switch (format) { case PIPE_FORMAT_R8G8B8A8_UNORM: { - uint *d = (uint *) dest; - *d = (r << 24) | (g << 16) | (b << 8) | a; + uc->ui = (r << 24) | (g << 16) | (b << 8) | a; } return; case PIPE_FORMAT_R8G8B8X8_UNORM: { - uint *d = (uint *) dest; - *d = (r << 24) | (g << 16) | (b << 8) | 0xff; + uc->ui = (r << 24) | (g << 16) | (b << 8) | 0xff; } return; case PIPE_FORMAT_A8R8G8B8_UNORM: { - uint *d = (uint *) dest; - *d = (a << 24) | (r << 16) | (g << 8) | b; + uc->ui = (a << 24) | (r << 16) | (g << 8) | b; } return; case PIPE_FORMAT_X8R8G8B8_UNORM: { - uint *d = (uint *) dest; - *d = (0xff << 24) | (r << 16) | (g << 8) | b; + uc->ui = (0xff << 24) | (r << 16) | (g << 8) | b; } return; case PIPE_FORMAT_B8G8R8A8_UNORM: { - uint *d = (uint *) dest; - *d = (b << 24) | (g << 16) | (r << 8) | a; + uc->ui = (b << 24) | (g << 16) | (r << 8) | a; } return; case PIPE_FORMAT_B8G8R8X8_UNORM: { - uint *d = (uint *) dest; - *d = (b << 24) | (g << 16) | (r << 8) | 0xff; + uc->ui = (b << 24) | (g << 16) | (r << 8) | 0xff; } return; case PIPE_FORMAT_R5G6B5_UNORM: { - ushort *d = (ushort *) dest; - *d = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); + uc->us = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); } return; case PIPE_FORMAT_A1R5G5B5_UNORM: { - ushort *d = (ushort *) dest; - *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); + uc->us = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); } return; case PIPE_FORMAT_A4R4G4B4_UNORM: { - ushort *d = (ushort *) dest; - *d = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); + uc->ub = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); } return; case PIPE_FORMAT_A8_UNORM: { - ubyte *d = (ubyte *) dest; - *d = a; + uc->ub = a; } return; case PIPE_FORMAT_L8_UNORM: case PIPE_FORMAT_I8_UNORM: { - ubyte *d = (ubyte *) dest; - *d = r; + uc->ub = r; } return; case PIPE_FORMAT_R32G32B32A32_FLOAT: { - float *d = (float *) dest; - d[0] = rgba[0]; - d[1] = rgba[1]; - d[2] = rgba[2]; - d[3] = rgba[3]; + uc->f[0] = rgba[0]; + uc->f[1] = rgba[1]; + uc->f[2] = rgba[2]; + uc->f[3] = rgba[3]; } return; case PIPE_FORMAT_R32G32B32_FLOAT: { - float *d = (float *) dest; - d[0] = rgba[0]; - d[1] = rgba[1]; - d[2] = rgba[2]; + uc->f[0] = rgba[0]; + uc->f[1] = rgba[1]; + uc->f[2] = rgba[2]; } return; /* XXX lots more cases to add */ default: + uc->ui = 0; /* keep compiler happy */ debug_print_format("gallium: unhandled format in util_pack_color()", format); assert(0); } diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c index 79ad687ea9..3a3f968a49 100644 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -59,9 +59,9 @@ cell_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, if (buffers & PIPE_CLEAR_COLOR) { uint surfIndex = 0; - uint clearValue; + union util_color uc; - util_pack_color(rgba, cell->framebuffer.cbufs[0]->format, &clearValue); + util_pack_color(rgba, cell->framebuffer.cbufs[0]->format, &uc); /* Build a CLEAR command and place it in the current batch buffer */ STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0); @@ -70,7 +70,7 @@ cell_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, cell_batch_alloc16(cell, sizeof(*clr)); clr->opcode[0] = CELL_CMD_CLEAR_SURFACE; clr->surface = surfIndex; - clr->value = clearValue; + clr->value = uc.ui; } if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c index bdcff94b9b..08d9f2e273 100644 --- a/src/gallium/drivers/llvmpipe/lp_clear.c +++ b/src/gallium/drivers/llvmpipe/lp_clear.c @@ -50,6 +50,7 @@ llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, double depth, unsigned stencil) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + union util_color uc; unsigned cv; uint i; @@ -64,8 +65,8 @@ llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { struct pipe_surface *ps = llvmpipe->framebuffer.cbufs[i]; - util_pack_color(rgba, ps->format, &cv); - lp_tile_cache_clear(llvmpipe->cbuf_cache[i], rgba, cv); + util_pack_color(rgba, ps->format, &uc); + lp_tile_cache_clear(llvmpipe->cbuf_cache[i], rgba, uc.ui); } llvmpipe->dirty_render_cache = TRUE; } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 442af70e14..4ddbb357b6 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -153,7 +153,7 @@ static void r300_set_blend_color(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM, - &r300->blend_color_state->blend_color); + (union util_color *)&r300->blend_color_state->blend_color); /* XXX if FP16 blending is enabled, we should use the FP16 format */ r300->blend_color_state->blend_color_red_alpha = @@ -535,7 +535,7 @@ static void* sampler->filter1 |= r300_anisotropy(state->max_anisotropy); util_pack_color(state->border_color, PIPE_FORMAT_A8R8G8B8_UNORM, - &sampler->border_color); + (union util_color *)&sampler->border_color); /* R500-specific fixups and optimizations */ if (r300_screen(r300->context.screen)->caps->is_r500) { diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index 8fac8e6e05..f98087deb8 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -48,6 +48,7 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, double depth, unsigned stencil) { struct softpipe_context *softpipe = softpipe_context(pipe); + union util_color uc; unsigned cv; uint i; @@ -62,12 +63,12 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) { struct pipe_surface *ps = softpipe->framebuffer.cbufs[i]; - util_pack_color(rgba, ps->format, &cv); - sp_tile_cache_clear(softpipe->cbuf_cache[i], rgba, cv); + util_pack_color(rgba, ps->format, &uc); + sp_tile_cache_clear(softpipe->cbuf_cache[i], rgba, uc.ui); #if !TILE_CLEAR_OPTIMIZATION /* non-cached surface */ - pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, cv); + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, uc.ui); #endif } } diff --git a/src/gallium/drivers/svga/svga_pipe_clear.c b/src/gallium/drivers/svga/svga_pipe_clear.c index 6195c3897e..409b3b41cb 100644 --- a/src/gallium/drivers/svga/svga_pipe_clear.c +++ b/src/gallium/drivers/svga/svga_pipe_clear.c @@ -46,7 +46,7 @@ try_clear(struct svga_context *svga, boolean restore_viewport = FALSE; SVGA3dClearFlag flags = 0; struct pipe_framebuffer_state *fb = &svga->curr.framebuffer; - unsigned color = 0; + union util_color uc; ret = svga_update_state(svga, SVGA_STATE_HW_CLEAR); if (ret) @@ -54,7 +54,7 @@ try_clear(struct svga_context *svga, if ((buffers & PIPE_CLEAR_COLOR) && fb->cbufs[0]) { flags |= SVGA3D_CLEAR_COLOR; - util_pack_color(rgba, PIPE_FORMAT_A8R8G8B8_UNORM, &color); + util_pack_color(rgba, PIPE_FORMAT_A8R8G8B8_UNORM, &uc); rect.w = fb->cbufs[0]->width; rect.h = fb->cbufs[0]->height; @@ -77,7 +77,7 @@ try_clear(struct svga_context *svga, return ret; } - ret = SVGA3D_ClearRect(svga->swc, flags, color, depth, stencil, + ret = SVGA3D_ClearRect(svga->swc, flags, uc.ui, depth, stencil, rect.x, rect.y, rect.w, rect.h); if (ret != PIPE_OK) return ret; diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c index b4e57c5d15..7f530083d6 100644 --- a/src/gallium/drivers/svga/svga_pipe_sampler.c +++ b/src/gallium/drivers/svga/svga_pipe_sampler.c @@ -122,7 +122,7 @@ svga_create_sampler_state(struct pipe_context *pipe, util_pack_color_ub( r, g, b, a, PIPE_FORMAT_B8G8R8A8_UNORM, - &cso->bordercolor ); + (union util_color *)&cso->bordercolor ); } /* No SVGA3D support for: diff --git a/src/gallium/state_trackers/vega/vg_translate.c b/src/gallium/state_trackers/vega/vg_translate.c index 00e0764706..5051d83831 100644 --- a/src/gallium/state_trackers/vega/vg_translate.c +++ b/src/gallium/state_trackers/vega/vg_translate.c @@ -487,7 +487,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, a = 0xff; util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } } @@ -503,7 +503,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, a = (*src >> 0) & 0xff; util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } return; @@ -520,7 +520,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, a = (*src >> 0) & 0xff; util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } return; @@ -537,7 +537,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, clr[3] = 1.f; util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } } @@ -553,7 +553,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, clr[3] = ((*src >> 0) & 1)/1.; util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } } @@ -569,7 +569,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, clr[3] = ((*src >> 0) & 15)/15.; util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } } @@ -579,7 +579,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, src += offset; for (i = 0; i < n; ++i) { util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } } @@ -595,7 +595,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, a = 0xff; util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } } @@ -611,7 +611,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, a = (*src >> 0) & 0xff; util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } return; @@ -628,7 +628,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, a = (*src >> 0) & 0xff; util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } return; @@ -639,7 +639,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, src += offset; for (i = 0; i < n; ++i) { util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } } @@ -649,7 +649,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, src += offset; for (i = 0; i < n; ++i) { util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } } @@ -668,7 +668,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, clr[3] = 1.f; util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i+j]); + (union util_color *)rgba[i+j]); } ++src; } @@ -689,7 +689,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, clr[3] = (((*src) & (1<> shift); util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i+j]); + (union util_color *)rgba[i+j]); } ++src; } @@ -716,7 +716,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, clr[3] = ((*src) & (bitter)) >> shift; util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i +j]); + (union util_color *)rgba[i +j]); } ++src; } @@ -736,7 +736,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, b = (*src >> 0) & 0xff; util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } return; @@ -753,7 +753,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, b = (*src >> 0) & 0xff; util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } return; @@ -776,7 +776,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, b = (*src >> 0) & 0xff; util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } return; @@ -793,7 +793,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, b = (*src >> 0) & 0xff; util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } return; @@ -812,7 +812,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, a = (*src >> 0) & 0xff; util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } return; @@ -829,7 +829,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, a = (*src >> 0) & 0xff; util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } return; @@ -854,7 +854,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, a = (*src >> 0) & 0xff; util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } return; @@ -871,7 +871,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, a = (*src >> 0) & 0xff; util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } return; @@ -890,7 +890,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, r = (*src >> 0) & 0xff; util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } return; @@ -907,7 +907,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, r = (*src >> 0) & 0xff; util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } return; @@ -930,7 +930,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, r = (*src >> 0) & 0xff; util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } return; @@ -947,7 +947,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, r = (*src >> 0) & 0xff; util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - rgba[i]); + (union util_color *)rgba[i]); ++src; } return; diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c index 4b35f59cc2..5e2ae1bb36 100644 --- a/src/mesa/state_tracker/st_atom_pixeltransfer.c +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c @@ -167,7 +167,7 @@ load_color_map_texture(GLcontext *ctx, struct pipe_texture *pt) ubyte g = ctx->PixelMaps.GtoG.Map8[i * gSize / texSize]; ubyte b = ctx->PixelMaps.BtoB.Map8[j * bSize / texSize]; ubyte a = ctx->PixelMaps.AtoA.Map8[i * aSize / texSize]; - util_pack_color_ub(r, g, b, a, pt->format, dest + k); + util_pack_color_ub(r, g, b, a, pt->format, (union util_color *)(dest + k)); } } -- cgit v1.2.3 From add6dfbba64260c9b314b4a95c8def084e05bd3b Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Mon, 7 Dec 2009 19:04:07 -0800 Subject: llvmpipe: Initialize variables in emit_instruction. --- src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index d4d18febec..f588bde983 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -496,9 +496,9 @@ emit_instruction( if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { - LLVMValueRef *p_floor_log2; - LLVMValueRef *p_exp; - LLVMValueRef *p_log2; + LLVMValueRef *p_floor_log2 = NULL; + LLVMValueRef *p_exp = NULL; + LLVMValueRef *p_log2 = NULL; src0 = emit_fetch( bld, inst, 0, CHAN_X ); src0 = lp_build_abs( &bld->base, src0 ); -- cgit v1.2.3 From 2aebc5e01fbab6046f80c881d30717f788a390bc Mon Sep 17 00:00:00 2001 From: Alan Hourihane Date: Tue, 8 Dec 2009 13:11:09 +0000 Subject: move assert to avoid crash in debug build. --- src/gallium/drivers/llvmpipe/lp_tile_cache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c index e83210f93b..7a1ecf5107 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c @@ -290,11 +290,12 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc, assert(tc->surface); assert(tc->transfer); - assert(tc->transfer_map); if(!tc->transfer_map) lp_tile_cache_map_transfers(tc); + assert(tc->transfer_map); + switch(tile->status) { case LP_TILE_STATUS_CLEAR: /* don't get tile from framebuffer, just clear it */ -- cgit v1.2.3 From ee1720b99dfb5964962f2346406a4e3e88374a68 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Tue, 8 Dec 2009 19:13:48 +0100 Subject: gallium: fix more potential strict aliasing issues In particular, gcc man page warns that union a_union { int i; double d; }; int f() { double d = 3.0; return ((union a_union *) &d)->i; } "might" not be ok (why not?), even though it doesn't seem to generate any warnings. Hence don't use this and do the extra step to actually use assignment to get the values in/out of the union. This changes parts of 3456f9149b3009fcfce80054759d05883d3c4ee5. --- src/gallium/drivers/r300/r300_state.c | 10 +- src/gallium/drivers/svga/svga_pipe_sampler.c | 5 +- src/gallium/state_trackers/vega/vg_translate.c | 190 ++++++++++++++++++------- src/mesa/state_tracker/st_atom_pixeltransfer.c | 4 +- 4 files changed, 148 insertions(+), 61 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 4ddbb357b6..a83075df92 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -151,9 +151,10 @@ static void r300_set_blend_color(struct pipe_context* pipe, const struct pipe_blend_color* color) { struct r300_context* r300 = r300_context(pipe); + union util_color uc; - util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM, - (union util_color *)&r300->blend_color_state->blend_color); + util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM, &uc); + r300->blend_color_state->blend_color = uc.ui; /* XXX if FP16 blending is enabled, we should use the FP16 format */ r300->blend_color_state->blend_color_red_alpha = @@ -513,6 +514,7 @@ static void* struct r300_context* r300 = r300_context(pipe); struct r300_sampler_state* sampler = CALLOC_STRUCT(r300_sampler_state); int lod_bias; + union util_color uc; sampler->filter0 |= (r300_translate_wrap(state->wrap_s) << R300_TX_WRAP_S_SHIFT) | @@ -534,8 +536,8 @@ static void* sampler->filter1 |= r300_anisotropy(state->max_anisotropy); - util_pack_color(state->border_color, PIPE_FORMAT_A8R8G8B8_UNORM, - (union util_color *)&sampler->border_color); + util_pack_color(state->border_color, PIPE_FORMAT_A8R8G8B8_UNORM, &uc); + sampler->border_color = uc.ui; /* R500-specific fixups and optimizations */ if (r300_screen(r300->context.screen)->caps->is_r500) { diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c index 7f530083d6..78053e755e 100644 --- a/src/gallium/drivers/svga/svga_pipe_sampler.c +++ b/src/gallium/drivers/svga/svga_pipe_sampler.c @@ -101,6 +101,7 @@ svga_create_sampler_state(struct pipe_context *pipe, { struct svga_context *svga = svga_context(pipe); struct svga_sampler_state *cso = CALLOC_STRUCT( svga_sampler_state ); + union util_color uc; cso->mipfilter = translate_mip_filter(sampler->min_mip_filter); cso->magfilter = translate_img_filter( sampler->mag_img_filter ); @@ -121,8 +122,8 @@ svga_create_sampler_state(struct pipe_context *pipe, ubyte a = float_to_ubyte(sampler->border_color[3]); util_pack_color_ub( r, g, b, a, - PIPE_FORMAT_B8G8R8A8_UNORM, - (union util_color *)&cso->bordercolor ); + PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + cso->bordercolor = uc.ui; } /* No SVGA3D support for: diff --git a/src/gallium/state_trackers/vega/vg_translate.c b/src/gallium/state_trackers/vega/vg_translate.c index 5051d83831..03575ca3dd 100644 --- a/src/gallium/state_trackers/vega/vg_translate.c +++ b/src/gallium/state_trackers/vega/vg_translate.c @@ -474,6 +474,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, VGfloat rgba[][4]) { VGint i; + union util_color uc; switch (dataFormat) { case VG_sRGBX_8888: { @@ -486,8 +487,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, b = (*src >> 8) & 0xff; a = 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } } @@ -502,8 +506,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, b = (*src >> 8) & 0xff; a = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -519,8 +526,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, b = (*src >> 8) & 0xff; a = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -536,8 +546,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, clr[2] = ((*src >> 0) & 31)/31.; clr[3] = 1.f; - util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } } @@ -552,8 +565,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, clr[2] = ((*src >> 1) & 31)/31.; clr[3] = ((*src >> 0) & 1)/1.; - util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } } @@ -568,8 +584,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, clr[2] = ((*src >> 4) & 15)/15.; clr[3] = ((*src >> 0) & 15)/15.; - util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } } @@ -578,8 +597,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, VGubyte *src = (VGubyte *)data; src += offset; for (i = 0; i < n; ++i) { - util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } } @@ -594,8 +616,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, b = (*src >> 8) & 0xff; a = 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } } @@ -610,8 +635,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, b = (*src >> 8) & 0xff; a = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -627,8 +655,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, b = (*src >> 8) & 0xff; a = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -638,8 +669,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, VGubyte *src = (VGubyte *)data; src += offset; for (i = 0; i < n; ++i) { - util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } } @@ -648,8 +682,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, VGubyte *src = (VGubyte *)data; src += offset; for (i = 0; i < n; ++i) { - util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } } @@ -667,8 +704,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, clr[2] = clr[0]; clr[3] = 1.f; - util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i+j]); + util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i+j][0] = uc.f[0]; + rgba[i+j][1] = uc.f[1]; + rgba[i+j][2] = uc.f[2]; + rgba[i+j][3] = uc.f[3]; } ++src; } @@ -688,8 +728,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, clr[2] = 0.f; clr[3] = (((*src) & (1<> shift); - util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i+j]); + util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i+j][0] = uc.f[0]; + rgba[i+j][1] = uc.f[1]; + rgba[i+j][2] = uc.f[2]; + rgba[i+j][3] = uc.f[3]; } ++src; } @@ -715,8 +758,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, clr[2] = 0.f; clr[3] = ((*src) & (bitter)) >> shift; - util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i +j]); + util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i+j][0] = uc.f[0]; + rgba[i+j][1] = uc.f[1]; + rgba[i+j][2] = uc.f[2]; + rgba[i+j][3] = uc.f[3]; } ++src; } @@ -735,8 +781,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, g = (*src >> 8) & 0xff; b = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -752,8 +801,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, g = (*src >> 8) & 0xff; b = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -775,8 +827,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, g = (*src >> 8) & 0xff; b = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -792,8 +847,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, g = (*src >> 8) & 0xff; b = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -811,8 +869,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, r = (*src >> 8) & 0xff; a = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -828,8 +889,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, r = (*src >> 8) & 0xff; a = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -853,8 +917,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, r = (*src >> 8) & 0xff; a = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -870,8 +937,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, r = (*src >> 8) & 0xff; a = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -889,8 +959,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, g = (*src >> 8) & 0xff; r = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -906,8 +979,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, g = (*src >> 8) & 0xff; r = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -929,8 +1005,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, g = (*src >> 8) & 0xff; r = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; @@ -946,8 +1025,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx, g = (*src >> 8) & 0xff; r = (*src >> 0) & 0xff; - util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, - (union util_color *)rgba[i]); + util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc); + rgba[i][0] = uc.f[0]; + rgba[i][1] = uc.f[1]; + rgba[i][2] = uc.f[2]; + rgba[i][3] = uc.f[3]; ++src; } return; diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c index 5e2ae1bb36..6a5854e9ba 100644 --- a/src/mesa/state_tracker/st_atom_pixeltransfer.c +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c @@ -162,12 +162,14 @@ load_color_map_texture(GLcontext *ctx, struct pipe_texture *pt) */ for (i = 0; i < texSize; i++) { for (j = 0; j < texSize; j++) { + union util_color uc; int k = (i * texSize + j); ubyte r = ctx->PixelMaps.RtoR.Map8[j * rSize / texSize]; ubyte g = ctx->PixelMaps.GtoG.Map8[i * gSize / texSize]; ubyte b = ctx->PixelMaps.BtoB.Map8[j * bSize / texSize]; ubyte a = ctx->PixelMaps.AtoA.Map8[i * aSize / texSize]; - util_pack_color_ub(r, g, b, a, pt->format, (union util_color *)(dest + k)); + util_pack_color_ub(r, g, b, a, pt->format, &uc); + *(dest + k) = uc.ui; } } -- cgit v1.2.3 From 849a0644ada6ed7c3576babc3b348bee227118ff Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Tue, 8 Dec 2009 17:44:51 +0100 Subject: cell: use boolean instead of bool --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 1895a7940c..1d8a11a4ac 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -995,7 +995,7 @@ static boolean emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg; - bool complement = FALSE; + boolean complement = FALSE; one_reg = get_const_one_reg(gen); -- cgit v1.2.3 From 47c780180b888e115b630cd940fe9c29dd53b4c5 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Tue, 8 Dec 2009 17:51:19 +0100 Subject: nouveau: use boolean instead of bool --- src/gallium/drivers/nv04/nv04_transfer.c | 2 +- src/gallium/drivers/nv10/nv10_transfer.c | 2 +- src/gallium/drivers/nv20/nv20_transfer.c | 2 +- src/gallium/drivers/nv30/nv30_transfer.c | 2 +- src/gallium/drivers/nv40/nv40_transfer.c | 2 +- src/gallium/drivers/nv50/nv50_context.h | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c index e8ff686b4a..d66d6c6346 100644 --- a/src/gallium/drivers/nv04/nv04_transfer.c +++ b/src/gallium/drivers/nv04/nv04_transfer.c @@ -11,7 +11,7 @@ struct nv04_transfer { struct pipe_transfer base; struct pipe_surface *surface; - bool direct; + boolean direct; }; static void diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c index 9e44d37367..06bb513417 100644 --- a/src/gallium/drivers/nv10/nv10_transfer.c +++ b/src/gallium/drivers/nv10/nv10_transfer.c @@ -11,7 +11,7 @@ struct nv10_transfer { struct pipe_transfer base; struct pipe_surface *surface; - bool direct; + boolean direct; }; static void diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c index f2e0a34db9..26a73c5143 100644 --- a/src/gallium/drivers/nv20/nv20_transfer.c +++ b/src/gallium/drivers/nv20/nv20_transfer.c @@ -11,7 +11,7 @@ struct nv20_transfer { struct pipe_transfer base; struct pipe_surface *surface; - bool direct; + boolean direct; }; static void diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c index c8c3bd1f17..e29bfbd3ef 100644 --- a/src/gallium/drivers/nv30/nv30_transfer.c +++ b/src/gallium/drivers/nv30/nv30_transfer.c @@ -11,7 +11,7 @@ struct nv30_transfer { struct pipe_transfer base; struct pipe_surface *surface; - bool direct; + boolean direct; }; static void diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c index 1ee5cf39e0..ed5be1cf87 100644 --- a/src/gallium/drivers/nv40/nv40_transfer.c +++ b/src/gallium/drivers/nv40/nv40_transfer.c @@ -11,7 +11,7 @@ struct nv40_transfer { struct pipe_transfer base; struct pipe_surface *surface; - bool direct; + boolean direct; }; static void diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 4b0f062295..79135f2f36 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -65,7 +65,7 @@ struct nv50_rasterizer_stateobj { }; struct nv50_sampler_stateobj { - bool normalized; + boolean normalized; unsigned tsc[8]; }; -- cgit v1.2.3 From 8cc570a48c2e8e18622027cbd76f16a746b430bc Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 9 Dec 2009 00:55:51 +0100 Subject: r300g: clean up r300_emit_aos --- src/gallium/drivers/r300/r300_cs.h | 9 +++++ src/gallium/drivers/r300/r300_emit.c | 71 +++++++++++++++++++++++------------- src/gallium/drivers/r300/r300_reg.h | 5 +++ 3 files changed, 59 insertions(+), 26 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 9fcf3ab538..d142fee050 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -115,6 +115,15 @@ cs_count -= 3; \ } while (0) +#define OUT_CS_RELOC_NO_OFFSET(bo, rd, wd, flags) do { \ + DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, " \ + "domains (%d, %d, %d)\n", \ + bo, rd, wd, flags); \ + assert(bo); \ + cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \ + cs_count -= 2; \ +} while (0) + #define END_CS do { \ if (VERY_VERBOSE_CS) { \ DBG(cs_context_copy, DBG_CS, "r300: END_CS in %s (%s:%d)\n", __FUNCTION__, \ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index dbf316a9b5..7620c73cac 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson + * Copyright 2009 Marek Olšák * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -623,50 +624,68 @@ void r300_emit_texture(struct r300_context* r300, END_CS; } -/* XXX I can't read this and that's not good */ -void r300_emit_aos(struct r300_context* r300, unsigned offset) +static boolean r300_validate_aos(struct r300_context *r300) { struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; struct pipe_vertex_element *velem = r300->vertex_element; - CS_LOCALS(r300); int i; - unsigned aos_count = r300->vertex_element_count; + /* Check if formats and strides are aligned to the size of DWORD. */ + for (i = 0; i < r300->vertex_element_count; i++) { + if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 || + pf_get_blocksize(velem[i].src_format) % 4 != 0) { + return FALSE; + } + } + return TRUE; +} + +void r300_emit_aos(struct r300_context* r300, unsigned offset) +{ + struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer; + struct pipe_vertex_element *velem = r300->vertex_element; + int i; + unsigned size1, size2, aos_count = r300->vertex_element_count; unsigned packet_size = (aos_count * 3 + 1) / 2; + CS_LOCALS(r300); + + /* XXX Move this checking to a more approriate place. */ + if (!r300_validate_aos(r300)) { + /* XXX We should fallback using Draw. */ + assert(0); + } + BEGIN_CS(2 + packet_size + aos_count * 2); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); OUT_CS(aos_count); + for (i = 0; i < aos_count - 1; i += 2) { - int buf_num1 = velem[i].vertex_buffer_index; - int buf_num2 = velem[i+1].vertex_buffer_index; - assert(vbuf[buf_num1].stride % 4 == 0 && pf_get_blocksize(velem[i].src_format) % 4 == 0); - assert(vbuf[buf_num2].stride % 4 == 0 && pf_get_blocksize(velem[i+1].src_format) % 4 == 0); - OUT_CS((pf_get_blocksize(velem[i].src_format) >> 2) | (vbuf[buf_num1].stride << 6) | - (pf_get_blocksize(velem[i+1].src_format) << 14) | (vbuf[buf_num2].stride << 22)); - OUT_CS(vbuf[buf_num1].buffer_offset + velem[i].src_offset + - offset * vbuf[buf_num1].stride); - OUT_CS(vbuf[buf_num2].buffer_offset + velem[i+1].src_offset + - offset * vbuf[buf_num2].stride); + vb1 = &vbuf[velem[i].vertex_buffer_index]; + vb2 = &vbuf[velem[i+1].vertex_buffer_index]; + size1 = pf_get_blocksize(velem[i].src_format); + size2 = pf_get_blocksize(velem[i+1].src_format); + + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | + R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride)); + OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); + OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride); } + if (aos_count & 1) { - int buf_num = velem[i].vertex_buffer_index; - assert(vbuf[buf_num].stride % 4 == 0 && pf_get_blocksize(velem[i].src_format) % 4 == 0); - OUT_CS((pf_get_blocksize(velem[i].src_format) >> 2) | (vbuf[buf_num].stride << 6)); - OUT_CS(vbuf[buf_num].buffer_offset + velem[i].src_offset + - offset * vbuf[buf_num].stride); + vb1 = &vbuf[velem[i].vertex_buffer_index]; + size1 = pf_get_blocksize(velem[i].src_format); + + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); + OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); } - /* XXX bare CS reloc */ for (i = 0; i < aos_count; i++) { - cs_winsys->write_cs_reloc(cs_winsys, - vbuf[velem[i].vertex_buffer_index].buffer, - RADEON_GEM_DOMAIN_GTT, - 0, - 0); - cs_count -= 2; + OUT_CS_RELOC_NO_OFFSET(vbuf[velem[i].vertex_buffer_index].buffer, + RADEON_GEM_DOMAIN_GTT, 0, 0); } END_CS; } + #if 0 void r300_emit_draw_packet(struct r300_context* r300) { diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 85b1ea568a..c1ea87d11e 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -3293,6 +3293,11 @@ enum { */ #define R300_PACKET3_3D_LOAD_VBPNTR 0x00002F00 +# define R300_VBPNTR_SIZE0(x) ((x) >> 2) +# define R300_VBPNTR_STRIDE0(x) (((x) >> 2) << 8) +# define R300_VBPNTR_SIZE1(x) (((x) >> 2) << 16) +# define R300_VBPNTR_STRIDE1(x) (((x) >> 2) << 24) + #define R300_PACKET3_INDX_BUFFER 0x00003300 # define R300_INDX_BUFFER_DST_SHIFT 0 # define R300_INDX_BUFFER_SKIP_SHIFT 16 -- cgit v1.2.3 From 87b822e024797ef2fdb51ec9364f21eeb4d07161 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 8 Dec 2009 04:55:32 +0100 Subject: r300g: make pow(0,0) return 1 instead of NaN in the R500 fragment shader Unfortunately we can't fix this easily in the R300 fragment shader, and it's probably not worth the effort. --- src/gallium/drivers/r300/r300_emit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 7620c73cac..55bc2b3528 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -277,7 +277,7 @@ void r500_emit_fragment_program_code(struct r300_context* r300, BEGIN_CS(13 + ((code->inst_end + 1) * 6)); - OUT_CS_REG(R500_US_CONFIG, 0); + OUT_CS_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); OUT_CS_REG(R500_US_PIXSIZE, code->max_temp_idx); OUT_CS_REG(R500_US_CODE_RANGE, R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end)); -- cgit v1.2.3 From 6de7ac73bf027b9ace6f5f0c8063cbf724d95cee Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 8 Dec 2009 21:53:19 +0100 Subject: r300g: always disable unused colorbuffers --- src/gallium/drivers/r300/r300_emit.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 55bc2b3528..f784e1fa8e 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -331,7 +331,13 @@ void r300_emit_fb_state(struct r300_context* r300, int i; CS_LOCALS(r300); - BEGIN_CS((10 * fb->nr_cbufs) + (fb->zsbuf ? 10 : 0) + 4); + /* Shouldn't fail unless there is a bug in the state tracker. */ + assert(fb->nr_cbufs <= 4); + + BEGIN_CS((10 * fb->nr_cbufs) + (2 * (4 - fb->nr_cbufs)) + + (fb->zsbuf ? 10 : 0) + 4); + + /* Flush and free renderbuffer caches. */ OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); @@ -339,6 +345,7 @@ void r300_emit_fb_state(struct r300_context* r300, R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + /* Set up colorbuffers. */ for (i = 0; i < fb->nr_cbufs; i++) { surf = fb->cbufs[i]; tex = (struct r300_texture*)surf->texture; @@ -356,6 +363,12 @@ void r300_emit_fb_state(struct r300_context* r300, r300_translate_out_fmt(surf->format)); } + /* Disable unused colorbuffers. */ + for (; i < 4; i++) { + OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), R300_US_OUT_FMT_UNUSED); + } + + /* Set up a zbuffer. */ if (fb->zsbuf) { surf = fb->zsbuf; tex = (struct r300_texture*)surf->texture; -- cgit v1.2.3 From c6b450033d7ec2a415b1d761da1d94588358c94b Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 9 Dec 2009 00:45:18 +0100 Subject: r300g: fix routing of vertex streams if TCL is bypassed Generating mipmaps finally works, among other things. Yay! --- src/gallium/drivers/r300/r300_state.c | 2 -- src/gallium/drivers/r300/r300_state_derived.c | 17 +++++++++++---- src/gallium/drivers/r300/r300_vs.c | 31 +++++++++++---------------- src/gallium/drivers/r300/r300_vs.h | 4 +++- 4 files changed, 29 insertions(+), 25 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 68c5408a64..edf7114bbb 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -419,8 +419,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe, if (state->bypass_vs_clip_and_viewport || !r300_screen(pipe->screen)->caps->has_tcl) { rs->vap_control_status |= R300_VAP_TCL_BYPASS; - } else { - rs->rs.bypass_vs_clip_and_viewport = TRUE; } rs->point_size = pack_float_16_6x(state->point_size) | diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 6af49888b9..29bc701a86 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -134,6 +134,16 @@ static void r300_vertex_psc(struct r300_context* r300) uint16_t type, swizzle; enum pipe_format format; unsigned i; + int identity[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + int* stream_tab; + + /* If TCL is bypassed, map vertex streams to equivalent VS output + * locations. */ + if (r300->rs_state->enable_vte) { + stream_tab = identity; + } else { + stream_tab = r300->vs->stream_loc_notcl; + } /* Vertex shaders have no semantics on their inputs, * so PSC should just route stuff based on the vertex elements, @@ -147,10 +157,10 @@ static void r300_vertex_psc(struct r300_context* r300) format = r300->vertex_element[i].src_format; type = r300_translate_vertex_data_type(format) | - (i << R300_DST_VEC_LOC_SHIFT); + (stream_tab[i] << R300_DST_VEC_LOC_SHIFT); swizzle = r300_translate_vertex_data_swizzle(format); - if (i % 2) { + if (i & 1) { vformat->vap_prog_stream_cntl[i >> 1] |= type << 16; vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; } else { @@ -159,7 +169,6 @@ static void r300_vertex_psc(struct r300_context* r300) } } - assert(i <= 15); /* Set the last vector in the PSC. */ @@ -178,7 +187,7 @@ static void r300_swtcl_vertex_psc(struct r300_context* r300) uint16_t type, swizzle; enum pipe_format format; unsigned i, attrib_count; - int* vs_output_tab = r300->vs->output_stream_loc_swtcl; + int* vs_output_tab = r300->vs->stream_loc_notcl; /* For each Draw attribute, route it to the fragment shader according * to the vs_output_tab. */ diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 31248346bc..fa207c939c 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -143,35 +143,33 @@ static void r300_shader_vap_output_fmt( assert(gen_count <= 8); } -/* Set VS output stream locations for SWTCL. */ -static void r300_stream_locations_swtcl( +/* Sets up stream mapping to equivalent VS outputs if TCL is bypassed + * or isn't present. */ +static void r300_stream_locations_notcl( struct r300_shader_semantics* vs_outputs, - int* output_stream_loc) + int* stream_loc) { int i, tabi = 0, gen_count; - /* XXX Check whether the numbers (0, 1, 2+i, etc.) are correct. - * These should go to VAP_PROG_STREAM_CNTL/DST_VEC_LOC. */ - /* Position. */ - output_stream_loc[tabi++] = 0; + stream_loc[tabi++] = 0; /* Point size. */ if (vs_outputs->psize != ATTR_UNUSED) { - output_stream_loc[tabi++] = 1; + stream_loc[tabi++] = 1; } /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { if (vs_outputs->color[i] != ATTR_UNUSED) { - output_stream_loc[tabi++] = 2 + i; + stream_loc[tabi++] = 2 + i; } } /* Back-face colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { if (vs_outputs->bcolor[i] != ATTR_UNUSED) { - output_stream_loc[tabi++] = 4 + i; + stream_loc[tabi++] = 4 + i; } } @@ -180,7 +178,7 @@ static void r300_stream_locations_swtcl( for (i = 0; i < ATTR_GENERIC_COUNT; i++) { if (vs_outputs->bcolor[i] != ATTR_UNUSED) { assert(tabi < 16); - output_stream_loc[tabi++] = 6 + gen_count; + stream_loc[tabi++] = 6 + gen_count; gen_count++; } } @@ -188,7 +186,7 @@ static void r300_stream_locations_swtcl( /* Fog coordinates. */ if (vs_outputs->fog != ATTR_UNUSED) { assert(tabi < 16); - output_stream_loc[tabi++] = 6 + gen_count; + stream_loc[tabi++] = 6 + gen_count; gen_count++; } @@ -196,7 +194,7 @@ static void r300_stream_locations_swtcl( assert(gen_count <= 8); for (; tabi < 16;) { - output_stream_loc[tabi++] = -1; + stream_loc[tabi++] = -1; } } @@ -254,10 +252,7 @@ void r300_translate_vertex_shader(struct r300_context* r300, /* Initialize. */ r300_shader_read_vs_outputs(&vs->info, &vs->outputs); r300_shader_vap_output_fmt(&vs->outputs, vs->hwfmt); - - if (!r300_screen(r300->context.screen)->caps->has_tcl) { - r300_stream_locations_swtcl(&vs->outputs, vs->output_stream_loc_swtcl); - } + r300_stream_locations_notcl(&vs->outputs, vs->stream_loc_notcl); /* Setup the compiler */ rc_init(&compiler.Base); @@ -283,7 +278,7 @@ void r300_translate_vertex_shader(struct r300_context* r300, /* Invoke the compiler */ r3xx_compile_vertex_program(&compiler); if (compiler.Base.Error) { - /* XXX Fail gracefully */ + /* XXX We should fallback using Draw. */ fprintf(stderr, "r300 VP: Compiler error\n"); abort(); } diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 283dd5a9e8..67e9db5366 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -38,9 +38,11 @@ struct r300_vertex_shader { struct tgsi_shader_info info; struct r300_shader_semantics outputs; - int output_stream_loc_swtcl[16]; uint hwfmt[4]; + /* Stream locations for SWTCL or if TCL is bypassed. */ + int stream_loc_notcl[16]; + /* Has this shader been translated yet? */ boolean translated; -- cgit v1.2.3 From 770323e33e62169827454af74e9f90f09997f962 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 11 Dec 2009 12:09:02 +0000 Subject: svga: Fix mixed signed comparisons. --- src/gallium/drivers/svga/svga_screen_texture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c index e7301aba84..ed83ba48f0 100644 --- a/src/gallium/drivers/svga/svga_screen_texture.c +++ b/src/gallium/drivers/svga/svga_screen_texture.c @@ -528,7 +528,7 @@ svga_texture_view_surface(struct pipe_context *pipe, { struct svga_screen *ss = svga_screen(tex->base.screen); struct svga_winsys_surface *handle; - int i, j; + uint32_t i, j; unsigned z_offset = 0; SVGA_DBG(DEBUG_PERF, -- cgit v1.2.3 From 16876b8328059446b6fa0951f7848e5d500244ab Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 11 Dec 2009 12:29:02 +0000 Subject: svga: Keep tight control of texture handle ownership. The texture owns the surface handle. All derivatives need to keep a reference to texture. This fixes several assertions failures starting up Jedi Knight 2. Should cause no change for DRM surface sharing -- reference count still done as before there. --- src/gallium/drivers/svga/svga_screen_texture.c | 35 ++++++++++++++------------ src/gallium/drivers/svga/svga_screen_texture.h | 9 ++++++- 2 files changed, 27 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c index ed83ba48f0..1eb03db280 100644 --- a/src/gallium/drivers/svga/svga_screen_texture.c +++ b/src/gallium/drivers/svga/svga_screen_texture.c @@ -657,13 +657,11 @@ svga_get_tex_surface(struct pipe_screen *screen, s->real_level = 0; s->real_zslice = 0; } else { - struct svga_winsys_screen *sws = svga_winsys_screen(screen); - SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, face %u, z %u, %p\n", pt, level, face, zslice, s); memset(&s->key, 0, sizeof s->key); - sws->surface_reference(sws, &s->handle, tex->handle); + s->handle = tex->handle; s->real_face = face; s->real_level = level; s->real_zslice = zslice; @@ -677,11 +675,14 @@ static void svga_tex_surface_destroy(struct pipe_surface *surf) { struct svga_surface *s = svga_surface(surf); + struct svga_texture *t = svga_texture(surf->texture); struct svga_screen *ss = svga_screen(surf->texture->screen); - SVGA_DBG(DEBUG_DMA, "unref sid %p (tex surface)\n", s->handle); - assert(s->key.cachable == 0); - svga_screen_surface_destroy(ss, &s->key, &s->handle); + if(s->handle != t->handle) { + SVGA_DBG(DEBUG_DMA, "unref sid %p (tex surface)\n", s->handle); + svga_screen_surface_destroy(ss, &s->key, &s->handle); + } + pipe_texture_reference(&surf->texture, NULL); FREE(surf); } @@ -910,7 +911,6 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt, unsigned min_lod, unsigned max_lod) { struct svga_screen *ss = svga_screen(pt->screen); - struct svga_winsys_screen *sws = ss->sws; struct svga_texture *tex = svga_texture(pt); struct svga_sampler_view *sv = NULL; SVGA3dSurfaceFormat format = svga_translate_format(pt->format); @@ -961,7 +961,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt, sv = CALLOC_STRUCT(svga_sampler_view); pipe_reference_init(&sv->reference, 1); - sv->texture = tex; + pipe_texture_reference(&sv->texture, pt); sv->min_lod = min_lod; sv->max_lod = max_lod; @@ -976,7 +976,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt, pt->depth[0], pt->last_level); sv->key.cachable = 0; - sws->surface_reference(sws, &sv->handle, tex->handle); + sv->handle = tex->handle; return sv; } @@ -999,7 +999,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt, if (!sv->handle) { assert(0); sv->key.cachable = 0; - sws->surface_reference(sws, &sv->handle, tex->handle); + sv->handle = tex->handle; return sv; } @@ -1013,14 +1013,14 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt, void svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *v) { - struct svga_texture *tex = v->texture; + struct svga_texture *tex = svga_texture(v->texture); unsigned numFaces; unsigned age = 0; int i, k; assert(svga); - if (v->handle == v->texture->handle) + if (v->handle == tex->handle) return; age = tex->age; @@ -1048,11 +1048,14 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view * void svga_destroy_sampler_view_priv(struct svga_sampler_view *v) { - struct svga_screen *ss = svga_screen(v->texture->base.screen); - - SVGA_DBG(DEBUG_DMA, "unref sid %p (sampler view)\n", v->handle); - svga_screen_surface_destroy(ss, &v->key, &v->handle); + struct svga_texture *tex = svga_texture(v->texture); + if(v->handle != tex->handle) { + struct svga_screen *ss = svga_screen(v->texture->screen); + SVGA_DBG(DEBUG_DMA, "unref sid %p (sampler view)\n", v->handle); + svga_screen_surface_destroy(ss, &v->key, &v->handle); + } + pipe_texture_reference(&v->texture, NULL); FREE(v); } diff --git a/src/gallium/drivers/svga/svga_screen_texture.h b/src/gallium/drivers/svga/svga_screen_texture.h index 1cc4063e65..8cfdfea693 100644 --- a/src/gallium/drivers/svga/svga_screen_texture.h +++ b/src/gallium/drivers/svga/svga_screen_texture.h @@ -61,7 +61,7 @@ struct svga_sampler_view { struct pipe_reference reference; - struct svga_texture *texture; + struct pipe_texture *texture; int min_lod; int max_lod; @@ -94,6 +94,13 @@ struct svga_texture * operation. */ struct svga_host_surface_cache_key key; + + /** + * Handle for the host side surface. + * + * This handle is owned by this texture. Views should hold on to a reference + * to this texture and never destroy this handle directly. + */ struct svga_winsys_surface *handle; }; -- cgit v1.2.3 From 8469baf41bd4775eab2403ecf08ed013343943a5 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 11 Dec 2009 13:15:12 +0000 Subject: svga: Always pass SVGA3D_SURFACE_HINT_DYNAMIC. Since we're reusing buffers we're effectively transforming all of them into dynamic buffers. It would be nice to not cache long lived static buffers. But there is no way to detect the long lived from short lived ones yet. A good heuristic would be buffer size. --- src/gallium/drivers/svga/svga_screen_cache.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_screen_cache.c b/src/gallium/drivers/svga/svga_screen_cache.c index 8a06383f61..eff36e0bcc 100644 --- a/src/gallium/drivers/svga/svga_screen_cache.c +++ b/src/gallium/drivers/svga/svga_screen_cache.c @@ -277,6 +277,15 @@ svga_screen_surface_create(struct svga_screen *svgascreen, while(size < key->size.width) size <<= 1; key->size.width = size; + /* Since we're reusing buffers we're effectively transforming all + * of them into dynamic buffers. + * + * It would be nice to not cache long lived static buffers. But there + * is no way to detect the long lived from short lived ones yet. A + * good heuristic would be buffer size. + */ + key->flags &= ~SVGA3D_SURFACE_HINT_STATIC; + key->flags |= SVGA3D_SURFACE_HINT_DYNAMIC; } handle = svga_screen_cache_lookup(svgascreen, key); -- cgit v1.2.3 From ffae1f938d61165fce620bfd76ea7ae74dc63289 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 11 Dec 2009 14:14:03 +0000 Subject: svga: Add a missing dependency from the prescale state. Thanks for Keith to finding this. Fixes Jedi Knight 2 menus. --- src/gallium/drivers/svga/svga_state_constants.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c index 18cce7dde1..a5777d4fbd 100644 --- a/src/gallium/drivers/svga/svga_state_constants.c +++ b/src/gallium/drivers/svga/svga_state_constants.c @@ -231,7 +231,8 @@ static int emit_vs_consts( struct svga_context *svga, struct svga_tracked_state svga_hw_vs_parameters = { "hw vs params", - (SVGA_NEW_VS_CONST_BUFFER | + (SVGA_NEW_PRESCALE | + SVGA_NEW_VS_CONST_BUFFER | SVGA_NEW_ZERO_STRIDE | SVGA_NEW_VS_RESULT), emit_vs_consts -- cgit v1.2.3