diff options
Diffstat (limited to 'src/gallium/auxiliary')
154 files changed, 13307 insertions, 7535 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 1d0930e024..c4d6b528b7 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -105,9 +105,16 @@ C_SOURCES = \ util/u_cpu_detect.c \ util/u_dl.c \ util/u_draw_quad.c \ - util/u_format_access.c \ + util/u_format.c \ + util/u_format_other.c \ + util/u_format_s3tc.c \ + util/u_format_srgb.c \ util/u_format_table.c \ + util/u_format_tests.c \ + util/u_format_yuv.c \ + util/u_format_zs.c \ util/u_gen_mipmap.c \ + util/u_half.c \ util/u_handle_table.c \ util/u_hash_table.c \ util/u_hash.c \ @@ -118,19 +125,23 @@ C_SOURCES = \ util/u_mm.c \ util/u_rect.c \ util/u_ringbuffer.c \ + util/u_sampler.c \ util/u_simple_shaders.c \ util/u_snprintf.c \ util/u_surface.c \ util/u_texture.c \ util/u_tile.c \ - util/u_timed_winsys.c \ + util/u_transfer.c \ + util/u_resource.c \ util/u_upload_mgr.c \ - util/u_simple_screen.c \ - vl/vl_bitstream_parser.c \ - vl/vl_mpeg12_mc_renderer.c \ - vl/vl_compositor.c \ - vl/vl_csc.c \ - vl/vl_shader_build.c + target-helpers/wrap_screen.c + + # Disabling until pipe-video branch gets merged in + #vl/vl_bitstream_parser.c \ + #vl/vl_mpeg12_mc_renderer.c \ + #vl/vl_compositor.c \ + #vl/vl_csc.c \ + #vl/vl_shader_build.c \ GALLIVM_SOURCES = \ gallivm/lp_bld_alpha.c \ @@ -143,29 +154,31 @@ GALLIVM_SOURCES = \ gallivm/lp_bld_debug.c \ gallivm/lp_bld_depth.c \ gallivm/lp_bld_flow.c \ - gallivm/lp_bld_format_aos.c \ - gallivm/lp_bld_format_query.c \ gallivm/lp_bld_format_soa.c \ + gallivm/lp_bld_init.c \ gallivm/lp_bld_interp.c \ gallivm/lp_bld_intr.c \ gallivm/lp_bld_logic.c \ gallivm/lp_bld_pack.c \ + gallivm/lp_bld_printf.c \ gallivm/lp_bld_sample.c \ gallivm/lp_bld_sample_soa.c \ gallivm/lp_bld_struct.c \ gallivm/lp_bld_swizzle.c \ gallivm/lp_bld_tgsi_soa.c \ - gallivm/lp_bld_type.c + gallivm/lp_bld_type.c \ + draw/draw_llvm.c \ + draw/draw_pt_fetch_shade_pipeline_llvm.c \ + draw/draw_llvm_translate.c -GALLIVM_CPP_SOURCES = \ - gallivm/lp_bld_init.cpp +GALLIVM_CPP_SOURCES = GENERATED_SOURCES = \ indices/u_indices_gen.c \ indices/u_unfilled_gen.c \ - util/u_format_access.c \ - util/u_format_pack.h \ - util/u_format_table.c + util/u_format_srgb.c \ + util/u_format_table.c \ + util/u_half.c ifeq ($(MESA_LLVM),1) @@ -188,12 +201,11 @@ indices/u_indices_gen.c: indices/u_indices_gen.py indices/u_unfilled_gen.c: indices/u_unfilled_gen.py python $< > $@ -util/u_format_table.c: util/u_format_table.py util/u_format_parse.py util/u_format.csv - python util/u_format_table.py util/u_format.csv > $@ - -util/u_format_pack.h: util/u_format_pack.py util/u_format_parse.py util/u_format.csv - python util/u_format_pack.py util/u_format.csv > $@ +util/u_format_srgb.c: util/u_format_srgb.py + python $< > $@ -util/u_format_access.c: util/u_format_access.py util/u_format_parse.py util/u_format.csv - python util/u_format_access.py util/u_format.csv > $@ +util/u_format_table.c: util/u_format_table.py util/u_format_pack.py util/u_format_parse.py util/u_format.csv + python util/u_format_table.py util/u_format.csv > $@ +util/u_half.c: util/u_half.py + python util/u_half.py > $@ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index f365c4bbdd..3ad0600326 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -7,6 +7,8 @@ env.Append(CPPPATH = [ 'util', ]) +env.Tool('udis86') + env.CodeGenerate( target = 'indices/u_indices_gen.c', script = 'indices/u_indices_gen.py', @@ -22,26 +24,31 @@ env.CodeGenerate( ) env.CodeGenerate( - target = 'util/u_format_table.c', - script = 'util/u_format_table.py', - source = ['util/u_format.csv'], - command = 'python $SCRIPT $SOURCE > $TARGET' + target = 'util/u_format_srgb.c', + script = 'util/u_format_srgb.py', + source = [], + command = python_cmd + ' $SCRIPT > $TARGET' ) env.CodeGenerate( - target = File('util/u_format_pack.h').srcnode(), - script = 'util/u_format_pack.py', + target = 'util/u_format_table.c', + script = 'util/u_format_table.py', source = ['util/u_format.csv'], command = 'python $SCRIPT $SOURCE > $TARGET' ) env.CodeGenerate( - target = 'util/u_format_access.c', - script = 'util/u_format_access.py', - source = ['util/u_format.csv'], - command = 'python $SCRIPT $SOURCE > $TARGET' + target = 'util/u_half.c', + script = 'util/u_half.py', + source = [], + command = 'python $SCRIPT > $TARGET' ) +env.Depends('util/u_format_table.c', [ + 'util/u_format_parse.py', + 'util/u_format_pack.py', +]) + source = [ 'cso_cache/cso_context.c', 'cso_cache/cso_cache.c', @@ -147,9 +154,16 @@ source = [ 'util/u_dump_state.c', 'util/u_dl.c', 'util/u_draw_quad.c', - 'util/u_format_access.c', + 'util/u_format.c', + 'util/u_format_other.c', + 'util/u_format_s3tc.c', + 'util/u_format_srgb.c', 'util/u_format_table.c', + 'util/u_format_tests.c', + 'util/u_format_yuv.c', + 'util/u_format_zs.c', 'util/u_gen_mipmap.c', + 'util/u_half.c', 'util/u_handle_table.c', 'util/u_hash.c', 'util/u_hash_table.c', @@ -158,23 +172,26 @@ source = [ 'util/u_math.c', 'util/u_mm.c', 'util/u_rect.c', + 'util/u_resource.c', 'util/u_ringbuffer.c', + 'util/u_sampler.c', 'util/u_simple_shaders.c', 'util/u_snprintf.c', 'util/u_surface.c', 'util/u_texture.c', 'util/u_tile.c', - 'util/u_timed_winsys.c', + 'util/u_transfer.c', 'util/u_upload_mgr.c', - 'util/u_simple_screen.c', - 'vl/vl_bitstream_parser.c', - 'vl/vl_mpeg12_mc_renderer.c', - 'vl/vl_compositor.c', - 'vl/vl_csc.c', - 'vl/vl_shader_build.c', + # Disabling until pipe-video branch gets merged in + #'vl/vl_bitstream_parser.c', + #'vl/vl_mpeg12_mc_renderer.c', + #'vl/vl_compositor.c', + #'vl/vl_csc.c', + #'vl/vl_shader_build.c', + 'target-helpers/wrap_screen.c', ] -if drawllvm: +if env['llvm']: source += [ 'gallivm/lp_bld_alpha.c', 'gallivm/lp_bld_arit.c', @@ -186,20 +203,22 @@ if drawllvm: 'gallivm/lp_bld_debug.c', 'gallivm/lp_bld_depth.c', 'gallivm/lp_bld_flow.c', - 'gallivm/lp_bld_format_aos.c', - 'gallivm/lp_bld_format_query.c', 'gallivm/lp_bld_format_soa.c', 'gallivm/lp_bld_interp.c', 'gallivm/lp_bld_intr.c', 'gallivm/lp_bld_logic.c', - 'gallivm/lp_bld_init.cpp', + 'gallivm/lp_bld_init.c', 'gallivm/lp_bld_pack.c', + 'gallivm/lp_bld_printf.c', 'gallivm/lp_bld_sample.c', 'gallivm/lp_bld_sample_soa.c', 'gallivm/lp_bld_struct.c', 'gallivm/lp_bld_swizzle.c', 'gallivm/lp_bld_tgsi_soa.c', 'gallivm/lp_bld_type.c', + 'draw/draw_llvm.c', + 'draw/draw_pt_fetch_shade_pipeline_llvm.c', + 'draw/draw_llvm_translate.c' ] gallium = env.ConvenienceLibrary( diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index a6a07e72c2..900c64df4b 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -43,6 +43,7 @@ struct cso_cache { struct cso_hash *vs_hash; struct cso_hash *rasterizer_hash; struct cso_hash *sampler_hash; + struct cso_hash *velements_hash; int max_size; cso_sanitize_callback sanitize_cb; @@ -108,6 +109,9 @@ static struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_ case CSO_VERTEX_SHADER: hash = sc->vs_hash; break; + case CSO_VELEMENTS: + hash = sc->velements_hash; + break; } return hash; @@ -161,6 +165,13 @@ static void delete_vs_state(void *state, void *data) FREE(state); } +static void delete_velements(void *state, void *data) +{ + struct cso_velements *cso = (struct cso_velements *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); +} static INLINE void delete_cso(void *state, enum cso_cache_type type) { @@ -183,6 +194,9 @@ static INLINE void delete_cso(void *state, enum cso_cache_type type) case CSO_VERTEX_SHADER: delete_vs_state(state, 0); break; + case CSO_VELEMENTS: + delete_velements(state, 0); + break; default: assert(0); FREE(state); @@ -294,6 +308,7 @@ struct cso_cache *cso_cache_create(void) sc->rasterizer_hash = cso_hash_create(); sc->fs_hash = cso_hash_create(); sc->vs_hash = cso_hash_create(); + sc->velements_hash = cso_hash_create(); sc->sanitize_cb = sanitize_cb; sc->sanitize_data = 0; @@ -325,6 +340,9 @@ void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, case CSO_VERTEX_SHADER: hash = sc->vs_hash; break; + case CSO_VELEMENTS: + hash = sc->velements_hash; + break; } iter = cso_hash_first_node(hash); @@ -351,6 +369,7 @@ void cso_cache_delete(struct cso_cache *sc) cso_for_each_state(sc, CSO_VERTEX_SHADER, delete_vs_state, 0); cso_for_each_state(sc, CSO_RASTERIZER, delete_rasterizer_state, 0); cso_for_each_state(sc, CSO_SAMPLER, delete_sampler_state, 0); + cso_for_each_state(sc, CSO_VELEMENTS, delete_velements, 0); cso_hash_delete(sc->blend_hash); cso_hash_delete(sc->sampler_hash); @@ -358,6 +377,7 @@ void cso_cache_delete(struct cso_cache *sc) cso_hash_delete(sc->rasterizer_hash); cso_hash_delete(sc->fs_hash); cso_hash_delete(sc->vs_hash); + cso_hash_delete(sc->velements_hash); FREE(sc); } @@ -372,6 +392,7 @@ void cso_set_maximum_cache_size(struct cso_cache *sc, int number) sanitize_hash(sc, sc->vs_hash, CSO_VERTEX_SHADER, sc->max_size); sanitize_hash(sc, sc->rasterizer_hash, CSO_RASTERIZER, sc->max_size); sanitize_hash(sc, sc->sampler_hash, CSO_SAMPLER, sc->max_size); + sanitize_hash(sc, sc->velements_hash, CSO_VELEMENTS, sc->max_size); } int cso_maximum_cache_size(const struct cso_cache *sc) diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h index eea60b940b..fb09b83c62 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.h +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -53,6 +53,7 @@ * - rasterizer (old setup) * - sampler * - vertex shader + * - vertex elements * * Things that are not constant state objects include: * - blend_color @@ -90,7 +91,8 @@ enum cso_cache_type { CSO_DEPTH_STENCIL_ALPHA, CSO_RASTERIZER, CSO_FRAGMENT_SHADER, - CSO_VERTEX_SHADER + CSO_VERTEX_SHADER, + CSO_VELEMENTS }; typedef void (*cso_state_callback)(void *ctx, void *obj); @@ -144,6 +146,18 @@ struct cso_sampler { struct pipe_context *context; }; +struct cso_velems_state { + unsigned count; + struct pipe_vertex_element velems[PIPE_MAX_ATTRIBS]; +}; + +struct cso_velements { + struct cso_velems_state state; + void *data; + cso_state_callback delete_state; + struct pipe_context *context; +}; + unsigned cso_construct_key(void *item, int item_size); struct cso_cache *cso_cache_create(void); diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index a7335c340c..6fd4bd3642 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -37,6 +37,7 @@ #include "pipe/p_state.h" #include "util/u_inlines.h" +#include "util/u_math.h" #include "util/u_memory.h" #include "tgsi/tgsi_parse.h" @@ -69,17 +70,17 @@ struct cso_context { unsigned nr_vertex_samplers_saved; void *vertex_samplers_saved[PIPE_MAX_VERTEX_SAMPLERS]; - struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; - uint nr_textures; + uint nr_fragment_sampler_views; + struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; - struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS]; - uint nr_vertex_textures; + uint nr_vertex_sampler_views; + struct pipe_sampler_view *vertex_sampler_views[PIPE_MAX_VERTEX_SAMPLERS]; - uint nr_textures_saved; - struct pipe_texture *textures_saved[PIPE_MAX_SAMPLERS]; + uint nr_fragment_sampler_views_saved; + struct pipe_sampler_view *fragment_sampler_views_saved[PIPE_MAX_SAMPLERS]; - uint nr_vertex_textures_saved; - struct pipe_texture *vertex_textures_saved[PIPE_MAX_SAMPLERS]; + uint nr_vertex_sampler_views_saved; + struct pipe_sampler_view *vertex_sampler_views_saved[PIPE_MAX_VERTEX_SAMPLERS]; /** Current and saved state. * The saved state is used as a 1-deep stack. @@ -89,6 +90,7 @@ struct cso_context { void *rasterizer, *rasterizer_saved; void *fragment_shader, *fragment_shader_saved, *geometry_shader; void *vertex_shader, *vertex_shader_saved, *geometry_shader_saved; + void *velements, *velements_saved; struct pipe_clip_state clip; struct pipe_clip_state clip_saved; @@ -174,6 +176,20 @@ static boolean delete_vs_state(struct cso_context *ctx, void *state) return FALSE; } +static boolean delete_vertex_elements(struct cso_context *ctx, + void *state) +{ + struct cso_velements *cso = (struct cso_velements *)state; + + if (ctx->velements == cso->data) + return FALSE; + + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + return TRUE; +} + static INLINE boolean delete_cso(struct cso_context *ctx, void *state, enum cso_cache_type type) @@ -197,6 +213,9 @@ static INLINE boolean delete_cso(struct cso_context *ctx, case CSO_VERTEX_SHADER: return delete_vs_state(ctx, state); break; + case CSO_VELEMENTS: + return delete_vertex_elements(ctx, state); + break; default: assert(0); FREE(state); @@ -271,16 +290,17 @@ void cso_release_all( struct cso_context *ctx ) ctx->pipe->bind_depth_stencil_alpha_state( ctx->pipe, NULL ); ctx->pipe->bind_fs_state( ctx->pipe, NULL ); ctx->pipe->bind_vs_state( ctx->pipe, NULL ); + ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL ); } for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - pipe_texture_reference(&ctx->textures[i], NULL); - pipe_texture_reference(&ctx->textures_saved[i], NULL); + pipe_sampler_view_reference(&ctx->fragment_sampler_views[i], NULL); + pipe_sampler_view_reference(&ctx->fragment_sampler_views_saved[i], NULL); } for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - pipe_texture_reference(&ctx->vertex_textures[i], NULL); - pipe_texture_reference(&ctx->vertex_textures_saved[i], NULL); + pipe_sampler_view_reference(&ctx->vertex_sampler_views[i], NULL); + pipe_sampler_view_reference(&ctx->vertex_sampler_views_saved[i], NULL); } free_framebuffer_state(&ctx->fb); @@ -597,114 +617,6 @@ cso_restore_vertex_samplers(struct cso_context *ctx) } -enum pipe_error cso_set_sampler_textures( struct cso_context *ctx, - uint count, - struct pipe_texture **textures ) -{ - uint i; - - ctx->nr_textures = count; - - for (i = 0; i < count; i++) - pipe_texture_reference(&ctx->textures[i], textures[i]); - for ( ; i < PIPE_MAX_SAMPLERS; i++) - pipe_texture_reference(&ctx->textures[i], NULL); - - ctx->pipe->set_fragment_sampler_textures(ctx->pipe, count, textures); - - return PIPE_OK; -} - -void cso_save_sampler_textures( struct cso_context *ctx ) -{ - uint i; - - ctx->nr_textures_saved = ctx->nr_textures; - for (i = 0; i < ctx->nr_textures; i++) { - assert(!ctx->textures_saved[i]); - pipe_texture_reference(&ctx->textures_saved[i], ctx->textures[i]); - } -} - -void cso_restore_sampler_textures( struct cso_context *ctx ) -{ - uint i; - - ctx->nr_textures = ctx->nr_textures_saved; - - for (i = 0; i < ctx->nr_textures; i++) { - pipe_texture_reference(&ctx->textures[i], NULL); - ctx->textures[i] = ctx->textures_saved[i]; - ctx->textures_saved[i] = NULL; - } - for ( ; i < PIPE_MAX_SAMPLERS; i++) - pipe_texture_reference(&ctx->textures[i], NULL); - - ctx->pipe->set_fragment_sampler_textures(ctx->pipe, ctx->nr_textures, ctx->textures); - - ctx->nr_textures_saved = 0; -} - - - -enum pipe_error -cso_set_vertex_sampler_textures(struct cso_context *ctx, - uint count, - struct pipe_texture **textures) -{ - uint i; - - ctx->nr_vertex_textures = count; - - for (i = 0; i < count; i++) { - pipe_texture_reference(&ctx->vertex_textures[i], textures[i]); - } - for ( ; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - pipe_texture_reference(&ctx->vertex_textures[i], NULL); - } - - ctx->pipe->set_vertex_sampler_textures(ctx->pipe, count, textures); - - return PIPE_OK; -} - -void -cso_save_vertex_sampler_textures(struct cso_context *ctx) -{ - uint i; - - ctx->nr_vertex_textures_saved = ctx->nr_vertex_textures; - for (i = 0; i < ctx->nr_vertex_textures; i++) { - assert(!ctx->vertex_textures_saved[i]); - pipe_texture_reference(&ctx->vertex_textures_saved[i], ctx->vertex_textures[i]); - } -} - -void -cso_restore_vertex_sampler_textures(struct cso_context *ctx) -{ - uint i; - - ctx->nr_vertex_textures = ctx->nr_vertex_textures_saved; - - for (i = 0; i < ctx->nr_vertex_textures; i++) { - pipe_texture_reference(&ctx->vertex_textures[i], NULL); - ctx->vertex_textures[i] = ctx->vertex_textures_saved[i]; - ctx->vertex_textures_saved[i] = NULL; - } - for ( ; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - pipe_texture_reference(&ctx->vertex_textures[i], NULL); - } - - ctx->pipe->set_vertex_sampler_textures(ctx->pipe, - ctx->nr_vertex_textures, - ctx->vertex_textures); - - ctx->nr_vertex_textures_saved = 0; -} - - - enum pipe_error cso_set_depth_stencil_alpha(struct cso_context *ctx, const struct pipe_depth_stencil_alpha_state *templ) { @@ -1130,7 +1042,6 @@ void cso_restore_geometry_shader(struct cso_context *ctx) ctx->geometry_shader_saved = NULL; } - /* clip state */ static INLINE void @@ -1180,3 +1091,185 @@ cso_restore_clip(struct cso_context *ctx) ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip_saved); } } + +enum pipe_error cso_set_vertex_elements(struct cso_context *ctx, + unsigned count, + const struct pipe_vertex_element *states) +{ + unsigned key_size, hash_key; + struct cso_hash_iter iter; + void *handle; + struct cso_velems_state velems_state; + + /* need to include the count into the stored state data too. + Otherwise first few count pipe_vertex_elements could be identical even if count + is different, and there's no guarantee the hash would be different in that + case neither */ + key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned); + velems_state.count = count; + memcpy(velems_state.velems, states, sizeof(struct pipe_vertex_element) * count); + hash_key = cso_construct_key((void*)&velems_state, key_size); + iter = cso_find_state_template(ctx->cache, hash_key, CSO_VELEMENTS, (void*)&velems_state, key_size); + + if (cso_hash_iter_is_null(iter)) { + struct cso_velements *cso = MALLOC(sizeof(struct cso_velements)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + + memcpy(&cso->state, &velems_state, key_size); + cso->data = ctx->pipe->create_vertex_elements_state(ctx->pipe, count, &cso->state.velems[0]); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_vertex_elements_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_VELEMENTS, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + handle = cso->data; + } + else { + handle = ((struct cso_velements *)cso_hash_iter_data(iter))->data; + } + + if (ctx->velements != handle) { + ctx->velements = handle; + ctx->pipe->bind_vertex_elements_state(ctx->pipe, handle); + } + return PIPE_OK; +} + +void cso_save_vertex_elements(struct cso_context *ctx) +{ + assert(!ctx->velements_saved); + ctx->velements_saved = ctx->velements; +} + +void cso_restore_vertex_elements(struct cso_context *ctx) +{ + if (ctx->velements != ctx->velements_saved) { + ctx->velements = ctx->velements_saved; + ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->velements_saved); + } + ctx->velements_saved = NULL; +} + +/* fragment sampler view state */ + +void +cso_set_fragment_sampler_views(struct cso_context *cso, + uint count, + struct pipe_sampler_view **views) +{ + uint i; + + for (i = 0; i < count; i++) { + pipe_sampler_view_reference(&cso->fragment_sampler_views[i], views[i]); + } + for (; i < cso->nr_fragment_sampler_views; i++) { + pipe_sampler_view_reference(&cso->fragment_sampler_views[i], NULL); + } + + cso->pipe->set_fragment_sampler_views(cso->pipe, + MAX2(count, cso->nr_fragment_sampler_views), + cso->fragment_sampler_views); + + cso->nr_fragment_sampler_views = count; +} + +void +cso_save_fragment_sampler_views(struct cso_context *cso) +{ + uint i; + + cso->nr_fragment_sampler_views_saved = cso->nr_fragment_sampler_views; + + for (i = 0; i < cso->nr_fragment_sampler_views; i++) { + assert(!cso->fragment_sampler_views_saved[i]); + + pipe_sampler_view_reference(&cso->fragment_sampler_views_saved[i], + cso->fragment_sampler_views[i]); + } +} + +void +cso_restore_fragment_sampler_views(struct cso_context *cso) +{ + uint i; + + for (i = 0; i < cso->nr_fragment_sampler_views_saved; i++) { + pipe_sampler_view_reference(&cso->fragment_sampler_views[i], cso->fragment_sampler_views_saved[i]); + pipe_sampler_view_reference(&cso->fragment_sampler_views_saved[i], NULL); + } + for (; i < cso->nr_fragment_sampler_views; i++) { + pipe_sampler_view_reference(&cso->fragment_sampler_views[i], NULL); + } + + cso->pipe->set_fragment_sampler_views(cso->pipe, + MAX2(cso->nr_fragment_sampler_views, cso->nr_fragment_sampler_views_saved), + cso->fragment_sampler_views); + + cso->nr_fragment_sampler_views = cso->nr_fragment_sampler_views_saved; + cso->nr_fragment_sampler_views_saved = 0; +} + + +/* vertex sampler view state */ + +void +cso_set_vertex_sampler_views(struct cso_context *cso, + uint count, + struct pipe_sampler_view **views) +{ + uint i; + + for (i = 0; i < count; i++) { + pipe_sampler_view_reference(&cso->vertex_sampler_views[i], views[i]); + } + for (; i < cso->nr_vertex_sampler_views; i++) { + pipe_sampler_view_reference(&cso->vertex_sampler_views[i], NULL); + } + + cso->pipe->set_vertex_sampler_views(cso->pipe, + MAX2(count, cso->nr_vertex_sampler_views), + cso->vertex_sampler_views); + + cso->nr_vertex_sampler_views = count; +} + +void +cso_save_vertex_sampler_views(struct cso_context *cso) +{ + uint i; + + cso->nr_vertex_sampler_views_saved = cso->nr_vertex_sampler_views; + + for (i = 0; i < cso->nr_vertex_sampler_views; i++) { + assert(!cso->vertex_sampler_views_saved[i]); + + pipe_sampler_view_reference(&cso->vertex_sampler_views_saved[i], + cso->vertex_sampler_views[i]); + } +} + +void +cso_restore_vertex_sampler_views(struct cso_context *cso) +{ + uint i; + + for (i = 0; i < cso->nr_vertex_sampler_views_saved; i++) { + pipe_sampler_view_reference(&cso->vertex_sampler_views[i], cso->vertex_sampler_views_saved[i]); + pipe_sampler_view_reference(&cso->vertex_sampler_views_saved[i], NULL); + } + for (; i < cso->nr_vertex_sampler_views; i++) { + pipe_sampler_view_reference(&cso->vertex_sampler_views[i], NULL); + } + + cso->pipe->set_vertex_sampler_views(cso->pipe, + MAX2(cso->nr_vertex_sampler_views, cso->nr_vertex_sampler_views_saved), + cso->vertex_sampler_views); + + cso->nr_vertex_sampler_views = cso->nr_vertex_sampler_views_saved; + cso->nr_vertex_sampler_views_saved = 0; +} diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index 251a9a644f..d6bcb1fe8f 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -103,24 +103,11 @@ void cso_single_vertex_sampler_done(struct cso_context *cso); - -enum pipe_error cso_set_sampler_textures( struct cso_context *cso, - uint count, - struct pipe_texture **textures ); -void cso_save_sampler_textures( struct cso_context *cso ); -void cso_restore_sampler_textures( struct cso_context *cso ); - - - -enum pipe_error -cso_set_vertex_sampler_textures(struct cso_context *cso, - uint count, - struct pipe_texture **textures); -void -cso_save_vertex_sampler_textures(struct cso_context *cso); -void -cso_restore_vertex_sampler_textures(struct cso_context *cso); - +enum pipe_error cso_set_vertex_elements(struct cso_context *ctx, + unsigned count, + const struct pipe_vertex_element *states); +void cso_save_vertex_elements(struct cso_context *ctx); +void cso_restore_vertex_elements(struct cso_context *ctx); /* These aren't really sensible -- most of the time the api provides @@ -157,7 +144,6 @@ void cso_save_geometry_shader(struct cso_context *cso); void cso_restore_geometry_shader(struct cso_context *cso); - enum pipe_error cso_set_framebuffer(struct cso_context *cso, const struct pipe_framebuffer_state *fb); void cso_save_framebuffer(struct cso_context *cso); @@ -193,6 +179,34 @@ void cso_restore_clip(struct cso_context *cso); +/* fragment sampler view state */ + +void +cso_set_fragment_sampler_views(struct cso_context *cso, + uint count, + struct pipe_sampler_view **views); + +void +cso_save_fragment_sampler_views(struct cso_context *cso); + +void +cso_restore_fragment_sampler_views(struct cso_context *cso); + + +/* vertex sampler view state */ + +void +cso_set_vertex_sampler_views(struct cso_context *cso, + uint count, + struct pipe_sampler_view **views); + +void +cso_save_vertex_sampler_views(struct cso_context *cso); + +void +cso_restore_vertex_sampler_views(struct cso_context *cso); + + #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index bb0988543f..b6574a9fea 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -44,6 +44,18 @@ struct draw_context *draw_create( void ) if (draw == NULL) goto fail; + if (!draw_init(draw)) + goto fail; + + return draw; + +fail: + draw_destroy( draw ); + return NULL; +} + +boolean draw_init(struct draw_context *draw) +{ ASSIGN_4V( draw->plane[0], -1, 0, 0, 1 ); ASSIGN_4V( draw->plane[1], 1, 0, 0, 1 ); ASSIGN_4V( draw->plane[2], 0, -1, 0, 1 ); @@ -57,22 +69,18 @@ struct draw_context *draw_create( void ) if (!draw_pipeline_init( draw )) - goto fail; + return FALSE; if (!draw_pt_init( draw )) - goto fail; + return FALSE; if (!draw_vs_init( draw )) - goto fail; + return FALSE; if (!draw_gs_init( draw )) - goto fail; + return FALSE; - return draw; - -fail: - draw_destroy( draw ); - return NULL; + return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index acd81b9712..1af4961716 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -40,7 +40,6 @@ #include "pipe/p_state.h" - struct pipe_context; struct draw_context; struct draw_stage; @@ -197,6 +196,11 @@ boolean draw_need_pipeline(const struct draw_context *draw, const struct pipe_rasterizer_state *rasterizer, unsigned prim ); - +#ifdef HAVE_LLVM +/******************************************************************************* + * LLVM integration + */ +struct draw_context *draw_create_with_llvm(void); +#endif #endif /* DRAW_CONTEXT_H */ diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 7069aa6b18..131deed43e 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -342,10 +342,10 @@ void draw_geometry_shader_delete(struct draw_geometry_shader *shader) void draw_geometry_shader_prepare(struct draw_geometry_shader *shader, struct draw_context *draw) { - if (shader->machine->Tokens != shader->state.tokens) { - tgsi_exec_machine_bind_shader(shader->machine, - shader->state.tokens, - draw->gs.num_samplers, - draw->gs.samplers); - } + if (shader && shader->machine->Tokens != shader->state.tokens) { + tgsi_exec_machine_bind_shader(shader->machine, + shader->state.tokens, + draw->gs.num_samplers, + draw->gs.samplers); + } } diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c new file mode 100644 index 0000000000..364a560eca --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -0,0 +1,727 @@ +#include "draw_llvm.h" + +#include "draw_context.h" +#include "draw_vs.h" + +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_interp.h" +#include "gallivm/lp_bld_struct.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_tgsi.h" +#include "gallivm/lp_bld_printf.h" +#include "gallivm/lp_bld_init.h" + +#include "util/u_cpu_detect.h" +#include "util/u_string.h" + +#include <llvm-c/Transforms/Scalar.h> + +#define DEBUG_STORE 0 + + +/* generates the draw jit function */ +static void +draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var); + +static void +init_globals(struct draw_llvm *llvm) +{ + LLVMTypeRef texture_type; + + /* struct draw_jit_texture */ + { + LLVMTypeRef elem_types[4]; + + elem_types[DRAW_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); + elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); + elem_types[DRAW_JIT_TEXTURE_STRIDE] = LLVMInt32Type(); + elem_types[DRAW_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0); + + texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); + + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_WIDTH); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_HEIGHT); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, stride, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_STRIDE); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_DATA); + LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, + llvm->target, texture_type); + + LLVMAddTypeName(llvm->module, "texture", texture_type); + } + + + /* struct draw_jit_context */ + { + LLVMTypeRef elem_types[3]; + LLVMTypeRef context_type; + + elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ + elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ + elem_types[2] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ + + context_type = LLVMStructType(elem_types, Elements(elem_types), 0); + + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants, + llvm->target, context_type, 0); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants, + llvm->target, context_type, 1); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures, + llvm->target, context_type, + DRAW_JIT_CONTEXT_TEXTURES_INDEX); + LP_CHECK_STRUCT_SIZE(struct draw_jit_context, + llvm->target, context_type); + + LLVMAddTypeName(llvm->module, "draw_jit_context", context_type); + + llvm->context_ptr_type = LLVMPointerType(context_type, 0); + } + { + LLVMTypeRef buffer_ptr = LLVMPointerType(LLVMIntType(8), 0); + llvm->buffer_ptr_type = LLVMPointerType(buffer_ptr, 0); + } + /* struct pipe_vertex_buffer */ + { + LLVMTypeRef elem_types[4]; + LLVMTypeRef vb_type; + + elem_types[0] = LLVMInt32Type(); + elem_types[1] = LLVMInt32Type(); + elem_types[2] = LLVMInt32Type(); + elem_types[3] = LLVMPointerType(LLVMOpaqueType(), 0); /* vs_constants */ + + vb_type = LLVMStructType(elem_types, Elements(elem_types), 0); + + LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride, + llvm->target, vb_type, 0); + LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset, + llvm->target, vb_type, 2); + LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, + llvm->target, vb_type); + + LLVMAddTypeName(llvm->module, "pipe_vertex_buffer", vb_type); + + llvm->vb_ptr_type = LLVMPointerType(vb_type, 0); + } +} + +static LLVMTypeRef +create_vertex_header(struct draw_llvm *llvm, int data_elems) +{ + /* struct vertex_header */ + LLVMTypeRef elem_types[3]; + LLVMTypeRef vertex_header; + char struct_name[24]; + + util_snprintf(struct_name, 23, "vertex_header%d", data_elems); + + elem_types[0] = LLVMIntType(32); + elem_types[1] = LLVMArrayType(LLVMFloatType(), 4); + elem_types[2] = LLVMArrayType(elem_types[1], data_elems); + + vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0); + + /* these are bit-fields and we can't take address of them + LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_CLIPMASK); + LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_EDGEFLAG); + LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_PAD); + LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_VERTEX_ID); + */ + LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_CLIP); + LP_CHECK_MEMBER_OFFSET(struct vertex_header, data, + llvm->target, vertex_header, + DRAW_JIT_VERTEX_DATA); + + LLVMAddTypeName(llvm->module, struct_name, vertex_header); + + return LLVMPointerType(vertex_header, 0); +} + +struct draw_llvm * +draw_llvm_create(struct draw_context *draw) +{ + struct draw_llvm *llvm = CALLOC_STRUCT( draw_llvm ); + + util_cpu_detect(); + + llvm->draw = draw; + llvm->engine = draw->engine; + + debug_assert(llvm->engine); + + llvm->module = LLVMModuleCreateWithName("draw_llvm"); + llvm->provider = LLVMCreateModuleProviderForExistingModule(llvm->module); + + LLVMAddModuleProvider(llvm->engine, llvm->provider); + + llvm->target = LLVMGetExecutionEngineTargetData(llvm->engine); + + llvm->pass = LLVMCreateFunctionPassManager(llvm->provider); + LLVMAddTargetData(llvm->target, llvm->pass); + /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, + * but there are more on SVN. */ + /* TODO: Add more passes */ + LLVMAddConstantPropagationPass(llvm->pass); + if(util_cpu_caps.has_sse4_1) { + /* FIXME: There is a bug in this pass, whereby the combination of fptosi + * and sitofp (necessary for trunc/floor/ceil/round implementation) + * somehow becomes invalid code. + */ + LLVMAddInstructionCombiningPass(llvm->pass); + } + LLVMAddPromoteMemoryToRegisterPass(llvm->pass); + LLVMAddGVNPass(llvm->pass); + LLVMAddCFGSimplificationPass(llvm->pass); + + init_globals(llvm); + + +#if 1 + LLVMDumpModule(llvm->module); +#endif + + return llvm; +} + +void +draw_llvm_destroy(struct draw_llvm *llvm) +{ + free(llvm); +} + +struct draw_llvm_variant * +draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs) +{ + struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant)); + + draw_llvm_make_variant_key(llvm, &variant->key); + + llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs); + + draw_llvm_generate(llvm, variant); + + return variant; +} + + +struct draw_context *draw_create_with_llvm(void) +{ + struct draw_context *draw = CALLOC_STRUCT( draw_context ); + if (draw == NULL) + goto fail; + + assert(lp_build_engine); + draw->engine = lp_build_engine; + + if (!draw_init(draw)) + goto fail; + + return draw; + +fail: + draw_destroy( draw ); + return NULL; +} + +static void +generate_vs(struct draw_llvm *llvm, + LLVMBuilderRef builder, + LLVMValueRef (*outputs)[NUM_CHANNELS], + const LLVMValueRef (*inputs)[NUM_CHANNELS], + LLVMValueRef context_ptr) +{ + const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens; + struct lp_type vs_type; + LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr); + + memset(&vs_type, 0, sizeof vs_type); + vs_type.floating = TRUE; /* floating point values */ + vs_type.sign = TRUE; /* values are signed */ + vs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ + vs_type.width = 32; /* 32-bit float */ + vs_type.length = 4; /* 4 elements per vector */ +#if 0 + num_vs = 4; /* number of vertices per block */ +#endif + + /*tgsi_dump(tokens, 0);*/ + lp_build_tgsi_soa(builder, + tokens, + vs_type, + NULL /*struct lp_build_mask_context *mask*/, + consts_ptr, + NULL /*pos*/, + inputs, + outputs, + NULL/*sampler*/); +} + +#if DEBUG_STORE +static void print_vectorf(LLVMBuilderRef builder, + LLVMValueRef vec) +{ + LLVMValueRef val[4]; + val[0] = LLVMBuildExtractElement(builder, vec, + LLVMConstInt(LLVMInt32Type(), 0, 0), ""); + val[1] = LLVMBuildExtractElement(builder, vec, + LLVMConstInt(LLVMInt32Type(), 1, 0), ""); + val[2] = LLVMBuildExtractElement(builder, vec, + LLVMConstInt(LLVMInt32Type(), 2, 0), ""); + val[3] = LLVMBuildExtractElement(builder, vec, + LLVMConstInt(LLVMInt32Type(), 3, 0), ""); + lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n", + val[0], val[1], val[2], val[3]); +} +#endif + +static void +generate_fetch(LLVMBuilderRef builder, + LLVMValueRef vbuffers_ptr, + LLVMValueRef *res, + struct pipe_vertex_element *velem, + LLVMValueRef vbuf, + LLVMValueRef index) +{ + LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0); + LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, + &indices, 1, ""); + LLVMValueRef vb_stride = draw_jit_vbuffer_stride(builder, vbuf); + LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(builder, vbuf); + LLVMValueRef stride = LLVMBuildMul(builder, + vb_stride, + index, ""); + + vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer"); + + stride = LLVMBuildAdd(builder, stride, + vb_buffer_offset, + ""); + stride = LLVMBuildAdd(builder, stride, + LLVMConstInt(LLVMInt32Type(), velem->src_offset, 0), + ""); + + /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/ + vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, ""); + + *res = draw_llvm_translate_from(builder, vbuffer_ptr, velem->src_format); +} + +static LLVMValueRef +aos_to_soa(LLVMBuilderRef builder, + LLVMValueRef val0, + LLVMValueRef val1, + LLVMValueRef val2, + LLVMValueRef val3, + LLVMValueRef channel) +{ + LLVMValueRef ex, res; + + ex = LLVMBuildExtractElement(builder, val0, + channel, ""); + res = LLVMBuildInsertElement(builder, + LLVMConstNull(LLVMTypeOf(val0)), + ex, + LLVMConstInt(LLVMInt32Type(), 0, 0), + ""); + + ex = LLVMBuildExtractElement(builder, val1, + channel, ""); + res = LLVMBuildInsertElement(builder, + res, ex, + LLVMConstInt(LLVMInt32Type(), 1, 0), + ""); + + ex = LLVMBuildExtractElement(builder, val2, + channel, ""); + res = LLVMBuildInsertElement(builder, + res, ex, + LLVMConstInt(LLVMInt32Type(), 2, 0), + ""); + + ex = LLVMBuildExtractElement(builder, val3, + channel, ""); + res = LLVMBuildInsertElement(builder, + res, ex, + LLVMConstInt(LLVMInt32Type(), 3, 0), + ""); + + return res; +} + +static void +soa_to_aos(LLVMBuilderRef builder, + LLVMValueRef soa[NUM_CHANNELS], + LLVMValueRef aos[NUM_CHANNELS]) +{ + LLVMValueRef comp; + int i = 0; + + debug_assert(NUM_CHANNELS == 4); + + aos[0] = LLVMConstNull(LLVMTypeOf(soa[0])); + aos[1] = aos[2] = aos[3] = aos[0]; + + for (i = 0; i < NUM_CHANNELS; ++i) { + LLVMValueRef channel = LLVMConstInt(LLVMInt32Type(), i, 0); + + comp = LLVMBuildExtractElement(builder, soa[i], + LLVMConstInt(LLVMInt32Type(), 0, 0), ""); + aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, ""); + + comp = LLVMBuildExtractElement(builder, soa[i], + LLVMConstInt(LLVMInt32Type(), 1, 0), ""); + aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, ""); + + comp = LLVMBuildExtractElement(builder, soa[i], + LLVMConstInt(LLVMInt32Type(), 2, 0), ""); + aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, ""); + + comp = LLVMBuildExtractElement(builder, soa[i], + LLVMConstInt(LLVMInt32Type(), 3, 0), ""); + aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, ""); + + } +} + +static void +convert_to_soa(LLVMBuilderRef builder, + LLVMValueRef (*aos)[NUM_CHANNELS], + LLVMValueRef (*soa)[NUM_CHANNELS], + int num_attribs) +{ + int i; + + debug_assert(NUM_CHANNELS == 4); + + for (i = 0; i < num_attribs; ++i) { + LLVMValueRef val0 = aos[i][0]; + LLVMValueRef val1 = aos[i][1]; + LLVMValueRef val2 = aos[i][2]; + LLVMValueRef val3 = aos[i][3]; + + soa[i][0] = aos_to_soa(builder, val0, val1, val2, val3, + LLVMConstInt(LLVMInt32Type(), 0, 0)); + soa[i][1] = aos_to_soa(builder, val0, val1, val2, val3, + LLVMConstInt(LLVMInt32Type(), 1, 0)); + soa[i][2] = aos_to_soa(builder, val0, val1, val2, val3, + LLVMConstInt(LLVMInt32Type(), 2, 0)); + soa[i][3] = aos_to_soa(builder, val0, val1, val2, val3, + LLVMConstInt(LLVMInt32Type(), 3, 0)); + } +} + +static void +store_aos(LLVMBuilderRef builder, + LLVMValueRef io_ptr, + LLVMValueRef index, + LLVMValueRef value) +{ + LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr); + LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr); + LLVMValueRef indices[3]; + + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[1] = index; + indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); + + /* undefined vertex */ + LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), + 0xffff, 0), id_ptr); + +#if DEBUG_STORE + lp_build_printf(builder, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr); +#endif +#if 0 + /*lp_build_printf(builder, " ---- %p storing at %d (%p) ", io_ptr, index, data_ptr); + print_vectorf(builder, value);*/ + data_ptr = LLVMBuildBitCast(builder, data_ptr, + LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatType(), 4), 0), 0), + "datavec"); + data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, ""); + + LLVMBuildStore(builder, value, data_ptr); +#else + { + LLVMValueRef x, y, z, w; + LLVMValueRef idx0, idx1, idx2, idx3; + LLVMValueRef gep0, gep1, gep2, gep3; + data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, ""); + + idx0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + idx1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + idx2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + idx3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + + x = LLVMBuildExtractElement(builder, value, + idx0, ""); + y = LLVMBuildExtractElement(builder, value, + idx1, ""); + z = LLVMBuildExtractElement(builder, value, + idx2, ""); + w = LLVMBuildExtractElement(builder, value, + idx3, ""); + + gep0 = LLVMBuildGEP(builder, data_ptr, &idx0, 1, ""); + gep1 = LLVMBuildGEP(builder, data_ptr, &idx1, 1, ""); + gep2 = LLVMBuildGEP(builder, data_ptr, &idx2, 1, ""); + gep3 = LLVMBuildGEP(builder, data_ptr, &idx3, 1, ""); + + /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n", + x, gep0, y, gep1, z, gep2, w, gep3);*/ + LLVMBuildStore(builder, x, gep0); + LLVMBuildStore(builder, y, gep1); + LLVMBuildStore(builder, z, gep2); + LLVMBuildStore(builder, w, gep3); + } +#endif +} + +static void +store_aos_array(LLVMBuilderRef builder, + LLVMValueRef io_ptr, + LLVMValueRef aos[NUM_CHANNELS], + int attrib, + int num_outputs) +{ + LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0); + LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; + + debug_assert(NUM_CHANNELS == 4); + + io0_ptr = LLVMBuildGEP(builder, io_ptr, + &ind0, 1, ""); + io1_ptr = LLVMBuildGEP(builder, io_ptr, + &ind1, 1, ""); + io2_ptr = LLVMBuildGEP(builder, io_ptr, + &ind2, 1, ""); + io3_ptr = LLVMBuildGEP(builder, io_ptr, + &ind3, 1, ""); + +#if DEBUG_STORE + lp_build_printf(builder, " io = %p, indexes[%d, %d, %d, %d]\n", + io_ptr, ind0, ind1, ind2, ind3); +#endif + + store_aos(builder, io0_ptr, attr_index, aos[0]); + store_aos(builder, io1_ptr, attr_index, aos[1]); + store_aos(builder, io2_ptr, attr_index, aos[2]); + store_aos(builder, io3_ptr, attr_index, aos[3]); +} + +static void +convert_to_aos(LLVMBuilderRef builder, + LLVMValueRef io, + LLVMValueRef (*outputs)[NUM_CHANNELS], + int num_outputs, + int max_vertices) +{ + unsigned chan, attrib; + +#if DEBUG_STORE + lp_build_printf(builder, " # storing begin\n"); +#endif + for (attrib = 0; attrib < num_outputs; ++attrib) { + LLVMValueRef soa[4]; + LLVMValueRef aos[4]; + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + if(outputs[attrib][chan]) { + LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); + lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]); + /*lp_build_printf(builder, "output %d : %d ", + LLVMConstInt(LLVMInt32Type(), attrib, 0), + LLVMConstInt(LLVMInt32Type(), chan, 0)); + print_vectorf(builder, out);*/ + soa[chan] = out; + } else + soa[chan] = 0; + } + soa_to_aos(builder, soa, aos); + store_aos_array(builder, + io, + aos, + attrib, + num_outputs); + } +#if DEBUG_STORE + lp_build_printf(builder, " # storing end\n"); +#endif +} + +static void +draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) +{ + LLVMTypeRef arg_types[7]; + LLVMTypeRef func_type; + LLVMValueRef context_ptr; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + LLVMValueRef start, end, count, stride, step, io_itr; + LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; + struct draw_context *draw = llvm->draw; + unsigned i, j; + struct lp_build_context bld; + struct lp_build_loop_state lp_loop; + struct lp_type vs_type = lp_type_float_vec(32); + const int max_vertices = 4; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; + + arg_types[0] = llvm->context_ptr_type; /* context */ + arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ + arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ + arg_types[3] = LLVMInt32Type(); /* start */ + arg_types[4] = LLVMInt32Type(); /* count */ + arg_types[5] = LLVMInt32Type(); /* stride */ + arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ + + func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + + variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type); + LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); + for(i = 0; i < Elements(arg_types); ++i) + if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) + LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute); + + context_ptr = LLVMGetParam(variant->function, 0); + io_ptr = LLVMGetParam(variant->function, 1); + vbuffers_ptr = LLVMGetParam(variant->function, 2); + start = LLVMGetParam(variant->function, 3); + count = LLVMGetParam(variant->function, 4); + stride = LLVMGetParam(variant->function, 5); + vb_ptr = LLVMGetParam(variant->function, 6); + + lp_build_name(context_ptr, "context"); + lp_build_name(io_ptr, "io"); + lp_build_name(vbuffers_ptr, "vbuffers"); + lp_build_name(start, "start"); + lp_build_name(count, "count"); + lp_build_name(stride, "stride"); + lp_build_name(vb_ptr, "vb"); + + /* + * Function body + */ + + block = LLVMAppendBasicBlock(variant->function, "entry"); + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, block); + + lp_build_context_init(&bld, builder, vs_type); + + end = lp_build_add(&bld, start, count); + + step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); + +#if DEBUG_STORE + lp_build_printf(builder, "start = %d, end = %d, step = %d\n", + start, end, step); +#endif + lp_build_loop_begin(builder, start, &lp_loop); + { + LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; + LLVMValueRef io; + const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; + + io_itr = LLVMBuildSub(builder, lp_loop.counter, start, ""); + io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); +#if DEBUG_STORE + lp_build_printf(builder, " --- io %d = %p, loop counter %d\n", + io_itr, io, lp_loop.counter); +#endif + for (i = 0; i < NUM_CHANNELS; ++i) { + LLVMValueRef true_index = LLVMBuildAdd( + builder, + lp_loop.counter, + LLVMConstInt(LLVMInt32Type(), i, 0), ""); + for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { + struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; + LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(), + velem->vertex_buffer_index, + 0); + LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, + &vb_index, 1, ""); + generate_fetch(builder, vbuffers_ptr, + &aos_attribs[j][i], velem, vb, true_index); + } + } + convert_to_soa(builder, aos_attribs, inputs, + draw->pt.nr_vertex_elements); + + ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs; + generate_vs(llvm, + builder, + outputs, + ptr_aos, + context_ptr); + + convert_to_aos(builder, io, outputs, + draw->vs.vertex_shader->info.num_outputs, + max_vertices); + } + lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop); + + LLVMBuildRetVoid(builder); + + LLVMDisposeBuilder(builder); + + /* + * Translate the LLVM IR into machine code. + */ +#ifdef DEBUG + if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { + LLVMDumpValue(variant->function); + assert(0); + } +#endif + + LLVMRunFunctionPassManager(llvm->pass, variant->function); + + if (0) { + LLVMDumpValue(variant->function); + debug_printf("\n"); + } + variant->jit_func = (draw_jit_vert_func)LLVMGetPointerToGlobal(llvm->draw->engine, variant->function); + + if (0) + lp_disassemble(variant->jit_func); +} + +void +draw_llvm_make_variant_key(struct draw_llvm *llvm, + struct draw_llvm_variant_key *key) +{ + memset(key, 0, sizeof(struct draw_llvm_variant_key)); + + key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements; + + memcpy(key->vertex_element, + llvm->draw->pt.vertex_element, + sizeof(struct pipe_vertex_element) * key->nr_vertex_elements); + + memcpy(&key->vs, + &llvm->draw->vs.vertex_shader->state, + sizeof(struct pipe_shader_state)); +} diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h new file mode 100644 index 0000000000..28b9044a81 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -0,0 +1,144 @@ +#ifndef HAVE_LLVM_H +#define HAVE_LLVM_H + +#include "draw/draw_private.h" + +#include "pipe/p_context.h" + +#include <llvm-c/Core.h> +#include <llvm-c/Analysis.h> +#include <llvm-c/Target.h> +#include <llvm-c/ExecutionEngine.h> + +struct draw_jit_texture +{ + uint32_t width; + uint32_t height; + uint32_t stride; + const void *data; +}; + +enum { + DRAW_JIT_TEXTURE_WIDTH = 0, + DRAW_JIT_TEXTURE_HEIGHT, + DRAW_JIT_TEXTURE_STRIDE, + DRAW_JIT_TEXTURE_DATA +}; + +enum { + DRAW_JIT_VERTEX_VERTEX_ID = 0, + DRAW_JIT_VERTEX_CLIP, + DRAW_JIT_VERTEX_DATA +}; + +/** + * This structure is passed directly to the generated vertex shader. + * + * It contains the derived state. + * + * Changes here must be reflected in the draw_jit_context_* macros. + * Changes to the ordering should be avoided. + * + * Only use types with a clear size and padding here, in particular prefer the + * stdint.h types to the basic integer types. + */ +struct draw_jit_context +{ + const float *vs_constants; + const float *gs_constants; + + + struct draw_jit_texture textures[PIPE_MAX_SAMPLERS]; +}; + + +#define draw_jit_context_vs_constants(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 0, "vs_constants") + +#define draw_jit_context_gs_constants(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 1, "gs_constants") + +#define DRAW_JIT_CONTEXT_TEXTURES_INDEX 2 + +#define draw_jit_context_textures(_builder, _ptr) \ + lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CONTEXT_TEXTURES_INDEX, "textures") + + + +#define draw_jit_header_id(_builder, _ptr) \ + lp_build_struct_get_ptr(_builder, _ptr, 0, "id") + +#define draw_jit_header_clip(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 1, "clip") + +#define draw_jit_header_data(_builder, _ptr) \ + lp_build_struct_get_ptr(_builder, _ptr, 2, "data") + + +#define draw_jit_vbuffer_stride(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 0, "stride") + +#define draw_jit_vbuffer_offset(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 2, "buffer_offset") + + +typedef void +(*draw_jit_vert_func)(struct draw_jit_context *context, + struct vertex_header *io, + const char *vbuffers[PIPE_MAX_ATTRIBS], + unsigned start, + unsigned count, + unsigned stride, + struct pipe_vertex_buffer *vertex_buffers); + +struct draw_llvm { + struct draw_context *draw; + + struct draw_jit_context jit_context; + + LLVMModuleRef module; + LLVMExecutionEngineRef engine; + LLVMModuleProviderRef provider; + LLVMTargetDataRef target; + LLVMPassManagerRef pass; + + LLVMTypeRef context_ptr_type; + LLVMTypeRef vertex_header_ptr_type; + LLVMTypeRef buffer_ptr_type; + LLVMTypeRef vb_ptr_type; +}; + +struct draw_llvm_variant_key +{ + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_elements; + struct pipe_shader_state vs; +}; + +struct draw_llvm_variant +{ + struct draw_llvm_variant_key key; + LLVMValueRef function; + draw_jit_vert_func jit_func; + + struct draw_llvm_variant *next; +}; + +struct draw_llvm * +draw_llvm_create(struct draw_context *draw); + +void +draw_llvm_destroy(struct draw_llvm *llvm); + +struct draw_llvm_variant * +draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs); + +void +draw_llvm_make_variant_key(struct draw_llvm *llvm, + struct draw_llvm_variant_key *key); + +LLVMValueRef +draw_llvm_translate_from(LLVMBuilderRef builder, + LLVMValueRef vbuffer, + enum pipe_format from_format); +#endif diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c new file mode 100644 index 0000000000..2a95823f2c --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_llvm_translate.c @@ -0,0 +1,481 @@ +#include "draw_private.h" +#include "draw_context.h" + +#include "draw_llvm.h" + +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_interp.h" +#include "gallivm/lp_bld_struct.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_debug.h" + +#include "util/u_memory.h" +#include "pipe/p_state.h" + + +#define DRAW_DBG 0 + +static LLVMValueRef +from_64_float(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMDoubleType(), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + return LLVMBuildFPTrunc(builder, l, LLVMFloatType(), ""); +} + +static LLVMValueRef +from_32_float(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMFloatType(), 0) , ""); + return LLVMBuildLoad(builder, bc, ""); +} + +static INLINE LLVMValueRef +from_8_uscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef l = LLVMBuildLoad(builder, val, ""); + return LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); +} + +static INLINE LLVMValueRef +from_16_uscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(16), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + return LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); +} + +static INLINE LLVMValueRef +from_32_uscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(32), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + return LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); +} + +static INLINE LLVMValueRef +from_8_sscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef l = LLVMBuildLoad(builder, val, ""); + return LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); +} + +static INLINE LLVMValueRef +from_16_sscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(16), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + return LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); +} + +static INLINE LLVMValueRef +from_32_sscaled(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(32), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + return LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); +} + + +static INLINE LLVMValueRef +from_8_unorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef l = LLVMBuildLoad(builder, val, ""); + LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 255.), ""); +} + +static INLINE LLVMValueRef +from_16_unorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(16), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 65535.), ""); +} + +static INLINE LLVMValueRef +from_32_unorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(32), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), ""); + + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 4294967295.), ""); +} + +static INLINE LLVMValueRef +from_8_snorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef l = LLVMBuildLoad(builder, val, ""); + LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 127.0), ""); +} + +static INLINE LLVMValueRef +from_16_snorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(16), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 32767.0f), ""); +} + +static INLINE LLVMValueRef +from_32_snorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(32), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); + + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 2147483647.0), ""); +} + +static INLINE LLVMValueRef +from_32_fixed(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef bc = LLVMBuildBitCast(builder, val, + LLVMPointerType(LLVMIntType(32), 0) , ""); + LLVMValueRef l = LLVMBuildLoad(builder, bc, ""); + LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), ""); + + return LLVMBuildFDiv(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 65536.0), ""); +} + +static LLVMValueRef +to_64_float(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPExt(builder, l, LLVMDoubleType(), ""); +} + +static LLVMValueRef +to_32_float(LLVMBuilderRef builder, LLVMValueRef fp) +{ + return LLVMBuildLoad(builder, fp, ""); +} + +static INLINE LLVMValueRef +to_8_uscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToUI(builder, l, LLVMIntType(8), ""); +} + +static INLINE LLVMValueRef +to_16_uscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToUI(builder, l, LLVMIntType(16), ""); +} + +static INLINE LLVMValueRef +to_32_uscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToUI(builder, l, LLVMIntType(32), ""); +} + +static INLINE LLVMValueRef +to_8_sscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToSI(builder, l, LLVMIntType(8), ""); +} + +static INLINE LLVMValueRef +to_16_sscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToSI(builder, l, LLVMIntType(16), ""); +} + +static INLINE LLVMValueRef +to_32_sscaled(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + return LLVMBuildFPToSI(builder, l, LLVMIntType(32), ""); +} + +static INLINE LLVMValueRef +to_8_unorm(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(8), ""); + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 255.), ""); +} + +static INLINE LLVMValueRef +to_16_unorm(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(32), ""); + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 65535.), ""); +} + +static INLINE LLVMValueRef +to_32_unorm(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(32), ""); + + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 4294967295.), ""); +} + +static INLINE LLVMValueRef +to_8_snorm(LLVMBuilderRef builder, LLVMValueRef val) +{ + LLVMValueRef l = LLVMBuildLoad(builder, val, ""); + LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(8), ""); + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 127.0), ""); +} + +static INLINE LLVMValueRef +to_16_snorm(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(16), ""); + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 32767.0f), ""); +} + +static INLINE LLVMValueRef +to_32_snorm(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(32), ""); + + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 2147483647.0), ""); +} + +static INLINE LLVMValueRef +to_32_fixed(LLVMBuilderRef builder, LLVMValueRef fp) +{ + LLVMValueRef l = LLVMBuildLoad(builder, fp, ""); + LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(32), ""); + + return LLVMBuildFMul(builder, uscaled, + LLVMConstReal(LLVMFloatType(), 65536.0), ""); +} + +typedef LLVMValueRef (*from_func)(LLVMBuilderRef, LLVMValueRef); +typedef LLVMValueRef (*to_func)(LLVMBuilderRef, LLVMValueRef); + +/* so that underneath can avoid function calls which are prohibited + * for static initialization we need this conversion */ +enum ll_type { + LL_Double, + LL_Float, + LL_Int32, + LL_Int16, + LL_Int8 +}; + +static INLINE LLVMTypeRef +ll_type_to_llvm(enum ll_type type) +{ + switch (type) { + case LL_Double: + return LLVMDoubleType(); + case LL_Float: + return LLVMFloatType(); + case LL_Int32: + return LLVMInt32Type(); + case LL_Int16: + return LLVMIntType(16); + case LL_Int8: + return LLVMIntType(8); + } + return LLVMIntType(8); +} + +static INLINE int +ll_type_size(enum ll_type type) +{ + switch (type) { + case LL_Double: + return 8; + case LL_Float: + return 4; + case LL_Int32: + return 4; + case LL_Int16: + return 2; + case LL_Int8: + return 1; + } + return 1; +} + +struct draw_llvm_translate { + int format; + from_func from; + to_func to; + enum ll_type type; + int num_components; +} translates[] = +{ + {PIPE_FORMAT_R64_FLOAT, from_64_float, to_64_float, LL_Double, 1}, + {PIPE_FORMAT_R64G64_FLOAT, from_64_float, to_64_float, LL_Double, 2}, + {PIPE_FORMAT_R64G64B64_FLOAT, from_64_float, to_64_float, LL_Double, 3}, + {PIPE_FORMAT_R64G64B64A64_FLOAT, from_64_float, to_64_float, LL_Double, 4}, + {PIPE_FORMAT_R32_FLOAT, from_32_float, to_32_float, LL_Float, 1}, + {PIPE_FORMAT_R32G32_FLOAT, from_32_float, to_32_float, LL_Float, 2}, + {PIPE_FORMAT_R32G32B32_FLOAT, from_32_float, to_32_float, LL_Float, 3}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, from_32_float, to_32_float, LL_Float, 4}, + + {PIPE_FORMAT_R32_UNORM, from_32_unorm, to_32_unorm, LL_Int32, 1}, + {PIPE_FORMAT_R32G32_UNORM, from_32_unorm, to_32_unorm, LL_Int32, 2}, + {PIPE_FORMAT_R32G32B32_UNORM, from_32_unorm, to_32_unorm, LL_Int32, 3}, + {PIPE_FORMAT_R32G32B32A32_UNORM, from_32_unorm, to_32_unorm, LL_Int32, 4}, + + {PIPE_FORMAT_R32_USCALED, from_32_uscaled, to_32_uscaled, LL_Int32, 1}, + {PIPE_FORMAT_R32G32_USCALED, from_32_uscaled, to_32_uscaled, LL_Int32, 2}, + {PIPE_FORMAT_R32G32B32_USCALED, from_32_uscaled, to_32_uscaled, LL_Int32, 3}, + {PIPE_FORMAT_R32G32B32A32_USCALED, from_32_uscaled, to_32_uscaled, LL_Int32, 4}, + + {PIPE_FORMAT_R32_SNORM, from_32_snorm, to_32_snorm, LL_Int32, 1}, + {PIPE_FORMAT_R32G32_SNORM, from_32_snorm, to_32_snorm, LL_Int32, 2}, + {PIPE_FORMAT_R32G32B32_SNORM, from_32_snorm, to_32_snorm, LL_Int32, 3}, + {PIPE_FORMAT_R32G32B32A32_SNORM, from_32_snorm, to_32_snorm, LL_Int32, 4}, + + {PIPE_FORMAT_R32_SSCALED, from_32_sscaled, to_32_sscaled, LL_Int32, 1}, + {PIPE_FORMAT_R32G32_SSCALED, from_32_sscaled, to_32_sscaled, LL_Int32, 2}, + {PIPE_FORMAT_R32G32B32_SSCALED, from_32_sscaled, to_32_sscaled, LL_Int32, 3}, + {PIPE_FORMAT_R32G32B32A32_SSCALED, from_32_sscaled, to_32_sscaled, LL_Int32, 4}, + + {PIPE_FORMAT_R16_UNORM, from_16_unorm, to_16_unorm, LL_Int16, 1}, + {PIPE_FORMAT_R16G16_UNORM, from_16_unorm, to_16_unorm, LL_Int16, 2}, + {PIPE_FORMAT_R16G16B16_UNORM, from_16_unorm, to_16_unorm, LL_Int16, 3}, + {PIPE_FORMAT_R16G16B16A16_UNORM, from_16_unorm, to_16_unorm, LL_Int16, 4}, + + {PIPE_FORMAT_R16_USCALED, from_16_uscaled, to_16_uscaled, LL_Int16, 1}, + {PIPE_FORMAT_R16G16_USCALED, from_16_uscaled, to_16_uscaled, LL_Int16, 2}, + {PIPE_FORMAT_R16G16B16_USCALED, from_16_uscaled, to_16_uscaled, LL_Int16, 3}, + {PIPE_FORMAT_R16G16B16A16_USCALED, from_16_uscaled, to_16_uscaled, LL_Int16, 4}, + + {PIPE_FORMAT_R16_SNORM, from_16_snorm, to_16_snorm, LL_Int16, 1}, + {PIPE_FORMAT_R16G16_SNORM, from_16_snorm, to_16_snorm, LL_Int16, 2}, + {PIPE_FORMAT_R16G16B16_SNORM, from_16_snorm, to_16_snorm, LL_Int16, 3}, + {PIPE_FORMAT_R16G16B16A16_SNORM, from_16_snorm, to_16_snorm, LL_Int16, 4}, + + {PIPE_FORMAT_R16_SSCALED, from_16_sscaled, to_16_sscaled, LL_Int16, 1}, + {PIPE_FORMAT_R16G16_SSCALED, from_16_sscaled, to_16_sscaled, LL_Int16, 2}, + {PIPE_FORMAT_R16G16B16_SSCALED, from_16_sscaled, to_16_sscaled, LL_Int16, 3}, + {PIPE_FORMAT_R16G16B16A16_SSCALED, from_16_sscaled, to_16_sscaled, LL_Int16, 4}, + + {PIPE_FORMAT_R8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 1}, + {PIPE_FORMAT_R8G8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 2}, + {PIPE_FORMAT_R8G8B8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 3}, + {PIPE_FORMAT_R8G8B8A8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 4}, + + {PIPE_FORMAT_R8_USCALED, from_8_uscaled, to_8_uscaled, LL_Int8, 1}, + {PIPE_FORMAT_R8G8_USCALED, from_8_uscaled, to_8_uscaled, LL_Int8, 2}, + {PIPE_FORMAT_R8G8B8_USCALED, from_8_uscaled, to_8_uscaled, LL_Int8, 3}, + {PIPE_FORMAT_R8G8B8A8_USCALED, from_8_uscaled, to_8_uscaled, LL_Int8, 4}, + + {PIPE_FORMAT_R8_SNORM, from_8_snorm, to_8_snorm, LL_Int8, 1}, + {PIPE_FORMAT_R8G8_SNORM, from_8_snorm, to_8_snorm, LL_Int8, 2}, + {PIPE_FORMAT_R8G8B8_SNORM, from_8_snorm, to_8_snorm, LL_Int8, 3}, + {PIPE_FORMAT_R8G8B8A8_SNORM, from_8_snorm, to_8_snorm, LL_Int8, 4}, + + {PIPE_FORMAT_R8_SSCALED, from_8_sscaled, to_8_sscaled, LL_Int8, 1}, + {PIPE_FORMAT_R8G8_SSCALED, from_8_sscaled, to_8_sscaled, LL_Int8, 2}, + {PIPE_FORMAT_R8G8B8_SSCALED, from_8_sscaled, to_8_sscaled, LL_Int8, 3}, + {PIPE_FORMAT_R8G8B8A8_SSCALED, from_8_sscaled, to_8_sscaled, LL_Int8, 4}, + + {PIPE_FORMAT_R32_FIXED, from_32_fixed, to_32_fixed, LL_Int32, 1}, + {PIPE_FORMAT_R32G32_FIXED, from_32_fixed, to_32_fixed, LL_Int32, 2}, + {PIPE_FORMAT_R32G32B32_FIXED, from_32_fixed, to_32_fixed, LL_Int32, 3}, + {PIPE_FORMAT_R32G32B32A32_FIXED, from_32_fixed, to_32_fixed, LL_Int32, 4}, + + {PIPE_FORMAT_A8R8G8B8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 4}, + {PIPE_FORMAT_B8G8R8A8_UNORM, from_8_unorm, to_8_unorm, LL_Int8, 4}, +}; + + +static LLVMValueRef +fetch(LLVMBuilderRef builder, + LLVMValueRef ptr, int val_size, int nr_components, + from_func func) +{ + int i; + int offset = 0; + LLVMValueRef res = LLVMConstNull( + LLVMVectorType(LLVMFloatType(), 4)); + LLVMValueRef defaults[4]; + + defaults[0] = LLVMConstReal(LLVMFloatType(), 0); + defaults[1] = LLVMConstReal(LLVMFloatType(), 0); + defaults[2] = LLVMConstReal(LLVMFloatType(), 0); + defaults[3] = LLVMConstReal(LLVMFloatType(), 1); + + for (i = 0; i < nr_components; ++i) { + LLVMValueRef src_index = LLVMConstInt(LLVMInt32Type(), offset, 0); + LLVMValueRef dst_index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef src_tmp; + LLVMValueRef component; + + src_tmp = LLVMBuildGEP(builder, ptr, &src_index, 1, "src_tmp"); + + /* convert src_tmp to float */ + component = func(builder, src_tmp); + + /* vec.comp = component */ + res = LLVMBuildInsertElement(builder, + res, + component, + dst_index, ""); + offset += val_size; + } + for (; i < 4; ++i) { + LLVMValueRef dst_index = LLVMConstInt(LLVMInt32Type(), i, 0); + res = LLVMBuildInsertElement(builder, + res, + defaults[i], + dst_index, ""); + } + return res; +} + + +LLVMValueRef +draw_llvm_translate_from(LLVMBuilderRef builder, + LLVMValueRef vbuffer, + enum pipe_format from_format) +{ + int i; + for (i = 0; i < Elements(translates); ++i) { + if (translates[i].format == from_format) { + /*LLVMTypeRef type = ll_type_to_llvm(translates[i].type);*/ + return fetch(builder, + vbuffer, + ll_type_size(translates[i].type), + translates[i].num_components, + translates[i].from); + } + } + return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)); +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 8f6ca15dfa..3844c04dd3 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -40,6 +40,7 @@ #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_sampler.h" #include "tgsi/tgsi_transform.h" #include "tgsi/tgsi_dump.h" @@ -87,9 +88,10 @@ struct aaline_stage uint pos_slot; void *sampler_cso; - struct pipe_texture *texture; + struct pipe_resource *texture; + struct pipe_sampler_view *sampler_view; uint num_samplers; - uint num_textures; + uint num_sampler_views; /* @@ -98,7 +100,7 @@ struct aaline_stage struct aaline_fragment_shader *fs; struct { void *sampler[PIPE_MAX_SAMPLERS]; - struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; } state; /* @@ -111,8 +113,9 @@ struct aaline_stage void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, void **); - void (*driver_set_sampler_textures)(struct pipe_context *, unsigned, - struct pipe_texture **); + void (*driver_set_sampler_views)(struct pipe_context *, + unsigned, + struct pipe_sampler_view **); struct pipe_context *pipe; }; @@ -393,7 +396,8 @@ aaline_create_texture(struct aaline_stage *aaline) { struct pipe_context *pipe = aaline->pipe; struct pipe_screen *screen = pipe->screen; - struct pipe_texture texTemp; + struct pipe_resource texTemp; + struct pipe_sampler_view viewTempl; uint level; memset(&texTemp, 0, sizeof(texTemp)); @@ -404,27 +408,44 @@ aaline_create_texture(struct aaline_stage *aaline) texTemp.height0 = 1 << MAX_TEXTURE_LEVEL; texTemp.depth0 = 1; - aaline->texture = screen->texture_create(screen, &texTemp); + aaline->texture = screen->resource_create(screen, &texTemp); if (!aaline->texture) return FALSE; + u_sampler_view_default_template(&viewTempl, + aaline->texture, + aaline->texture->format); + aaline->sampler_view = pipe->create_sampler_view(pipe, + aaline->texture, + &viewTempl); + if (!aaline->sampler_view) { + return FALSE; + } + /* Fill in mipmap images. * Basically each level is solid opaque, except for the outermost * texels which are zero. Special case the 1x1 and 2x2 levels. */ for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) { struct pipe_transfer *transfer; + struct pipe_box box; const uint size = u_minify(aaline->texture->width0, level); ubyte *data; uint i, j; assert(aaline->texture->width0 == aaline->texture->height0); + u_box_origin_2d( size, size, &box ); + /* This texture is new, no need to flush. */ - transfer = screen->get_tex_transfer(screen, aaline->texture, 0, level, 0, - PIPE_TRANSFER_WRITE, 0, 0, size, size); - data = screen->transfer_map(screen, transfer); + transfer = pipe->get_transfer(pipe, + aaline->texture, + u_subresource(0, level), + PIPE_TRANSFER_WRITE, + &box); + + data = pipe->transfer_map(pipe, transfer); if (data == NULL) return FALSE; @@ -448,8 +469,8 @@ aaline_create_texture(struct aaline_stage *aaline) } /* unmap */ - screen->transfer_unmap(screen, transfer); - screen->tex_transfer_destroy(transfer); + pipe->transfer_unmap(pipe, transfer); + pipe->transfer_destroy(pipe, transfer); } return TRUE; } @@ -669,16 +690,16 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) /* how many samplers? */ /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ - num_samplers = MAX2(aaline->num_textures, aaline->num_samplers); + num_samplers = MAX2(aaline->num_sampler_views, aaline->num_samplers); num_samplers = MAX2(num_samplers, aaline->fs->sampler_unit + 1); aaline->state.sampler[aaline->fs->sampler_unit] = aaline->sampler_cso; - pipe_texture_reference(&aaline->state.texture[aaline->fs->sampler_unit], - aaline->texture); + pipe_sampler_view_reference(&aaline->state.sampler_views[aaline->fs->sampler_unit], + aaline->sampler_view); draw->suspend_flushing = TRUE; aaline->driver_bind_sampler_states(pipe, num_samplers, aaline->state.sampler); - aaline->driver_set_sampler_textures(pipe, num_samplers, aaline->state.texture); + aaline->driver_set_sampler_views(pipe, num_samplers, aaline->state.sampler_views); draw->suspend_flushing = FALSE; /* now really draw first line */ @@ -702,8 +723,9 @@ aaline_flush(struct draw_stage *stage, unsigned flags) aaline->driver_bind_fs_state(pipe, aaline->fs->driver_fs); aaline->driver_bind_sampler_states(pipe, aaline->num_samplers, aaline->state.sampler); - aaline->driver_set_sampler_textures(pipe, aaline->num_textures, - aaline->state.texture); + aaline->driver_set_sampler_views(pipe, + aaline->num_sampler_views, + aaline->state.sampler_views); draw->suspend_flushing = FALSE; draw->extra_shader_outputs.slot = 0; @@ -724,14 +746,18 @@ aaline_destroy(struct draw_stage *stage) uint i; for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - pipe_texture_reference(&aaline->state.texture[i], NULL); + pipe_sampler_view_reference(&aaline->state.sampler_views[i], NULL); } if (aaline->sampler_cso) aaline->pipe->delete_sampler_state(aaline->pipe, aaline->sampler_cso); if (aaline->texture) - pipe_texture_reference(&aaline->texture, NULL); + pipe_resource_reference(&aaline->texture, NULL); + + if (aaline->sampler_view) { + pipe_sampler_view_reference(&aaline->sampler_view, NULL); + } draw_free_temp_verts( stage ); @@ -844,23 +870,24 @@ aaline_bind_sampler_states(struct pipe_context *pipe, static void -aaline_set_sampler_textures(struct pipe_context *pipe, - unsigned num, struct pipe_texture **texture) +aaline_set_sampler_views(struct pipe_context *pipe, + unsigned num, + struct pipe_sampler_view **views) { struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); uint i; /* save current */ for (i = 0; i < num; i++) { - pipe_texture_reference(&aaline->state.texture[i], texture[i]); + pipe_sampler_view_reference(&aaline->state.sampler_views[i], views[i]); } for ( ; i < PIPE_MAX_SAMPLERS; i++) { - pipe_texture_reference(&aaline->state.texture[i], NULL); + pipe_sampler_view_reference(&aaline->state.sampler_views[i], NULL); } - aaline->num_textures = num; + aaline->num_sampler_views = num; /* pass-through */ - aaline->driver_set_sampler_textures(aaline->pipe, num, texture); + aaline->driver_set_sampler_views(aaline->pipe, num, views); } @@ -898,7 +925,7 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) aaline->driver_delete_fs_state = pipe->delete_fs_state; aaline->driver_bind_sampler_states = pipe->bind_fragment_sampler_states; - aaline->driver_set_sampler_textures = pipe->set_fragment_sampler_textures; + aaline->driver_set_sampler_views = pipe->set_fragment_sampler_views; /* override the driver's functions */ pipe->create_fs_state = aaline_create_fs_state; @@ -906,7 +933,7 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) pipe->delete_fs_state = aaline_delete_fs_state; pipe->bind_fragment_sampler_states = aaline_bind_sampler_states; - pipe->set_fragment_sampler_textures = aaline_set_sampler_textures; + pipe->set_fragment_sampler_views = aaline_set_sampler_views; /* Install once everything is known to be OK: */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index d0d99aa331..bd433deeca 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -42,6 +42,7 @@ #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_sampler.h" #include "tgsi/tgsi_transform.h" #include "tgsi/tgsi_dump.h" @@ -74,9 +75,10 @@ struct pstip_stage struct draw_stage stage; void *sampler_cso; - struct pipe_texture *texture; + struct pipe_resource *texture; + struct pipe_sampler_view *sampler_view; uint num_samplers; - uint num_textures; + uint num_sampler_views; /* * Currently bound state @@ -84,7 +86,7 @@ struct pstip_stage struct pstip_fragment_shader *fs; struct { void *samplers[PIPE_MAX_SAMPLERS]; - struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; const struct pipe_poly_stipple *stipple; } state; @@ -98,8 +100,9 @@ struct pstip_stage void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, void **); - void (*driver_set_sampler_textures)(struct pipe_context *, unsigned, - struct pipe_texture **); + void (*driver_set_sampler_views)(struct pipe_context *, + unsigned, + struct pipe_sampler_view **); void (*driver_set_polygon_stipple)(struct pipe_context *, const struct pipe_poly_stipple *); @@ -374,19 +377,21 @@ pstip_update_texture(struct pstip_stage *pstip) { static const uint bit31 = 1 << 31; struct pipe_context *pipe = pstip->pipe; - struct pipe_screen *screen = pipe->screen; struct pipe_transfer *transfer; const uint *stipple = pstip->state.stipple->stipple; uint i, j; ubyte *data; /* XXX: want to avoid flushing just because we use stipple: + * + * Flush should no longer be necessary if driver is properly + * interleaving drawing and transfers on a given context: */ pipe->flush( pipe, PIPE_FLUSH_TEXTURE_CACHE, NULL ); - transfer = screen->get_tex_transfer(screen, pstip->texture, 0, 0, 0, - PIPE_TRANSFER_WRITE, 0, 0, 32, 32); - data = screen->transfer_map(screen, transfer); + transfer = pipe_get_transfer(pipe, pstip->texture, 0, 0, 0, + PIPE_TRANSFER_WRITE, 0, 0, 32, 32); + data = pipe->transfer_map(pipe, transfer); /* * Load alpha texture. @@ -408,8 +413,8 @@ pstip_update_texture(struct pstip_stage *pstip) } /* unmap */ - screen->transfer_unmap(screen, transfer); - screen->tex_transfer_destroy(transfer); + pipe->transfer_unmap(pipe, transfer); + pipe->transfer_destroy(pipe, transfer); } @@ -421,7 +426,8 @@ pstip_create_texture(struct pstip_stage *pstip) { struct pipe_context *pipe = pstip->pipe; struct pipe_screen *screen = pipe->screen; - struct pipe_texture texTemp; + struct pipe_resource texTemp; + struct pipe_sampler_view viewTempl; memset(&texTemp, 0, sizeof(texTemp)); texTemp.target = PIPE_TEXTURE_2D; @@ -431,10 +437,20 @@ pstip_create_texture(struct pstip_stage *pstip) texTemp.height0 = 32; texTemp.depth0 = 1; - pstip->texture = screen->texture_create(screen, &texTemp); + pstip->texture = screen->resource_create(screen, &texTemp); if (pstip->texture == NULL) return FALSE; + u_sampler_view_default_template(&viewTempl, + pstip->texture, + pstip->texture->format); + pstip->sampler_view = pipe->create_sampler_view(pipe, + pstip->texture, + &viewTempl); + if (!pstip->sampler_view) { + return FALSE; + } + return TRUE; } @@ -513,19 +529,19 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header) /* how many samplers? */ /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ - num_samplers = MAX2(pstip->num_textures, pstip->num_samplers); + num_samplers = MAX2(pstip->num_sampler_views, pstip->num_samplers); num_samplers = MAX2(num_samplers, pstip->fs->sampler_unit + 1); /* plug in our sampler, texture */ pstip->state.samplers[pstip->fs->sampler_unit] = pstip->sampler_cso; - pipe_texture_reference(&pstip->state.textures[pstip->fs->sampler_unit], - pstip->texture); + pipe_sampler_view_reference(&pstip->state.sampler_views[pstip->fs->sampler_unit], + pstip->sampler_view); assert(num_samplers <= PIPE_MAX_SAMPLERS); draw->suspend_flushing = TRUE; pstip->driver_bind_sampler_states(pipe, num_samplers, pstip->state.samplers); - pstip->driver_set_sampler_textures(pipe, num_samplers, pstip->state.textures); + pstip->driver_set_sampler_views(pipe, num_samplers, pstip->state.sampler_views); draw->suspend_flushing = FALSE; /* now really draw first triangle */ @@ -549,8 +565,9 @@ pstip_flush(struct draw_stage *stage, unsigned flags) pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs); pstip->driver_bind_sampler_states(pipe, pstip->num_samplers, pstip->state.samplers); - pstip->driver_set_sampler_textures(pipe, pstip->num_textures, - pstip->state.textures); + pstip->driver_set_sampler_views(pipe, + pstip->num_sampler_views, + pstip->state.sampler_views); draw->suspend_flushing = FALSE; } @@ -569,12 +586,16 @@ pstip_destroy(struct draw_stage *stage) uint i; for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - pipe_texture_reference(&pstip->state.textures[i], NULL); + pipe_sampler_view_reference(&pstip->state.sampler_views[i], NULL); } pstip->pipe->delete_sampler_state(pstip->pipe, pstip->sampler_cso); - pipe_texture_reference(&pstip->texture, NULL); + pipe_resource_reference(&pstip->texture, NULL); + + if (pstip->sampler_view) { + pipe_sampler_view_reference(&pstip->sampler_view, NULL); + } draw_free_temp_verts( stage ); FREE( stage ); @@ -680,24 +701,25 @@ pstip_bind_sampler_states(struct pipe_context *pipe, static void -pstip_set_sampler_textures(struct pipe_context *pipe, - unsigned num, struct pipe_texture **texture) +pstip_set_sampler_views(struct pipe_context *pipe, + unsigned num, + struct pipe_sampler_view **views) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); uint i; /* save current */ for (i = 0; i < num; i++) { - pipe_texture_reference(&pstip->state.textures[i], texture[i]); + pipe_sampler_view_reference(&pstip->state.sampler_views[i], views[i]); } for (; i < PIPE_MAX_SAMPLERS; i++) { - pipe_texture_reference(&pstip->state.textures[i], NULL); + pipe_sampler_view_reference(&pstip->state.sampler_views[i], NULL); } - pstip->num_textures = num; + pstip->num_sampler_views = num; /* pass-through */ - pstip->driver_set_sampler_textures(pstip->pipe, num, texture); + pstip->driver_set_sampler_views(pstip->pipe, num, views); } @@ -754,7 +776,7 @@ draw_install_pstipple_stage(struct draw_context *draw, pstip->driver_delete_fs_state = pipe->delete_fs_state; pstip->driver_bind_sampler_states = pipe->bind_fragment_sampler_states; - pstip->driver_set_sampler_textures = pipe->set_fragment_sampler_textures; + pstip->driver_set_sampler_views = pipe->set_fragment_sampler_views; pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple; /* override the driver's functions */ @@ -763,7 +785,7 @@ draw_install_pstipple_stage(struct draw_context *draw, pipe->delete_fs_state = pstip_delete_fs_state; pipe->bind_fragment_sampler_states = pstip_bind_sampler_states; - pipe->set_fragment_sampler_textures = pstip_set_sampler_textures; + pipe->set_fragment_sampler_views = pstip_set_sampler_views; pipe->set_polygon_stipple = pstip_set_polygon_stipple; return TRUE; diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 2709957961..1c7db0005a 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -238,38 +238,15 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim ) unsigned output_format; unsigned src_offset = (vbuf->vinfo->attrib[i].src_index * 4 * sizeof(float) ); - switch (vbuf->vinfo->attrib[i].emit) { - case EMIT_4F: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - emit_sz = 4 * sizeof(float); - break; - case EMIT_3F: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - emit_sz = 3 * sizeof(float); - break; - case EMIT_2F: - output_format = PIPE_FORMAT_R32G32_FLOAT; - emit_sz = 2 * sizeof(float); - break; - case EMIT_1F: - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); - break; - case EMIT_1F_PSIZE: - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); + output_format = draw_translate_vinfo_format(vbuf->vinfo->attrib[i].emit); + emit_sz = draw_translate_vinfo_size(vbuf->vinfo->attrib[i].emit); + + /* doesn't handle EMIT_OMIT */ + assert(emit_sz != 0); + + if (vbuf->vinfo->attrib[i].emit == EMIT_1F_PSIZE) { src_buffer = 1; src_offset = 0; - break; - case EMIT_4UB: - output_format = PIPE_FORMAT_A8R8G8B8_UNORM; - emit_sz = 4 * sizeof(ubyte); - break; - default: - assert(0); - output_format = PIPE_FORMAT_NONE; - emit_sz = 0; - break; } hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index fdabce7d44..6864b4015b 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -113,7 +113,10 @@ static void set_texcoords(const struct widepoint_stage *wide, /* put gl_PointCoord into the extra vertex slot */ uint slot = wide->stage.draw->extra_shader_outputs.slot; v->data[slot][0] = tc[0]; - v->data[slot][1] = tc[1]; + if (wide->texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT) + v->data[slot][1] = 1.0f - tc[1]; + else + v->data[slot][1] = tc[1]; v->data[slot][2] = 0.0F; v->data[slot][3] = 1.0F; } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 1e6e01af9e..da64102d9d 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -46,6 +46,10 @@ #include "tgsi/tgsi_scan.h" +#ifdef HAVE_LLVM +#include <llvm-c/ExecutionEngine.h> +#endif + struct pipe_context; struct draw_vertex_shader; @@ -237,9 +241,16 @@ struct draw_context unsigned instance_id; +#ifdef HAVE_LLVM + LLVMExecutionEngineRef engine; +#endif void *driver_private; }; +/******************************************************************************* + * Draw common initialization code + */ +boolean draw_init(struct draw_context *draw); /******************************************************************************* * Vertex shader code: diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 6d90a6c42f..43f6c5650a 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -140,7 +140,14 @@ boolean draw_pt_init( struct draw_context *draw ) if (!draw->pt.middle.fetch_shade_emit) return FALSE; - draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); +#if HAVE_LLVM + draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit_llvm( draw ); +#else + draw->pt.middle.general = NULL; +#endif + + if (!draw->pt.middle.general) + draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); if (!draw->pt.middle.general) return FALSE; @@ -307,9 +314,8 @@ draw_arrays_instanced(struct draw_context *draw, tgsi_dump(draw->vs.vertex_shader->state.tokens, 0); debug_printf("Elements:\n"); for (i = 0; i < draw->pt.nr_vertex_elements; i++) { - debug_printf(" format=%s comps=%u\n", - util_format_name(draw->pt.vertex_element[i].src_format), - draw->pt.vertex_element[i].nr_components); + debug_printf(" format=%s\n", + util_format_name(draw->pt.vertex_element[i].src_format)); } debug_printf("Buffers:\n"); for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index d5e0d92a60..c2797a759e 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -147,6 +147,7 @@ struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw); struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw); +struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw); diff --git a/src/gallium/auxiliary/draw/draw_pt_decompose.h b/src/gallium/auxiliary/draw/draw_pt_decompose.h index 4ca5b52020..3c44f7c11e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_decompose.h +++ b/src/gallium/auxiliary/draw/draw_pt_decompose.h @@ -105,40 +105,20 @@ static void FUNC( ARGS, case PIPE_PRIM_QUADS: - if (flatfirst) { - for (i = 0; i+3 < count; i += 4) { - QUAD( (i + 1), - (i + 2), - (i + 3), - (i + 0) ); - } - } - else { - for (i = 0; i+3 < count; i += 4) { - QUAD( (i + 0), - (i + 1), - (i + 2), - (i + 3)); - } + for (i = 0; i+3 < count; i += 4) { + QUAD( (i + 0), + (i + 1), + (i + 2), + (i + 3)); } break; case PIPE_PRIM_QUAD_STRIP: - if (flatfirst) { - for (i = 0; i+3 < count; i += 2) { - QUAD( (i + 1), - (i + 3), - (i + 2), - (i + 0) ); - } - } - else { - for (i = 0; i+3 < count; i += 2) { - QUAD( (i + 2), - (i + 0), - (i + 1), - (i + 3)); - } + for (i = 0; i+3 < count; i += 2) { + QUAD( (i + 2), + (i + 0), + (i + 1), + (i + 3)); } break; diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index ae357b5122..a7917f54b0 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -86,40 +86,15 @@ void draw_pt_emit_prepare( struct pt_emit *emit, unsigned output_format; unsigned src_offset = (vinfo->attrib[i].src_index * 4 * sizeof(float) ); + output_format = draw_translate_vinfo_format(vinfo->attrib[i].emit); + emit_sz = draw_translate_vinfo_size(vinfo->attrib[i].emit); - - switch (vinfo->attrib[i].emit) { - case EMIT_4F: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - emit_sz = 4 * sizeof(float); - break; - case EMIT_3F: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - emit_sz = 3 * sizeof(float); - break; - case EMIT_2F: - output_format = PIPE_FORMAT_R32G32_FLOAT; - emit_sz = 2 * sizeof(float); - break; - case EMIT_1F: - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); - break; - case EMIT_1F_PSIZE: - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); + /* doesn't handle EMIT_OMIT */ + assert(emit_sz != 0); + + if (vinfo->attrib[i].emit == EMIT_1F_PSIZE) { src_buffer = 1; src_offset = 0; - break; - case EMIT_4UB: - output_format = PIPE_FORMAT_A8R8G8B8_UNORM; - emit_sz = 4 * sizeof(ubyte); - break; - default: - assert(0); - output_format = PIPE_FORMAT_NONE; - emit_sz = 0; - break; } hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 2a604470e9..1994ddf2bc 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -129,41 +129,16 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, unsigned input_offset = src->src_offset; unsigned output_format; - switch (vinfo->attrib[i].emit) { - case EMIT_4UB: - output_format = PIPE_FORMAT_R8G8B8A8_UNORM; - emit_sz = 4 * sizeof(unsigned char); - break; - case EMIT_4F: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - emit_sz = 4 * sizeof(float); - break; - case EMIT_3F: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - emit_sz = 3 * sizeof(float); - break; - case EMIT_2F: - output_format = PIPE_FORMAT_R32G32_FLOAT; - emit_sz = 2 * sizeof(float); - break; - case EMIT_1F: - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); - break; - case EMIT_1F_PSIZE: + output_format = draw_translate_vinfo_format(vinfo->attrib[i].emit); + emit_sz = draw_translate_vinfo_size(vinfo->attrib[i].emit); + + if (vinfo->attrib[i].emit == EMIT_OMIT) + continue; + + if (vinfo->attrib[i].emit == EMIT_1F_PSIZE) { input_format = PIPE_FORMAT_R32_FLOAT; input_buffer = draw->pt.nr_vertex_buffers; input_offset = 0; - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); - break; - case EMIT_OMIT: - continue; - default: - assert(0); - output_format = PIPE_FORMAT_NONE; - emit_sz = 0; - continue; } key.element[i].type = TRANSLATE_ELEMENT_NORMAL; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 1aecb51077..389e2b105e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -130,31 +130,10 @@ static void fse_prepare( struct draw_pt_middle_end *middle, unsigned dst_offset = 0; for (i = 0; i < vinfo->num_attribs; i++) { - unsigned emit_sz = 0; - - switch (vinfo->attrib[i].emit) { - case EMIT_4F: - emit_sz = 4 * sizeof(float); - break; - case EMIT_3F: - emit_sz = 3 * sizeof(float); - break; - case EMIT_2F: - emit_sz = 2 * sizeof(float); - break; - case EMIT_1F: - emit_sz = 1 * sizeof(float); - break; - case EMIT_1F_PSIZE: - emit_sz = 1 * sizeof(float); - break; - case EMIT_4UB: - emit_sz = 4 * sizeof(ubyte); - break; - default: - assert(0); - break; - } + unsigned emit_sz = draw_translate_vinfo_size(vinfo->attrib[i].emit); + + /* doesn't handle EMIT_OMIT */ + assert(emit_sz != 0); /* The elements in the key correspond to vertex shader output * numbers, not to positions in the hw vertex description -- diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c new file mode 100644 index 0000000000..f71271bd91 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -0,0 +1,440 @@ +/************************************************************************** + * + * Copyright 2010 VMWare, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "draw/draw_vs.h" +#include "draw/draw_gs.h" +#include "draw/draw_llvm.h" + + +struct llvm_middle_end { + struct draw_pt_middle_end base; + struct draw_context *draw; + + struct pt_emit *emit; + struct pt_fetch *fetch; + struct pt_post_vs *post_vs; + + + unsigned vertex_data_offset; + unsigned vertex_size; + unsigned prim; + unsigned opt; + + struct draw_llvm *llvm; + struct draw_llvm_variant *variants; + struct draw_llvm_variant *current_variant; + int nr_variants; +}; + + +static void +llvm_middle_end_prepare( struct draw_pt_middle_end *middle, + unsigned prim, + unsigned opt, + unsigned *max_vertices ) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *vs = draw->vs.vertex_shader; + struct draw_geometry_shader *gs = draw->gs.geometry_shader; + struct draw_llvm_variant_key key; + struct draw_llvm_variant *variant = NULL; + unsigned i; + unsigned instance_id_index = ~0; + + /* Add one to num_outputs because the pipeline occasionally tags on + * an additional texcoord, eg for AA lines. + */ + unsigned nr = MAX2( vs->info.num_inputs, + vs->info.num_outputs + 1 ); + + /* Scan for instanceID system value. + */ + for (i = 0; i < vs->info.num_inputs; i++) { + if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) { + instance_id_index = i; + break; + } + } + + fpme->prim = prim; + fpme->opt = opt; + + /* Always leave room for the vertex header whether we need it or + * not. It's hard to get rid of it in particular because of the + * viewport code in draw_pt_post_vs.c. + */ + fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); + + + + draw_pt_fetch_prepare( fpme->fetch, + vs->info.num_inputs, + fpme->vertex_size, + instance_id_index ); + if (opt & PT_SHADE) { + vs->prepare(vs, draw); + draw_geometry_shader_prepare(gs, draw); + } + + + /* XXX: it's not really gl rasterization rules we care about here, + * but gl vs dx9 clip spaces. + */ + draw_pt_post_vs_prepare( fpme->post_vs, + (boolean)draw->bypass_clipping, + (boolean)(draw->identity_viewport), + (boolean)draw->rasterizer->gl_rasterization_rules, + (draw->vs.edgeflag_output ? true : false) ); + + if (!(opt & PT_PIPELINE)) { + draw_pt_emit_prepare( fpme->emit, + prim, + max_vertices ); + + *max_vertices = MAX2( *max_vertices, + DRAW_PIPE_MAX_VERTICES ); + } + else { + *max_vertices = DRAW_PIPE_MAX_VERTICES; + } + + /* return even number */ + *max_vertices = *max_vertices & ~1; + + draw_llvm_make_variant_key(fpme->llvm, &key); + + variant = fpme->variants; + while(variant) { + if(memcmp(&variant->key, &key, sizeof key) == 0) + break; + + variant = variant->next; + } + + if (!variant) { + variant = draw_llvm_prepare(fpme->llvm, nr); + variant->next = fpme->variants; + fpme->variants = variant; + ++fpme->nr_variants; + } + fpme->current_variant = variant; + + /*XXX we only support one constant buffer */ + fpme->llvm->jit_context.vs_constants = + draw->pt.user.vs_constants[0]; + fpme->llvm->jit_context.gs_constants = + draw->pt.user.gs_constants[0]; +} + + + +static void llvm_middle_end_run( struct draw_pt_middle_end *middle, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *vshader = draw->vs.vertex_shader; + struct draw_geometry_shader *gshader = draw->gs.geometry_shader; + unsigned opt = fpme->opt; + unsigned alloc_count = align( fetch_count, 4 ); + + struct vertex_header *pipeline_verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + + if (!pipeline_verts) { + /* Not much we can do here - just skip the rendering. + */ + assert(0); + return; + } + + /* Fetch into our vertex buffer + */ + draw_pt_fetch_run( fpme->fetch, + fetch_elts, + fetch_count, + (char *)pipeline_verts ); + + /* Run the shader, note that this overwrites the data[] parts of + * the pipeline verts. If there is no shader, eg if + * bypass_vs_clip_and_viewport, then the inputs == outputs, and are + * already in the correct place.*/ + if (opt & PT_SHADE) + { + vshader->run_linear(vshader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + draw->pt.user.vs_constants, + fetch_count, + fpme->vertex_size, + fpme->vertex_size); + if (gshader) + draw_geometry_shader_run(gshader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + draw->pt.user.gs_constants, + fetch_count, + fpme->vertex_size, + fpme->vertex_size); + } + + if (draw_pt_post_vs_run( fpme->post_vs, + pipeline_verts, + fetch_count, + fpme->vertex_size )) + { + opt |= PT_PIPELINE; + } + + /* Do we need to run the pipeline? + */ + if (opt & PT_PIPELINE) { + draw_pipeline_run( fpme->draw, + fpme->prim, + pipeline_verts, + fetch_count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + else { + draw_pt_emit( fpme->emit, + (const float (*)[4])pipeline_verts->data, + fetch_count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + + + FREE(pipeline_verts); +} + + +static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_context *draw = fpme->draw; + unsigned opt = fpme->opt; + unsigned alloc_count = align( count, 4 ); + + struct vertex_header *pipeline_verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + + if (!pipeline_verts) { + /* Not much we can do here - just skip the rendering. + */ + assert(0); + return; + } + +#if 0 + debug_printf("#### Pipeline = %p (data = %p)\n", + pipeline_verts, pipeline_verts->data); +#endif + fpme->current_variant->jit_func( &fpme->llvm->jit_context, + pipeline_verts, + (const char **)draw->pt.user.vbuffer, + start, + count, + fpme->vertex_size, + draw->pt.vertex_buffer ); + + if (draw_pt_post_vs_run( fpme->post_vs, + pipeline_verts, + count, + fpme->vertex_size )) + { + opt |= PT_PIPELINE; + } + + /* Do we need to run the pipeline? + */ + if (opt & PT_PIPELINE) { + draw_pipeline_run_linear( fpme->draw, + fpme->prim, + pipeline_verts, + count, + fpme->vertex_size); + } + else { + draw_pt_emit_linear( fpme->emit, + (const float (*)[4])pipeline_verts->data, + fpme->vertex_size, + count ); + } + + FREE(pipeline_verts); +} + + + +static boolean +llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_context *draw = fpme->draw; + unsigned opt = fpme->opt; + unsigned alloc_count = align( count, 4 ); + + struct vertex_header *pipeline_verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + + if (!pipeline_verts) + return FALSE; + + fpme->current_variant->jit_func( &fpme->llvm->jit_context, + pipeline_verts, + (const char **)draw->pt.user.vbuffer, + start, + count, + fpme->vertex_size, + draw->pt.vertex_buffer ); + + if (draw_pt_post_vs_run( fpme->post_vs, + pipeline_verts, + count, + fpme->vertex_size )) + { + opt |= PT_PIPELINE; + } + + /* Do we need to run the pipeline? + */ + if (opt & PT_PIPELINE) { + draw_pipeline_run( fpme->draw, + fpme->prim, + pipeline_verts, + count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + else { + draw_pt_emit( fpme->emit, + (const float (*)[4])pipeline_verts->data, + count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + + FREE(pipeline_verts); + return TRUE; +} + + + +static void llvm_middle_end_finish( struct draw_pt_middle_end *middle ) +{ + /* nothing to do */ +} + +static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle ) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + + if (fpme->fetch) + draw_pt_fetch_destroy( fpme->fetch ); + + if (fpme->emit) + draw_pt_emit_destroy( fpme->emit ); + + if (fpme->post_vs) + draw_pt_post_vs_destroy( fpme->post_vs ); + + if (fpme->llvm) + draw_llvm_destroy( fpme->llvm ); + + FREE(middle); +} + + +struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_context *draw ) +{ + struct llvm_middle_end *fpme = 0; + + if (!draw->engine) + return NULL; + + fpme = CALLOC_STRUCT( llvm_middle_end ); + if (!fpme) + goto fail; + + fpme->base.prepare = llvm_middle_end_prepare; + fpme->base.run = llvm_middle_end_run; + fpme->base.run_linear = llvm_middle_end_linear_run; + fpme->base.run_linear_elts = llvm_middle_end_linear_run_elts; + fpme->base.finish = llvm_middle_end_finish; + fpme->base.destroy = llvm_middle_end_destroy; + + fpme->draw = draw; + + fpme->fetch = draw_pt_fetch_create( draw ); + if (!fpme->fetch) + goto fail; + + fpme->post_vs = draw_pt_post_vs_create( draw ); + if (!fpme->post_vs) + goto fail; + + fpme->emit = draw_pt_emit_create( draw ); + if (!fpme->emit) + goto fail; + + fpme->llvm = draw_llvm_create(draw); + if (!fpme->llvm) + goto fail; + + fpme->variants = NULL; + fpme->current_variant = NULL; + fpme->nr_variants = 0; + + return &fpme->base; + + fail: + if (fpme) + llvm_middle_end_destroy( &fpme->base ); + + return NULL; +} diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index 9728d5c2bd..5525dfc748 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -108,6 +108,11 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, for (j = 0; j < count; j++) { float *position = out->data[pos]; +#if 0 + debug_printf("%d) io = %p, data = %p = [%f, %f, %f, %f]\n", + j, out, position, position[0], position[1], position[2], position[3]); +#endif + out->clip[0] = position[0]; out->clip[1] = position[1]; out->clip[2] = position[2]; diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h index 62822a3d56..7cba8547f1 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h @@ -118,39 +118,21 @@ static void FUNC( struct draw_pt_front_end *frontend, case PIPE_PRIM_QUADS: for (i = 0; i+3 < count; i += 4) { - if (flatfirst) { - QUAD( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, i + 3) ); - } - else { - QUAD( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, i + 3) ); - } + QUAD( vcache, + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, i + 3) ); } break; case PIPE_PRIM_QUAD_STRIP: for (i = 0; i+3 < count; i += 2) { - if (flatfirst) { - QUAD( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 3), - get_elt(elts, i + 2) ); - } - else { - QUAD( vcache, - get_elt(elts, i + 2), - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 3) ); - } + QUAD( vcache, + get_elt(elts, i + 2), + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 3) ); } break; diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c index 3214213e44..a4f5e882c0 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.c +++ b/src/gallium/auxiliary/draw/draw_vertex.c @@ -48,30 +48,12 @@ draw_compute_vertex_size(struct vertex_info *vinfo) uint i; vinfo->size = 0; - for (i = 0; i < vinfo->num_attribs; i++) { - switch (vinfo->attrib[i].emit) { - case EMIT_OMIT: - break; - case EMIT_4UB: - /* fall-through */ - case EMIT_1F_PSIZE: - /* fall-through */ - case EMIT_1F: - vinfo->size += 1; - break; - case EMIT_2F: - vinfo->size += 2; - break; - case EMIT_3F: - vinfo->size += 3; - break; - case EMIT_4F: - vinfo->size += 4; - break; - default: - assert(0); - } - } + for (i = 0; i < vinfo->num_attribs; i++) + vinfo->size += draw_translate_vinfo_size(vinfo->attrib[i].emit); + + assert(vinfo->size % 4 == 0); + /* in dwords */ + vinfo->size /= 4; } @@ -120,6 +102,13 @@ draw_dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) debug_printf("%u ", *data++); debug_printf("%u ", *data++); break; + case EMIT_4UB_BGRA: + debug_printf("EMIT_4UB_BGRA:\t"); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + break; default: assert(0); } diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index 8c3c7befbc..ca27237126 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -54,7 +54,8 @@ enum attrib_emit { EMIT_2F, EMIT_3F, EMIT_4F, - EMIT_4UB /**< XXX may need variations for RGBA vs BGRA, etc */ + EMIT_4UB, /**< is RGBA like the rest */ + EMIT_4UB_BGRA }; @@ -141,9 +142,11 @@ void draw_dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data); -static INLINE unsigned draw_translate_vinfo_format(unsigned format ) +static INLINE unsigned draw_translate_vinfo_format(enum attrib_emit emit) { - switch (format) { + switch (emit) { + case EMIT_OMIT: + return PIPE_FORMAT_NONE; case EMIT_1F: case EMIT_1F_PSIZE: return PIPE_FORMAT_R32_FLOAT; @@ -155,10 +158,36 @@ static INLINE unsigned draw_translate_vinfo_format(unsigned format ) return PIPE_FORMAT_R32G32B32A32_FLOAT; case EMIT_4UB: return PIPE_FORMAT_R8G8B8A8_UNORM; + case EMIT_4UB_BGRA: + return PIPE_FORMAT_B8G8R8A8_UNORM; default: + assert(!"unexpected format"); return PIPE_FORMAT_NONE; } } +static INLINE unsigned draw_translate_vinfo_size(enum attrib_emit emit) +{ + switch (emit) { + case EMIT_OMIT: + return 0; + case EMIT_1F: + case EMIT_1F_PSIZE: + return 1 * sizeof(float); + case EMIT_2F: + return 2 * sizeof(float); + case EMIT_3F: + return 3 * sizeof(float); + case EMIT_4F: + return 4 * sizeof(float); + case EMIT_4UB: + return 4 * sizeof(unsigned char); + case EMIT_4UB_BGRA: + return 4 * sizeof(unsigned char); + default: + assert(!"unexpected format"); + return 0; + } +} #endif /* DRAW_VERTEX_H */ diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index ece1ddde0c..8f8bbe7cb8 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -401,13 +401,11 @@ static boolean emit_output( struct aos_compilation *cp, emit_store_R32G32B32A32(cp, ptr, dataXMM); break; case EMIT_4UB: - if (1) { - emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W)); - emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); - } - else { - emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); - } + emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); + break; + case EMIT_4UB_BGRA: + emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W)); + emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); break; default: AOS_ERROR(cp, "unhandled output format"); diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 5f7a645f5d..2a3b6b39b9 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -40,7 +40,7 @@ #include "tgsi/tgsi_parse.h" -#ifdef MESA_LLVM +#ifdef HAVE_LLVM struct draw_llvm_vertex_shader { struct draw_vertex_shader base; @@ -64,12 +64,8 @@ vs_llvm_run_linear( struct draw_vertex_shader *base, unsigned input_stride, unsigned output_stride ) { - struct draw_llvm_vertex_shader *shader = - (struct draw_llvm_vertex_shader *)base; } - - static void vs_llvm_delete( struct draw_vertex_shader *base ) { @@ -90,6 +86,7 @@ struct draw_vertex_shader * draw_create_vs_llvm(struct draw_context *draw, const struct pipe_shader_state *templ) { +#if 0 struct draw_llvm_vertex_shader *vs; vs = CALLOC_STRUCT( draw_llvm_vertex_shader ); @@ -113,6 +110,8 @@ draw_create_vs_llvm(struct draw_context *draw, vs->machine = draw->vs.machine; return &vs->base; +#endif + return NULL; } diff --git a/src/gallium/auxiliary/vl/vl_csc.h b/src/gallium/auxiliary/gallivm/lp_bld.h index 722ca35f33..2fa682f400 100644 --- a/src/gallium/auxiliary/vl/vl_csc.h +++ b/src/gallium/auxiliary/gallivm/lp_bld.h @@ -1,8 +1,8 @@ /************************************************************************** - * - * Copyright 2009 Younes Manton. + * + * Copyright 2010 VMware, Inc. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,44 +10,38 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ -#ifndef vl_csc_h -#define vl_csc_h +/** + * @file + * Wrapper for LLVM header file #includes. + */ -#include <pipe/p_compiler.h> -struct vl_procamp -{ - float brightness; - float contrast; - float saturation; - float hue; -}; +#ifndef LP_BLD_H +#define LP_BLD_H -enum VL_CSC_COLOR_STANDARD -{ - VL_CSC_COLOR_STANDARD_IDENTITY, - VL_CSC_COLOR_STANDARD_BT_601, - VL_CSC_COLOR_STANDARD_BT_709 -}; -void vl_csc_get_matrix(enum VL_CSC_COLOR_STANDARD cs, - struct vl_procamp *procamp, - bool full_range, - float *matrix); +#include <llvm-c/Core.h> -#endif /* vl_csc_h */ + +/** Ensure HAVE_LLVM is set to avoid #ifdef HAVE_LLVM everywhere */ +#ifndef HAVE_LLVM +#error "HAVE_LLVM should be set with LLVM's version number, e.g. (0x0207 for 2.7)" +#endif + + +#endif /* LP_BLD_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_alpha.h b/src/gallium/auxiliary/gallivm/lp_bld_alpha.h index 634575670d..0f99fec65e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_alpha.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_alpha.h @@ -35,7 +35,7 @@ #define LP_BLD_ALPHA_H -#include <llvm-c/Core.h> +#include "gallivm/lp_bld.h" struct pipe_alpha_state; struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 32f9e5201c..8e8fcccf56 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -232,6 +232,37 @@ lp_build_add(struct lp_build_context *bld, } +/** Return the sum of the elements of a */ +LLVMValueRef +lp_build_sum_vector(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMValueRef index, res; + int i; + + if (a == bld->zero) + return bld->zero; + if (a == bld->undef) + return bld->undef; + assert(type.length > 1); + + assert(!bld->type.norm); + + index = LLVMConstInt(LLVMInt32Type(), 0, 0); + res = LLVMBuildExtractElement(bld->builder, a, index, ""); + + for (i = 1; i < type.length; i++) { + index = LLVMConstInt(LLVMInt32Type(), i, 0); + res = LLVMBuildAdd(bld->builder, res, + LLVMBuildExtractElement(bld->builder, a, index, ""), + ""); + } + + return res; +} + + /** * Generate a - b */ @@ -330,12 +361,12 @@ lp_build_mul_u8n(LLVMBuilderRef builder, LLVMValueRef c8; LLVMValueRef ab; - c8 = lp_build_int_const_scalar(i16_type, 8); + c8 = lp_build_const_int_vec(i16_type, 8); #if 0 /* a*b/255 ~= (a*(b + 1)) >> 256 */ - b = LLVMBuildAdd(builder, b, lp_build_int_const_scalar(i16_type, 1), ""); + b = LLVMBuildAdd(builder, b, lp_build_const_int_vec(i16_type, 1), ""); ab = LLVMBuildMul(builder, a, b, ""); #else @@ -343,7 +374,7 @@ lp_build_mul_u8n(LLVMBuilderRef builder, /* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */ ab = LLVMBuildMul(builder, a, b, ""); ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), ""); - ab = LLVMBuildAdd(builder, ab, lp_build_int_const_scalar(i16_type, 0x80), ""); + ab = LLVMBuildAdd(builder, ab, lp_build_const_int_vec(i16_type, 0x80), ""); #endif @@ -398,7 +429,7 @@ lp_build_mul(struct lp_build_context *bld, } if(type.fixed) - shift = lp_build_int_const_scalar(type, type.width/2); + shift = lp_build_const_int_vec(type, type.width/2); else shift = NULL; @@ -460,7 +491,7 @@ lp_build_mul_imm(struct lp_build_context *bld, * for Inf and NaN. */ unsigned mantissa = lp_mantissa(bld->type); - factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa); + factor = lp_build_const_int_vec(bld->type, (unsigned long long)shift << mantissa); a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), ""); a = LLVMBuildAdd(bld->builder, a, factor, ""); a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), ""); @@ -468,12 +499,12 @@ lp_build_mul_imm(struct lp_build_context *bld, #endif } else { - factor = lp_build_const_scalar(bld->type, shift); + factor = lp_build_const_vec(bld->type, shift); return LLVMBuildShl(bld->builder, a, factor, ""); } } - factor = lp_build_const_scalar(bld->type, (double)b); + factor = lp_build_const_vec(bld->type, (double)b); return lp_build_mul(bld, a, factor); } @@ -536,7 +567,7 @@ lp_build_lerp(struct lp_build_context *bld, * but it will be wrong for other uses. Basically we need a more * powerful lp_type, capable of further distinguishing the values * interpretation from the value storage. */ - res = LLVMBuildAnd(bld->builder, res, lp_build_int_const_scalar(bld->type, (1 << bld->type.width/2) - 1), ""); + res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(bld->type, (1 << bld->type.width/2) - 1), ""); return res; } @@ -644,13 +675,26 @@ lp_build_abs(struct lp_build_context *bld, if(type.floating) { /* Mask out the sign bit */ - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - unsigned long long absMask = ~(1ULL << (type.width - 1)); - LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask)); - a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - a = LLVMBuildAnd(bld->builder, a, mask, ""); - a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); - return a; + if (type.length == 1) { + LLVMTypeRef int_type = LLVMIntType(type.width); + LLVMTypeRef float_type = LLVMFloatType(); + unsigned long long absMask = ~(1ULL << (type.width - 1)); + LLVMValueRef mask = LLVMConstInt(int_type, absMask, 0); + a = LLVMBuildBitCast(bld->builder, a, int_type, ""); + a = LLVMBuildAnd(bld->builder, a, mask, ""); + a = LLVMBuildBitCast(bld->builder, a, float_type, ""); + return a; + } + else { + /* vector of floats */ + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + unsigned long long absMask = ~(1ULL << (type.width - 1)); + LLVMValueRef mask = lp_build_const_int_vec(type, ((unsigned long long) absMask)); + a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + a = LLVMBuildAnd(bld->builder, a, mask, ""); + a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); + return a; + } } if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) { @@ -676,12 +720,12 @@ lp_build_negate(struct lp_build_context *bld, } +/** Return -1, 0 or +1 depending on the sign of a */ LLVMValueRef lp_build_sgn(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; - LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMValueRef cond; LLVMValueRef res; @@ -691,27 +735,42 @@ lp_build_sgn(struct lp_build_context *bld, res = bld->one; } else if(type.floating) { - /* Take the sign bit and add it to 1 constant */ - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMTypeRef vec_type; + LLVMTypeRef int_type; + LLVMValueRef mask; LLVMValueRef sign; LLVMValueRef one; - sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + unsigned long long maskBit = (unsigned long long)1 << (type.width - 1); + + if (type.length == 1) { + int_type = lp_build_int_elem_type(type); + vec_type = lp_build_elem_type(type); + mask = LLVMConstInt(int_type, maskBit, 0); + } + else { + /* vector */ + int_type = lp_build_int_vec_type(type); + vec_type = lp_build_vec_type(type); + mask = lp_build_const_int_vec(type, maskBit); + } + + /* Take the sign bit and add it to 1 constant */ + sign = LLVMBuildBitCast(bld->builder, a, int_type, ""); sign = LLVMBuildAnd(bld->builder, sign, mask, ""); - one = LLVMConstBitCast(bld->one, int_vec_type); + one = LLVMConstBitCast(bld->one, int_type); res = LLVMBuildOr(bld->builder, sign, one, ""); res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); } else { - LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0); + LLVMValueRef minus_one = lp_build_const_vec(type, -1.0); cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero); res = lp_build_select(bld, cond, bld->one, minus_one); } /* Handle zero */ cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero); - res = lp_build_select(bld, cond, bld->zero, bld->one); + res = lp_build_select(bld, cond, bld->zero, res); return res; } @@ -730,8 +789,8 @@ lp_build_set_sign(struct lp_build_context *bld, const struct lp_type type = bld->type; LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMValueRef shift = lp_build_int_const_scalar(type, type.width - 1); - LLVMValueRef mask = lp_build_int_const_scalar(type, + LLVMValueRef shift = lp_build_const_int_vec(type, type.width - 1); + LLVMValueRef mask = lp_build_const_int_vec(type, ~((unsigned long long) 1 << (type.width - 1))); LLVMValueRef val, res; @@ -753,7 +812,7 @@ lp_build_set_sign(struct lp_build_context *bld, /** - * Convert vector of int to vector of float. + * Convert vector of (or scalar) int to vector of (or scalar) float. */ LLVMValueRef lp_build_int_to_float(struct lp_build_context *bld, @@ -764,7 +823,11 @@ lp_build_int_to_float(struct lp_build_context *bld, assert(type.floating); /*assert(lp_check_value(type, a));*/ - { + if (type.length == 1) { + LLVMTypeRef float_type = LLVMFloatType(); + return LLVMBuildSIToFP(bld->builder, a, float_type, ""); + } + else { LLVMTypeRef vec_type = lp_build_vec_type(type); /*LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);*/ LLVMValueRef res; @@ -866,6 +929,13 @@ lp_build_floor(struct lp_build_context *bld, assert(type.floating); + if (type.length == 1) { + LLVMValueRef res; + res = lp_build_ifloor(bld, a); + res = LLVMBuildSIToFP(bld->builder, res, LLVMFloatType(), ""); + return res; + } + if(util_cpu_caps.has_sse4_1) return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); else { @@ -921,15 +991,24 @@ lp_build_itrunc(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); assert(type.floating); - assert(lp_check_value(type, a)); - return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); + if (type.length == 1) { + LLVMTypeRef int_type = LLVMIntType(type.width); + return LLVMBuildFPToSI(bld->builder, a, int_type, ""); + } + else { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + assert(lp_check_value(type, a)); + return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); + } } +/** + * Convert float[] to int[] with round(). + */ LLVMValueRef lp_build_iround(struct lp_build_context *bld, LLVMValueRef a) @@ -939,6 +1018,15 @@ lp_build_iround(struct lp_build_context *bld, LLVMValueRef res; assert(type.floating); + + if (type.length == 1) { + /* scalar float to int */ + LLVMTypeRef int_type = LLVMIntType(type.width); + /* XXX we want rounding here! */ + res = LLVMBuildFPToSI(bld->builder, a, int_type, ""); + return res; + } + assert(lp_check_value(type, a)); if(util_cpu_caps.has_sse4_1) { @@ -946,7 +1034,7 @@ lp_build_iround(struct lp_build_context *bld, } else { LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); LLVMValueRef sign; LLVMValueRef half; @@ -955,7 +1043,7 @@ lp_build_iround(struct lp_build_context *bld, sign = LLVMBuildAnd(bld->builder, sign, mask, ""); /* sign * 0.5 */ - half = lp_build_const_scalar(type, 0.5); + half = lp_build_const_vec(type, 0.5); half = LLVMBuildBitCast(bld->builder, half, int_vec_type, ""); half = LLVMBuildOr(bld->builder, sign, half, ""); half = LLVMBuildBitCast(bld->builder, half, vec_type, ""); @@ -981,6 +1069,14 @@ lp_build_ifloor(struct lp_build_context *bld, LLVMValueRef res; assert(type.floating); + + if (type.length == 1) { + /* scalar float to int */ + LLVMTypeRef int_type = LLVMIntType(type.width); + res = LLVMBuildFPToSI(bld->builder, a, int_type, ""); + return res; + } + assert(lp_check_value(type, a)); if(util_cpu_caps.has_sse4_1) { @@ -990,18 +1086,18 @@ lp_build_ifloor(struct lp_build_context *bld, /* Take the sign bit and add it to 1 constant */ LLVMTypeRef vec_type = lp_build_vec_type(type); unsigned mantissa = lp_mantissa(type); - LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); LLVMValueRef sign; LLVMValueRef offset; /* sign = a < 0 ? ~0 : 0 */ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); sign = LLVMBuildAnd(bld->builder, sign, mask, ""); - sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), ""); + sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), ""); lp_build_name(sign, "floor.sign"); /* offset = -0.99999(9)f */ - offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa)); + offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa)); offset = LLVMConstBitCast(offset, int_vec_type); /* offset = a < 0 ? -0.99999(9)f : 0.0f */ @@ -1172,7 +1268,7 @@ lp_build_exp(struct lp_build_context *bld, LLVMValueRef x) { /* log2(e) = 1/log(2) */ - LLVMValueRef log2e = lp_build_const_scalar(bld->type, 1.4426950408889634); + LLVMValueRef log2e = lp_build_const_vec(bld->type, 1.4426950408889634); return lp_build_mul(bld, log2e, lp_build_exp2(bld, x)); } @@ -1186,7 +1282,7 @@ lp_build_log(struct lp_build_context *bld, LLVMValueRef x) { /* log(2) */ - LLVMValueRef log2 = lp_build_const_scalar(bld->type, 0.69314718055994529); + LLVMValueRef log2 = lp_build_const_vec(bld->type, 0.69314718055994529); return lp_build_mul(bld, log2, lp_build_exp2(bld, x)); } @@ -1207,6 +1303,7 @@ lp_build_polynomial(struct lp_build_context *bld, unsigned num_coeffs) { const struct lp_type type = bld->type; + LLVMTypeRef float_type = LLVMFloatType(); LLVMValueRef res = NULL; unsigned i; @@ -1216,7 +1313,13 @@ lp_build_polynomial(struct lp_build_context *bld, __FUNCTION__); for (i = num_coeffs; i--; ) { - LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]); + LLVMValueRef coeff; + + if (type.length == 1) + coeff = LLVMConstReal(float_type, coeffs[i]); + else + coeff = lp_build_const_vec(type, coeffs[i]); + if(res) res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res)); else @@ -1272,11 +1375,11 @@ lp_build_exp2_approx(struct lp_build_context *bld, assert(type.floating && type.width == 32); - x = lp_build_min(bld, x, lp_build_const_scalar(type, 129.0)); - x = lp_build_max(bld, x, lp_build_const_scalar(type, -126.99999)); + x = lp_build_min(bld, x, lp_build_const_vec(type, 129.0)); + x = lp_build_max(bld, x, lp_build_const_vec(type, -126.99999)); /* ipart = int(x - 0.5) */ - ipart = LLVMBuildSub(bld->builder, x, lp_build_const_scalar(type, 0.5f), ""); + ipart = LLVMBuildSub(bld->builder, x, lp_build_const_vec(type, 0.5f), ""); ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, ""); /* fpart = x - ipart */ @@ -1286,8 +1389,8 @@ lp_build_exp2_approx(struct lp_build_context *bld, if(p_exp2_int_part || p_exp2) { /* expipart = (float) (1 << ipart) */ - expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_scalar(type, 127), ""); - expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_scalar(type, 23), ""); + expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_const_int_vec(type, 127), ""); + expipart = LLVMBuildShl(bld->builder, expipart, lp_build_const_int_vec(type, 23), ""); expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, ""); } @@ -1353,8 +1456,8 @@ lp_build_log2_approx(struct lp_build_context *bld, LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - LLVMValueRef expmask = lp_build_int_const_scalar(type, 0x7f800000); - LLVMValueRef mantmask = lp_build_int_const_scalar(type, 0x007fffff); + LLVMValueRef expmask = lp_build_const_int_vec(type, 0x7f800000); + LLVMValueRef mantmask = lp_build_const_int_vec(type, 0x007fffff); LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type); LLVMValueRef i = NULL; @@ -1379,8 +1482,8 @@ lp_build_log2_approx(struct lp_build_context *bld, } if(p_floor_log2 || p_log2) { - logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_scalar(type, 23), ""); - logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_scalar(type, 127), ""); + logexp = LLVMBuildLShr(bld->builder, exp, lp_build_const_int_vec(type, 23), ""); + logexp = LLVMBuildSub(bld->builder, logexp, lp_build_const_int_vec(type, 127), ""); logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, ""); } @@ -1410,11 +1513,87 @@ lp_build_log2_approx(struct lp_build_context *bld, } +/** scalar version of above function */ +static void +lp_build_float_log2_approx(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef *p_exp, + LLVMValueRef *p_floor_log2, + LLVMValueRef *p_log2) +{ + const struct lp_type type = bld->type; + LLVMTypeRef float_type = LLVMFloatType(); + LLVMTypeRef int_type = LLVMIntType(type.width); + + LLVMValueRef expmask = LLVMConstInt(int_type, 0x7f800000, 0); + LLVMValueRef mantmask = LLVMConstInt(int_type, 0x007fffff, 0); + LLVMValueRef one = LLVMConstBitCast(bld->one, int_type); + + LLVMValueRef i = NULL; + LLVMValueRef exp = NULL; + LLVMValueRef mant = NULL; + LLVMValueRef logexp = NULL; + LLVMValueRef logmant = NULL; + LLVMValueRef res = NULL; + + if(p_exp || p_floor_log2 || p_log2) { + /* TODO: optimize the constant case */ + if(LLVMIsConstant(x)) + debug_printf("%s: inefficient/imprecise constant arithmetic\n", + __FUNCTION__); + + assert(type.floating && type.width == 32); + + i = LLVMBuildBitCast(bld->builder, x, int_type, ""); + + /* exp = (float) exponent(x) */ + exp = LLVMBuildAnd(bld->builder, i, expmask, ""); + } + + if(p_floor_log2 || p_log2) { + LLVMValueRef c23 = LLVMConstInt(int_type, 23, 0); + LLVMValueRef c127 = LLVMConstInt(int_type, 127, 0); + logexp = LLVMBuildLShr(bld->builder, exp, c23, ""); + logexp = LLVMBuildSub(bld->builder, logexp, c127, ""); + logexp = LLVMBuildSIToFP(bld->builder, logexp, float_type, ""); + } + + if(p_log2) { + /* mant = (float) mantissa(x) */ + mant = LLVMBuildAnd(bld->builder, i, mantmask, ""); + mant = LLVMBuildOr(bld->builder, mant, one, ""); + mant = LLVMBuildBitCast(bld->builder, mant, float_type, ""); + + logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial, + Elements(lp_build_log2_polynomial)); + + /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ + logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), ""); + + res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); + } + + if(p_exp) + *p_exp = exp; + + if(p_floor_log2) + *p_floor_log2 = logexp; + + if(p_log2) + *p_log2 = res; +} + + LLVMValueRef lp_build_log2(struct lp_build_context *bld, LLVMValueRef x) { LLVMValueRef res; - lp_build_log2_approx(bld, x, NULL, NULL, &res); + if (bld->type.length == 1) { + lp_build_float_log2_approx(bld, x, NULL, NULL, &res); + } + else { + lp_build_log2_approx(bld, x, NULL, NULL, &res); + } return res; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index 55385e3a66..31efa9921c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -37,7 +37,7 @@ #define LP_BLD_ARIT_H -#include <llvm-c/Core.h> +#include "gallivm/lp_bld.h" struct lp_type; @@ -57,6 +57,10 @@ lp_build_add(struct lp_build_context *bld, LLVMValueRef b); LLVMValueRef +lp_build_sum_vector(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef lp_build_sub(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_blend.h b/src/gallium/auxiliary/gallivm/lp_bld_blend.h index da272e549f..ebbdb1a604 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_blend.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_blend.h @@ -40,7 +40,7 @@ * for a standalone example. */ -#include <llvm-c/Core.h> +#include "gallivm/lp_bld.h" #include "pipe/p_format.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c index c8eaa8c394..57843e9a60 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c @@ -221,8 +221,16 @@ lp_build_undef(struct lp_type type) LLVMValueRef lp_build_zero(struct lp_type type) { - LLVMTypeRef vec_type = lp_build_vec_type(type); - return LLVMConstNull(vec_type); + if (type.length == 1) { + if (type.floating) + return LLVMConstReal(LLVMFloatType(), 0.0); + else + return LLVMConstInt(LLVMIntType(type.width), 0, 0); + } + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + return LLVMConstNull(vec_type); + } } @@ -255,7 +263,7 @@ lp_build_one(struct lp_type type) if(type.sign) /* TODO: Unfortunately this caused "Tried to create a shift operation * on a non-integer type!" */ - vec = LLVMConstLShr(vec, lp_build_int_const_scalar(type, 1)); + vec = LLVMConstLShr(vec, lp_build_const_int_vec(type, 1)); #endif return vec; @@ -264,13 +272,19 @@ lp_build_one(struct lp_type type) for(i = 1; i < type.length; ++i) elems[i] = elems[0]; - return LLVMConstVector(elems, type.length); + if (type.length == 1) + return elems[0]; + else + return LLVMConstVector(elems, type.length); } +/** + * Build constant-valued vector from a scalar value. + */ LLVMValueRef -lp_build_const_scalar(struct lp_type type, - double val) +lp_build_const_vec(struct lp_type type, + double val) { LLVMTypeRef elem_type = lp_build_elem_type(type); LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; @@ -295,7 +309,7 @@ lp_build_const_scalar(struct lp_type type, LLVMValueRef -lp_build_int_const_scalar(struct lp_type type, +lp_build_const_int_vec(struct lp_type type, long long val) { LLVMTypeRef elem_type = lp_build_int_elem_type(type); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h index cb8e1c7b00..9ca2f0664e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h @@ -37,9 +37,9 @@ #define LP_BLD_CONST_H -#include <llvm-c/Core.h> +#include "pipe/p_compiler.h" +#include "gallivm/lp_bld.h" -#include <pipe/p_compiler.h> struct lp_type; @@ -85,13 +85,11 @@ lp_build_one(struct lp_type type); LLVMValueRef -lp_build_const_scalar(struct lp_type type, - double val); +lp_build_const_vec(struct lp_type type, double val); LLVMValueRef -lp_build_int_const_scalar(struct lp_type type, - long long val); +lp_build_const_int_vec(struct lp_type type, long long val); LLVMValueRef diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index f77cf78721..3f7f2ebde9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -114,13 +114,13 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, scale = (double)mask/ubound; bias = (double)((unsigned long long)1 << (mantissa - n)); - res = LLVMBuildMul(builder, src, lp_build_const_scalar(src_type, scale), ""); - res = LLVMBuildAdd(builder, res, lp_build_const_scalar(src_type, bias), ""); + res = LLVMBuildMul(builder, src, lp_build_const_vec(src_type, scale), ""); + res = LLVMBuildAdd(builder, res, lp_build_const_vec(src_type, bias), ""); res = LLVMBuildBitCast(builder, res, int_vec_type, ""); if(dst_width > n) { int shift = dst_width - n; - res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), ""); + res = LLVMBuildShl(builder, res, lp_build_const_int_vec(src_type, shift), ""); /* TODO: Fill in the empty lower bits for additional precision? */ /* YES: this fixes progs/trivial/tri-z-eq.c. @@ -130,21 +130,21 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, #if 0 { LLVMValueRef msb; - msb = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, dst_width - 1), ""); - msb = LLVMBuildShl(builder, msb, lp_build_int_const_scalar(src_type, shift), ""); - msb = LLVMBuildSub(builder, msb, lp_build_int_const_scalar(src_type, 1), ""); + msb = LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, dst_width - 1), ""); + msb = LLVMBuildShl(builder, msb, lp_build_const_int_vec(src_type, shift), ""); + msb = LLVMBuildSub(builder, msb, lp_build_const_int_vec(src_type, 1), ""); res = LLVMBuildOr(builder, res, msb, ""); } #elif 0 while(shift > 0) { - res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, n), ""), ""); + res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, n), ""), ""); shift -= n; n *= 2; } #endif } else - res = LLVMBuildAnd(builder, res, lp_build_int_const_scalar(src_type, mask), ""); + res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), ""); return res; } @@ -183,10 +183,10 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, if(src_width > mantissa) { int shift = src_width - mantissa; - res = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(dst_type, shift), ""); + res = LLVMBuildLShr(builder, res, lp_build_const_int_vec(dst_type, shift), ""); } - bias_ = lp_build_const_scalar(dst_type, bias); + bias_ = lp_build_const_vec(dst_type, bias); res = LLVMBuildOr(builder, res, @@ -195,7 +195,7 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, res = LLVMBuildBitCast(builder, res, vec_type, ""); res = LLVMBuildSub(builder, res, bias_, ""); - res = LLVMBuildMul(builder, res, lp_build_const_scalar(dst_type, scale), ""); + res = LLVMBuildMul(builder, res, lp_build_const_vec(dst_type, scale), ""); return res; } @@ -251,7 +251,7 @@ lp_build_conv(LLVMBuilderRef builder, if(dst_min == 0.0) thres = bld.zero; else - thres = lp_build_const_scalar(src_type, dst_min); + thres = lp_build_const_vec(src_type, dst_min); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_max(&bld, tmp[i], thres); } @@ -260,7 +260,7 @@ lp_build_conv(LLVMBuilderRef builder, if(dst_max == 1.0) thres = bld.one; else - thres = lp_build_const_scalar(src_type, dst_max); + thres = lp_build_const_vec(src_type, dst_max); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_min(&bld, tmp[i], thres); } @@ -288,7 +288,7 @@ lp_build_conv(LLVMBuilderRef builder, LLVMTypeRef tmp_vec_type; if (dst_scale != 1.0) { - LLVMValueRef scale = lp_build_const_scalar(tmp_type, dst_scale); + LLVMValueRef scale = lp_build_const_vec(tmp_type, dst_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildMul(builder, tmp[i], scale, ""); } @@ -315,7 +315,7 @@ lp_build_conv(LLVMBuilderRef builder, /* FIXME: compensate different offsets too */ if(src_shift > dst_shift) { - LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, src_shift - dst_shift); + LLVMValueRef shift = lp_build_const_int_vec(tmp_type, src_shift - dst_shift); for(i = 0; i < num_tmps; ++i) if(src_type.sign) tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, ""); @@ -388,7 +388,7 @@ lp_build_conv(LLVMBuilderRef builder, } if (src_scale != 1.0) { - LLVMValueRef scale = lp_build_const_scalar(tmp_type, 1.0/src_scale); + LLVMValueRef scale = lp_build_const_vec(tmp_type, 1.0/src_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildMul(builder, tmp[i], scale, ""); } @@ -400,7 +400,7 @@ lp_build_conv(LLVMBuilderRef builder, /* FIXME: compensate different offsets too */ if(src_shift < dst_shift) { - LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, dst_shift - src_shift); + LLVMValueRef shift = lp_build_const_int_vec(tmp_type, dst_shift - src_shift); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildShl(builder, tmp[i], shift, ""); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.h b/src/gallium/auxiliary/gallivm/lp_bld_conv.h index 948e68fae4..628831c3ad 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.h @@ -37,7 +37,7 @@ #define LP_BLD_CONV_H -#include <llvm-c/Core.h> +#include "gallivm/lp_bld.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h index 583e6132b4..7b010cbdb0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h @@ -30,7 +30,7 @@ #define LP_BLD_DEBUG_H -#include <llvm-c/Core.h> +#include "gallivm/lp_bld.h" #include "pipe/p_compiler.h" #include "util/u_string.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_depth.c b/src/gallium/auxiliary/gallivm/lp_bld_depth.c index f08f8eb6d8..564ea2e318 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_depth.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_depth.c @@ -52,7 +52,14 @@ * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ... * ... ... ... ... ... ... ... ... ... * - * FIXME: Code generate stencil test + * + * Stencil test: + * Two-sided stencil test is supported but probably not as efficient as + * it could be. Currently, we use if/then/else constructs to do the + * operations for front vs. back-facing polygons. We could probably do + * both the front and back arithmetic then use a Select() instruction to + * choose the result depending on polyon orientation. We'd have to + * measure performance both ways and see which is better. * * @author Jose Fonseca <jfonseca@vmware.com> */ @@ -61,11 +68,264 @@ #include "util/u_format.h" #include "lp_bld_type.h" +#include "lp_bld_arit.h" #include "lp_bld_const.h" #include "lp_bld_logic.h" #include "lp_bld_flow.h" #include "lp_bld_debug.h" #include "lp_bld_depth.h" +#include "lp_bld_swizzle.h" + + +/** Used to select fields from pipe_stencil_state */ +enum stencil_op { + S_FAIL_OP, + Z_FAIL_OP, + Z_PASS_OP +}; + + + +/** + * Do the stencil test comparison (compare FB stencil values against ref value). + * This will be used twice when generating two-sided stencil code. + * \param stencil the front/back stencil state + * \param stencilRef the stencil reference value, replicated as a vector + * \param stencilVals vector of stencil values from framebuffer + * \return vector mask of pass/fail values (~0 or 0) + */ +static LLVMValueRef +lp_build_stencil_test_single(struct lp_build_context *bld, + const struct pipe_stencil_state *stencil, + LLVMValueRef stencilRef, + LLVMValueRef stencilVals) +{ + const unsigned stencilMax = 255; /* XXX fix */ + struct lp_type type = bld->type; + LLVMValueRef res; + + assert(type.sign); + + assert(stencil->enabled); + + if (stencil->valuemask != stencilMax) { + /* compute stencilRef = stencilRef & valuemask */ + LLVMValueRef valuemask = lp_build_const_int_vec(type, stencil->valuemask); + stencilRef = LLVMBuildAnd(bld->builder, stencilRef, valuemask, ""); + /* compute stencilVals = stencilVals & valuemask */ + stencilVals = LLVMBuildAnd(bld->builder, stencilVals, valuemask, ""); + } + + res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals); + + return res; +} + + +/** + * Do the one or two-sided stencil test comparison. + * \sa lp_build_stencil_test_single + * \param face an integer indicating front (+) or back (-) facing polygon. + * If NULL, assume front-facing. + */ +static LLVMValueRef +lp_build_stencil_test(struct lp_build_context *bld, + const struct pipe_stencil_state stencil[2], + LLVMValueRef stencilRefs[2], + LLVMValueRef stencilVals, + LLVMValueRef face) +{ + LLVMValueRef res; + + assert(stencil[0].enabled); + + if (stencil[1].enabled && face) { + /* do two-sided test */ + struct lp_build_flow_context *flow_ctx; + struct lp_build_if_state if_ctx; + LLVMValueRef front_facing; + LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); + LLVMValueRef result = bld->undef; + + flow_ctx = lp_build_flow_create(bld->builder); + lp_build_flow_scope_begin(flow_ctx); + + lp_build_flow_scope_declare(flow_ctx, &result); + + /* front_facing = face > 0.0 */ + front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, ""); + + lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing); + { + result = lp_build_stencil_test_single(bld, &stencil[0], + stencilRefs[0], stencilVals); + } + lp_build_else(&if_ctx); + { + result = lp_build_stencil_test_single(bld, &stencil[1], + stencilRefs[1], stencilVals); + } + lp_build_endif(&if_ctx); + + lp_build_flow_scope_end(flow_ctx); + lp_build_flow_destroy(flow_ctx); + + res = result; + } + else { + /* do single-side test */ + res = lp_build_stencil_test_single(bld, &stencil[0], + stencilRefs[0], stencilVals); + } + + return res; +} + + +/** + * Apply the stencil operator (add/sub/keep/etc) to the given vector + * of stencil values. + * \return new stencil values vector + */ +static LLVMValueRef +lp_build_stencil_op_single(struct lp_build_context *bld, + const struct pipe_stencil_state *stencil, + enum stencil_op op, + LLVMValueRef stencilRef, + LLVMValueRef stencilVals, + LLVMValueRef mask) + +{ + const unsigned stencilMax = 255; /* XXX fix */ + struct lp_type type = bld->type; + LLVMValueRef res; + LLVMValueRef max = lp_build_const_int_vec(type, stencilMax); + unsigned stencil_op; + + assert(type.sign); + + switch (op) { + case S_FAIL_OP: + stencil_op = stencil->fail_op; + break; + case Z_FAIL_OP: + stencil_op = stencil->zfail_op; + break; + case Z_PASS_OP: + stencil_op = stencil->zpass_op; + break; + default: + assert(0 && "Invalid stencil_op mode"); + stencil_op = PIPE_STENCIL_OP_KEEP; + } + + switch (stencil_op) { + case PIPE_STENCIL_OP_KEEP: + res = stencilVals; + /* we can return early for this case */ + return res; + case PIPE_STENCIL_OP_ZERO: + res = bld->zero; + break; + case PIPE_STENCIL_OP_REPLACE: + res = stencilRef; + break; + case PIPE_STENCIL_OP_INCR: + res = lp_build_add(bld, stencilVals, bld->one); + res = lp_build_min(bld, res, max); + break; + case PIPE_STENCIL_OP_DECR: + res = lp_build_sub(bld, stencilVals, bld->one); + res = lp_build_max(bld, res, bld->zero); + break; + case PIPE_STENCIL_OP_INCR_WRAP: + res = lp_build_add(bld, stencilVals, bld->one); + res = LLVMBuildAnd(bld->builder, res, max, ""); + break; + case PIPE_STENCIL_OP_DECR_WRAP: + res = lp_build_sub(bld, stencilVals, bld->one); + res = LLVMBuildAnd(bld->builder, res, max, ""); + break; + case PIPE_STENCIL_OP_INVERT: + res = LLVMBuildNot(bld->builder, stencilVals, ""); + res = LLVMBuildAnd(bld->builder, res, max, ""); + break; + default: + assert(0 && "bad stencil op mode"); + res = NULL; + } + + if (stencil->writemask != stencilMax) { + /* compute res = (res & mask) | (stencilVals & ~mask) */ + LLVMValueRef mask = lp_build_const_int_vec(type, stencil->writemask); + LLVMValueRef cmask = LLVMBuildNot(bld->builder, mask, "notWritemask"); + LLVMValueRef t1 = LLVMBuildAnd(bld->builder, res, mask, "t1"); + LLVMValueRef t2 = LLVMBuildAnd(bld->builder, stencilVals, cmask, "t2"); + res = LLVMBuildOr(bld->builder, t1, t2, "t1_or_t2"); + } + + /* only the update the vector elements enabled by 'mask' */ + res = lp_build_select(bld, mask, res, stencilVals); + + return res; +} + + +/** + * Do the one or two-sided stencil test op/update. + */ +static LLVMValueRef +lp_build_stencil_op(struct lp_build_context *bld, + const struct pipe_stencil_state stencil[2], + enum stencil_op op, + LLVMValueRef stencilRefs[2], + LLVMValueRef stencilVals, + LLVMValueRef mask, + LLVMValueRef face) + +{ + assert(stencil[0].enabled); + + if (stencil[1].enabled && face) { + /* do two-sided op */ + struct lp_build_flow_context *flow_ctx; + struct lp_build_if_state if_ctx; + LLVMValueRef front_facing; + LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); + LLVMValueRef result = bld->undef; + + flow_ctx = lp_build_flow_create(bld->builder); + lp_build_flow_scope_begin(flow_ctx); + + lp_build_flow_scope_declare(flow_ctx, &result); + + /* front_facing = face > 0.0 */ + front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, ""); + + lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing); + { + result = lp_build_stencil_op_single(bld, &stencil[0], op, + stencilRefs[0], stencilVals, mask); + } + lp_build_else(&if_ctx); + { + result = lp_build_stencil_op_single(bld, &stencil[1], op, + stencilRefs[1], stencilVals, mask); + } + lp_build_endif(&if_ctx); + + lp_build_flow_scope_end(flow_ctx); + lp_build_flow_destroy(flow_ctx); + + return result; + } + else { + /* do single-sided op */ + return lp_build_stencil_op_single(bld, &stencil[0], op, + stencilRefs[0], stencilVals, mask); + } +} + /** @@ -109,105 +369,303 @@ lp_depth_type(const struct util_format_description *format_desc, /** - * Depth test. + * Compute bitmask and bit shift to apply to the incoming fragment Z values + * and the Z buffer values needed before doing the Z comparison. + * + * Note that we leave the Z bits in the position that we find them + * in the Z buffer (typically 0xffffff00 or 0x00ffffff). That lets us + * get by with fewer bit twiddling steps. */ -void -lp_build_depth_test(LLVMBuilderRef builder, - const struct pipe_depth_state *state, - struct lp_type type, - const struct util_format_description *format_desc, - struct lp_build_mask_context *mask, - LLVMValueRef src, - LLVMValueRef dst_ptr) +static boolean +get_z_shift_and_mask(const struct util_format_description *format_desc, + unsigned *shift, unsigned *mask) { - struct lp_build_context bld; + const unsigned total_bits = format_desc->block.bits; unsigned z_swizzle; - LLVMValueRef dst; - LLVMValueRef z_bitmask = NULL; - LLVMValueRef test; - - if(!state->enabled) - return; - + int chan; + unsigned padding_left, padding_right; + assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); z_swizzle = format_desc->swizzle[0]; - if(z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) - return; - /* Sanity checking */ - assert(z_swizzle < 4); - assert(format_desc->block.bits == type.width); - if(type.floating) { - assert(z_swizzle == 0); - assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT); - assert(format_desc->channel[z_swizzle].size == format_desc->block.bits); + if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) + return FALSE; + + padding_right = 0; + for (chan = 0; chan < z_swizzle; ++chan) + padding_right += format_desc->channel[chan].size; + + padding_left = + total_bits - (padding_right + format_desc->channel[z_swizzle].size); + + if (padding_left || padding_right) { + unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1; + unsigned long long mask_right = (1ULL << (padding_right)) - 1; + *mask = mask_left ^ mask_right; } else { - assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); - assert(format_desc->channel[z_swizzle].normalized); - assert(!type.fixed); - assert(!type.sign); - assert(type.norm); + *mask = 0xffffffff; + } + + *shift = padding_left; + + return TRUE; +} + + +/** + * Compute bitmask and bit shift to apply to the framebuffer pixel values + * to put the stencil bits in the least significant position. + * (i.e. 0x000000ff) + */ +static boolean +get_s_shift_and_mask(const struct util_format_description *format_desc, + unsigned *shift, unsigned *mask) +{ + unsigned s_swizzle; + int chan, sz; + + s_swizzle = format_desc->swizzle[1]; + + if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE) + return FALSE; + + *shift = 0; + for (chan = 0; chan < s_swizzle; chan++) + *shift += format_desc->channel[chan].size; + + sz = format_desc->channel[s_swizzle].size; + *mask = (1U << sz) - 1U; + + return TRUE; +} + + + +/** + * Generate code for performing depth and/or stencil tests. + * We operate on a vector of values (typically a 2x2 quad). + * + * \param depth the depth test state + * \param stencil the front/back stencil state + * \param type the data type of the fragment depth/stencil values + * \param format_desc description of the depth/stencil surface + * \param mask the alive/dead pixel mask for the quad (vector) + * \param stencil_refs the front/back stencil ref values (scalar) + * \param z_src the incoming depth/stencil values (a 2x2 quad) + * \param zs_dst_ptr pointer to depth/stencil values in framebuffer + * \param facing contains float value indicating front/back facing polygon + */ +void +lp_build_depth_stencil_test(LLVMBuilderRef builder, + const struct pipe_depth_state *depth, + const struct pipe_stencil_state stencil[2], + struct lp_type type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef stencil_refs[2], + LLVMValueRef z_src, + LLVMValueRef zs_dst_ptr, + LLVMValueRef face) +{ + struct lp_build_context bld; + struct lp_build_context sbld; + struct lp_type s_type; + LLVMValueRef zs_dst, z_dst = NULL; + LLVMValueRef stencil_vals = NULL; + LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; + LLVMValueRef z_pass = NULL, s_pass_mask = NULL; + LLVMValueRef orig_mask = mask->value; + + /* Sanity checking */ + { + const unsigned z_swizzle = format_desc->swizzle[0]; + const unsigned s_swizzle = format_desc->swizzle[1]; + + assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE || + s_swizzle != UTIL_FORMAT_SWIZZLE_NONE); + + assert(depth->enabled || stencil[0].enabled); + + assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + + if (stencil[0].enabled) { + assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED || + format_desc->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM); + } + + assert(z_swizzle < 4); + assert(format_desc->block.bits == type.width); + if (type.floating) { + assert(z_swizzle == 0); + assert(format_desc->channel[z_swizzle].type == + UTIL_FORMAT_TYPE_FLOAT); + assert(format_desc->channel[z_swizzle].size == + format_desc->block.bits); + } + else { + assert(format_desc->channel[z_swizzle].type == + UTIL_FORMAT_TYPE_UNSIGNED); + assert(format_desc->channel[z_swizzle].normalized); + assert(!type.fixed); + assert(!type.sign); + assert(type.norm); + } } - /* Setup build context */ + + /* Setup build context for Z vals */ lp_build_context_init(&bld, builder, type); - dst = LLVMBuildLoad(builder, dst_ptr, ""); + /* Setup build context for stencil vals */ + s_type = lp_type_int_vec(type.width); + lp_build_context_init(&sbld, builder, s_type); + + /* Load current z/stencil value from z/stencil buffer */ + zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, ""); + + lp_build_name(zs_dst, "zsbufval"); - lp_build_name(dst, "zsbuf"); - /* Align the source depth bits with the destination's, and mask out any - * stencil or padding bits from both */ - if(format_desc->channel[z_swizzle].size == format_desc->block.bits) { - assert(z_swizzle == 0); - /* nothing to do */ + /* Compute and apply the Z/stencil bitmasks and shifts. + */ + { + unsigned z_shift, z_mask; + unsigned s_shift, s_mask; + + if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) { + if (z_shift) { + LLVMValueRef shift = lp_build_const_int_vec(type, z_shift); + z_src = LLVMBuildLShr(builder, z_src, shift, ""); + } + + if (z_mask != 0xffffffff) { + LLVMValueRef mask = lp_build_const_int_vec(type, z_mask); + z_src = LLVMBuildAnd(builder, z_src, mask, ""); + z_dst = LLVMBuildAnd(builder, zs_dst, mask, ""); + z_bitmask = mask; /* used below */ + } + else { + z_dst = zs_dst; + } + + lp_build_name(z_dst, "zsbuf.z"); + } + + if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { + if (s_shift) { + LLVMValueRef shift = lp_build_const_int_vec(type, s_shift); + stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, ""); + stencil_shift = shift; /* used below */ + } + else { + stencil_vals = zs_dst; + } + + if (s_mask != 0xffffffff) { + LLVMValueRef mask = lp_build_const_int_vec(type, s_mask); + stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); + } + + lp_build_name(stencil_vals, "stencil"); + } } - else { - unsigned padding_left; - unsigned padding_right; - unsigned chan; - - assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); - assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); - assert(format_desc->channel[z_swizzle].size <= format_desc->block.bits); - assert(format_desc->channel[z_swizzle].normalized); - - padding_right = 0; - for(chan = 0; chan < z_swizzle; ++chan) - padding_right += format_desc->channel[chan].size; - padding_left = format_desc->block.bits - - (padding_right + format_desc->channel[z_swizzle].size); - - if(padding_left || padding_right) { - const unsigned long long mask_left = ((unsigned long long)1 << (format_desc->block.bits - padding_left)) - 1; - const unsigned long long mask_right = ((unsigned long long)1 << (padding_right)) - 1; - z_bitmask = lp_build_int_const_scalar(type, mask_left ^ mask_right); + + + if (stencil[0].enabled) { + /* convert scalar stencil refs into vectors */ + stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]); + stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]); + + s_pass_mask = lp_build_stencil_test(&sbld, stencil, + stencil_refs, stencil_vals, face); + + /* apply stencil-fail operator */ + { + LLVMValueRef s_fail_mask = lp_build_andc(&bld, orig_mask, s_pass_mask); + stencil_vals = lp_build_stencil_op(&sbld, stencil, S_FAIL_OP, + stencil_refs, stencil_vals, + s_fail_mask, face); + } + } + + if (depth->enabled) { + /* compare src Z to dst Z, returning 'pass' mask */ + z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst); + + if (!stencil[0].enabled) { + /* We can potentially skip all remaining operations here, but only + * if stencil is disabled because we still need to update the stencil + * buffer values. Don't need to update Z buffer values. + */ + lp_build_mask_update(mask, z_pass); + } + + if (depth->writemask) { + if(z_bitmask) + z_bitmask = LLVMBuildAnd(builder, mask->value, z_bitmask, ""); + else + z_bitmask = mask->value; + + z_dst = lp_build_select(&bld, z_bitmask, z_src, z_dst); } - if(padding_left) - src = LLVMBuildLShr(builder, src, lp_build_int_const_scalar(type, padding_left), ""); - if(padding_right) - src = LLVMBuildAnd(builder, src, z_bitmask, ""); - if(padding_left || padding_right) - dst = LLVMBuildAnd(builder, dst, z_bitmask, ""); + if (stencil[0].enabled) { + /* update stencil buffer values according to z pass/fail result */ + LLVMValueRef z_fail_mask, z_pass_mask; + + /* apply Z-fail operator */ + z_fail_mask = lp_build_andc(&bld, orig_mask, z_pass); + stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_FAIL_OP, + stencil_refs, stencil_vals, + z_fail_mask, face); + + /* apply Z-pass operator */ + z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, ""); + stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, + stencil_refs, stencil_vals, + z_pass_mask, face); + } + } + else { + /* No depth test: apply Z-pass operator to stencil buffer values which + * passed the stencil test. + */ + s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, ""); + stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, + stencil_refs, stencil_vals, + s_pass_mask, face); } - lp_build_name(dst, "zsbuf.z"); + /* The Z bits are already in the right place but we may need to shift the + * stencil bits before ORing Z with Stencil to make the final pixel value. + */ + if (stencil_vals && stencil_shift) + stencil_vals = LLVMBuildShl(bld.builder, stencil_vals, + stencil_shift, ""); - test = lp_build_cmp(&bld, state->func, src, dst); - lp_build_mask_update(mask, test); + /* Finally, merge/store the z/stencil values */ + if ((depth->enabled && depth->writemask) || + (stencil[0].enabled && stencil[0].writemask)) { - if(state->writemask) { - if(z_bitmask) - z_bitmask = LLVMBuildAnd(builder, mask->value, z_bitmask, ""); + if (z_dst && stencil_vals) + zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, ""); + else if (z_dst) + zs_dst = z_dst; else - z_bitmask = mask->value; + zs_dst = stencil_vals; - dst = lp_build_select(&bld, z_bitmask, src, dst); - LLVMBuildStore(builder, dst, dst_ptr); + LLVMBuildStore(builder, zs_dst, zs_dst_ptr); } + + if (s_pass_mask) + lp_build_mask_update(mask, s_pass_mask); + + if (depth->enabled && stencil[0].enabled) + lp_build_mask_update(mask, z_pass); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_depth.h b/src/gallium/auxiliary/gallivm/lp_bld_depth.h index 79d6981bb5..27dd46b625 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_depth.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_depth.h @@ -36,7 +36,7 @@ #define LP_BLD_DEPTH_H -#include <llvm-c/Core.h> +#include "gallivm/lp_bld.h" struct pipe_depth_state; @@ -51,13 +51,16 @@ lp_depth_type(const struct util_format_description *format_desc, void -lp_build_depth_test(LLVMBuilderRef builder, - const struct pipe_depth_state *state, - struct lp_type type, - const struct util_format_description *format_desc, - struct lp_build_mask_context *mask, - LLVMValueRef src, - LLVMValueRef dst_ptr); +lp_build_depth_stencil_test(LLVMBuilderRef builder, + const struct pipe_depth_state *depth, + const struct pipe_stencil_state stencil[2], + struct lp_type type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef stencil_refs[2], + LLVMValueRef zs_src, + LLVMValueRef zs_dst_ptr, + LLVMValueRef facing); #endif /* !LP_BLD_DEPTH_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index bc83138908..e60ab4f6ba 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -308,7 +308,7 @@ lp_build_flow_scope_end(struct lp_build_flow_context *flow) * Note: this function has no dependencies on the flow code and could * be used elsewhere. */ -static LLVMBasicBlockRef +LLVMBasicBlockRef lp_build_insert_new_block(LLVMBuilderRef builder, const char *name) { LLVMBasicBlockRef current_block; @@ -570,6 +570,35 @@ lp_build_loop_end(LLVMBuilderRef builder, LLVMPositionBuilderAtEnd(builder, after_block); } +void +lp_build_loop_end_cond(LLVMBuilderRef builder, + LLVMValueRef end, + LLVMValueRef step, + int llvm_cond, + struct lp_build_loop_state *state) +{ + LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(block); + LLVMValueRef next; + LLVMValueRef cond; + LLVMBasicBlockRef after_block; + + if (!step) + step = LLVMConstInt(LLVMTypeOf(end), 1, 0); + + next = LLVMBuildAdd(builder, state->counter, step, ""); + + cond = LLVMBuildICmp(builder, llvm_cond, next, end, ""); + + after_block = LLVMAppendBasicBlock(function, ""); + + LLVMBuildCondBr(builder, cond, after_block, state->block); + + LLVMAddIncoming(state->counter, &next, &block, 1); + + LLVMPositionBuilderAtEnd(builder, after_block); +} + /* @@ -648,7 +677,9 @@ lp_build_if(struct lp_build_if_state *ctx, ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); /* add add the initial value of the var from the entry block */ - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->entry_block, 1); + if (!LLVMIsUndef(*flow->variables[i])) + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], + &ifthen->entry_block, 1); } /* create/insert true_block before merge_block */ @@ -695,18 +726,21 @@ lp_build_endif(struct lp_build_if_state *ctx) { struct lp_build_flow_context *flow = ctx->flow; struct lp_build_flow_if *ifthen; + LLVMBasicBlockRef curBlock = LLVMGetInsertBlock(ctx->builder); unsigned i; ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen; assert(ifthen); + /* Insert branch to the merge block from current block */ + LLVMBuildBr(ctx->builder, ifthen->merge_block); + if (ifthen->false_block) { LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); /* for each variable, update the Phi node with a (variable, block) pair */ for (i = 0; i < flow->num_variables; i++) { assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->false_block, 1); - + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &curBlock, 1); /* replace the variable ref with the phi function */ *flow->variables[i] = ifthen->phi[i]; } @@ -742,15 +776,18 @@ lp_build_endif(struct lp_build_if_state *ctx) ifthen->true_block, ifthen->merge_block); } - /* Append an unconditional Br(anch) instruction on the true_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block); - LLVMBuildBr(ctx->builder, ifthen->merge_block); + /* Insert branch from end of true_block to merge_block */ if (ifthen->false_block) { - /* Append an unconditional Br(anch) instruction on the false_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block); + /* Append an unconditional Br(anch) instruction on the true_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block); LLVMBuildBr(ctx->builder, ifthen->merge_block); } - + else { + /* No else clause. + * Note that we've already inserted the branch at the end of + * true_block. See the very first LLVMBuildBr() call in this function. + */ + } /* Resume building code at end of the ifthen->merge_block */ LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index 4c225a0d4f..745838570c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -35,7 +35,7 @@ #define LP_BLD_FLOW_H -#include <llvm-c/Core.h> +#include "gallivm/lp_bld.h" struct lp_type; @@ -124,6 +124,13 @@ lp_build_loop_end(LLVMBuilderRef builder, LLVMValueRef step, struct lp_build_loop_state *state); +void +lp_build_loop_end_cond(LLVMBuilderRef builder, + LLVMValueRef end, + LLVMValueRef step, + int cond, /* LLVM condition */ + struct lp_build_loop_state *state); + @@ -145,7 +152,9 @@ lp_build_else(struct lp_build_if_state *ctx); void lp_build_endif(struct lp_build_if_state *ctx); - + +LLVMBasicBlockRef +lp_build_insert_new_block(LLVMBuilderRef builder, const char *name); #endif /* !LP_BLD_FLOW_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h index 970bee379f..bb1298ed3f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -34,7 +34,7 @@ * Pixel format helpers. */ -#include <llvm-c/Core.h> +#include "gallivm/lp_bld.h" #include "pipe/p_format.h" @@ -80,4 +80,15 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, LLVMValueRef *rgba); +void +lp_build_fetch_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + struct lp_type type, + LLVMValueRef base_ptr, + LLVMValueRef offsets, + LLVMValueRef i, + LLVMValueRef j, + LLVMValueRef *rgba); + + #endif /* !LP_BLD_FORMAT_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c deleted file mode 100644 index a07f7418f2..0000000000 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ /dev/null @@ -1,383 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * AoS pixel format manipulation. - * - * @author Jose Fonseca <jfonseca@vmware.com> - */ - - -#include "util/u_cpu_detect.h" -#include "util/u_format.h" - -#include "lp_bld_type.h" -#include "lp_bld_const.h" -#include "lp_bld_swizzle.h" -#include "lp_bld_format.h" - - -/** - * Unpack a single pixel into its RGBA components. - * - * @param packed integer. - * - * @return RGBA in a 4 floats vector. - * - * XXX: This is mostly for reference and testing -- operating a single pixel at - * a time is rarely if ever needed. - */ -LLVMValueRef -lp_build_unpack_rgba_aos(LLVMBuilderRef builder, - const struct util_format_description *desc, - LLVMValueRef packed) -{ - LLVMTypeRef type; - LLVMValueRef shifted, casted, scaled, masked; - LLVMValueRef shifts[4]; - LLVMValueRef masks[4]; - LLVMValueRef scales[4]; - LLVMValueRef swizzles[4]; - LLVMValueRef aux[4]; - bool normalized; - int empty_channel; - unsigned shift; - unsigned i; - - /* FIXME: Support more formats */ - assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); - assert(desc->block.width == 1); - assert(desc->block.height == 1); - assert(desc->block.bits <= 32); - - type = LLVMIntType(desc->block.bits); - - /* Do the intermediate integer computations with 32bit integers since it - * matches floating point size */ - if (desc->block.bits < 32) - packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), ""); - - /* Broadcast the packed value to all four channels */ - packed = LLVMBuildInsertElement(builder, - LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)), - packed, - LLVMConstNull(LLVMInt32Type()), - ""); - packed = LLVMBuildShuffleVector(builder, - packed, - LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)), - LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)), - ""); - - /* Initialize vector constants */ - normalized = FALSE; - empty_channel = -1; - shift = 0; - for (i = 0; i < 4; ++i) { - unsigned bits = desc->channel[i].size; - - if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { - shifts[i] = LLVMGetUndef(LLVMInt32Type()); - masks[i] = LLVMConstNull(LLVMInt32Type()); - scales[i] = LLVMConstNull(LLVMFloatType()); - empty_channel = i; - } - else { - unsigned mask = (1 << bits) - 1; - - assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); - assert(bits < 32); - - shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0); - masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0); - - if (desc->channel[i].normalized) { - scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask); - normalized = TRUE; - } - else - scales[i] = LLVMConstReal(LLVMFloatType(), 1.0); - } - - shift += bits; - } - - shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), ""); - masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), ""); - /* UIToFP can't be expressed in SSE2 */ - casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), ""); - - if (normalized) - scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), ""); - else - scaled = casted; - - for (i = 0; i < 4; ++i) - aux[i] = LLVMGetUndef(LLVMFloatType()); - - for (i = 0; i < 4; ++i) { - enum util_format_swizzle swizzle = desc->swizzle[i]; - - switch (swizzle) { - case UTIL_FORMAT_SWIZZLE_X: - case UTIL_FORMAT_SWIZZLE_Y: - case UTIL_FORMAT_SWIZZLE_Z: - case UTIL_FORMAT_SWIZZLE_W: - swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0); - break; - case UTIL_FORMAT_SWIZZLE_0: - assert(empty_channel >= 0); - swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0); - break; - case UTIL_FORMAT_SWIZZLE_1: - swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0); - aux[0] = LLVMConstReal(LLVMFloatType(), 1.0); - break; - case UTIL_FORMAT_SWIZZLE_NONE: - swizzles[i] = LLVMGetUndef(LLVMFloatType()); - assert(0); - break; - } - } - - return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4), LLVMConstVector(swizzles, 4), ""); -} - - -/** - * Take a vector with packed pixels and unpack into a rgba8 vector. - * - * Formats with bit depth smaller than 32bits are accepted, but they must be - * padded to 32bits. - */ -LLVMValueRef -lp_build_unpack_rgba8_aos(LLVMBuilderRef builder, - const struct util_format_description *desc, - struct lp_type type, - LLVMValueRef packed) -{ - struct lp_build_context bld; - bool rgba8; - LLVMValueRef res; - unsigned i; - - lp_build_context_init(&bld, builder, type); - - /* FIXME: Support more formats */ - assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); - assert(desc->block.width == 1); - assert(desc->block.height == 1); - assert(desc->block.bits <= 32); - - assert(!type.floating); - assert(!type.fixed); - assert(type.norm); - assert(type.width == 8); - assert(type.length % 4 == 0); - - rgba8 = TRUE; - for(i = 0; i < 4; ++i) { - assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED || - desc->channel[i].type == UTIL_FORMAT_TYPE_VOID); - if(desc->channel[0].size != 8) - rgba8 = FALSE; - } - - if(rgba8) { - /* - * The pixel is already in a rgba8 format variant. All it is necessary - * is to swizzle the channels. - */ - - unsigned char swizzles[4]; - boolean zeros[4]; /* bitwise AND mask */ - boolean ones[4]; /* bitwise OR mask */ - boolean swizzles_needed = FALSE; - boolean zeros_needed = FALSE; - boolean ones_needed = FALSE; - - for(i = 0; i < 4; ++i) { - enum util_format_swizzle swizzle = desc->swizzle[i]; - - /* Initialize with the no-op case */ - swizzles[i] = util_cpu_caps.little_endian ? 3 - i : i; - zeros[i] = TRUE; - ones[i] = FALSE; - - switch (swizzle) { - case UTIL_FORMAT_SWIZZLE_X: - case UTIL_FORMAT_SWIZZLE_Y: - case UTIL_FORMAT_SWIZZLE_Z: - case UTIL_FORMAT_SWIZZLE_W: - if(swizzle != swizzles[i]) { - swizzles[i] = swizzle; - swizzles_needed = TRUE; - } - break; - case UTIL_FORMAT_SWIZZLE_0: - zeros[i] = FALSE; - zeros_needed = TRUE; - break; - case UTIL_FORMAT_SWIZZLE_1: - ones[i] = TRUE; - ones_needed = TRUE; - break; - case UTIL_FORMAT_SWIZZLE_NONE: - assert(0); - break; - } - } - - res = packed; - - if(swizzles_needed) - res = lp_build_swizzle1_aos(&bld, res, swizzles); - - if(zeros_needed) { - /* Mask out zero channels */ - LLVMValueRef mask = lp_build_const_mask_aos(type, zeros); - res = LLVMBuildAnd(builder, res, mask, ""); - } - - if(ones_needed) { - /* Or one channels */ - LLVMValueRef mask = lp_build_const_mask_aos(type, ones); - res = LLVMBuildOr(builder, res, mask, ""); - } - } - else { - /* FIXME */ - assert(0); - res = lp_build_undef(type); - } - - return res; -} - - -/** - * Pack a single pixel. - * - * @param rgba 4 float vector with the unpacked components. - * - * XXX: This is mostly for reference and testing -- operating a single pixel at - * a time is rarely if ever needed. - */ -LLVMValueRef -lp_build_pack_rgba_aos(LLVMBuilderRef builder, - const struct util_format_description *desc, - LLVMValueRef rgba) -{ - LLVMTypeRef type; - LLVMValueRef packed = NULL; - LLVMValueRef swizzles[4]; - LLVMValueRef shifted, casted, scaled, unswizzled; - LLVMValueRef shifts[4]; - LLVMValueRef scales[4]; - bool normalized; - unsigned shift; - unsigned i, j; - - assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); - assert(desc->block.width == 1); - assert(desc->block.height == 1); - - type = LLVMIntType(desc->block.bits); - - /* Unswizzle the color components into the source vector. */ - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) { - if (desc->swizzle[j] == i) - break; - } - if (j < 4) - swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0); - else - swizzles[i] = LLVMGetUndef(LLVMInt32Type()); - } - - unswizzled = LLVMBuildShuffleVector(builder, rgba, - LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)), - LLVMConstVector(swizzles, 4), ""); - - normalized = FALSE; - shift = 0; - for (i = 0; i < 4; ++i) { - unsigned bits = desc->channel[i].size; - - if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { - shifts[i] = LLVMGetUndef(LLVMInt32Type()); - scales[i] = LLVMGetUndef(LLVMFloatType()); - } - else { - unsigned mask = (1 << bits) - 1; - - assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); - assert(bits < 32); - - shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0); - - if (desc->channel[i].normalized) { - scales[i] = LLVMConstReal(LLVMFloatType(), mask); - normalized = TRUE; - } - else - scales[i] = LLVMConstReal(LLVMFloatType(), 1.0); - } - - shift += bits; - } - - if (normalized) - scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), ""); - else - scaled = unswizzled; - - casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), ""); - - shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), ""); - - /* Bitwise or all components */ - for (i = 0; i < 4; ++i) { - if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { - LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), ""); - if (packed) - packed = LLVMBuildOr(builder, packed, component, ""); - else - packed = component; - } - } - - if (!packed) - packed = LLVMGetUndef(LLVMInt32Type()); - - if (desc->block.bits < 32) - packed = LLVMBuildTrunc(builder, packed, type, ""); - - return packed; -} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index abb27e4c32..2b66162eb4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -27,10 +27,14 @@ #include "util/u_format.h" +#include "util/u_memory.h" +#include "util/u_string.h" #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_conv.h" +#include "lp_bld_sample.h" /* for lp_build_gather */ +#include "lp_bld_init.h" #include "lp_bld_format.h" @@ -80,6 +84,24 @@ lp_build_format_swizzle_soa(const struct util_format_description *format_desc, } +/** + * Unpack several pixels in SoA. + * + * It takes a vector of packed pixels: + * + * packed = {P0, P1, P2, P3, ..., Pn} + * + * And will produce four vectors: + * + * red = {R0, R1, R2, R3, ..., Rn} + * green = {G0, G1, G2, G3, ..., Gn} + * blue = {B0, B1, B2, B3, ..., Bn} + * alpha = {A0, A1, A2, A3, ..., An} + * + * It requires that a packed pixel fits into an element of the output + * channels. The common case is when converting pixel with a depth of 32 bit or + * less into floats. + */ void lp_build_unpack_rgba_soa(LLVMBuilderRef builder, const struct util_format_description *format_desc, @@ -91,15 +113,17 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, unsigned start; unsigned chan; - /* FIXME: Support more formats */ assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); - assert(format_desc->block.bits <= 32); + assert(format_desc->block.bits <= type.width); + /* FIXME: Support more output types */ + assert(type.floating); + assert(type.width == 32); /* Decode the input vector components */ start = 0; - for (chan = 0; chan < 4; ++chan) { + for (chan = 0; chan < format_desc->nr_channels; ++chan) { unsigned width = format_desc->channel[chan].size; unsigned stop = start + width; LLVMValueRef input; @@ -108,22 +132,106 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, switch(format_desc->channel[chan].type) { case UTIL_FORMAT_TYPE_VOID: - input = NULL; + input = lp_build_undef(type); break; case UTIL_FORMAT_TYPE_UNSIGNED: - if(type.floating) { - if(start) - input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(type, start), ""); - if(stop < format_desc->block.bits) { - unsigned mask = ((unsigned long long)1 << width) - 1; - input = LLVMBuildAnd(builder, input, lp_build_int_const_scalar(type, mask), ""); - } + /* + * Align the LSB + */ + + if (start) { + input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(type, start), ""); + } + /* + * Zero the MSBs + */ + + if (stop < format_desc->block.bits) { + unsigned mask = ((unsigned long long)1 << width) - 1; + input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(type, mask), ""); + } + + /* + * Type conversion + */ + + if (type.floating) { if(format_desc->channel[chan].normalized) input = lp_build_unsigned_norm_to_float(builder, width, type, input); else - input = LLVMBuildFPToSI(builder, input, lp_build_vec_type(type), ""); + input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), ""); + } + else { + /* FIXME */ + assert(0); + input = lp_build_undef(type); + } + + break; + + case UTIL_FORMAT_TYPE_SIGNED: + /* + * Align the sign bit first. + */ + + if (stop < type.width) { + unsigned bits = type.width - stop; + LLVMValueRef bits_val = lp_build_const_int_vec(type, bits); + input = LLVMBuildShl(builder, input, bits_val, ""); + } + + /* + * Align the LSB (with an arithmetic shift to preserve the sign) + */ + + if (format_desc->channel[chan].size < type.width) { + unsigned bits = type.width - format_desc->channel[chan].size; + LLVMValueRef bits_val = lp_build_const_int_vec(type, bits); + input = LLVMBuildAShr(builder, input, bits_val, ""); + } + + /* + * Type conversion + */ + + if (type.floating) { + input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), ""); + if (format_desc->channel[chan].normalized) { + double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1); + LLVMValueRef scale_val = lp_build_const_vec(type, scale); + input = LLVMBuildMul(builder, input, scale_val, ""); + } + } + else { + /* FIXME */ + assert(0); + input = lp_build_undef(type); + } + + break; + + case UTIL_FORMAT_TYPE_FLOAT: + if (type.floating) { + assert(start == 0); + assert(stop == 32); + assert(type.width == 32); + input = LLVMBuildBitCast(builder, input, lp_build_vec_type(type), ""); + } + else { + /* FIXME */ + assert(0); + input = lp_build_undef(type); + } + break; + + case UTIL_FORMAT_TYPE_FIXED: + if (type.floating) { + double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1); + LLVMValueRef scale_val = lp_build_const_vec(type, scale); + input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), ""); + input = LLVMBuildMul(builder, input, scale_val, ""); } else { /* FIXME */ @@ -133,7 +241,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, break; default: - /* fall through */ + assert(0); input = lp_build_undef(type); break; } @@ -145,3 +253,144 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, lp_build_format_swizzle_soa(format_desc, type, inputs, rgba); } + + +/** + * Fetch a pixel into a SoA. + * + * i and j are the sub-block pixel coordinates. + */ +void +lp_build_fetch_rgba_soa(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + struct lp_type type, + LLVMValueRef base_ptr, + LLVMValueRef offset, + LLVMValueRef i, + LLVMValueRef j, + LLVMValueRef *rgba) +{ + + if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && + (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || + format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && + format_desc->block.width == 1 && + format_desc->block.height == 1 && + format_desc->block.bits <= type.width && + (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT || + format_desc->channel[0].size == 32)) + { + /* + * The packed pixel fits into an element of the destination format. Put + * the packed pixels into a vector and estract each component for all + * vector elements in parallel. + */ + + LLVMValueRef packed; + + /* + * gather the texels from the texture + */ + packed = lp_build_gather(builder, + type.length, + format_desc->block.bits, + type.width, + base_ptr, offset); + + /* + * convert texels to float rgba + */ + lp_build_unpack_rgba_soa(builder, + format_desc, + type, + packed, rgba); + } + else { + /* + * Fallback to calling util_format_description::fetch_rgba_float for each + * pixel. + * + * This is definitely not the most efficient way of fetching pixels, as + * we miss the opportunity to do vectorization, but this it is a + * convenient for formats or scenarios for which there was no opportunity + * or incentive to optimize. + */ + + LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); + char name[256]; + LLVMValueRef function; + LLVMValueRef tmp; + unsigned k, chan; + + assert(type.floating); + + util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", format_desc->short_name); + + /* + * Declare and bind format_desc->fetch_rgba_float(). + */ + + function = LLVMGetNamedFunction(module, name); + if (!function) { + LLVMTypeRef ret_type; + LLVMTypeRef arg_types[4]; + LLVMTypeRef function_type; + + ret_type = LLVMVoidType(); + arg_types[0] = LLVMPointerType(LLVMFloatType(), 0); + arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0); + arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8); + function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); + function = LLVMAddFunction(module, name, function_type); + + LLVMSetFunctionCallConv(function, LLVMCCallConv); + LLVMSetLinkage(function, LLVMExternalLinkage); + + assert(LLVMIsDeclaration(function)); + + LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float); + } + + for (chan = 0; chan < 4; ++chan) { + rgba[chan] = lp_build_undef(type); + } + + tmp = LLVMBuildArrayAlloca(builder, + LLVMFloatType(), + LLVMConstInt(LLVMInt32Type(), 4, 0), + ""); + + /* + * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result + * in the SoA vectors. + */ + + for(k = 0; k < type.length; ++k) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0); + LLVMValueRef offset_elem; + LLVMValueRef ptr; + LLVMValueRef i_elem, j_elem; + LLVMValueRef args[4]; + + offset_elem = LLVMBuildExtractElement(builder, offset, index, ""); + ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, ""); + + i_elem = LLVMBuildExtractElement(builder, i, index, ""); + j_elem = LLVMBuildExtractElement(builder, j, index, ""); + + args[0] = tmp; + args[1] = ptr; + args[2] = i_elem; + args[3] = j_elem; + + LLVMBuildCall(builder, function, args, 4, ""); + + for (chan = 0; chan < 4; ++chan) { + LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0), + tmp_chan = LLVMBuildGEP(builder, tmp, &chan_val, 1, ""); + tmp_chan = LLVMBuildLoad(builder, tmp_chan, ""); + rgba[chan] = LLVMBuildInsertElement(builder, rgba[chan], tmp_chan, index, ""); + } + } + } +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.cpp b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 067397a520..de07c222a3 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -26,33 +26,42 @@ **************************************************************************/ -#include <llvm/Config/config.h> -#include <llvm/Target/TargetSelect.h> -#include <llvm/Target/TargetOptions.h> - -#include "pipe/p_config.h" - +#include "pipe/p_compiler.h" +#include "util/u_debug.h" #include "lp_bld_init.h" -extern "C" void LLVMLinkInJIT(); +LLVMModuleRef lp_build_module = NULL; +LLVMExecutionEngineRef lp_build_engine = NULL; +LLVMModuleProviderRef lp_build_provider = NULL; +LLVMTargetDataRef lp_build_target = NULL; -extern "C" void +void lp_build_init(void) { -#if defined(PIPE_OS_WINDOWS) && defined(PIPE_ARCH_X86) - /* - * This is mis-detected on some hardware / software combinations. - */ - llvm::StackAlignment = 4; - llvm::RealignStack = true; -#endif - - /* Same as LLVMInitializeNativeTarget(); */ - llvm::InitializeNativeTarget(); + LLVMInitializeNativeTarget(); LLVMLinkInJIT(); + + if (!lp_build_module) + lp_build_module = LLVMModuleCreateWithName("gallivm"); + + if (!lp_build_provider) + lp_build_provider = LLVMCreateModuleProviderForExistingModule(lp_build_module); + + if (!lp_build_engine) { + char *error = NULL; + + if (LLVMCreateJITCompiler(&lp_build_engine, lp_build_provider, 1, &error)) { + _debug_printf("%s\n", error); + LLVMDisposeMessage(error); + assert(0); + } + } + + if (!lp_build_target) + lp_build_target = LLVMGetExecutionEngineTargetData(lp_build_engine); } @@ -64,6 +73,6 @@ lp_build_init(void) */ #if defined(_MSC_VER) && defined(_DEBUG) #include <crtdefs.h> -extern "C" _CRTIMP void __cdecl +_CRTIMP void __cdecl _invalid_parameter_noinfo(void) {} #endif diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h index 07f50d1c43..0ec2afcd1b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h @@ -30,18 +30,18 @@ #define LP_BLD_INIT_H -#ifdef __cplusplus -extern "C" { -#endif +#include "lp_bld.h" +#include <llvm-c/ExecutionEngine.h> -void -lp_build_init(void); +extern LLVMModuleRef lp_build_module; +extern LLVMExecutionEngineRef lp_build_engine; +extern LLVMModuleProviderRef lp_build_provider; +extern LLVMTargetDataRef lp_build_target; -#ifdef __cplusplus -} -#endif +void +lp_build_init(void); #endif /* !LP_BLD_INIT_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_interp.c b/src/gallium/auxiliary/gallivm/lp_bld_interp.c index 2fc894017d..09efb16121 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_interp.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_interp.c @@ -289,17 +289,17 @@ pos_update(struct lp_build_interp_soa_context *bld, int quad_index) /* top-right or bottom-right quad in block */ /* build x += xstep */ x = lp_build_add(&bld->base, x, - lp_build_const_scalar(bld->base.type, xstep)); + lp_build_const_vec(bld->base.type, xstep)); } if (quad_index == 2) { /* bottom-left quad in block */ /* build y += ystep */ y = lp_build_add(&bld->base, y, - lp_build_const_scalar(bld->base.type, ystep)); + lp_build_const_vec(bld->base.type, ystep)); /* build x -= xstep */ x = lp_build_sub(&bld->base, x, - lp_build_const_scalar(bld->base.type, xstep)); + lp_build_const_vec(bld->base.type, xstep)); } lp_build_name(x, "pos.x"); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_interp.h b/src/gallium/auxiliary/gallivm/lp_bld_interp.h index ca958cdf34..a4937bbb04 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_interp.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_interp.h @@ -41,7 +41,7 @@ #define LP_BLD_INTERP_H -#include <llvm-c/Core.h> +#include "gallivm/lp_bld.h" #include "tgsi/tgsi_exec.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h index f813f27074..977f767322 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h @@ -37,7 +37,7 @@ #define LP_BLD_INTR_H -#include <llvm-c/Core.h> +#include "gallivm/lp_bld.h" /** diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index 2726747eae..a3b6970116 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -42,6 +42,26 @@ #include "lp_bld_logic.h" +/* + * XXX + * + * Selection with vector conditional like + * + * select <4 x i1> %C, %A, %B + * + * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not + * supported on any backend. + * + * Expanding the boolean vector to full SIMD register width, as in + * + * sext <4 x i1> %C to <4 x i32> + * + * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but + * it causes assertion failures in LLVM 2.6. It appears to work correctly on + * LLVM 2.7. + */ + + /** * Build code to compare two values 'a' and 'b' of 'type' using the given func. * \param func one of PIPE_FUNC_x @@ -54,13 +74,11 @@ lp_build_compare(LLVMBuilderRef builder, LLVMValueRef a, LLVMValueRef b) { - LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMValueRef zeros = LLVMConstNull(int_vec_type); LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); LLVMValueRef cond; LLVMValueRef res; - unsigned i; assert(func >= PIPE_FUNC_NEVER); assert(func <= PIPE_FUNC_ALWAYS); @@ -74,10 +92,12 @@ lp_build_compare(LLVMBuilderRef builder, /* XXX: It is not clear if we should use the ordered or unordered operators */ +#if HAVE_LLVM < 0x0207 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if(type.width * type.length == 128) { if(type.floating && util_cpu_caps.has_sse) { /* float[4] comparison */ + LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMValueRef args[3]; unsigned cc; boolean swap; @@ -147,6 +167,7 @@ lp_build_compare(LLVMBuilderRef builder, const char *pcmpgt; LLVMValueRef args[2]; LLVMValueRef res; + LLVMTypeRef vec_type = lp_build_vec_type(type); switch (type.width) { case 8: @@ -172,7 +193,7 @@ lp_build_compare(LLVMBuilderRef builder, if(table[func].gt && ((type.width == 8 && type.sign) || (type.width != 8 && !type.sign))) { - LLVMValueRef msb = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); a = LLVMBuildXor(builder, a, msb, ""); b = LLVMBuildXor(builder, b, msb, ""); } @@ -198,8 +219,9 @@ lp_build_compare(LLVMBuilderRef builder, return res; } - } + } /* if (type.width * type.length == 128) */ #endif +#endif /* HAVE_LLVM < 0x0207 */ if(type.floating) { LLVMRealPredicate op; @@ -233,25 +255,33 @@ lp_build_compare(LLVMBuilderRef builder, return lp_build_undef(type); } -#if 0 - /* XXX: Although valid IR, no LLVM target currently support this */ +#if HAVE_LLVM >= 0x0207 cond = LLVMBuildFCmp(builder, op, a, b, ""); - res = LLVMBuildSelect(builder, cond, ones, zeros, ""); + res = LLVMBuildSExt(builder, cond, int_vec_type, ""); #else - debug_printf("%s: warning: using slow element-wise vector comparison\n", - __FUNCTION__); - res = LLVMGetUndef(int_vec_type); - for(i = 0; i < type.length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - cond = LLVMBuildFCmp(builder, op, - LLVMBuildExtractElement(builder, a, index, ""), - LLVMBuildExtractElement(builder, b, index, ""), - ""); - cond = LLVMBuildSelect(builder, cond, - LLVMConstExtractElement(ones, index), - LLVMConstExtractElement(zeros, index), - ""); - res = LLVMBuildInsertElement(builder, res, cond, index, ""); + if (type.length == 1) { + cond = LLVMBuildFCmp(builder, op, a, b, ""); + res = LLVMBuildSExt(builder, cond, int_vec_type, ""); + } + else { + unsigned i; + + res = LLVMGetUndef(int_vec_type); + + debug_printf("%s: warning: using slow element-wise float" + " vector comparison\n", __FUNCTION__); + for (i = 0; i < type.length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + cond = LLVMBuildFCmp(builder, op, + LLVMBuildExtractElement(builder, a, index, ""), + LLVMBuildExtractElement(builder, b, index, ""), + ""); + cond = LLVMBuildSelect(builder, cond, + LLVMConstExtractElement(ones, index), + LLVMConstExtractElement(zeros, index), + ""); + res = LLVMBuildInsertElement(builder, res, cond, index, ""); + } } #endif } @@ -281,25 +311,34 @@ lp_build_compare(LLVMBuilderRef builder, return lp_build_undef(type); } -#if 0 - /* XXX: Although valid IR, no LLVM target currently support this */ +#if HAVE_LLVM >= 0x0207 cond = LLVMBuildICmp(builder, op, a, b, ""); - res = LLVMBuildSelect(builder, cond, ones, zeros, ""); + res = LLVMBuildSExt(builder, cond, int_vec_type, ""); #else - debug_printf("%s: warning: using slow element-wise int vector comparison\n", - __FUNCTION__); - res = LLVMGetUndef(int_vec_type); - for(i = 0; i < type.length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - cond = LLVMBuildICmp(builder, op, - LLVMBuildExtractElement(builder, a, index, ""), - LLVMBuildExtractElement(builder, b, index, ""), - ""); - cond = LLVMBuildSelect(builder, cond, - LLVMConstExtractElement(ones, index), - LLVMConstExtractElement(zeros, index), - ""); - res = LLVMBuildInsertElement(builder, res, cond, index, ""); + if (type.length == 1) { + cond = LLVMBuildICmp(builder, op, a, b, ""); + res = LLVMBuildSExt(builder, cond, int_vec_type, ""); + } + else { + unsigned i; + + res = LLVMGetUndef(int_vec_type); + + debug_printf("%s: warning: using slow element-wise int" + " vector comparison\n", __FUNCTION__); + + for(i = 0; i < type.length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + cond = LLVMBuildICmp(builder, op, + LLVMBuildExtractElement(builder, a, index, ""), + LLVMBuildExtractElement(builder, b, index, ""), + ""); + cond = LLVMBuildSelect(builder, cond, + LLVMConstExtractElement(ones, index), + LLVMConstExtractElement(zeros, index), + ""); + res = LLVMBuildInsertElement(builder, res, cond, index, ""); + } } #endif } @@ -326,6 +365,8 @@ lp_build_cmp(struct lp_build_context *bld, /** * Return mask ? a : b; + * + * mask is a bitwise mask, composed of 0 or ~0 for each element. */ LLVMValueRef lp_build_select(struct lp_build_context *bld, @@ -339,26 +380,32 @@ lp_build_select(struct lp_build_context *bld, if(a == b) return a; - if(type.floating) { - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); + if (type.length == 1) { + mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), ""); + res = LLVMBuildSelect(bld->builder, mask, a, b, ""); } + else { + if(type.floating) { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); + } - a = LLVMBuildAnd(bld->builder, a, mask, ""); + a = LLVMBuildAnd(bld->builder, a, mask, ""); - /* This often gets translated to PANDN, but sometimes the NOT is - * pre-computed and stored in another constant. The best strategy depends - * on available registers, so it is not a big deal -- hopefully LLVM does - * the right decision attending the rest of the program. - */ - b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); + /* This often gets translated to PANDN, but sometimes the NOT is + * pre-computed and stored in another constant. The best strategy depends + * on available registers, so it is not a big deal -- hopefully LLVM does + * the right decision attending the rest of the program. + */ + b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); - res = LLVMBuildOr(bld->builder, a, b, ""); + res = LLVMBuildOr(bld->builder, a, b, ""); - if(type.floating) { - LLVMTypeRef vec_type = lp_build_vec_type(type); - res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + if(type.floating) { + LLVMTypeRef vec_type = lp_build_vec_type(type); + res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + } } return res; @@ -436,3 +483,13 @@ lp_build_alloca(struct lp_build_context *bld) return LLVMBuildAlloca(bld->builder, lp_build_elem_type(type), ""); } } + + +/** Return (a & ~b) */ +LLVMValueRef +lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) +{ + b = LLVMBuildNot(bld->builder, b, ""); + b = LLVMBuildAnd(bld->builder, a, b, ""); + return b; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h index a399ebf39e..00a8c75019 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h @@ -37,7 +37,7 @@ #define LP_BLD_LOGIC_H -#include <llvm-c/Core.h> +#include "gallivm/lp_bld.h" #include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */ @@ -79,4 +79,9 @@ lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef lp_build_alloca(struct lp_build_context *bld); + +LLVMValueRef +lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); + + #endif /* !LP_BLD_LOGIC_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index bc360ad77a..2daa8a3b58 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -164,7 +164,7 @@ lp_build_unpack2(LLVMBuilderRef builder, if(dst_type.sign && src_type.sign) { /* Replicate the sign bit in the most significant bits */ - msb = LLVMBuildAShr(builder, src, lp_build_int_const_scalar(src_type, src_type.width - 1), ""); + msb = LLVMBuildAShr(builder, src, lp_build_const_int_vec(src_type, src_type.width - 1), ""); } else /* Most significant bits always zero */ @@ -256,7 +256,9 @@ lp_build_pack2(LLVMBuilderRef builder, LLVMValueRef lo, LLVMValueRef hi) { +#if HAVE_LLVM < 0x0207 LLVMTypeRef src_vec_type = lp_build_vec_type(src_type); +#endif LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type); LLVMValueRef shuffle; LLVMValueRef res; @@ -272,11 +274,14 @@ lp_build_pack2(LLVMBuilderRef builder, switch(src_type.width) { case 32: if(dst_type.sign) { +#if HAVE_LLVM >= 0x0207 + res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", dst_vec_type, lo, hi); +#else res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi); +#endif } else { if (util_cpu_caps.has_sse4_1) { - /* PACKUSDW is the only instrinsic with a consistent signature */ return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi); } else { @@ -288,9 +293,17 @@ lp_build_pack2(LLVMBuilderRef builder, case 16: if(dst_type.sign) +#if HAVE_LLVM >= 0x0207 + res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", dst_vec_type, lo, hi); +#else res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi); +#endif else +#if HAVE_LLVM >= 0x0207 + res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", dst_vec_type, lo, hi); +#else res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi); +#endif break; default: @@ -348,7 +361,7 @@ lp_build_packs2(LLVMBuilderRef builder, if(clamp) { struct lp_build_context bld; unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width; - LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1); + LLVMValueRef dst_max = lp_build_const_int_vec(src_type, ((unsigned long long)1 << dst_bits) - 1); lp_build_context_init(&bld, builder, src_type); lo = lp_build_min(&bld, lo, dst_max); hi = lp_build_min(&bld, hi, dst_max); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h index fb2a34984a..41adeed220 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h @@ -37,7 +37,7 @@ #define LP_BLD_PACK_H -#include <llvm-c/Core.h> +#include "gallivm/lp_bld.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.c b/src/gallium/auxiliary/gallivm/lp_bld_printf.c new file mode 100644 index 0000000000..153ba5b15b --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.c @@ -0,0 +1,121 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdio.h> + +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "lp_bld_printf.h" + + +static int +lp_get_printf_arg_count(const char *fmt) +{ + int count =0; + const char *p = fmt; + int c; + + while ((c = *p++)) { + if (c != '%') + continue; + switch (*p) { + case '\0': + continue; + case '%': + p++; + continue; + case '.': + if (p[1] == '*' && p[2] == 's') { + count += 2; + p += 3; + continue; + } + /* fallthrough */ + default: + count ++; + } + } + return count; +} + +LLVMValueRef +lp_build_const_string_variable(LLVMModuleRef module, const char *str, int len) +{ + LLVMValueRef string = LLVMAddGlobal(module, LLVMArrayType(LLVMInt8Type(), len + 1), ""); + LLVMSetGlobalConstant(string, TRUE); + LLVMSetLinkage(string, LLVMInternalLinkage); + LLVMSetInitializer(string, LLVMConstString(str, len + 1, TRUE)); + return string; +} + + +/** + * lp_build_printf. + * + * Build printf call in LLVM IR. The output goes to stdout. + * The additional variable arguments need to have type + * LLVMValueRef. + */ +LLVMValueRef +lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...) +{ + va_list arglist; + int i = 0; + int argcount = lp_get_printf_arg_count(fmt); + LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); + LLVMValueRef params[50]; + LLVMValueRef fmtarg = lp_build_const_string_variable(module, fmt, strlen(fmt) + 1); + LLVMValueRef int0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef index[2]; + LLVMValueRef func_printf = LLVMGetNamedFunction(module, "printf"); + + assert(Elements(params) >= argcount + 1); + + index[0] = index[1] = int0; + + if (!func_printf) { + LLVMTypeRef printf_type = LLVMFunctionType(LLVMIntType(32), NULL, 0, 1); + func_printf = LLVMAddFunction(module, "printf", printf_type); + } + + params[0] = LLVMBuildGEP(builder, fmtarg, index, 2, ""); + + va_start(arglist, fmt); + for (i = 1; i <= argcount; i++) { + LLVMValueRef val = va_arg(arglist, LLVMValueRef); + LLVMTypeRef type = LLVMTypeOf(val); + /* printf wants doubles, so lets convert so that + * we can actually print them */ + if (LLVMGetTypeKind(type) == LLVMFloatTypeKind) + val = LLVMBuildFPExt(builder, val, LLVMDoubleType(), ""); + params[i] = val; + } + va_end(arglist); + + return LLVMBuildCall(builder, func_printf, params, argcount + 1, ""); +} + diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.h b/src/gallium/auxiliary/gallivm/lp_bld_printf.h new file mode 100644 index 0000000000..83bd8f1d55 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.h @@ -0,0 +1,39 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef LP_BLD_PRINTF_H +#define LP_BLD_PRINTF_H + + +#include "pipe/p_compiler.h" +#include "lp_bld.h" + +LLVMValueRef lp_build_const_string_variable(LLVMModuleRef module, const char *str, int len); +LLVMValueRef lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...); + +#endif + diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 6a026e468e..eb75b9b393 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -51,9 +51,11 @@ */ void lp_sampler_static_state(struct lp_sampler_static_state *state, - const struct pipe_texture *texture, + const struct pipe_sampler_view *view, const struct pipe_sampler_state *sampler) { + const struct pipe_resource *texture = view->texture; + memset(state, 0, sizeof *state); if(!texture) @@ -62,7 +64,19 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, if(!sampler) return; - state->format = texture->format; + /* + * We don't copy sampler state over unless it is actually enabled, to avoid + * spurious recompiles, as the sampler static state is part of the shader + * key. + * + * Ideally the state tracker or cso_cache module would make all state + * canonical, but until that happens it's better to be safe than sorry here. + * + * XXX: Actually there's much more than can be done here, especially + * regarding 1D/2D/3D/CUBE textures, wrap modes, etc. + */ + + state->format = view->format; state->target = texture->target; state->pot_width = util_is_pot(texture->width0); state->pot_height = util_is_pot(texture->height0); @@ -72,10 +86,18 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, state->wrap_t = sampler->wrap_t; state->wrap_r = sampler->wrap_r; state->min_img_filter = sampler->min_img_filter; - state->min_mip_filter = sampler->min_mip_filter; state->mag_img_filter = sampler->mag_img_filter; + if (texture->last_level) { + state->min_mip_filter = sampler->min_mip_filter; + } else { + state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + } + state->compare_mode = sampler->compare_mode; - state->compare_func = sampler->compare_func; + if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) { + state->compare_func = sampler->compare_func; + } + state->normalized_coords = sampler->normalized_coords; state->lod_bias = sampler->lod_bias; state->min_lod = sampler->min_lod; @@ -84,6 +106,10 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, state->border_color[1] = sampler->border_color[1]; state->border_color[2] = sampler->border_color[2]; state->border_color[3] = sampler->border_color[3]; + + /* + * FIXME: Handle the remainder of pipe_sampler_view. + */ } @@ -137,22 +163,24 @@ lp_build_gather(LLVMBuilderRef builder, /** - * Compute the offset of a pixel. + * Compute the offset of a pixel block. * - * x, y, y_stride are vectors + * x, y, z, y_stride, z_stride are vectors, and they refer to pixel blocks, as + * per format description, and not individual pixels. */ LLVMValueRef lp_build_sample_offset(struct lp_build_context *bld, const struct util_format_description *format_desc, LLVMValueRef x, LLVMValueRef y, + LLVMValueRef z, LLVMValueRef y_stride, - LLVMValueRef data_ptr) + LLVMValueRef z_stride) { LLVMValueRef x_stride; LLVMValueRef offset; - x_stride = lp_build_const_scalar(bld->type, format_desc->block.bits/8); + x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8); if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { LLVMValueRef x_lo, x_hi; @@ -163,6 +191,10 @@ lp_build_sample_offset(struct lp_build_context *bld, LLVMValueRef y_offset_lo, y_offset_hi; LLVMValueRef offset_lo, offset_hi; + /* XXX 1D & 3D addressing not done yet */ + assert(!z); + assert(!z_stride); + x_lo = LLVMBuildAnd(bld->builder, x, bld->one, ""); y_lo = LLVMBuildAnd(bld->builder, y, bld->one, ""); @@ -170,9 +202,9 @@ lp_build_sample_offset(struct lp_build_context *bld, y_hi = LLVMBuildLShr(bld->builder, y, bld->one, ""); x_stride_lo = x_stride; - y_stride_lo = lp_build_const_scalar(bld->type, 2*format_desc->block.bits/8); + y_stride_lo = lp_build_const_vec(bld->type, 2*format_desc->block.bits/8); - x_stride_hi = lp_build_const_scalar(bld->type, 4*format_desc->block.bits/8); + x_stride_hi = lp_build_const_vec(bld->type, 4*format_desc->block.bits/8); y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, ""); x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo); @@ -186,13 +218,17 @@ lp_build_sample_offset(struct lp_build_context *bld, offset = lp_build_add(bld, offset_hi, offset_lo); } else { - LLVMValueRef x_offset; - LLVMValueRef y_offset; + offset = lp_build_mul(bld, x, x_stride); - x_offset = lp_build_mul(bld, x, x_stride); - y_offset = lp_build_mul(bld, y, y_stride); + if (y && y_stride) { + LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride); + offset = lp_build_add(bld, offset, y_offset); + } - offset = lp_build_add(bld, x_offset, y_offset); + if (z && z_stride) { + LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride); + offset = lp_build_add(bld, offset, z_offset); + } } return offset; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index 5ba0925bb6..8c1af95c50 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -36,9 +36,10 @@ #define LP_BLD_SAMPLE_H -#include <llvm-c/Core.h> +#include "gallivm/lp_bld.h" -struct pipe_texture; +struct pipe_resource; +struct pipe_sampler_view; struct pipe_sampler_state; struct util_format_description; struct lp_type; @@ -48,14 +49,14 @@ struct lp_build_context; /** * Sampler static state. * - * These are the bits of state from pipe_texture and pipe_sampler_state that + * These are the bits of state from pipe_resource and pipe_sampler_state that * are embedded in the generated code. */ struct lp_sampler_static_state { /* pipe_texture's state */ enum pipe_format format; - unsigned target:2; + unsigned target:3; unsigned pot_width:1; unsigned pot_height:1; unsigned pot_depth:1; @@ -78,7 +79,7 @@ struct lp_sampler_static_state /** * Sampler dynamic state. * - * These are the bits of state from pipe_texture and pipe_sampler_state that + * These are the bits of state from pipe_resource and pipe_sampler_state that * are computed in runtime. * * There are obtained through callbacks, as we don't want to tie the texture @@ -113,9 +114,9 @@ struct lp_sampler_dynamic_state unsigned unit); LLVMValueRef - (*stride)( struct lp_sampler_dynamic_state *state, - LLVMBuilderRef builder, - unsigned unit); + (*row_stride)( struct lp_sampler_dynamic_state *state, + LLVMBuilderRef builder, + unsigned unit); LLVMValueRef (*data_ptr)( struct lp_sampler_dynamic_state *state, @@ -130,7 +131,7 @@ struct lp_sampler_dynamic_state */ void lp_sampler_static_state(struct lp_sampler_static_state *state, - const struct pipe_texture *texture, + const struct pipe_sampler_view *view, const struct pipe_sampler_state *sampler); @@ -148,8 +149,9 @@ lp_build_sample_offset(struct lp_build_context *bld, const struct util_format_description *format_desc, LLVMValueRef x, LLVMValueRef y, + LLVMValueRef z, LLVMValueRef y_stride, - LLVMValueRef data_ptr); + LLVMValueRef z_stride); void diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 9058f76c1d..395eaaba26 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -48,6 +48,7 @@ #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" #include "lp_bld_pack.h" +#include "lp_bld_flow.h" #include "lp_bld_format.h" #include "lp_bld_sample.h" @@ -65,6 +66,14 @@ struct lp_build_sample_context const struct util_format_description *format_desc; + /** regular scalar float type */ + struct lp_type float_type; + struct lp_build_context float_bld; + + /** regular scalar float type */ + struct lp_type int_type; + struct lp_build_context int_bld; + /** Incoming coordinates type and build context */ struct lp_type coord_type; struct lp_build_context coord_bld; @@ -108,9 +117,78 @@ wrap_mode_uses_border_color(unsigned mode) } +static LLVMValueRef +lp_build_get_mipmap_level(struct lp_build_sample_context *bld, + LLVMValueRef data_array, LLVMValueRef level) +{ + LLVMValueRef indexes[2], data_ptr; + indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indexes[1] = level; + data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, ""); + data_ptr = LLVMBuildLoad(bld->builder, data_ptr, ""); + return data_ptr; +} + + +static LLVMValueRef +lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, + LLVMValueRef data_array, int level) +{ + LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0); + return lp_build_get_mipmap_level(bld, data_array, lvl); +} + + +/** + * Dereference stride_array[mipmap_level] array to get a stride. + * Return stride as a vector. + */ +static LLVMValueRef +lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, + LLVMValueRef stride_array, LLVMValueRef level) +{ + LLVMValueRef indexes[2], stride; + indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indexes[1] = level; + stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, ""); + stride = LLVMBuildLoad(bld->builder, stride, ""); + stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride); + return stride; +} + + +/** Dereference stride_array[0] array to get a stride (as vector). */ +static LLVMValueRef +lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld, + LLVMValueRef stride_array, int level) +{ + LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0); + return lp_build_get_level_stride_vec(bld, stride_array, lvl); +} + + +static int +texture_dims(enum pipe_texture_target tex) +{ + switch (tex) { + case PIPE_TEXTURE_1D: + return 1; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_CUBE: + return 2; + case PIPE_TEXTURE_3D: + return 3; + default: + assert(0 && "bad texture target in texture_dims()"); + return 2; + } +} + + /** - * Gen code to fetch a texel from a texture at int coords (x, y). + * Generate code to fetch a texel from a texture at int coords (x, y, z). + * The computation depends on whether the texture is 1D, 2D or 3D. * The result, texel, will be: * texel[0] = red values * texel[1] = green values @@ -121,15 +199,19 @@ static void lp_build_sample_texel_soa(struct lp_build_sample_context *bld, LLVMValueRef width, LLVMValueRef height, + LLVMValueRef depth, LLVMValueRef x, LLVMValueRef y, + LLVMValueRef z, LLVMValueRef y_stride, + LLVMValueRef z_stride, LLVMValueRef data_ptr, LLVMValueRef *texel) { + const int dims = texture_dims(bld->static_state->target); struct lp_build_context *int_coord_bld = &bld->int_coord_bld; LLVMValueRef offset; - LLVMValueRef packed; + LLVMValueRef i, j; LLVMValueRef use_border = NULL; /* use_border = x < 0 || x >= width || y < 0 || y >= height */ @@ -140,7 +222,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2"); } - if (wrap_mode_uses_border_color(bld->static_state->wrap_t)) { + if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) { LLVMValueRef b1, b2; b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero); b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height); @@ -153,6 +235,56 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, } } + if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) { + LLVMValueRef b1, b2; + b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero); + b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth); + if (use_border) { + use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1"); + use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2"); + } + else { + use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2"); + } + } + + /* + * Describe the coordinates in terms of pixel blocks. + * + * TODO: pixel blocks are power of two. LLVM should convert rem/div to + * bit arithmetic. Verify this. + */ + + if (bld->format_desc->block.width == 1) { + i = bld->uint_coord_bld.zero; + } + else { + LLVMValueRef block_width = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.width); + i = LLVMBuildURem(bld->builder, x, block_width, ""); + x = LLVMBuildUDiv(bld->builder, x, block_width, ""); + } + + if (bld->format_desc->block.height == 1) { + j = bld->uint_coord_bld.zero; + } + else { + LLVMValueRef block_height = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.height); + j = LLVMBuildURem(bld->builder, y, block_height, ""); + y = LLVMBuildUDiv(bld->builder, y, block_height, ""); + } + + /* convert x,y,z coords to linear offset from start of texture, in bytes */ + offset = lp_build_sample_offset(&bld->uint_coord_bld, + bld->format_desc, + x, y, z, y_stride, z_stride); + + lp_build_fetch_rgba_soa(bld->builder, + bld->format_desc, + bld->texel_type, + data_ptr, offset, + i, j, + texel); + /* * Note: if we find an app which frequently samples the texture border * we might want to implement a true conditional here to avoid sampling @@ -168,35 +300,12 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, * the texel color results with the border color. */ - /* convert x,y coords to linear offset from start of texture, in bytes */ - offset = lp_build_sample_offset(&bld->uint_coord_bld, - bld->format_desc, - x, y, y_stride, - data_ptr); - - assert(bld->format_desc->block.width == 1); - assert(bld->format_desc->block.height == 1); - assert(bld->format_desc->block.bits <= bld->texel_type.width); - - /* gather the texels from the texture */ - packed = lp_build_gather(bld->builder, - bld->texel_type.length, - bld->format_desc->block.bits, - bld->texel_type.width, - data_ptr, offset); - - /* convert texels to float rgba */ - lp_build_unpack_rgba_soa(bld->builder, - bld->format_desc, - bld->texel_type, - packed, texel); - if (use_border) { /* select texel color or border color depending on use_border */ int chan; for (chan = 0; chan < 4; chan++) { LLVMValueRef border_chan = - lp_build_const_scalar(bld->texel_type, + lp_build_const_vec(bld->texel_type, bld->static_state->border_color[chan]); texel[chan] = lp_build_select(&bld->texel_bld, use_border, border_chan, texel[chan]); @@ -210,19 +319,22 @@ lp_build_sample_packed(struct lp_build_sample_context *bld, LLVMValueRef x, LLVMValueRef y, LLVMValueRef y_stride, - LLVMValueRef data_ptr) + LLVMValueRef data_array) { LLVMValueRef offset; + LLVMValueRef data_ptr; offset = lp_build_sample_offset(&bld->uint_coord_bld, bld->format_desc, - x, y, y_stride, - data_ptr); + x, y, NULL, y_stride, NULL); assert(bld->format_desc->block.width == 1); assert(bld->format_desc->block.height == 1); assert(bld->format_desc->block.bits <= bld->texel_type.width); + /* get pointer to mipmap level 0 data */ + data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0); + return lp_build_gather(bld->builder, bld->texel_type.length, bld->format_desc->block.bits, @@ -358,8 +470,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, struct lp_build_context *coord_bld = &bld->coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; - LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0); - LLVMValueRef half = lp_build_const_scalar(coord_bld->type, 0.5); + LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0); + LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5); LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one); @@ -413,7 +525,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, else { LLVMValueRef min, max; /* clamp to [0.5, length - 0.5] */ - min = lp_build_const_scalar(coord_bld->type, 0.5F); + min = lp_build_const_vec(coord_bld->type, 0.5F); max = lp_build_sub(coord_bld, length_f, min); coord = lp_build_clamp(coord_bld, coord, min, max); } @@ -434,7 +546,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, if (bld->static_state->normalized_coords) { /* min = -1.0 / (2 * length) = -0.5 / length */ min = lp_build_mul(coord_bld, - lp_build_const_scalar(coord_bld->type, -0.5F), + lp_build_const_vec(coord_bld->type, -0.5F), lp_build_rcp(coord_bld, length_f)); /* max = 1.0 - min */ max = lp_build_sub(coord_bld, coord_bld->one, min); @@ -446,7 +558,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, } else { /* clamp to [-0.5, length + 0.5] */ - min = lp_build_const_scalar(coord_bld->type, -0.5F); + min = lp_build_const_vec(coord_bld->type, -0.5F); max = lp_build_sub(coord_bld, length_f, min); coord = lp_build_clamp(coord_bld, coord, min, max); coord = lp_build_sub(coord_bld, coord, half); @@ -521,7 +633,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, LLVMValueRef min, max; /* min = -1.0 / (2 * length) = -0.5 / length */ min = lp_build_mul(coord_bld, - lp_build_const_scalar(coord_bld->type, -0.5F), + lp_build_const_vec(coord_bld->type, -0.5F), lp_build_rcp(coord_bld, length_f)); /* max = 1.0 - min */ max = lp_build_sub(coord_bld, coord_bld->one, min); @@ -566,7 +678,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, struct lp_build_context *coord_bld = &bld->coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; - LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0); + LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0); LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one); @@ -609,7 +721,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, } else { /* clamp to [0.5, length - 0.5] */ - min = lp_build_const_scalar(coord_bld->type, 0.5F); + min = lp_build_const_vec(coord_bld->type, 0.5F); max = lp_build_sub(coord_bld, length_f, min); } /* coord = clamp(coord, min, max) */ @@ -625,7 +737,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, if (bld->static_state->normalized_coords) { /* min = -1.0 / (2 * length) = -0.5 / length */ min = lp_build_mul(coord_bld, - lp_build_const_scalar(coord_bld->type, -0.5F), + lp_build_const_vec(coord_bld->type, -0.5F), lp_build_rcp(coord_bld, length_f)); /* max = length - min */ max = lp_build_sub(coord_bld, length_f, min); @@ -634,7 +746,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, } else { /* clamp to [-0.5, length + 0.5] */ - min = lp_build_const_scalar(coord_bld->type, -0.5F); + min = lp_build_const_vec(coord_bld->type, -0.5F); max = lp_build_sub(coord_bld, length_f, min); } /* coord = clamp(coord, min, max) */ @@ -711,83 +823,905 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, /** - * Sample 2D texture with nearest filtering. + * Codegen equivalent for u_minify(). + * Return max(1, base_size >> level); + */ +static LLVMValueRef +lp_build_minify(struct lp_build_sample_context *bld, + LLVMValueRef base_size, + LLVMValueRef level) +{ + LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify"); + size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one); + return size; +} + + +/** + * Generate code to compute texture level of detail (lambda). + * \param s vector of texcoord s values + * \param t vector of texcoord t values + * \param r vector of texcoord r values + * \param width scalar int texture width + * \param height scalar int texture height + * \param depth scalar int texture depth + */ +static LLVMValueRef +lp_build_lod_selector(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef depth) + +{ + if (bld->static_state->min_lod == bld->static_state->max_lod) { + /* User is forcing sampling from a particular mipmap level. + * This is hit during mipmap generation. + */ + return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod); + } + else { + const int dims = texture_dims(bld->static_state->target); + struct lp_build_context *float_bld = &bld->float_bld; + LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), + bld->static_state->lod_bias); + LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), + bld->static_state->min_lod); + LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), + bld->static_state->max_lod); + + LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + + LLVMValueRef s0, s1, s2; + LLVMValueRef t0, t1, t2; + LLVMValueRef r0, r1, r2; + LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy; + LLVMValueRef rho, lod; + + /* + * dsdx = abs(s[1] - s[0]); + * dsdy = abs(s[2] - s[0]); + * dtdx = abs(t[1] - t[0]); + * dtdy = abs(t[2] - t[0]); + * drdx = abs(r[1] - r[0]); + * drdy = abs(r[2] - r[0]); + * XXX we're assuming a four-element quad in 2x2 layout here. + */ + s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0"); + s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1"); + s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2"); + dsdx = LLVMBuildSub(bld->builder, s1, s0, ""); + dsdx = lp_build_abs(float_bld, dsdx); + dsdy = LLVMBuildSub(bld->builder, s2, s0, ""); + dsdy = lp_build_abs(float_bld, dsdy); + if (dims > 1) { + t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0"); + t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1"); + t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2"); + dtdx = LLVMBuildSub(bld->builder, t1, t0, ""); + dtdx = lp_build_abs(float_bld, dtdx); + dtdy = LLVMBuildSub(bld->builder, t2, t0, ""); + dtdy = lp_build_abs(float_bld, dtdy); + if (dims > 2) { + r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0"); + r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1"); + r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2"); + drdx = LLVMBuildSub(bld->builder, r1, r0, ""); + drdx = lp_build_abs(float_bld, drdx); + drdy = LLVMBuildSub(bld->builder, r2, r0, ""); + drdy = lp_build_abs(float_bld, drdy); + } + } + + /* Compute rho = max of all partial derivatives scaled by texture size. + * XXX this could be vectorized somewhat + */ + rho = LLVMBuildMul(bld->builder, + lp_build_max(float_bld, dsdx, dsdy), + lp_build_int_to_float(float_bld, width), ""); + if (dims > 1) { + LLVMValueRef max; + max = LLVMBuildMul(bld->builder, + lp_build_max(float_bld, dtdx, dtdy), + lp_build_int_to_float(float_bld, height), ""); + rho = lp_build_max(float_bld, rho, max); + if (dims > 2) { + max = LLVMBuildMul(bld->builder, + lp_build_max(float_bld, drdx, drdy), + lp_build_int_to_float(float_bld, depth), ""); + rho = lp_build_max(float_bld, rho, max); + } + } + + /* compute lod = log2(rho) */ + lod = lp_build_log2(float_bld, rho); + + /* add lod bias */ + lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias"); + + /* clamp lod */ + lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); + + return lod; + } +} + + +/** + * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer + * mipmap level index. + * Note: this is all scalar code. + * \param lod scalar float texture level of detail + * \param level_out returns integer + */ +static void +lp_build_nearest_mip_level(struct lp_build_sample_context *bld, + unsigned unit, + LLVMValueRef lod, + LLVMValueRef *level_out) +{ + struct lp_build_context *float_bld = &bld->float_bld; + struct lp_build_context *int_bld = &bld->int_bld; + LLVMValueRef last_level, level; + + LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0); + + last_level = bld->dynamic_state->last_level(bld->dynamic_state, + bld->builder, unit); + + /* convert float lod to integer */ + level = lp_build_iround(float_bld, lod); + + /* clamp level to legal range of levels */ + *level_out = lp_build_clamp(int_bld, level, zero, last_level); +} + + +/** + * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to + * two (adjacent) mipmap level indexes. Later, we'll sample from those + * two mipmap levels and interpolate between them. */ static void -lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld, - LLVMValueRef s, - LLVMValueRef t, - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef stride, - LLVMValueRef data_ptr, - LLVMValueRef *texel) +lp_build_linear_mip_levels(struct lp_build_sample_context *bld, + unsigned unit, + LLVMValueRef lod, + LLVMValueRef *level0_out, + LLVMValueRef *level1_out, + LLVMValueRef *weight_out) { - LLVMValueRef x, y; + struct lp_build_context *float_bld = &bld->float_bld; + struct lp_build_context *int_bld = &bld->int_bld; + LLVMValueRef last_level, level; - x = lp_build_sample_wrap_nearest(bld, s, width, - bld->static_state->pot_width, - bld->static_state->wrap_s); - y = lp_build_sample_wrap_nearest(bld, t, height, - bld->static_state->pot_height, - bld->static_state->wrap_t); + last_level = bld->dynamic_state->last_level(bld->dynamic_state, + bld->builder, unit); - lp_build_name(x, "tex.x.wrapped"); - lp_build_name(y, "tex.y.wrapped"); + /* convert float lod to integer */ + level = lp_build_ifloor(float_bld, lod); - lp_build_sample_texel_soa(bld, width, height, x, y, stride, data_ptr, texel); + /* compute level 0 and clamp to legal range of levels */ + *level0_out = lp_build_clamp(int_bld, level, + int_bld->zero, + last_level); + /* compute level 1 and clamp to legal range of levels */ + *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one); + *level1_out = lp_build_min(int_bld, *level1_out, last_level); + + *weight_out = lp_build_fract(float_bld, lod); } /** - * Sample 2D texture with bilinear filtering. + * Generate code to sample a mipmap level with nearest filtering. + * If sampling a cube texture, r = cube face in [0,5]. */ static void -lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, +lp_build_sample_image_nearest(struct lp_build_sample_context *bld, + LLVMValueRef width_vec, + LLVMValueRef height_vec, + LLVMValueRef depth_vec, + LLVMValueRef row_stride_vec, + LLVMValueRef img_stride_vec, + LLVMValueRef data_ptr, LLVMValueRef s, LLVMValueRef t, - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef stride, - LLVMValueRef data_ptr, - LLVMValueRef *texel) + LLVMValueRef r, + LLVMValueRef colors_out[4]) { - LLVMValueRef s_fpart; - LLVMValueRef t_fpart; - LLVMValueRef x0, x1; - LLVMValueRef y0, y1; + const int dims = texture_dims(bld->static_state->target); + LLVMValueRef x, y, z; + + /* + * Compute integer texcoords. + */ + x = lp_build_sample_wrap_nearest(bld, s, width_vec, + bld->static_state->pot_width, + bld->static_state->wrap_s); + lp_build_name(x, "tex.x.wrapped"); + + if (dims >= 2) { + y = lp_build_sample_wrap_nearest(bld, t, height_vec, + bld->static_state->pot_height, + bld->static_state->wrap_t); + lp_build_name(y, "tex.y.wrapped"); + + if (dims == 3) { + z = lp_build_sample_wrap_nearest(bld, r, depth_vec, + bld->static_state->pot_height, + bld->static_state->wrap_r); + lp_build_name(z, "tex.z.wrapped"); + } + else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { + z = r; + } + else { + z = NULL; + } + } + else { + y = z = NULL; + } + + /* + * Get texture colors. + */ + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x, y, z, + row_stride_vec, img_stride_vec, + data_ptr, colors_out); +} + + +/** + * Generate code to sample a mipmap level with linear filtering. + * If sampling a cube texture, r = cube face in [0,5]. + */ +static void +lp_build_sample_image_linear(struct lp_build_sample_context *bld, + LLVMValueRef width_vec, + LLVMValueRef height_vec, + LLVMValueRef depth_vec, + LLVMValueRef row_stride_vec, + LLVMValueRef img_stride_vec, + LLVMValueRef data_ptr, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef colors_out[4]) +{ + const int dims = texture_dims(bld->static_state->target); + LLVMValueRef x0, y0, z0, x1, y1, z1; + LLVMValueRef s_fpart, t_fpart, r_fpart; LLVMValueRef neighbors[2][2][4]; - unsigned chan; + int chan; + + /* + * Compute integer texcoords. + */ + lp_build_sample_wrap_linear(bld, s, width_vec, + bld->static_state->pot_width, + bld->static_state->wrap_s, + &x0, &x1, &s_fpart); + lp_build_name(x0, "tex.x0.wrapped"); + lp_build_name(x1, "tex.x1.wrapped"); + + if (dims >= 2) { + lp_build_sample_wrap_linear(bld, t, height_vec, + bld->static_state->pot_height, + bld->static_state->wrap_t, + &y0, &y1, &t_fpart); + lp_build_name(y0, "tex.y0.wrapped"); + lp_build_name(y1, "tex.y1.wrapped"); + + if (dims == 3) { + lp_build_sample_wrap_linear(bld, r, depth_vec, + bld->static_state->pot_depth, + bld->static_state->wrap_r, + &z0, &z1, &r_fpart); + lp_build_name(z0, "tex.z0.wrapped"); + lp_build_name(z1, "tex.z1.wrapped"); + } + else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { + z0 = z1 = r; /* cube face */ + r_fpart = NULL; + } + else { + z0 = z1 = NULL; + r_fpart = NULL; + } + } + else { + y0 = y1 = t_fpart = NULL; + z0 = z1 = r_fpart = NULL; + } + + /* + * Get texture colors. + */ + /* get x0/x1 texels */ + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x0, y0, z0, + row_stride_vec, img_stride_vec, + data_ptr, neighbors[0][0]); + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x1, y0, z0, + row_stride_vec, img_stride_vec, + data_ptr, neighbors[0][1]); + + if (dims == 1) { + /* Interpolate two samples from 1D image to produce one color */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart, + neighbors[0][0][chan], + neighbors[0][1][chan]); + } + } + else { + /* 2D/3D texture */ + LLVMValueRef colors0[4]; + + /* get x0/x1 texels at y1 */ + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x0, y1, z0, + row_stride_vec, img_stride_vec, + data_ptr, neighbors[1][0]); + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x1, y1, z0, + row_stride_vec, img_stride_vec, + data_ptr, neighbors[1][1]); + + /* Bilinear interpolate the four samples from the 2D image / 3D slice */ + for (chan = 0; chan < 4; chan++) { + colors0[chan] = lp_build_lerp_2d(&bld->texel_bld, + s_fpart, t_fpart, + neighbors[0][0][chan], + neighbors[0][1][chan], + neighbors[1][0][chan], + neighbors[1][1][chan]); + } - lp_build_sample_wrap_linear(bld, s, width, bld->static_state->pot_width, - bld->static_state->wrap_s, &x0, &x1, &s_fpart); - lp_build_sample_wrap_linear(bld, t, height, bld->static_state->pot_height, - bld->static_state->wrap_t, &y0, &y1, &t_fpart); + if (dims == 3) { + LLVMValueRef neighbors1[2][2][4]; + LLVMValueRef colors1[4]; + + /* get x0/x1/y0/y1 texels at z1 */ + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x0, y0, z1, + row_stride_vec, img_stride_vec, + data_ptr, neighbors1[0][0]); + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x1, y0, z1, + row_stride_vec, img_stride_vec, + data_ptr, neighbors1[0][1]); + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x0, y1, z1, + row_stride_vec, img_stride_vec, + data_ptr, neighbors1[1][0]); + lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, + x1, y1, z1, + row_stride_vec, img_stride_vec, + data_ptr, neighbors1[1][1]); + + /* Bilinear interpolate the four samples from the second Z slice */ + for (chan = 0; chan < 4; chan++) { + colors1[chan] = lp_build_lerp_2d(&bld->texel_bld, + s_fpart, t_fpart, + neighbors1[0][0][chan], + neighbors1[0][1][chan], + neighbors1[1][0][chan], + neighbors1[1][1][chan]); + } - lp_build_sample_texel_soa(bld, width, height, x0, y0, stride, data_ptr, neighbors[0][0]); - lp_build_sample_texel_soa(bld, width, height, x1, y0, stride, data_ptr, neighbors[0][1]); - lp_build_sample_texel_soa(bld, width, height, x0, y1, stride, data_ptr, neighbors[1][0]); - lp_build_sample_texel_soa(bld, width, height, x1, y1, stride, data_ptr, neighbors[1][1]); + /* Linearly interpolate the two samples from the two 3D slices */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = lp_build_lerp(&bld->texel_bld, + r_fpart, + colors0[chan], colors1[chan]); + } + } + else { + /* 2D tex */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = colors0[chan]; + } + } + } +} - /* TODO: Don't interpolate missing channels */ - for(chan = 0; chan < 4; ++chan) { - texel[chan] = lp_build_lerp_2d(&bld->texel_bld, - s_fpart, t_fpart, - neighbors[0][0][chan], - neighbors[0][1][chan], - neighbors[1][0][chan], - neighbors[1][1][chan]); + +/** Helper used by lp_build_cube_lookup() */ +static LLVMValueRef +lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord) +{ + /* ima = -0.5 / abs(coord); */ + LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5); + LLVMValueRef absCoord = lp_build_abs(coord_bld, coord); + LLVMValueRef ima = lp_build_mul(coord_bld, negHalf, + lp_build_rcp(coord_bld, absCoord)); + return ima; +} + + +/** + * Helper used by lp_build_cube_lookup() + * \param sign scalar +1 or -1 + * \param coord float vector + * \param ima float vector + */ +static LLVMValueRef +lp_build_cube_coord(struct lp_build_context *coord_bld, + LLVMValueRef sign, int negate_coord, + LLVMValueRef coord, LLVMValueRef ima) +{ + /* return negate(coord) * ima * sign + 0.5; */ + LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5); + LLVMValueRef res; + + assert(negate_coord == +1 || negate_coord == -1); + + if (negate_coord == -1) { + coord = lp_build_negate(coord_bld, coord); + } + + res = lp_build_mul(coord_bld, coord, ima); + if (sign) { + sign = lp_build_broadcast_scalar(coord_bld, sign); + res = lp_build_mul(coord_bld, res, sign); + } + res = lp_build_add(coord_bld, res, half); + + return res; +} + + +/** Helper used by lp_build_cube_lookup() + * Return (major_coord >= 0) ? pos_face : neg_face; + */ +static LLVMValueRef +lp_build_cube_face(struct lp_build_sample_context *bld, + LLVMValueRef major_coord, + unsigned pos_face, unsigned neg_face) +{ + LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE, + major_coord, + bld->float_bld.zero, ""); + LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0); + LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0); + LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, ""); + return res; +} + + + +/** + * Generate code to do cube face selection and per-face texcoords. + */ +static void +lp_build_cube_lookup(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef *face, + LLVMValueRef *face_s, + LLVMValueRef *face_t) +{ + struct lp_build_context *float_bld = &bld->float_bld; + struct lp_build_context *coord_bld = &bld->coord_bld; + LLVMValueRef rx, ry, rz; + LLVMValueRef arx, ary, arz; + LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25); + LLVMValueRef arx_ge_ary, arx_ge_arz; + LLVMValueRef ary_ge_arx, ary_ge_arz; + LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz; + LLVMValueRef rx_pos, ry_pos, rz_pos; + + assert(bld->coord_bld.type.length == 4); + + /* + * Use the average of the four pixel's texcoords to choose the face. + */ + rx = lp_build_mul(float_bld, c25, + lp_build_sum_vector(&bld->coord_bld, s)); + ry = lp_build_mul(float_bld, c25, + lp_build_sum_vector(&bld->coord_bld, t)); + rz = lp_build_mul(float_bld, c25, + lp_build_sum_vector(&bld->coord_bld, r)); + + arx = lp_build_abs(float_bld, rx); + ary = lp_build_abs(float_bld, ry); + arz = lp_build_abs(float_bld, rz); + + /* + * Compare sign/magnitude of rx,ry,rz to determine face + */ + arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, ""); + arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, ""); + ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, ""); + ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, ""); + + arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, ""); + ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); + + rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, ""); + ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, ""); + rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, ""); + + { + struct lp_build_flow_context *flow_ctx; + struct lp_build_if_state if_ctx; + + flow_ctx = lp_build_flow_create(bld->builder); + lp_build_flow_scope_begin(flow_ctx); + + *face_s = bld->coord_bld.undef; + *face_t = bld->coord_bld.undef; + *face = bld->int_bld.undef; + + lp_build_name(*face_s, "face_s"); + lp_build_name(*face_t, "face_t"); + lp_build_name(*face, "face"); + + lp_build_flow_scope_declare(flow_ctx, face_s); + lp_build_flow_scope_declare(flow_ctx, face_t); + lp_build_flow_scope_declare(flow_ctx, face); + + lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz); + { + /* +/- X face */ + LLVMValueRef sign = lp_build_sgn(float_bld, rx); + LLVMValueRef ima = lp_build_cube_ima(coord_bld, s); + *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima); + *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); + *face = lp_build_cube_face(bld, rx, + PIPE_TEX_FACE_POS_X, + PIPE_TEX_FACE_NEG_X); + } + lp_build_else(&if_ctx); + { + struct lp_build_flow_context *flow_ctx2; + struct lp_build_if_state if_ctx2; + + LLVMValueRef face_s2 = bld->coord_bld.undef; + LLVMValueRef face_t2 = bld->coord_bld.undef; + LLVMValueRef face2 = bld->int_bld.undef; + + flow_ctx2 = lp_build_flow_create(bld->builder); + lp_build_flow_scope_begin(flow_ctx2); + lp_build_flow_scope_declare(flow_ctx2, &face_s2); + lp_build_flow_scope_declare(flow_ctx2, &face_t2); + lp_build_flow_scope_declare(flow_ctx2, &face2); + + ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); + + lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz); + { + /* +/- Y face */ + LLVMValueRef sign = lp_build_sgn(float_bld, ry); + LLVMValueRef ima = lp_build_cube_ima(coord_bld, t); + face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima); + face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima); + face2 = lp_build_cube_face(bld, ry, + PIPE_TEX_FACE_POS_Y, + PIPE_TEX_FACE_NEG_Y); + } + lp_build_else(&if_ctx2); + { + /* +/- Z face */ + LLVMValueRef sign = lp_build_sgn(float_bld, rz); + LLVMValueRef ima = lp_build_cube_ima(coord_bld, r); + face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima); + face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); + face2 = lp_build_cube_face(bld, rz, + PIPE_TEX_FACE_POS_Z, + PIPE_TEX_FACE_NEG_Z); + } + lp_build_endif(&if_ctx2); + lp_build_flow_scope_end(flow_ctx2); + lp_build_flow_destroy(flow_ctx2); + + *face_s = face_s2; + *face_t = face_t2; + *face = face2; + } + + lp_build_endif(&if_ctx); + lp_build_flow_scope_end(flow_ctx); + lp_build_flow_destroy(flow_ctx); + } +} + + + +/** + * Sample the texture/mipmap using given image filter and mip filter. + * data0_ptr and data1_ptr point to the two mipmap levels to sample + * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes. + * If we're using nearest miplevel sampling the '1' values will be null/unused. + */ +static void +lp_build_sample_mipmap(struct lp_build_sample_context *bld, + unsigned img_filter, + unsigned mip_filter, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef lod_fpart, + LLVMValueRef width0_vec, + LLVMValueRef width1_vec, + LLVMValueRef height0_vec, + LLVMValueRef height1_vec, + LLVMValueRef depth0_vec, + LLVMValueRef depth1_vec, + LLVMValueRef row_stride0_vec, + LLVMValueRef row_stride1_vec, + LLVMValueRef img_stride0_vec, + LLVMValueRef img_stride1_vec, + LLVMValueRef data_ptr0, + LLVMValueRef data_ptr1, + LLVMValueRef *colors_out) +{ + LLVMValueRef colors0[4], colors1[4]; + int chan; + + if (img_filter == PIPE_TEX_FILTER_NEAREST) { + lp_build_sample_image_nearest(bld, + width0_vec, height0_vec, depth0_vec, + row_stride0_vec, img_stride0_vec, + data_ptr0, s, t, r, colors0); + + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* sample the second mipmap level, and interp */ + lp_build_sample_image_nearest(bld, + width1_vec, height1_vec, depth1_vec, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, colors1); + } + } + else { + assert(img_filter == PIPE_TEX_FILTER_LINEAR); + + lp_build_sample_image_linear(bld, + width0_vec, height0_vec, depth0_vec, + row_stride0_vec, img_stride0_vec, + data_ptr0, s, t, r, colors0); + + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* sample the second mipmap level, and interp */ + lp_build_sample_image_linear(bld, + width1_vec, height1_vec, depth1_vec, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, colors1); + } + } + + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* interpolate samples from the two mipmap levels */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, + colors0[chan], colors1[chan]); + } + } + else { + /* use first/only level's colors */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = colors0[chan]; + } + } +} + + + +/** + * General texture sampling codegen. + * This function handles texture sampling for all texture targets (1D, + * 2D, 3D, cube) and all filtering modes. + */ +static void +lp_build_sample_general(struct lp_build_sample_context *bld, + unsigned unit, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef depth, + LLVMValueRef width_vec, + LLVMValueRef height_vec, + LLVMValueRef depth_vec, + LLVMValueRef row_stride_array, + LLVMValueRef img_stride_vec, + LLVMValueRef data_array, + LLVMValueRef *colors_out) +{ + struct lp_build_context *float_bld = &bld->float_bld; + const unsigned mip_filter = bld->static_state->min_mip_filter; + const unsigned min_filter = bld->static_state->min_img_filter; + const unsigned mag_filter = bld->static_state->mag_img_filter; + const int dims = texture_dims(bld->static_state->target); + LLVMValueRef lod, lod_fpart; + LLVMValueRef ilevel0, ilevel1, ilevel0_vec, ilevel1_vec; + LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL; + LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL; + LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL; + LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL; + LLVMValueRef data_ptr0, data_ptr1; + + /* + printf("%s mip %d min %d mag %d\n", __FUNCTION__, + mip_filter, min_filter, mag_filter); + */ + + /* + * Compute the level of detail (float). + */ + if (min_filter != mag_filter || + mip_filter != PIPE_TEX_MIPFILTER_NONE) { + /* Need to compute lod either to choose mipmap levels or to + * distinguish between minification/magnification with one mipmap level. + */ + lod = lp_build_lod_selector(bld, s, t, r, width, height, depth); + } + + /* + * Compute integer mipmap level(s) to fetch texels from. + */ + if (mip_filter == PIPE_TEX_MIPFILTER_NONE) { + /* always use mip level 0 */ + ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + } + else { + if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { + lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + } + else { + assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR); + lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1, + &lod_fpart); + lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart); + } + } + + /* + * Convert scalar integer mipmap levels into vectors. + */ + ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0); + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) + ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1); + + /* + * Compute width, height at mipmap level 'ilevel0' + */ + width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec); + if (dims >= 2) { + height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec); + row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array, + ilevel0); + if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { + img_stride0_vec = lp_build_mul(&bld->int_coord_bld, + row_stride0_vec, height0_vec); + if (dims == 3) { + depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec); + } + } + } + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* compute width, height, depth for second mipmap level at 'ilevel1' */ + width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec); + if (dims >= 2) { + height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec); + row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array, + ilevel1); + if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { + img_stride1_vec = lp_build_mul(&bld->int_coord_bld, + row_stride1_vec, height1_vec); + if (dims ==3) { + depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec); + } + } + } + } + + /* + * Choose cube face, recompute per-face texcoords. + */ + if (bld->static_state->target == PIPE_TEXTURE_CUBE) { + LLVMValueRef face, face_s, face_t; + lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t); + s = face_s; /* vec */ + t = face_t; /* vec */ + /* use 'r' to indicate cube face */ + r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ + } + + /* + * Get pointer(s) to image data for mipmap level(s). + */ + data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0); + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1); + } + + /* + * Get/interpolate texture colors. + */ + if (min_filter == mag_filter) { + /* no need to distinquish between minification and magnification */ + lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, lod_fpart, + width0_vec, width1_vec, + height0_vec, height1_vec, + depth0_vec, depth1_vec, + row_stride0_vec, row_stride1_vec, + img_stride0_vec, img_stride1_vec, + data_ptr0, data_ptr1, + colors_out); + } + else { + /* Emit conditional to choose min image filter or mag image filter + * depending on the lod being >0 or <= 0, respectively. + */ + struct lp_build_flow_context *flow_ctx; + struct lp_build_if_state if_ctx; + LLVMValueRef minify; + + flow_ctx = lp_build_flow_create(bld->builder); + lp_build_flow_scope_begin(flow_ctx); + + lp_build_flow_scope_declare(flow_ctx, &colors_out[0]); + lp_build_flow_scope_declare(flow_ctx, &colors_out[1]); + lp_build_flow_scope_declare(flow_ctx, &colors_out[2]); + lp_build_flow_scope_declare(flow_ctx, &colors_out[3]); + + /* minify = lod > 0.0 */ + minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE, + lod, float_bld->zero, ""); + + lp_build_if(&if_ctx, flow_ctx, bld->builder, minify); + { + /* Use the minification filter */ + lp_build_sample_mipmap(bld, min_filter, mip_filter, + s, t, r, lod_fpart, + width0_vec, width1_vec, + height0_vec, height1_vec, + depth0_vec, depth1_vec, + row_stride0_vec, row_stride1_vec, + img_stride0_vec, img_stride1_vec, + data_ptr0, data_ptr1, + colors_out); + } + lp_build_else(&if_ctx); + { + /* Use the magnification filter */ + lp_build_sample_mipmap(bld, mag_filter, mip_filter, + s, t, r, lod_fpart, + width0_vec, width1_vec, + height0_vec, height1_vec, + depth0_vec, depth1_vec, + row_stride0_vec, row_stride1_vec, + img_stride0_vec, img_stride1_vec, + data_ptr0, data_ptr1, + colors_out); + } + lp_build_endif(&if_ctx); + + lp_build_flow_scope_end(flow_ctx); + lp_build_flow_destroy(flow_ctx); } } + static void lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, struct lp_type dst_type, LLVMValueRef packed, LLVMValueRef *rgba) { - LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff); + LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff); unsigned chan; /* Decode the input vector components */ @@ -799,7 +1733,7 @@ lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, input = packed; if(start) - input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), ""); + input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(dst_type, start), ""); if(stop < 32) input = LLVMBuildAnd(builder, input, mask, ""); @@ -817,8 +1751,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, LLVMValueRef t, LLVMValueRef width, LLVMValueRef height, - LLVMValueRef stride, - LLVMValueRef data_ptr, + LLVMValueRef stride_array, + LLVMValueRef data_array, LLVMValueRef *texel) { LLVMBuilderRef builder = bld->builder; @@ -834,8 +1768,9 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, LLVMValueRef neighbors_hi[2][2]; LLVMValueRef packed, packed_lo, packed_hi; LLVMValueRef unswizzled[4]; + LLVMValueRef stride; - lp_build_context_init(&i32, builder, lp_type_int(32)); + lp_build_context_init(&i32, builder, lp_type_int_vec(32)); lp_build_context_init(&h16, builder, lp_type_ufixed(16)); lp_build_context_init(&u8n, builder, lp_type_unorm(8)); @@ -860,17 +1795,17 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); /* subtract 0.5 (add -128) */ - i32_c128 = lp_build_int_const_scalar(i32.type, -128); + i32_c128 = lp_build_const_int_vec(i32.type, -128); s = LLVMBuildAdd(builder, s, i32_c128, ""); t = LLVMBuildAdd(builder, t, i32_c128, ""); /* compute floor (shift right 8) */ - i32_c8 = lp_build_int_const_scalar(i32.type, 8); + i32_c8 = lp_build_const_int_vec(i32.type, 8); s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); /* compute fractional part (AND with 0xff) */ - i32_c255 = lp_build_int_const_scalar(i32.type, 255); + i32_c255 = lp_build_const_int_vec(i32.type, 255); s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); @@ -941,6 +1876,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, ""); } + stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0); + /* * Fetch the pixels as 4 x 32bit (rgba order might differ): * @@ -958,10 +1895,10 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, * The higher 8 bits of the resulting elements will be zero. */ - neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_ptr); - neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_ptr); - neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_ptr); - neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_ptr); + neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array); + neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array); + neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array); + neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array); neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, ""); neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, ""); @@ -1035,7 +1972,7 @@ lp_build_sample_compare(struct lp_build_sample_context *bld, } assert(res); - res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25)); + res = lp_build_mul(texel_bld, res, lp_build_const_vec(texel_bld->type, 0.25)); /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ for(chan = 0; chan < 3; ++chan) @@ -1044,194 +1981,11 @@ lp_build_sample_compare(struct lp_build_sample_context *bld, } -static int -texture_dims(enum pipe_texture_target tex) -{ - switch (tex) { - case PIPE_TEXTURE_1D: - return 1; - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_CUBE: - return 2; - case PIPE_TEXTURE_3D: - return 3; - default: - assert(0 && "bad texture target in texture_dims()"); - return 2; - } -} - - -/** - * Generate code to compute texture level of detail (lambda). - * \param s vector of texcoord s values - * \param t vector of texcoord t values - * \param r vector of texcoord r values - * \param width scalar int texture width - * \param height scalar int texture height - * \param depth scalar int texture depth - */ -static LLVMValueRef -lp_build_lod_selector(struct lp_build_sample_context *bld, - LLVMValueRef s, - LLVMValueRef t, - LLVMValueRef r, - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef depth) - -{ - const int dims = texture_dims(bld->static_state->target); - struct lp_build_context *coord_bld = &bld->coord_bld; - - LLVMValueRef lod_bias = lp_build_const_scalar(bld->coord_bld.type, - bld->static_state->lod_bias); - LLVMValueRef min_lod = lp_build_const_scalar(bld->coord_bld.type, - bld->static_state->min_lod); - LLVMValueRef max_lod = lp_build_const_scalar(bld->coord_bld.type, - bld->static_state->max_lod); - - LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); - LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0); - LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0); - - LLVMValueRef s0, s1, s2; - LLVMValueRef t0, t1, t2; - LLVMValueRef r0, r1, r2; - LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy; - LLVMValueRef rho, lod; - - /* - * dsdx = abs(s[1] - s[0]); - * dsdy = abs(s[2] - s[0]); - * dtdx = abs(t[1] - t[0]); - * dtdy = abs(t[2] - t[0]); - * drdx = abs(r[1] - r[0]); - * drdy = abs(r[2] - r[0]); - * XXX we're assuming a four-element quad in 2x2 layout here. - */ - s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0"); - s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1"); - s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2"); - dsdx = lp_build_abs(coord_bld, lp_build_sub(coord_bld, s1, s0)); - dsdy = lp_build_abs(coord_bld, lp_build_sub(coord_bld, s2, s0)); - if (dims > 1) { - t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0"); - t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1"); - t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2"); - dtdx = lp_build_abs(coord_bld, lp_build_sub(coord_bld, t1, t0)); - dtdy = lp_build_abs(coord_bld, lp_build_sub(coord_bld, t2, t0)); - if (dims > 2) { - r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0"); - r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1"); - r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2"); - drdx = lp_build_abs(coord_bld, lp_build_sub(coord_bld, r1, r0)); - drdy = lp_build_abs(coord_bld, lp_build_sub(coord_bld, r2, r0)); - } - } - - /* Compute rho = max of all partial derivatives scaled by texture size. - * XXX this can be vectorized somewhat - */ - rho = lp_build_mul(coord_bld, - lp_build_max(coord_bld, dsdx, dsdy), - lp_build_int_to_float(coord_bld, width)); - if (dims > 1) { - LLVMValueRef max; - max = lp_build_mul(coord_bld, - lp_build_max(coord_bld, dtdx, dtdy), - lp_build_int_to_float(coord_bld, height)); - rho = lp_build_max(coord_bld, rho, max); - if (dims > 2) { - max = lp_build_mul(coord_bld, - lp_build_max(coord_bld, drdx, drdy), - lp_build_int_to_float(coord_bld, depth)); - rho = lp_build_max(coord_bld, rho, max); - } - } - - /* compute lod = log2(rho) */ - lod = lp_build_log2(coord_bld, rho); - - /* add lod bias */ - lod = lp_build_add(coord_bld, lod, lod_bias); - - /* clamp lod */ - lod = lp_build_clamp(coord_bld, lod, min_lod, max_lod); - - return lod; -} - - -/** - * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer - * mipmap level index. - * \param lod scalar float texture level of detail - * \param level_out returns integer - */ -static void -lp_build_nearest_mip_level(struct lp_build_sample_context *bld, - unsigned unit, - LLVMValueRef lod, - LLVMValueRef *level_out) -{ - struct lp_build_context *coord_bld = &bld->coord_bld; - struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - LLVMValueRef last_level, level; - - last_level = bld->dynamic_state->last_level(bld->dynamic_state, - bld->builder, unit); - - /* convert float lod to integer */ - level = lp_build_iround(coord_bld, lod); - - /* clamp level to legal range of levels */ - *level_out = lp_build_clamp(int_coord_bld, level, - int_coord_bld->zero, - last_level); -} - - -/** - * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to - * two (adjacent) mipmap level indexes. Later, we'll sample from those - * two mipmap levels and interpolate between them. - */ -static void -lp_build_linear_mip_levels(struct lp_build_sample_context *bld, - unsigned unit, - LLVMValueRef lod, - LLVMValueRef *level0_out, - LLVMValueRef *level1_out, - LLVMValueRef *weight_out) -{ - struct lp_build_context *coord_bld = &bld->coord_bld; - struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - LLVMValueRef last_level, level; - - last_level = bld->dynamic_state->last_level(bld->dynamic_state, - bld->builder, unit); - - /* convert float lod to integer */ - level = lp_build_ifloor(coord_bld, lod); - - /* compute level 0 and clamp to legal range of levels */ - *level0_out = lp_build_clamp(int_coord_bld, level, - int_coord_bld->zero, - last_level); - /* compute level 1 and clamp to legal range of levels */ - *level1_out = lp_build_add(int_coord_bld, *level0_out, int_coord_bld->one); - *level1_out = lp_build_min(int_coord_bld, *level1_out, int_coord_bld->zero); - - *weight_out = lp_build_fract(coord_bld, lod); -} - - - /** * Build texture sampling code. * 'texel' will return a vector of four LLVMValueRefs corresponding to * R, G, B, A. + * \param type vector float type to use for coords, etc. */ void lp_build_sample_soa(LLVMBuilderRef builder, @@ -1245,10 +1999,11 @@ lp_build_sample_soa(LLVMBuilderRef builder, LLVMValueRef *texel) { struct lp_build_sample_context bld; - LLVMValueRef width; - LLVMValueRef height; - LLVMValueRef stride; - LLVMValueRef data_ptr; + LLVMValueRef width, width_vec; + LLVMValueRef height, height_vec; + LLVMValueRef depth, depth_vec; + LLVMValueRef stride_array; + LLVMValueRef data_array; LLVMValueRef s; LLVMValueRef t; LLVMValueRef r; @@ -1256,6 +2011,7 @@ lp_build_sample_soa(LLVMBuilderRef builder, (void) lp_build_lod_selector; /* temporary to silence warning */ (void) lp_build_nearest_mip_level; (void) lp_build_linear_mip_levels; + (void) lp_build_minify; /* Setup our build context */ memset(&bld, 0, sizeof bld); @@ -1263,10 +2019,16 @@ lp_build_sample_soa(LLVMBuilderRef builder, bld.static_state = static_state; bld.dynamic_state = dynamic_state; bld.format_desc = util_format_description(static_state->format); + + bld.float_type = lp_type_float(32); + bld.int_type = lp_type_int(32); bld.coord_type = type; bld.uint_coord_type = lp_uint_type(type); bld.int_coord_type = lp_int_type(type); bld.texel_type = type; + + lp_build_context_init(&bld.float_bld, builder, bld.float_type); + lp_build_context_init(&bld.int_bld, builder, bld.int_type); lp_build_context_init(&bld.coord_bld, builder, bld.coord_type); lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type); lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type); @@ -1275,41 +2037,37 @@ lp_build_sample_soa(LLVMBuilderRef builder, /* Get the dynamic state */ width = dynamic_state->width(dynamic_state, builder, unit); height = dynamic_state->height(dynamic_state, builder, unit); - stride = dynamic_state->stride(dynamic_state, builder, unit); - data_ptr = dynamic_state->data_ptr(dynamic_state, builder, unit); + depth = dynamic_state->depth(dynamic_state, builder, unit); + stride_array = dynamic_state->row_stride(dynamic_state, builder, unit); + data_array = dynamic_state->data_ptr(dynamic_state, builder, unit); + /* Note that data_array is an array[level] of pointers to texture images */ s = coords[0]; t = coords[1]; r = coords[2]; - width = lp_build_broadcast_scalar(&bld.uint_coord_bld, width); - height = lp_build_broadcast_scalar(&bld.uint_coord_bld, height); - stride = lp_build_broadcast_scalar(&bld.uint_coord_bld, stride); - - if(static_state->target == PIPE_TEXTURE_1D) - t = bld.coord_bld.zero; - - switch (static_state->min_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, - stride, data_ptr, texel); - break; - case PIPE_TEX_FILTER_LINEAR: - if(lp_format_is_rgba8(bld.format_desc) && - is_simple_wrap_mode(static_state->wrap_s) && - is_simple_wrap_mode(static_state->wrap_t)) - lp_build_sample_2d_linear_aos(&bld, s, t, width, height, - stride, data_ptr, texel); - else - lp_build_sample_2d_linear_soa(&bld, s, t, width, height, - stride, data_ptr, texel); - break; - default: - assert(0); + width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width); + height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height); + depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth); + + if (util_format_is_rgba8_variant(bld.format_desc) && + static_state->target == PIPE_TEXTURE_2D && + static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR && + static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR && + static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && + is_simple_wrap_mode(static_state->wrap_s) && + is_simple_wrap_mode(static_state->wrap_t)) { + /* special case */ + lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec, + stride_array, data_array, texel); + } + else { + lp_build_sample_general(&bld, unit, s, t, r, + width, height, depth, + width_vec, height_vec, depth_vec, + stride_array, NULL, data_array, + texel); } - - /* FIXME: respect static_state->min_mip_filter */; - /* FIXME: respect static_state->mag_img_filter */; lp_build_sample_compare(&bld, r, texel); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.h b/src/gallium/auxiliary/gallivm/lp_bld_struct.h index 740392f561..147336edb4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_struct.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.h @@ -37,7 +37,7 @@ #define LP_BLD_STRUCT_H -#include <llvm-c/Core.h> +#include "gallivm/lp_bld.h" #include <llvm-c/Target.h> #include "util/u_debug.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c index 64e81f7b1f..278c838eac 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c @@ -144,9 +144,9 @@ lp_build_broadcast_aos(struct lp_build_context *bld, #endif if(shift > 0) - tmp = LLVMBuildLShr(bld->builder, a, lp_build_int_const_scalar(type4, shift*type.width), ""); + tmp = LLVMBuildLShr(bld->builder, a, lp_build_const_int_vec(type4, shift*type.width), ""); if(shift < 0) - tmp = LLVMBuildShl(bld->builder, a, lp_build_int_const_scalar(type4, -shift*type.width), ""); + tmp = LLVMBuildShl(bld->builder, a, lp_build_const_int_vec(type4, -shift*type.width), ""); assert(tmp); if(tmp) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h index b9472127a6..138ca620e6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h @@ -37,7 +37,7 @@ #define LP_BLD_SWIZZLE_H -#include <llvm-c/Core.h> +#include "gallivm/lp_bld.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index eddb7a83fa..63b938bfa9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -35,7 +35,7 @@ #ifndef LP_BLD_TGSI_H #define LP_BLD_TGSI_H -#include <llvm-c/Core.h> +#include "gallivm/lp_bld.h" struct tgsi_token; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 5f2c2a54ee..8901e656ae 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -41,6 +41,7 @@ #include "util/u_debug.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" @@ -95,6 +96,19 @@ struct lp_exec_mask { int cond_stack_size; LLVMValueRef cond_mask; + LLVMValueRef break_stack[LP_TGSI_MAX_NESTING]; + int break_stack_size; + LLVMValueRef break_mask; + + LLVMValueRef cont_stack[LP_TGSI_MAX_NESTING]; + int cont_stack_size; + LLVMValueRef cont_mask; + + LLVMBasicBlockRef loop_stack[LP_TGSI_MAX_NESTING]; + int loop_stack_size; + LLVMBasicBlockRef loop_block; + + LLVMValueRef exec_mask; }; @@ -145,15 +159,33 @@ static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context mask->bld = bld; mask->has_mask = FALSE; mask->cond_stack_size = 0; + mask->loop_stack_size = 0; + mask->break_stack_size = 0; + mask->cont_stack_size = 0; mask->int_vec_type = lp_build_int_vec_type(mask->bld->type); } static void lp_exec_mask_update(struct lp_exec_mask *mask) { - mask->exec_mask = mask->cond_mask; - if (mask->cond_stack_size > 0) - mask->has_mask = TRUE; + if (mask->loop_stack_size) { + /*for loops we need to update the entire mask at + * runtime */ + LLVMValueRef tmp; + tmp = LLVMBuildAnd(mask->bld->builder, + mask->cont_mask, + mask->break_mask, + "maskcb"); + mask->exec_mask = LLVMBuildAnd(mask->bld->builder, + mask->cond_mask, + tmp, + "maskfull"); + } else + mask->exec_mask = mask->cond_mask; + + + mask->has_mask = (mask->cond_stack_size > 0 || + mask->loop_stack_size > 0); } static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, @@ -190,6 +222,89 @@ static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) lp_exec_mask_update(mask); } +static void lp_exec_bgnloop(struct lp_exec_mask *mask) +{ + + if (mask->cont_stack_size == 0) + mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type); + if (mask->cont_stack_size == 0) + mask->break_mask = LLVMConstAllOnes(mask->int_vec_type); + if (mask->cond_stack_size == 0) + mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type); + mask->loop_stack[mask->loop_stack_size++] = mask->loop_block; + mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop"); + LLVMBuildBr(mask->bld->builder, mask->loop_block); + LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block); + + lp_exec_mask_update(mask); +} + +static void lp_exec_break(struct lp_exec_mask *mask) +{ + LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, + mask->exec_mask, + "break"); + + mask->break_stack[mask->break_stack_size++] = mask->break_mask; + if (mask->break_stack_size > 1) { + mask->break_mask = LLVMBuildAnd(mask->bld->builder, + mask->break_mask, + exec_mask, "break_full"); + } else + mask->break_mask = exec_mask; + + lp_exec_mask_update(mask); +} + +static void lp_exec_continue(struct lp_exec_mask *mask) +{ + LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, + mask->exec_mask, + ""); + + mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask; + if (mask->cont_stack_size > 1) { + mask->cont_mask = LLVMBuildAnd(mask->bld->builder, + mask->cont_mask, + exec_mask, ""); + } else + mask->cont_mask = exec_mask; + + lp_exec_mask_update(mask); +} + + +static void lp_exec_endloop(struct lp_exec_mask *mask) +{ + LLVMBasicBlockRef endloop; + LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width* + mask->bld->type.length); + /* i1cond = (mask == 0) */ + LLVMValueRef i1cond = LLVMBuildICmp( + mask->bld->builder, + LLVMIntNE, + LLVMBuildBitCast(mask->bld->builder, mask->break_mask, reg_type, ""), + LLVMConstNull(reg_type), ""); + + endloop = lp_build_insert_new_block(mask->bld->builder, "endloop"); + + LLVMBuildCondBr(mask->bld->builder, + i1cond, mask->loop_block, endloop); + + LLVMPositionBuilderAtEnd(mask->bld->builder, endloop); + + mask->loop_block = mask->loop_stack[--mask->loop_stack_size]; + /* pop the break mask */ + if (mask->cont_stack_size) { + mask->cont_mask = mask->cont_stack[--mask->cont_stack_size]; + } + if (mask->break_stack_size) { + mask->break_mask = mask->cont_stack[--mask->break_stack_size]; + } + + lp_exec_mask_update(mask); +} + static void lp_exec_mask_store(struct lp_exec_mask *mask, LLVMValueRef val, LLVMValueRef dst) @@ -360,7 +475,7 @@ emit_store( break; case TGSI_SAT_MINUS_PLUS_ONE: - value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0)); + value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0)); value = lp_build_min(&bld->base, value, bld->base.one); break; @@ -384,6 +499,11 @@ emit_store( assert(0); break; + case TGSI_FILE_PREDICATE: + /* FIXME */ + assert(0); + break; + default: assert( 0 ); } @@ -531,32 +651,40 @@ emit_declaration( unsigned first = decl->Range.First; unsigned last = decl->Range.Last; unsigned idx, i; + LLVMBasicBlockRef current_block = + LLVMGetInsertBlock(bld->base.builder); + LLVMBasicBlockRef first_block = + LLVMGetEntryBasicBlock( + LLVMGetBasicBlockParent(current_block)); + LLVMValueRef first_inst = + LLVMGetFirstInstruction(first_block); + + /* we want alloca's to be the first instruction + * in the function so we need to rewind the builder + * to the very beginning */ + LLVMPositionBuilderBefore(bld->base.builder, + first_inst); for (idx = first; idx <= last; ++idx) { - boolean ok; - switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: for (i = 0; i < NUM_CHANNELS; i++) bld->temps[idx][i] = lp_build_alloca(&bld->base); - ok = TRUE; break; case TGSI_FILE_OUTPUT: for (i = 0; i < NUM_CHANNELS; i++) bld->outputs[idx][i] = lp_build_alloca(&bld->base); - ok = TRUE; break; default: /* don't need to declare other vars */ - ok = TRUE; + break; } - - if (!ok) - return FALSE; } + LLVMPositionBuilderAtEnd(bld->base.builder, + current_block); return TRUE; } @@ -581,6 +709,17 @@ emit_instruction( if (indirect_temp_reference(inst)) return FALSE; + /* + * Stores and write masks are handled in a general fashion after the long + * instruction opcode switch statement. + * + * Although not stricitly necessary, we avoid generating instructions for + * channels which won't be stored, in cases where's that easy. For some + * complex instructions, like texture sampling, it is more convenient to + * assume a full writemask and then let LLVM optimization passes eliminate + * redundant code. + */ + assert(info->num_dst <= 1); if(info->num_dst) { FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { @@ -865,7 +1004,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); src2 = emit_fetch( bld, inst, 2, chan_index ); - tmp1 = lp_build_const_scalar(bld->base.type, 0.5); + tmp1 = lp_build_const_vec(bld->base.type, 0.5); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); } @@ -1126,7 +1265,6 @@ emit_instruction( break; case TGSI_OPCODE_TEX: - /* XXX what about dst0 writemask? */ emit_tex( bld, inst, FALSE, FALSE, dst0 ); break; @@ -1349,14 +1487,15 @@ emit_instruction( case TGSI_OPCODE_TXP: emit_tex( bld, inst, FALSE, TRUE, dst0 ); break; - + case TGSI_OPCODE_BRK: - /* FIXME */ - return 0; + lp_exec_break(&bld->exec_mask); break; case TGSI_OPCODE_IF: tmp0 = emit_fetch(bld, inst, 0, CHAN_X); + tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, + tmp0, bld->base.zero); lp_exec_mask_cond_push(&bld->exec_mask, tmp0); break; @@ -1366,6 +1505,10 @@ emit_instruction( return 0; break; + case TGSI_OPCODE_BGNLOOP: + lp_exec_bgnloop(&bld->exec_mask); + break; + case TGSI_OPCODE_REP: /* deprecated */ assert(0); @@ -1386,6 +1529,10 @@ emit_instruction( return 0; break; + case TGSI_OPCODE_ENDLOOP: + lp_exec_endloop(&bld->exec_mask); + break; + case TGSI_OPCODE_ENDREP: /* deprecated */ assert(0); @@ -1485,8 +1632,7 @@ emit_instruction( break; case TGSI_OPCODE_CONT: - /* FIXME */ - return 0; + lp_exec_continue(&bld->exec_mask); break; case TGSI_OPCODE_EMIT: @@ -1575,7 +1721,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, assert(num_immediates < LP_MAX_IMMEDIATES); for( i = 0; i < size; ++i ) bld.immediates[num_immediates][i] = - lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float); + lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float); for( i = size; i < 4; ++i ) bld.immediates[num_immediates][i] = bld.base.undef; num_immediates++; @@ -1589,7 +1735,14 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, assert( 0 ); } } - + if (0) { + LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(block); + debug_printf("11111111111111111111111111111 \n"); + tgsi_dump(tokens, 0); + LLVMDumpValue(function); + debug_printf("2222222222222222222222222222 \n"); + } tgsi_parse_free( &parse ); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.c b/src/gallium/auxiliary/gallivm/lp_bld_type.c index c327ba045a..796af88caa 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.c @@ -58,7 +58,10 @@ LLVMTypeRef lp_build_vec_type(struct lp_type type) { LLVMTypeRef elem_type = lp_build_elem_type(type); - return LLVMVectorType(elem_type, type.length); + if (type.length == 1) + return elem_type; + else + return LLVMVectorType(elem_type, type.length); } @@ -115,6 +118,9 @@ lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type) if(!vec_type) return FALSE; + if (type.length == 1) + return lp_check_elem_type(type, vec_type); + if(LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) return FALSE; @@ -153,7 +159,10 @@ LLVMTypeRef lp_build_int_vec_type(struct lp_type type) { LLVMTypeRef elem_type = lp_build_int_elem_type(type); - return LLVMVectorType(elem_type, type.length); + if (type.length == 1) + return elem_type; + else + return LLVMVectorType(elem_type, type.length); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h index 16946cc28a..cd59d2faa6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h @@ -37,9 +37,9 @@ #define LP_BLD_TYPE_H -#include <llvm-c/Core.h> +#include "pipe/p_compiler.h" +#include "gallivm/lp_bld.h" -#include <pipe/p_compiler.h> /** @@ -103,7 +103,7 @@ struct lp_type { unsigned width:14; /** - * Vector length. + * Vector length. If length==1, this is a scalar (float/int) type. * * width*length should be a power of two greater or equal to eight. * @@ -139,6 +139,7 @@ struct lp_build_context }; +/** Create scalar float type */ static INLINE struct lp_type lp_type_float(unsigned width) { @@ -148,12 +149,29 @@ lp_type_float(unsigned width) res_type.floating = TRUE; res_type.sign = TRUE; res_type.width = width; + res_type.length = 1; + + return res_type; +} + + +/** Create vector of float type */ +static INLINE struct lp_type +lp_type_float_vec(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.floating = TRUE; + res_type.sign = TRUE; + res_type.width = width; res_type.length = LP_NATIVE_VECTOR_WIDTH / width; return res_type; } +/** Create scalar int type */ static INLINE struct lp_type lp_type_int(unsigned width) { @@ -162,12 +180,28 @@ lp_type_int(unsigned width) memset(&res_type, 0, sizeof res_type); res_type.sign = TRUE; res_type.width = width; + res_type.length = 1; + + return res_type; +} + + +/** Create vector int type */ +static INLINE struct lp_type +lp_type_int_vec(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.sign = TRUE; + res_type.width = width; res_type.length = LP_NATIVE_VECTOR_WIDTH / width; return res_type; } +/** Create scalar uint type */ static INLINE struct lp_type lp_type_uint(unsigned width) { @@ -175,6 +209,20 @@ lp_type_uint(unsigned width) memset(&res_type, 0, sizeof res_type); res_type.width = width; + res_type.length = 1; + + return res_type; +} + + +/** Create vector uint type */ +static INLINE struct lp_type +lp_type_uint_vec(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.width = width; res_type.length = LP_NATIVE_VECTOR_WIDTH / width; return res_type; diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h index a04df4106f..004dad6b63 100644 --- a/src/gallium/auxiliary/os/os_thread.h +++ b/src/gallium/auxiliary/os/os_thread.h @@ -40,7 +40,7 @@ #include "util/u_debug.h" /* for assert */ -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED) #include <pthread.h> /* POSIX threads headers */ #include <stdio.h> /* for perror() */ @@ -257,7 +257,7 @@ typedef unsigned pipe_condvar; * pipe_barrier */ -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED) typedef pthread_barrier_t pipe_barrier; @@ -377,7 +377,7 @@ pipe_semaphore_wait(pipe_semaphore *sema) */ typedef struct { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED) pthread_key_t key; #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) DWORD key; @@ -392,7 +392,7 @@ typedef struct { static INLINE void pipe_tsd_init(pipe_tsd *tsd) { -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED) if (pthread_key_create(&tsd->key, NULL/*free*/) != 0) { perror("pthread_key_create(): failed to allocate key for thread specific data"); exit(-1); @@ -409,7 +409,7 @@ pipe_tsd_get(pipe_tsd *tsd) if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) { pipe_tsd_init(tsd); } -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED) return pthread_getspecific(tsd->key); #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) assert(0); @@ -426,7 +426,7 @@ pipe_tsd_set(pipe_tsd *tsd, void *value) if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) { pipe_tsd_init(tsd); } -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_EMBEDDED) if (pthread_setspecific(tsd->key, value) != 0) { perror("pthread_set_specific() failed"); exit(-1); diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index 34b1b77df4..a6c50dcf0c 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -48,7 +48,6 @@ #include "util/u_debug.h" #include "util/u_inlines.h" #include "pipe/p_defines.h" -#include "pipe/p_state.h" #ifdef __cplusplus @@ -58,8 +57,23 @@ extern "C" { struct pb_vtbl; struct pb_validate; +struct pipe_fence_handle; +#define PB_USAGE_CPU_READ (1 << 0) +#define PB_USAGE_CPU_WRITE (1 << 1) +#define PB_USAGE_GPU_READ (1 << 2) +#define PB_USAGE_GPU_WRITE (1 << 3) +#define PB_USAGE_UNSYNCHRONIZED (1 << 10) +#define PB_USAGE_DONTBLOCK (1 << 9) + +#define PB_USAGE_CPU_READ_WRITE \ + ( PB_USAGE_CPU_READ | PB_USAGE_CPU_WRITE ) +#define PB_USAGE_GPU_READ_WRITE \ + ( PB_USAGE_GPU_READ | PB_USAGE_GPU_WRITE ) +#define PB_USAGE_WRITE \ + ( PB_USAGE_CPU_WRITE | PB_USAGE_GPU_WRITE ) + /** * Buffer description. * @@ -83,7 +97,14 @@ typedef unsigned pb_size; */ struct pb_buffer { - struct pipe_buffer base; + /* This used to be a pipe_buffer struct: + */ + struct { + struct pipe_reference reference; + unsigned size; + unsigned alignment; + unsigned usage; + } base; /** * Pointer to the virtual function table. @@ -106,7 +127,7 @@ struct pb_vtbl /** * Map the entire data store of a buffer object into the client's address. - * flags is bitmask of PIPE_BUFFER_FLAG_READ/WRITE. + * flags is bitmask of PB_USAGE_CPU_READ/WRITE. */ void *(*map)( struct pb_buffer *buf, unsigned flags ); @@ -138,23 +159,6 @@ struct pb_vtbl }; -static INLINE struct pipe_buffer * -pb_pipe_buffer( struct pb_buffer *pbuf ) -{ - assert(pbuf); - return &pbuf->base; -} - - -static INLINE struct pb_buffer * -pb_buffer( struct pipe_buffer *buf ) -{ - assert(buf); - /* Could add a magic cookie check on debug builds. - */ - return (struct pb_buffer *)buf; -} - /* Accessor functions for pb->vtbl: */ diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index d97f749b6e..d6cf640582 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -135,7 +135,7 @@ struct fenced_buffer void *data; /** - * A bitmask of PIPE_BUFFER_USAGE_CPU/GPU_READ/WRITE describing the current + * A bitmask of PB_USAGE_CPU/GPU_READ/WRITE describing the current * buffer usage. */ unsigned flags; @@ -270,7 +270,7 @@ fenced_buffer_add_locked(struct fenced_manager *fenced_mgr, struct fenced_buffer *fenced_buf) { assert(pipe_is_referenced(&fenced_buf->base.base.reference)); - assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); + assert(fenced_buf->flags & PB_USAGE_GPU_READ_WRITE); assert(fenced_buf->fence); p_atomic_inc(&fenced_buf->base.base.reference.count); @@ -299,7 +299,7 @@ fenced_buffer_remove_locked(struct fenced_manager *fenced_mgr, assert(fenced_buf->mgr == fenced_mgr); ops->fence_reference(ops, &fenced_buf->fence, NULL); - fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; + fenced_buf->flags &= ~PB_USAGE_GPU_READ_WRITE; assert(fenced_buf->head.prev); assert(fenced_buf->head.next); @@ -377,7 +377,7 @@ fenced_buffer_finish_locked(struct fenced_manager *fenced_mgr, assert(!destroyed); - fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; + fenced_buf->flags &= ~PB_USAGE_GPU_READ_WRITE; ret = PIPE_OK; } @@ -624,7 +624,7 @@ fenced_buffer_copy_storage_to_gpu_locked(struct fenced_buffer *fenced_buf) assert(fenced_buf->data); assert(fenced_buf->buffer); - map = pb_map(fenced_buf->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_WRITE); if(!map) return PIPE_ERROR; @@ -644,7 +644,7 @@ fenced_buffer_copy_storage_to_cpu_locked(struct fenced_buffer *fenced_buf) assert(fenced_buf->data); assert(fenced_buf->buffer); - map = pb_map(fenced_buf->buffer, PIPE_BUFFER_USAGE_CPU_READ); + map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_READ); if(!map) return PIPE_ERROR; @@ -683,24 +683,24 @@ fenced_buffer_map(struct pb_buffer *buf, pipe_mutex_lock(fenced_mgr->mutex); - assert(!(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE)); + assert(!(flags & PB_USAGE_GPU_READ_WRITE)); /* * Serialize writes. */ - while((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_WRITE) || - ((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ) && - (flags & PIPE_BUFFER_USAGE_CPU_WRITE))) { + while((fenced_buf->flags & PB_USAGE_GPU_WRITE) || + ((fenced_buf->flags & PB_USAGE_GPU_READ) && + (flags & PB_USAGE_CPU_WRITE))) { /* * Don't wait for the GPU to finish accessing it, if blocking is forbidden. */ - if((flags & PIPE_BUFFER_USAGE_DONTBLOCK) && + if((flags & PB_USAGE_DONTBLOCK) && ops->fence_signalled(ops, fenced_buf->fence, 0) != 0) { goto done; } - if (flags & PIPE_BUFFER_USAGE_UNSYNCHRONIZED) { + if (flags & PB_USAGE_UNSYNCHRONIZED) { break; } @@ -721,7 +721,7 @@ fenced_buffer_map(struct pb_buffer *buf, if(map) { ++fenced_buf->mapcount; - fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE; + fenced_buf->flags |= flags & PB_USAGE_CPU_READ_WRITE; } done: @@ -745,7 +745,7 @@ fenced_buffer_unmap(struct pb_buffer *buf) pb_unmap(fenced_buf->buffer); --fenced_buf->mapcount; if(!fenced_buf->mapcount) - fenced_buf->flags &= ~PIPE_BUFFER_USAGE_CPU_READ_WRITE; + fenced_buf->flags &= ~PB_USAGE_CPU_READ_WRITE; } pipe_mutex_unlock(fenced_mgr->mutex); @@ -771,9 +771,9 @@ fenced_buffer_validate(struct pb_buffer *buf, goto done; } - assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); - assert(!(flags & ~PIPE_BUFFER_USAGE_GPU_READ_WRITE)); - flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE; + assert(flags & PB_USAGE_GPU_READ_WRITE); + assert(!(flags & ~PB_USAGE_GPU_READ_WRITE)); + flags &= PB_USAGE_GPU_READ_WRITE; /* Buffer cannot be validated in two different lists */ if(fenced_buf->vl && fenced_buf->vl != vl) { diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h index 0372f81d0a..004c2b939a 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h @@ -59,7 +59,6 @@ extern "C" { #endif -struct pipe_buffer; struct pipe_fence_handle; diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c index 6bdce5fcb0..b706f429be 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c @@ -135,9 +135,9 @@ pb_malloc_buffer_create(pb_size size, return NULL; pipe_reference_init(&buf->base.base.reference, 1); - buf->base.base.alignment = desc->alignment; buf->base.base.usage = desc->usage; buf->base.base.size = size; + buf->base.base.alignment = desc->alignment; buf->base.vtbl = &malloc_buffer_vtbl; buf->data = align_malloc(size, desc->alignment < sizeof(void*) ? sizeof(void*) : desc->alignment); diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 06669917ff..cec2524da2 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -60,7 +60,6 @@ extern "C" { struct pb_desc; -struct pipe_buffer; /** diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index 86f9266c95..88501e8d72 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -227,6 +227,8 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf, pb_size size, const struct pb_desc *desc) { + void *map; + if(buf->base.base.size < size) return FALSE; @@ -239,6 +241,13 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf, if(!pb_check_usage(desc->usage, buf->base.base.usage)) return FALSE; + + map = pb_map(buf->buffer, PB_USAGE_DONTBLOCK); + if (!map) { + return FALSE; + } + + pb_unmap(buf->buffer); return TRUE; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index a5dbded2bc..0dc5b31a75 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -158,7 +158,7 @@ pb_debug_buffer_fill(struct pb_debug_buffer *buf) { uint8_t *map; - map = pb_map(buf->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + map = pb_map(buf->buffer, PB_USAGE_CPU_WRITE); assert(map); if(map) { fill_random_pattern(map, buf->underflow_size); @@ -180,8 +180,8 @@ pb_debug_buffer_check(struct pb_debug_buffer *buf) uint8_t *map; map = pb_map(buf->buffer, - PIPE_BUFFER_USAGE_CPU_READ | - PIPE_BUFFER_USAGE_UNSYNCHRONIZED); + PB_USAGE_CPU_READ | + PB_USAGE_UNSYNCHRONIZED); assert(map); if(map) { boolean underflow, overflow; @@ -382,8 +382,8 @@ pb_debug_manager_create_buffer(struct pb_manager *_mgr, real_size = mgr->underflow_size + size + mgr->overflow_size; real_desc = *desc; - real_desc.usage |= PIPE_BUFFER_USAGE_CPU_WRITE; - real_desc.usage |= PIPE_BUFFER_USAGE_CPU_READ; + real_desc.usage |= PB_USAGE_CPU_WRITE; + real_desc.usage |= PB_USAGE_CPU_READ; buf->buffer = mgr->provider->create_buffer(mgr->provider, real_size, diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index 63195715d6..faf7c35267 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -268,8 +268,8 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, mm->buffer = buffer; mm->map = pb_map(mm->buffer, - PIPE_BUFFER_USAGE_CPU_READ | - PIPE_BUFFER_USAGE_CPU_WRITE); + PB_USAGE_CPU_READ | + PB_USAGE_CPU_WRITE); if(!mm->map) goto failure; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c index cb32d25136..31f1ebbeb7 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c @@ -150,7 +150,7 @@ pb_ondemand_buffer_instantiate(struct pb_ondemand_buffer *buf) if(!buf->buffer) return PIPE_ERROR_OUT_OF_MEMORY; - map = pb_map(buf->buffer, PIPE_BUFFER_USAGE_CPU_READ); + map = pb_map(buf->buffer, PB_USAGE_CPU_READ); if(!map) { pb_reference(&buf->buffer, NULL); return PIPE_ERROR; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c index fea234ae8c..fdcce42878 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -284,8 +284,8 @@ pool_bufmgr_create(struct pb_manager *provider, goto failure; pool->map = pb_map(pool->buffer, - PIPE_BUFFER_USAGE_CPU_READ | - PIPE_BUFFER_USAGE_CPU_WRITE); + PB_USAGE_CPU_READ | + PB_USAGE_CPU_WRITE); if(!pool->map) goto failure; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index 24e2820f88..7a3305aaf3 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -315,8 +315,8 @@ pb_slab_create(struct pb_slab_manager *mgr) /* Note down the slab virtual address. All mappings are accessed directly * through this address so it is required that the buffer is pinned. */ slab->virtual = pb_map(slab->bo, - PIPE_BUFFER_USAGE_CPU_READ | - PIPE_BUFFER_USAGE_CPU_WRITE); + PB_USAGE_CPU_READ | + PB_USAGE_CPU_WRITE); if(!slab->virtual) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto out_err1; diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c index 903afc749d..b585422460 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.c +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c @@ -69,9 +69,9 @@ pb_validate_add_buffer(struct pb_validate *vl, if(!buf) return PIPE_ERROR; - assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); - assert(!(flags & ~PIPE_BUFFER_USAGE_GPU_READ_WRITE)); - flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE; + assert(flags & PB_USAGE_GPU_READ_WRITE); + assert(!(flags & ~PB_USAGE_GPU_READ_WRITE)); + flags &= PB_USAGE_GPU_READ_WRITE; /* We only need to store one reference for each buffer, so avoid storing * consecutive references for the same buffer. It might not be the most diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index f675427d98..7595214bdf 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -87,7 +87,7 @@ void x86_print_reg( struct x86_reg reg ) foo++; \ if (*foo) \ foo++; \ - debug_printf( "\n% 4x% 15s ", p->csr - p->store, foo ); \ + debug_printf( "\n%4x %14s ", p->csr - p->store, foo ); \ } while (0) #define DUMP_I( I ) do { \ diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index f7612d416a..319b836ffb 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -102,7 +102,7 @@ enum sse_cc { #define cc_Z cc_E #define cc_NZ cc_NE -/* Begin/end/retreive function creation: +/* Begin/end/retrieve function creation: */ @@ -311,8 +311,8 @@ void x87_fucom( struct x86_function *p, struct x86_reg arg ); -/* Retreive a reference to one of the function arguments, taking into - * account any push/pop activity. Note - doesn't track explict +/* Retrieve a reference to one of the function arguments, taking into + * account any push/pop activity. Note - doesn't track explicit * manipulation of ESP by other instructions. */ struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg ); diff --git a/src/gallium/auxiliary/util/u_timed_winsys.h b/src/gallium/auxiliary/target-helpers/wrap_screen.c index 542365112d..5fe3013938 100644 --- a/src/gallium/auxiliary/util/u_timed_winsys.h +++ b/src/gallium/auxiliary/target-helpers/wrap_screen.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,17 +25,41 @@ * * **************************************************************************/ + /* - * Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com> + * Authors: + * Keith Whitwell + */ + +#include "target-helpers/wrap_screen.h" +#include "trace/tr_public.h" +#include "identity/id_public.h" +#include "util/u_debug.h" + + +/* Centralized code to inject common wrapping layers: */ +struct pipe_screen * +gallium_wrap_screen( struct pipe_screen *screen ) +{ + /* Screen wrapping functions are required not to fail. If it is + * impossible to wrap a screen, the unwrapped screen should be + * returned instead. Any failure condition should be returned in + * an OUT argument. + * + * Otherwise it is really messy trying to clean up in this code. + */ + if (debug_get_bool_option("GALLIUM_WRAP", FALSE)) { + screen = identity_screen_create(screen); + } + if (debug_get_bool_option("GALLIUM_TRACE", FALSE)) { + screen = trace_screen_create( screen ); + } -#ifndef U_TIMED_WINSYS_H -#define U_TIMED_WINSYS_H + return screen; +} -struct pipe_winsys; -struct pipe_winsys *u_timed_winsys_create( struct pipe_winsys *backend ); -#endif diff --git a/src/gallium/auxiliary/target-helpers/wrap_screen.h b/src/gallium/auxiliary/target-helpers/wrap_screen.h new file mode 100644 index 0000000000..7e76beb7c5 --- /dev/null +++ b/src/gallium/auxiliary/target-helpers/wrap_screen.h @@ -0,0 +1,16 @@ +#ifndef WRAP_SCREEN_HELPER_H +#define WRAP_SCREEN_HELPER_H + +#include "pipe/p_compiler.h" + +struct pipe_screen; + +/* Centralized code to inject common wrapping layers. Other layers + * can be introduced by specific targets, but these are the generally + * helpful ones we probably want everywhere. + */ +struct pipe_screen * +gallium_wrap_screen( struct pipe_screen *screen ); + + +#endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index f918151daa..0b468a9184 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -55,7 +55,7 @@ static boolean is_digit_alpha_underscore( const char *cur ) return is_digit( cur ) || is_alpha_underscore( cur ); } -static boolean uprcase( char c ) +static char uprcase( char c ) { if (c >= 'a' && c <= 'z') return c += 'A' - 'a'; @@ -76,7 +76,7 @@ streq_nocase_uprcase(const char *str1, str1++; str2++; } - return TRUE; + return *str1 == 0 && *str2 == 0; } static boolean str_match_no_case( const char **pcur, const char *str ) diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index c9ec2b32bf..c3ec9ae3f4 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -393,10 +393,10 @@ static fetch_func get_fetch_func( enum pipe_format format ) return &fetch_R8G8B8A8_SSCALED; case PIPE_FORMAT_B8G8R8A8_UNORM: - return &fetch_A8R8G8B8_UNORM; + return &fetch_B8G8R8A8_UNORM; case PIPE_FORMAT_A8R8G8B8_UNORM: - return &fetch_B8G8R8A8_UNORM; + return &fetch_A8R8G8B8_UNORM; case PIPE_FORMAT_R32_FIXED: return &fetch_R32_FIXED; @@ -552,10 +552,10 @@ static emit_func get_emit_func( enum pipe_format format ) return &emit_R8G8B8A8_SSCALED; case PIPE_FORMAT_B8G8R8A8_UNORM: - return &emit_A8R8G8B8_UNORM; + return &emit_B8G8R8A8_UNORM; case PIPE_FORMAT_A8R8G8B8_UNORM: - return &emit_B8G8R8A8_UNORM; + return &emit_A8R8G8B8_UNORM; default: assert(0); diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index 03e093c11e..c13e742738 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -336,7 +336,7 @@ static boolean translate_attr( struct translate_sse *p, case PIPE_FORMAT_R32G32B32A32_FLOAT: emit_load_R32G32B32A32(p, dataXMM, srcECX); break; - case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX); emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W)); break; @@ -360,7 +360,7 @@ static boolean translate_attr( struct translate_sse *p, case PIPE_FORMAT_R32G32B32A32_FLOAT: emit_store_R32G32B32A32(p, dstEAX, dataXMM); break; - case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W)); emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM); break; diff --git a/src/gallium/auxiliary/util/.gitignore b/src/gallium/auxiliary/util/.gitignore index 448d2f304f..da74de623d 100644 --- a/src/gallium/auxiliary/util/.gitignore +++ b/src/gallium/auxiliary/util/.gitignore @@ -1,3 +1,2 @@ -u_format_access.c +u_format_srgb.c u_format_table.c -u_format_pack.h diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 0b263a9db5..7850f81e58 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -45,6 +45,7 @@ #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_sampler.h" #include "util/u_simple_shaders.h" #include "util/u_surface.h" #include "util/u_rect.h" @@ -63,11 +64,12 @@ struct blit_state struct pipe_sampler_state sampler; struct pipe_viewport_state viewport; struct pipe_clip_state clip; + struct pipe_vertex_element velem[2]; void *vs; void *fs[TGSI_WRITEMASK_XYZW + 1]; - struct pipe_buffer *vbuf; /**< quad vertices */ + struct pipe_resource *vbuf; /**< quad vertices */ unsigned vbuf_slot; float vertices[4][2][4]; /**< vertex/texcoords for quad */ @@ -114,6 +116,15 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) ctx->sampler.mag_img_filter = 0; /* set later */ ctx->sampler.normalized_coords = 1; + /* vertex elements state */ + memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2); + for (i = 0; i < 2; i++) { + ctx->velem[i].src_offset = i * 4 * sizeof(float); + ctx->velem[i].instance_divisor = 0; + ctx->velem[i].vertex_buffer_index = 0; + ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + } + /* vertex shader - still required to provide the linkage between * fragment shader input semantics and vertex_element/buffers. */ @@ -156,7 +167,7 @@ util_destroy_blit(struct blit_state *ctx) if (ctx->fs[i]) pipe->delete_fs_state(pipe, ctx->fs[i]); - pipe_buffer_reference(&ctx->vbuf, NULL); + pipe_resource_reference(&ctx->vbuf, NULL); FREE(ctx); } @@ -175,8 +186,7 @@ get_next_slot( struct blit_state *ctx ) if (!ctx->vbuf) { ctx->vbuf = pipe_buffer_create(ctx->pipe->screen, - 32, - PIPE_BUFFER_USAGE_VERTEX, + PIPE_BIND_VERTEX_BUFFER, max_slots * sizeof ctx->vertices); } @@ -225,7 +235,7 @@ setup_vertex_data_tex(struct blit_state *ctx, offset = get_next_slot( ctx ); - pipe_buffer_write_nooverlap(ctx->pipe->screen, ctx->vbuf, + pipe_buffer_write_nooverlap(ctx->pipe, ctx->vbuf, offset, sizeof(ctx->vertices), ctx->vertices); return offset; @@ -270,6 +280,7 @@ regions_overlap(int srcX0, int srcY0, void util_blit_pixels_writemask(struct blit_state *ctx, struct pipe_surface *src, + struct pipe_sampler_view *src_sampler_view, int srcX0, int srcY0, int srcX1, int srcY1, struct pipe_surface *dst, @@ -280,7 +291,7 @@ util_blit_pixels_writemask(struct blit_state *ctx, { struct pipe_context *pipe = ctx->pipe; struct pipe_screen *screen = pipe->screen; - struct pipe_texture *tex = NULL; + struct pipe_sampler_view *sampler_view = NULL; struct pipe_framebuffer_state fb; const int srcW = abs(srcX1 - srcX0); const int srcH = abs(srcY1 - srcY0); @@ -292,9 +303,9 @@ util_blit_pixels_writemask(struct blit_state *ctx, filter == PIPE_TEX_MIPFILTER_LINEAR); assert(screen->is_format_supported(screen, src->format, PIPE_TEXTURE_2D, - PIPE_TEXTURE_USAGE_SAMPLER, 0)); + PIPE_BIND_SAMPLER_VIEW, 0)); assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D, - PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)); + PIPE_BIND_RENDER_TARGET, 0)); /* do the regions overlap? */ overlap = util_same_surface(src, dst) && @@ -323,7 +334,7 @@ util_blit_pixels_writemask(struct blit_state *ctx, } assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D, - PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)); + PIPE_BIND_RENDER_TARGET, 0)); /* Create a temporary texture when src and dest alias or when src * is anything other than a single-level 2d texture. @@ -334,7 +345,9 @@ util_blit_pixels_writemask(struct blit_state *ctx, src->texture->target != PIPE_TEXTURE_2D || src->texture->last_level != 0) { - struct pipe_texture texTemp; + struct pipe_resource texTemp; + struct pipe_resource *tex; + struct pipe_sampler_view sv_templ; struct pipe_surface *texSurf; const int srcLeft = MIN2(srcX0, srcX1); const int srcTop = MIN2(srcY0, srcY1); @@ -362,12 +375,20 @@ util_blit_pixels_writemask(struct blit_state *ctx, texTemp.height0 = srcH; texTemp.depth0 = 1; - tex = screen->texture_create(screen, &texTemp); + tex = screen->resource_create(screen, &texTemp); if (!tex) return; + u_sampler_view_default_template(&sv_templ, tex, tex->format); + + sampler_view = ctx->pipe->create_sampler_view(ctx->pipe, tex, &sv_templ); + if (!sampler_view) { + pipe_resource_reference(&tex, NULL); + return; + } + texSurf = screen->get_tex_surface(screen, tex, 0, 0, 0, - PIPE_BUFFER_USAGE_GPU_WRITE); + PIPE_BIND_BLIT_DESTINATION); /* load temp texture */ if (pipe->surface_copy) { @@ -389,33 +410,38 @@ util_blit_pixels_writemask(struct blit_state *ctx, s1 = 1.0f; t0 = 0.0f; t1 = 1.0f; + + pipe_resource_reference(&tex, NULL); } else { - pipe_texture_reference(&tex, src->texture); - s0 = srcX0 / (float)tex->width0; - s1 = srcX1 / (float)tex->width0; - t0 = srcY0 / (float)tex->height0; - t1 = srcY1 / (float)tex->height0; + pipe_sampler_view_reference(&sampler_view, src_sampler_view); + s0 = srcX0 / (float)src->texture->width0; + s1 = srcX1 / (float)src->texture->width0; + t0 = srcY0 / (float)src->texture->height0; + t1 = srcY1 / (float)src->texture->height0; } + /* save state (restored below) */ cso_save_blend(ctx->cso); cso_save_depth_stencil_alpha(ctx->cso); cso_save_rasterizer(ctx->cso); cso_save_samplers(ctx->cso); - cso_save_sampler_textures(ctx->cso); + cso_save_fragment_sampler_views(ctx->cso); cso_save_viewport(ctx->cso); cso_save_framebuffer(ctx->cso); cso_save_fragment_shader(ctx->cso); cso_save_vertex_shader(ctx->cso); cso_save_clip(ctx->cso); + cso_save_vertex_elements(ctx->cso); /* set misc state we care about */ cso_set_blend(ctx->cso, &ctx->blend); cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); cso_set_rasterizer(ctx->cso, &ctx->rasterizer); cso_set_clip(ctx->cso, &ctx->clip); + cso_set_vertex_elements(ctx->cso, 2, ctx->velem); /* sampler */ ctx->sampler.min_img_filter = filter; @@ -435,7 +461,7 @@ util_blit_pixels_writemask(struct blit_state *ctx, cso_set_viewport(ctx->cso, &ctx->viewport); /* texture */ - cso_set_sampler_textures(ctx->cso, 1, &tex); + cso_set_fragment_sampler_views(ctx->cso, 1, &sampler_view); if (ctx->fs[writemask] == NULL) ctx->fs[writemask] = @@ -474,20 +500,22 @@ util_blit_pixels_writemask(struct blit_state *ctx, cso_restore_depth_stencil_alpha(ctx->cso); cso_restore_rasterizer(ctx->cso); cso_restore_samplers(ctx->cso); - cso_restore_sampler_textures(ctx->cso); + cso_restore_fragment_sampler_views(ctx->cso); cso_restore_viewport(ctx->cso); cso_restore_framebuffer(ctx->cso); cso_restore_fragment_shader(ctx->cso); cso_restore_vertex_shader(ctx->cso); cso_restore_clip(ctx->cso); + cso_restore_vertex_elements(ctx->cso); - pipe_texture_reference(&tex, NULL); + pipe_sampler_view_reference(&sampler_view, NULL); } void util_blit_pixels(struct blit_state *ctx, struct pipe_surface *src, + struct pipe_sampler_view *src_sampler_view, int srcX0, int srcY0, int srcX1, int srcY1, struct pipe_surface *dst, @@ -495,7 +523,7 @@ util_blit_pixels(struct blit_state *ctx, int dstX1, int dstY1, float z, uint filter ) { - util_blit_pixels_writemask( ctx, src, + util_blit_pixels_writemask( ctx, src, src_sampler_view, srcX0, srcY0, srcX1, srcY1, dst, @@ -511,7 +539,7 @@ util_blit_pixels(struct blit_state *ctx, */ void util_blit_flush( struct blit_state *ctx ) { - pipe_buffer_reference(&ctx->vbuf, NULL); + pipe_resource_reference(&ctx->vbuf, NULL); ctx->vbuf_slot = 0; } @@ -526,21 +554,23 @@ void util_blit_flush( struct blit_state *ctx ) */ void util_blit_pixels_tex(struct blit_state *ctx, - struct pipe_texture *tex, - int srcX0, int srcY0, - int srcX1, int srcY1, - struct pipe_surface *dst, - int dstX0, int dstY0, - int dstX1, int dstY1, - float z, uint filter) + struct pipe_sampler_view *src_sampler_view, + int srcX0, int srcY0, + int srcX1, int srcY1, + struct pipe_surface *dst, + int dstX0, int dstY0, + int dstX1, int dstY1, + float z, uint filter) { struct pipe_framebuffer_state fb; float s0, t0, s1, t1; unsigned offset; + struct pipe_resource *tex = src_sampler_view->texture; assert(filter == PIPE_TEX_MIPFILTER_NEAREST || filter == PIPE_TEX_MIPFILTER_LINEAR); + assert(tex); assert(tex->width0 != 0); assert(tex->height0 != 0); @@ -551,7 +581,7 @@ util_blit_pixels_tex(struct blit_state *ctx, assert(ctx->pipe->screen->is_format_supported(ctx->pipe->screen, dst->format, PIPE_TEXTURE_2D, - PIPE_TEXTURE_USAGE_RENDER_TARGET, + PIPE_BIND_RENDER_TARGET, 0)); /* save state (restored below) */ @@ -559,17 +589,19 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_save_depth_stencil_alpha(ctx->cso); cso_save_rasterizer(ctx->cso); cso_save_samplers(ctx->cso); - cso_save_sampler_textures(ctx->cso); + cso_save_fragment_sampler_views(ctx->cso); cso_save_framebuffer(ctx->cso); cso_save_fragment_shader(ctx->cso); cso_save_vertex_shader(ctx->cso); cso_save_clip(ctx->cso); + cso_save_vertex_elements(ctx->cso); /* set misc state we care about */ cso_set_blend(ctx->cso, &ctx->blend); cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); cso_set_rasterizer(ctx->cso, &ctx->rasterizer); cso_set_clip(ctx->cso, &ctx->clip); + cso_set_vertex_elements(ctx->cso, 2, ctx->velem); /* sampler */ ctx->sampler.min_img_filter = filter; @@ -589,7 +621,7 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_set_viewport(ctx->cso, &ctx->viewport); /* texture */ - cso_set_sampler_textures(ctx->cso, 1, &tex); + cso_set_fragment_sampler_views(ctx->cso, 1, &src_sampler_view); /* shaders */ cso_set_fragment_shader_handle(ctx->cso, ctx->fs[TGSI_WRITEMASK_XYZW]); @@ -623,9 +655,10 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_restore_depth_stencil_alpha(ctx->cso); cso_restore_rasterizer(ctx->cso); cso_restore_samplers(ctx->cso); - cso_restore_sampler_textures(ctx->cso); + cso_restore_fragment_sampler_views(ctx->cso); cso_restore_framebuffer(ctx->cso); cso_restore_fragment_shader(ctx->cso); cso_restore_vertex_shader(ctx->cso); cso_restore_clip(ctx->cso); + cso_restore_vertex_elements(ctx->cso); } diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h index a102021529..464ff9aace 100644 --- a/src/gallium/auxiliary/util/u_blit.h +++ b/src/gallium/auxiliary/util/u_blit.h @@ -37,7 +37,7 @@ extern "C" { struct pipe_context; struct pipe_surface; -struct pipe_texture; +struct pipe_resource; struct cso_context; @@ -53,6 +53,7 @@ util_destroy_blit(struct blit_state *ctx); extern void util_blit_pixels(struct blit_state *ctx, struct pipe_surface *src, + struct pipe_sampler_view *src_sampler_view, int srcX0, int srcY0, int srcX1, int srcY1, struct pipe_surface *dst, @@ -63,6 +64,7 @@ util_blit_pixels(struct blit_state *ctx, void util_blit_pixels_writemask(struct blit_state *ctx, struct pipe_surface *src, + struct pipe_sampler_view *src_sampler_view, int srcX0, int srcY0, int srcX1, int srcY1, struct pipe_surface *dst, @@ -73,7 +75,7 @@ util_blit_pixels_writemask(struct blit_state *ctx, extern void util_blit_pixels_tex(struct blit_state *ctx, - struct pipe_texture *tex, + struct pipe_sampler_view *src_sampler_view, int srcX0, int srcY0, int srcX1, int srcY1, struct pipe_surface *dst, diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 0ba09d33bf..104cbf7f6c 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -45,6 +45,7 @@ #include "util/u_draw_quad.h" #include "util/u_pack_color.h" #include "util/u_rect.h" +#include "util/u_sampler.h" #include "util/u_simple_shaders.h" #include "util/u_texture.h" @@ -55,7 +56,7 @@ struct blitter_context_priv struct blitter_context blitter; struct pipe_context *pipe; /**< pipe context */ - struct pipe_buffer *vbuf; /**< quad */ + struct pipe_resource *vbuf; /**< quad */ float vertices[4][2][4]; /**< {pos, color} or {pos, texcoord} */ @@ -88,12 +89,16 @@ struct blitter_context_priv void *dsa_write_depth_keep_stencil; void *dsa_keep_depth_stencil; + void *velem_state; + /* Sampler state for clamping to a miplevel. */ void *sampler_state[PIPE_MAX_TEXTURE_LEVELS]; /* Rasterizer state. */ void *rs_state; + struct pipe_sampler_view *sampler_view; + /* Viewport state. */ struct pipe_viewport_state viewport; @@ -104,10 +109,11 @@ struct blitter_context_priv struct blitter_context *util_blitter_create(struct pipe_context *pipe) { struct blitter_context_priv *ctx; - struct pipe_blend_state blend = { 0 }; - struct pipe_depth_stencil_alpha_state dsa = { { 0 } }; - struct pipe_rasterizer_state rs_state = { 0 }; + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state dsa; + struct pipe_rasterizer_state rs_state; struct pipe_sampler_state *sampler_state; + struct pipe_vertex_element velem[2]; unsigned i; ctx = CALLOC_STRUCT(blitter_context_priv); @@ -122,17 +128,20 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) ctx->blitter.saved_rs_state = INVALID_PTR; ctx->blitter.saved_fs = INVALID_PTR; ctx->blitter.saved_vs = INVALID_PTR; + ctx->blitter.saved_velem_state = INVALID_PTR; ctx->blitter.saved_fb_state.nr_cbufs = ~0; - ctx->blitter.saved_num_textures = ~0; + ctx->blitter.saved_num_sampler_views = ~0; ctx->blitter.saved_num_sampler_states = ~0; /* blend state objects */ + memset(&blend, 0, sizeof(blend)); ctx->blend_keep_color = pipe->create_blend_state(pipe, &blend); blend.rt[0].colormask = PIPE_MASK_RGBA; ctx->blend_write_color = pipe->create_blend_state(pipe, &blend); /* depth stencil alpha state objects */ + memset(&dsa, 0, sizeof(dsa)); ctx->dsa_keep_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &dsa); @@ -170,6 +179,16 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) rs_state.flatshade = 1; ctx->rs_state = pipe->create_rasterizer_state(pipe, &rs_state); + /* vertex elements state */ + memset(&velem[0], 0, sizeof(velem[0]) * 2); + for (i = 0; i < 2; i++) { + velem[i].src_offset = i * 4 * sizeof(float); + velem[i].instance_divisor = 0; + velem[i].vertex_buffer_index = 0; + velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + } + ctx->velem_state = pipe->create_vertex_elements_state(pipe, 2, &velem[0]); + /* fragment shaders are created on-demand */ /* vertex shaders */ @@ -196,8 +215,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) /* create the vertex buffer */ ctx->vbuf = pipe_buffer_create(ctx->pipe->screen, - 32, - PIPE_BUFFER_USAGE_VERTEX, + PIPE_BIND_VERTEX_BUFFER, sizeof(ctx->vertices)); return &ctx->blitter; @@ -219,6 +237,7 @@ void util_blitter_destroy(struct blitter_context *blitter) pipe->delete_rasterizer_state(pipe, ctx->rs_state); pipe->delete_vs_state(pipe, ctx->vs_col); pipe->delete_vs_state(pipe, ctx->vs_tex); + pipe->delete_vertex_elements_state(pipe, ctx->velem_state); for (i = 0; i < PIPE_MAX_TEXTURE_TYPES; i++) { if (ctx->fs_texfetch_col[i]) @@ -235,7 +254,11 @@ void util_blitter_destroy(struct blitter_context *blitter) if (ctx->sampler_state[i]) pipe->delete_sampler_state(pipe, ctx->sampler_state[i]); - pipe_buffer_reference(&ctx->vbuf, NULL); + if (ctx->sampler_view) { + pipe_sampler_view_reference(&ctx->sampler_view, NULL); + } + + pipe_resource_reference(&ctx->vbuf, NULL); FREE(ctx); } @@ -246,7 +269,8 @@ static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx) ctx->blitter.saved_dsa_state != INVALID_PTR && ctx->blitter.saved_rs_state != INVALID_PTR && ctx->blitter.saved_fs != INVALID_PTR && - ctx->blitter.saved_vs != INVALID_PTR); + ctx->blitter.saved_vs != INVALID_PTR && + ctx->blitter.saved_velem_state != INVALID_PTR); } static void blitter_restore_CSOs(struct blitter_context_priv *ctx) @@ -259,12 +283,14 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx) pipe->bind_rasterizer_state(pipe, ctx->blitter.saved_rs_state); pipe->bind_fs_state(pipe, ctx->blitter.saved_fs); pipe->bind_vs_state(pipe, ctx->blitter.saved_vs); + pipe->bind_vertex_elements_state(pipe, ctx->blitter.saved_velem_state); ctx->blitter.saved_blend_state = INVALID_PTR; ctx->blitter.saved_dsa_state = INVALID_PTR; ctx->blitter.saved_rs_state = INVALID_PTR; ctx->blitter.saved_fs = INVALID_PTR; ctx->blitter.saved_vs = INVALID_PTR; + ctx->blitter.saved_velem_state = INVALID_PTR; pipe->set_stencil_ref(pipe, &ctx->blitter.saved_stencil_ref); @@ -285,11 +311,11 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx) ctx->blitter.saved_num_sampler_states = ~0; } - if (ctx->blitter.saved_num_textures != ~0) { - pipe->set_fragment_sampler_textures(pipe, - ctx->blitter.saved_num_textures, - ctx->blitter.saved_textures); - ctx->blitter.saved_num_textures = ~0; + if (ctx->blitter.saved_num_sampler_views != ~0) { + pipe->set_fragment_sampler_views(pipe, + ctx->blitter.saved_num_sampler_views, + ctx->blitter.saved_sampler_views); + ctx->blitter.saved_num_sampler_views = ~0; } } @@ -424,7 +450,7 @@ static void blitter_draw_quad(struct blitter_context_priv *ctx) struct pipe_context *pipe = ctx->pipe; /* write vertices and draw them */ - pipe_buffer_write(pipe->screen, ctx->vbuf, + pipe_buffer_write(pipe, ctx->vbuf, 0, sizeof(ctx->vertices), ctx->vertices); util_draw_vertex_buffer(pipe, ctx->vbuf, 0, PIPE_PRIM_TRIANGLE_FAN, @@ -569,6 +595,7 @@ void util_blitter_clear(struct blitter_context *blitter, pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); pipe->bind_rasterizer_state(pipe, ctx->rs_state); + pipe->bind_vertex_elements_state(pipe, ctx->velem_state); pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, num_cbufs)); pipe->bind_vs_state(pipe, ctx->vs_col); @@ -600,9 +627,10 @@ static void util_blitter_do_copy(struct blitter_context *blitter, struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; struct pipe_context *pipe = ctx->pipe; struct pipe_framebuffer_state fb_state; + struct pipe_sampler_view viewTempl, *view; assert(blitter->saved_fb_state.nr_cbufs != ~0); - assert(blitter->saved_num_textures != ~0); + assert(blitter->saved_num_sampler_views != ~0); assert(blitter->saved_num_sampler_states != ~0); assert(src->texture->target < PIPE_MAX_TEXTURE_TYPES); @@ -630,11 +658,24 @@ static void util_blitter_do_copy(struct blitter_context *blitter, fb_state.zsbuf = 0; } + u_sampler_view_default_template(&viewTempl, + src->texture, + src->texture->format); + view = pipe->create_sampler_view(pipe, + src->texture, + &viewTempl); + + if (ctx->sampler_view) { + pipe_sampler_view_reference(&ctx->sampler_view, NULL); + } + ctx->sampler_view = view; + pipe->bind_rasterizer_state(pipe, ctx->rs_state); pipe->bind_vs_state(pipe, ctx->vs_tex); pipe->bind_fragment_sampler_states(pipe, 1, blitter_get_sampler_state(ctx, src->level)); - pipe->set_fragment_sampler_textures(pipe, 1, &src->texture); + pipe->bind_vertex_elements_state(pipe, ctx->velem_state); + pipe->set_fragment_sampler_views(pipe, 1, &view); pipe->set_framebuffer_state(pipe, &fb_state); /* set texture coordinates */ @@ -672,8 +713,8 @@ static void util_blitter_overlap_copy(struct blitter_context *blitter, struct pipe_context *pipe = ctx->pipe; struct pipe_screen *screen = pipe->screen; - struct pipe_texture texTemp; - struct pipe_texture *texture; + struct pipe_resource texTemp; + struct pipe_resource *texture; struct pipe_surface *tex_surf; /* check whether the states are properly saved */ @@ -687,13 +728,13 @@ static void util_blitter_overlap_copy(struct blitter_context *blitter, texTemp.height0 = height; texTemp.depth0 = 1; - texture = screen->texture_create(screen, &texTemp); + texture = screen->resource_create(screen, &texTemp); if (!texture) return; tex_surf = screen->get_tex_surface(screen, texture, 0, 0, 0, - PIPE_BUFFER_USAGE_GPU_READ | - PIPE_BUFFER_USAGE_GPU_WRITE); + PIPE_BIND_BLIT_SOURCE | + PIPE_BIND_BLIT_DESTINATION); /* blit from the src to the temp */ util_blitter_do_copy(blitter, tex_surf, 0, 0, @@ -705,7 +746,7 @@ static void util_blitter_overlap_copy(struct blitter_context *blitter, width, height, FALSE); pipe_surface_reference(&tex_surf, NULL); - pipe_texture_reference(&texture, NULL); + pipe_resource_reference(&texture, NULL); blitter_restore_CSOs(ctx); } @@ -739,8 +780,8 @@ void util_blitter_copy(struct blitter_context *blitter, is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; is_stencil = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 1) != 0; - dst_tex_usage = is_depth || is_stencil ? PIPE_TEXTURE_USAGE_DEPTH_STENCIL : - PIPE_TEXTURE_USAGE_RENDER_TARGET; + dst_tex_usage = is_depth || is_stencil ? PIPE_BIND_DEPTH_STENCIL : + PIPE_BIND_RENDER_TARGET; /* check if we can sample from and render to the surfaces */ /* (assuming copying a stencil buffer is not possible) */ @@ -748,7 +789,7 @@ void util_blitter_copy(struct blitter_context *blitter, !screen->is_format_supported(screen, dst->format, dst->texture->target, dst_tex_usage, 0) || !screen->is_format_supported(screen, src->format, src->texture->target, - PIPE_TEXTURE_USAGE_SAMPLER, 0)) { + PIPE_BIND_SAMPLER_VIEW, 0)) { util_surface_copy(pipe, FALSE, dst, dstx, dsty, src, srcx, srcy, width, height); return; @@ -785,7 +826,7 @@ void util_blitter_fill(struct blitter_context *blitter, /* check if we can render to the surface */ if (util_format_is_depth_or_stencil(dst->format) || /* unlikely, but you never know */ !screen->is_format_supported(screen, dst->format, dst->texture->target, - PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) { + PIPE_BIND_RENDER_TARGET, 0)) { util_surface_fill(pipe, dst, dstx, dsty, width, height, value); return; } @@ -807,6 +848,7 @@ void util_blitter_fill(struct blitter_context *blitter, pipe->bind_rasterizer_state(pipe, ctx->rs_state); pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 1)); pipe->bind_vs_state(pipe, ctx->vs_col); + pipe->bind_vertex_elements_state(pipe, ctx->velem_state); /* set a framebuffer state */ fb_state.width = dst->width; diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index 92008fce99..2ad7201a29 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -43,6 +43,7 @@ struct blitter_context /* Private members, really. */ void *saved_blend_state; /**< blend state */ void *saved_dsa_state; /**< depth stencil alpha state */ + void *saved_velem_state; /**< vertex elements state */ void *saved_rs_state; /**< rasterizer state */ void *saved_fs, *saved_vs; /**< fragment shader, vertex shader */ @@ -52,10 +53,10 @@ struct blitter_context struct pipe_clip_state saved_clip; int saved_num_sampler_states; - void *saved_sampler_states[32]; + void *saved_sampler_states[PIPE_MAX_SAMPLERS]; - int saved_num_textures; - struct pipe_texture *saved_textures[32]; /* is 32 enough? */ + int saved_num_sampler_views; + struct pipe_sampler_view *saved_sampler_views[PIPE_MAX_SAMPLERS]; }; /** @@ -173,6 +174,13 @@ void util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter, } static INLINE +void util_blitter_save_vertex_elements(struct blitter_context *blitter, + void *state) +{ + blitter->saved_velem_state = state; +} + +static INLINE void util_blitter_save_stencil_ref(struct blitter_context *blitter, const struct pipe_stencil_ref *state) { @@ -234,17 +242,17 @@ void util_blitter_save_fragment_sampler_states( num_sampler_states * sizeof(void *)); } -static INLINE -void util_blitter_save_fragment_sampler_textures( - struct blitter_context *blitter, - int num_textures, - struct pipe_texture **textures) +static INLINE void +util_blitter_save_fragment_sampler_views(struct blitter_context *blitter, + int num_views, + struct pipe_sampler_view **views) { - assert(num_textures <= Elements(blitter->saved_textures)); + assert(num_views <= Elements(blitter->saved_sampler_views)); - blitter->saved_num_textures = num_textures; - memcpy(blitter->saved_textures, textures, - num_textures * sizeof(struct pipe_texture *)); + blitter->saved_num_sampler_views = num_views; + memcpy(blitter->saved_sampler_views, + views, + num_views * sizeof(struct pipe_sampler_view *)); } #ifdef __cplusplus diff --git a/src/gallium/auxiliary/util/u_box.h b/src/gallium/auxiliary/util/u_box.h new file mode 100644 index 0000000000..919967b55a --- /dev/null +++ b/src/gallium/auxiliary/util/u_box.h @@ -0,0 +1,73 @@ +#ifndef UTIL_BOX_INLINES_H +#define UTIL_BOX_INLINES_H + +#include "pipe/p_state.h" + +static INLINE +void u_box_1d( unsigned x, + unsigned w, + struct pipe_box *box ) +{ + box->x = x; + box->y = 0; + box->z = 0; + box->width = w; + box->height = 1; + box->depth = 1; +} + +static INLINE +void u_box_2d( unsigned x, + unsigned y, + unsigned w, + unsigned h, + struct pipe_box *box ) +{ + box->x = x; + box->y = y; + box->z = 0; + box->width = w; + box->height = h; + box->depth = 1; +} + +static INLINE +void u_box_origin_2d( unsigned w, + unsigned h, + struct pipe_box *box ) +{ + box->x = 0; + box->y = 0; + box->z = 0; + box->width = w; + box->height = h; + box->depth = 1; +} + +static INLINE +void u_box_2d_zslice( unsigned x, + unsigned y, + unsigned z, + unsigned w, + unsigned h, + struct pipe_box *box ) +{ + box->x = x; + box->y = y; + box->z = z; + box->width = w; + box->height = h; + box->depth = 1; +} + +static INLINE +struct pipe_subresource u_subresource( unsigned face, + unsigned level ) +{ + struct pipe_subresource subresource; + subresource.face = face; + subresource.level = level; + return subresource; +} + +#endif diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 94be682c4b..dd044973f9 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -421,45 +421,51 @@ void debug_dump_image(const char *prefix, #endif } -void debug_dump_surface(const char *prefix, +void debug_dump_surface(struct pipe_context *pipe, + const char *prefix, struct pipe_surface *surface) { - struct pipe_texture *texture; - struct pipe_screen *screen; + struct pipe_resource *texture; struct pipe_transfer *transfer; void *data; if (!surface) return; + /* XXX: this doesn't necessarily work, as the driver may be using + * temporary storage for the surface which hasn't been propagated + * back into the texture. Need to nail down the semantics of views + * and transfers a bit better before we can say if extra work needs + * to be done here: + */ texture = surface->texture; - screen = texture->screen; - transfer = screen->get_tex_transfer(screen, texture, surface->face, - surface->level, surface->zslice, - PIPE_TRANSFER_READ, 0, 0, surface->width, - surface->height); + transfer = pipe_get_transfer(pipe, texture, surface->face, + surface->level, surface->zslice, + PIPE_TRANSFER_READ, 0, 0, surface->width, + surface->height); - data = screen->transfer_map(screen, transfer); + data = pipe->transfer_map(pipe, transfer); if(!data) goto error; debug_dump_image(prefix, texture->format, util_format_get_blocksize(texture->format), - util_format_get_nblocksx(texture->format, transfer->width), - util_format_get_nblocksy(texture->format, transfer->height), + util_format_get_nblocksx(texture->format, surface->width), + util_format_get_nblocksy(texture->format, surface->height), transfer->stride, data); - screen->transfer_unmap(screen, transfer); + pipe->transfer_unmap(pipe, transfer); error: - screen->tex_transfer_destroy(transfer); + pipe->transfer_destroy(pipe, transfer); } -void debug_dump_texture(const char *prefix, - struct pipe_texture *texture) +void debug_dump_texture(struct pipe_context *pipe, + const char *prefix, + struct pipe_resource *texture) { struct pipe_surface *surface; struct pipe_screen *screen; @@ -471,9 +477,9 @@ void debug_dump_texture(const char *prefix, /* XXX for now, just dump image for face=0, level=0 */ surface = screen->get_tex_surface(screen, texture, 0, 0, 0, - PIPE_TEXTURE_USAGE_SAMPLER); + PIPE_BIND_SAMPLER_VIEW); if (surface) { - debug_dump_surface(prefix, surface); + debug_dump_surface(pipe, prefix, surface); screen->tex_surface_destroy(surface); } } @@ -511,27 +517,28 @@ struct bmp_rgb_quad { }; void -debug_dump_surface_bmp(const char *filename, +debug_dump_surface_bmp(struct pipe_context *pipe, + const char *filename, struct pipe_surface *surface) { #ifndef PIPE_SUBSYSTEM_WINDOWS_MINIPORT struct pipe_transfer *transfer; - struct pipe_texture *texture = surface->texture; - struct pipe_screen *screen = texture->screen; + struct pipe_resource *texture = surface->texture; - transfer = screen->get_tex_transfer(screen, texture, surface->face, - surface->level, surface->zslice, - PIPE_TRANSFER_READ, 0, 0, surface->width, - surface->height); + transfer = pipe_get_transfer(pipe, texture, surface->face, + surface->level, surface->zslice, + PIPE_TRANSFER_READ, 0, 0, surface->width, + surface->height); - debug_dump_transfer_bmp(filename, transfer); + debug_dump_transfer_bmp(pipe, filename, transfer); - screen->tex_transfer_destroy(transfer); + pipe->transfer_destroy(pipe, transfer); #endif } void -debug_dump_transfer_bmp(const char *filename, +debug_dump_transfer_bmp(struct pipe_context *pipe, + const char *filename, struct pipe_transfer *transfer) { #ifndef PIPE_SUBSYSTEM_WINDOWS_MINIPORT @@ -540,17 +547,20 @@ debug_dump_transfer_bmp(const char *filename, if (!transfer) goto error1; - rgba = MALLOC(transfer->width*transfer->height*4*sizeof(float)); + rgba = MALLOC(transfer->box.width * + transfer->box.height * + transfer->box.depth * + 4*sizeof(float)); if(!rgba) goto error1; - pipe_get_tile_rgba(transfer, 0, 0, - transfer->width, transfer->height, + pipe_get_tile_rgba(pipe, transfer, 0, 0, + transfer->box.width, transfer->box.height, rgba); debug_dump_float_rgba_bmp(filename, - transfer->width, transfer->height, - rgba, transfer->width); + transfer->box.width, transfer->box.height, + rgba, transfer->box.width); FREE(rgba); error1: diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h index 0f4768f344..b6d0b508e3 100644 --- a/src/gallium/auxiliary/util/u_debug.h +++ b/src/gallium/auxiliary/util/u_debug.h @@ -312,30 +312,35 @@ debug_memory_end(unsigned long beginning); #ifdef DEBUG +struct pipe_context; struct pipe_surface; struct pipe_transfer; -struct pipe_texture; +struct pipe_resource; void debug_dump_image(const char *prefix, unsigned format, unsigned cpp, unsigned width, unsigned height, unsigned stride, const void *data); -void debug_dump_surface(const char *prefix, +void debug_dump_surface(struct pipe_context *pipe, + const char *prefix, struct pipe_surface *surface); -void debug_dump_texture(const char *prefix, - struct pipe_texture *texture); -void debug_dump_surface_bmp(const char *filename, +void debug_dump_texture(struct pipe_context *pipe, + const char *prefix, + struct pipe_resource *texture); +void debug_dump_surface_bmp(struct pipe_context *pipe, + const char *filename, struct pipe_surface *surface); -void debug_dump_transfer_bmp(const char *filename, +void debug_dump_transfer_bmp(struct pipe_context *pipe, + const char *filename, struct pipe_transfer *transfer); void debug_dump_float_rgba_bmp(const char *filename, unsigned width, unsigned height, float *rgba, unsigned stride); #else #define debug_dump_image(prefix, format, cpp, width, height, stride, data) ((void)0) -#define debug_dump_surface(prefix, surface) ((void)0) -#define debug_dump_surface_bmp(filename, surface) ((void)0) +#define debug_dump_surface(pipe, prefix, surface) ((void)0) +#define debug_dump_surface_bmp(pipe, filename, surface) ((void)0) #define debug_dump_transfer_bmp(filename, transfer) ((void)0) #define debug_dump_float_rgba_bmp(filename, width, height, rgba, stride) ((void)0) #endif diff --git a/src/gallium/auxiliary/util/u_dl.c b/src/gallium/auxiliary/util/u_dl.c index 37ed789f95..220860ebf4 100644 --- a/src/gallium/auxiliary/util/u_dl.c +++ b/src/gallium/auxiliary/util/u_dl.c @@ -78,3 +78,16 @@ util_dl_close(struct util_dl_library *library) (void)library; #endif } + + +const char * +util_dl_error(void) +{ +#if defined(PIPE_OS_UNIX) + return dlerror(); +#elif defined(PIPE_OS_WINDOWS) + return "unknown error"; +#else + return "unknown error"; +#endif +} diff --git a/src/gallium/auxiliary/util/u_dl.h b/src/gallium/auxiliary/util/u_dl.h index 85296c58af..2853b447c6 100644 --- a/src/gallium/auxiliary/util/u_dl.h +++ b/src/gallium/auxiliary/util/u_dl.h @@ -70,4 +70,11 @@ void util_dl_close(struct util_dl_library *library); +/** + * Return most recent error message. + */ +const char * +util_dl_error(void); + + #endif /* U_DL_H_ */ diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index 14506e8451..b37b48b5ae 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -30,6 +30,7 @@ #include "pipe/p_defines.h" #include "util/u_inlines.h" #include "util/u_draw_quad.h" +#include "util/u_memory.h" /** @@ -38,15 +39,13 @@ */ void util_draw_vertex_buffer(struct pipe_context *pipe, - struct pipe_buffer *vbuf, + struct pipe_resource *vbuf, uint offset, uint prim_type, uint num_verts, uint num_attribs) { struct pipe_vertex_buffer vbuffer; - struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; - uint i; assert(num_attribs <= PIPE_MAX_ATTRIBS); @@ -58,15 +57,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe, vbuffer.max_index = num_verts - 1; pipe->set_vertex_buffers(pipe, 1, &vbuffer); - /* tell pipe about the vertex attributes */ - for (i = 0; i < num_attribs; i++) { - velements[i].src_offset = i * 4 * sizeof(float); - velements[i].instance_divisor = 0; - velements[i].vertex_buffer_index = 0; - velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - velements[i].nr_components = 4; - } - pipe->set_vertex_elements(pipe, num_attribs, velements); + /* note: vertex elements already set by caller */ /* draw */ pipe->draw_arrays(pipe, prim_type, 0, num_verts); @@ -76,60 +67,63 @@ util_draw_vertex_buffer(struct pipe_context *pipe, /** * Draw screen-aligned textured quad. - * Note: this function allocs/destroys a vertex buffer and isn't especially - * efficient. + * Note: this isn't especially efficient. */ void util_draw_texquad(struct pipe_context *pipe, float x0, float y0, float x1, float y1, float z) { - struct pipe_buffer *vbuf; - uint numAttribs = 2, vertexBytes, i, j; - - vertexBytes = 4 * (4 * numAttribs * sizeof(float)); - - /* XXX create one-time */ - vbuf = pipe_buffer_create(pipe->screen, 32, - PIPE_BUFFER_USAGE_VERTEX, vertexBytes); - if (vbuf) { - float *v = (float *) pipe_buffer_map(pipe->screen, vbuf, - PIPE_BUFFER_USAGE_CPU_WRITE); - if (v) { - /* - * Load vertex buffer - */ - for (i = j = 0; i < 4; i++) { - v[j + 2] = z; /* z */ - v[j + 3] = 1.0; /* w */ - v[j + 6] = 0.0; /* r */ - v[j + 7] = 1.0; /* q */ - j += 8; - } - - v[0] = x0; - v[1] = y0; - v[4] = 0.0; /*s*/ - v[5] = 0.0; /*t*/ - - v[8] = x1; - v[9] = y0; - v[12] = 1.0; - v[13] = 0.0; - - v[16] = x1; - v[17] = y1; - v[20] = 1.0; - v[21] = 1.0; - - v[24] = x0; - v[25] = y1; - v[28] = 0.0; - v[29] = 1.0; - - pipe_buffer_unmap(pipe->screen, vbuf); - util_draw_vertex_buffer(pipe, vbuf, 0, PIPE_PRIM_TRIANGLE_FAN, 4, 2); - } - - pipe_buffer_reference(&vbuf, NULL); + uint numAttribs = 2, i, j; + uint vertexBytes = 4 * (4 * numAttribs * sizeof(float)); + struct pipe_resource *vbuf = NULL; + uint *v = NULL; + + v = MALLOC(vertexBytes); + if (v == NULL) + goto out; + + /* + * Load vertex buffer + */ + for (i = j = 0; i < 4; i++) { + v[j + 2] = z; /* z */ + v[j + 3] = 1.0; /* w */ + v[j + 6] = 0.0; /* r */ + v[j + 7] = 1.0; /* q */ + j += 8; } + + v[0] = x0; + v[1] = y0; + v[4] = 0.0; /*s*/ + v[5] = 0.0; /*t*/ + + v[8] = x1; + v[9] = y0; + v[12] = 1.0; + v[13] = 0.0; + + v[16] = x1; + v[17] = y1; + v[20] = 1.0; + v[21] = 1.0; + + v[24] = x0; + v[25] = y1; + v[28] = 0.0; + v[29] = 1.0; + + vbuf = pipe_user_buffer_create(pipe->screen, v, vertexBytes, + PIPE_BIND_VERTEX_BUFFER); + if (!vbuf) + goto out; + + util_draw_vertex_buffer(pipe, vbuf, 0, PIPE_PRIM_TRIANGLE_FAN, 4, 2); + +out: + if (vbuf) + pipe_resource_reference(&vbuf, NULL); + + if (v) + FREE(v); } diff --git a/src/gallium/auxiliary/util/u_draw_quad.h b/src/gallium/auxiliary/util/u_draw_quad.h index 00d3f5b715..42eb184428 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.h +++ b/src/gallium/auxiliary/util/u_draw_quad.h @@ -33,11 +33,11 @@ extern "C" { #endif -struct pipe_buffer; +struct pipe_resource; extern void util_draw_vertex_buffer(struct pipe_context *pipe, - struct pipe_buffer *vbuf, uint offset, + struct pipe_resource *vbuf, uint offset, uint num_attribs, uint num_verts, uint prim_type); diff --git a/src/gallium/auxiliary/util/u_dump.h b/src/gallium/auxiliary/util/u_dump.h index 379f18ef38..bdc73ac47d 100644 --- a/src/gallium/auxiliary/util/u_dump.h +++ b/src/gallium/auxiliary/util/u_dump.h @@ -92,7 +92,7 @@ util_dump_tex_filter(unsigned value, boolean shortened); void util_dump_template(struct os_stream *stream, - const struct pipe_texture *templat); + const struct pipe_resource *templat); void util_dump_rasterizer_state(struct os_stream *stream, diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c index ae7afd7311..79fd38ef5c 100644 --- a/src/gallium/auxiliary/util/u_dump_state.c +++ b/src/gallium/auxiliary/util/u_dump_state.c @@ -255,14 +255,14 @@ util_dump_enum_func(struct os_stream *stream, unsigned value) void -util_dump_template(struct os_stream *stream, const struct pipe_texture *templat) +util_dump_template(struct os_stream *stream, const struct pipe_resource *templat) { if(!templat) { util_dump_null(stream); return; } - util_dump_struct_begin(stream, "pipe_texture"); + util_dump_struct_begin(stream, "pipe_resource"); util_dump_member(stream, int, templat, target); util_dump_member(stream, format, templat, format); @@ -280,7 +280,9 @@ util_dump_template(struct os_stream *stream, const struct pipe_texture *templat) util_dump_member_end(stream); util_dump_member(stream, uint, templat, last_level); - util_dump_member(stream, uint, templat, tex_usage); + util_dump_member(stream, uint, templat, _usage); + util_dump_member(stream, uint, templat, bind); + util_dump_member(stream, uint, templat, flags); util_dump_struct_end(stream); } @@ -653,16 +655,13 @@ util_dump_transfer(struct os_stream *stream, const struct pipe_transfer *state) util_dump_struct_begin(stream, "pipe_transfer"); - util_dump_member(stream, uint, state, width); - util_dump_member(stream, uint, state, height); + util_dump_member(stream, ptr, state, resource); +// util_dump_member(stream, uint, state, box); util_dump_member(stream, uint, state, stride); - util_dump_member(stream, uint, state, usage); + util_dump_member(stream, uint, state, slice_stride); - util_dump_member(stream, ptr, state, texture); - util_dump_member(stream, uint, state, face); - util_dump_member(stream, uint, state, level); - util_dump_member(stream, uint, state, zslice); +// util_dump_member(stream, ptr, state, data); util_dump_struct_end(stream); } @@ -700,7 +699,6 @@ util_dump_vertex_element(struct os_stream *stream, const struct pipe_vertex_elem util_dump_member(stream, uint, state, src_offset); util_dump_member(stream, uint, state, vertex_buffer_index); - util_dump_member(stream, uint, state, nr_components); util_dump_member(stream, format, state, src_format); diff --git a/src/gallium/auxiliary/util/u_dynarray.h b/src/gallium/auxiliary/util/u_dynarray.h new file mode 100644 index 0000000000..9d1c1713a7 --- /dev/null +++ b/src/gallium/auxiliary/util/u_dynarray.h @@ -0,0 +1,111 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_DYNARRAY_H +#define U_DYNARRAY_H + +#include "pipe/p_compiler.h" +#include "util/u_memory.h" + +/* A zero-initialized version of this is guaranteed to represent an + * empty array. + * + * Also, size <= capacity and data != 0 if and only if capacity != 0 + * capacity will always be the allocation size of data + */ +struct util_dynarray +{ + void *data; + unsigned size; + unsigned capacity; +}; + +static INLINE void +util_dynarray_init(struct util_dynarray *buf) +{ + memset(buf, 0, sizeof(*buf)); +} + +static INLINE void +util_dynarray_fini(struct util_dynarray *buf) +{ + if(buf->data) + { + FREE(buf->data); + util_dynarray_init(buf); + } +} + +/* use util_dynarray_trim to reduce the allocated storage */ +static INLINE void * +util_dynarray_resize(struct util_dynarray *buf, unsigned newsize) +{ + char *p; + if(newsize > buf->capacity) + { + unsigned newcap = buf->capacity << 1; + if(newsize > newcap) + newcap = newsize; + buf->data = REALLOC(buf->data, buf->capacity, newcap); + buf->capacity = newcap; + } + + p = (char *)buf->data + buf->size; + buf->size = newsize; + return p; +} + +static INLINE void * +util_dynarray_grow(struct util_dynarray *buf, int diff) +{ + return util_dynarray_resize(buf, buf->size + diff); +} + +static INLINE void +util_dynarray_trim(struct util_dynarray *buf) +{ + if (buf->size != buf->capacity) { + if (buf->size) { + buf->data = REALLOC(buf->data, buf->capacity, buf->size); + buf->capacity = buf->size; + } + else { + FREE(buf->data); + buf->data = 0; + buf->capacity = 0; + } + } +} + +#define util_dynarray_append(buf, type, v) do {type __v = (v); memcpy(util_dynarray_grow((buf), sizeof(type)), &__v, sizeof(type));} while(0) +#define util_dynarray_top_ptr(buf, type) (type*)((char*)(buf)->data + (buf)->size - sizeof(type)) +#define util_dynarray_top(buf, type) *util_dynarray_top_ptr(buf, type) +#define util_dynarray_pop_ptr(buf, type) (type*)((char*)(buf)->data + ((buf)->size -= sizeof(type))) +#define util_dynarray_pop(buf, type) *util_dynarray_pop_ptr(buf, type) +#define util_dynarray_contains(buf, type) ((buf)->size >= sizeof(type)) + +#endif /* U_DYNARRAY_H */ + diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c new file mode 100644 index 0000000000..c50c807eb8 --- /dev/null +++ b/src/gallium/auxiliary/util/u_format.c @@ -0,0 +1,286 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Pixel format accessor functions. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#include "u_math.h" +#include "u_memory.h" +#include "u_rect.h" +#include "u_format.h" + + +void +util_format_read_4f(enum pipe_format format, + float *dst, unsigned dst_stride, + const void *src, unsigned src_stride, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + const struct util_format_description *format_desc; + const uint8_t *src_row; + float *dst_row; + + format_desc = util_format_description(format); + + assert(x % format_desc->block.width == 0); + assert(y % format_desc->block.height == 0); + + src_row = (const uint8_t *)src + y*src_stride + x*(format_desc->block.bits/8); + dst_row = dst; + + format_desc->unpack_rgba_float(dst_row, dst_stride, src_row, src_stride, w, h); +} + + +void +util_format_write_4f(enum pipe_format format, + const float *src, unsigned src_stride, + void *dst, unsigned dst_stride, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + const struct util_format_description *format_desc; + uint8_t *dst_row; + const float *src_row; + + format_desc = util_format_description(format); + + assert(x % format_desc->block.width == 0); + assert(y % format_desc->block.height == 0); + + dst_row = (uint8_t *)dst + y*dst_stride + x*(format_desc->block.bits/8); + src_row = src; + + format_desc->pack_rgba_float(dst_row, dst_stride, src_row, src_stride, w, h); +} + + +void +util_format_read_4ub(enum pipe_format format, uint8_t *dst, unsigned dst_stride, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h) +{ + const struct util_format_description *format_desc; + const uint8_t *src_row; + uint8_t *dst_row; + + format_desc = util_format_description(format); + + assert(x % format_desc->block.width == 0); + assert(y % format_desc->block.height == 0); + + src_row = (const uint8_t *)src + y*src_stride + x*(format_desc->block.bits/8); + dst_row = dst; + + format_desc->unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, w, h); +} + + +void +util_format_write_4ub(enum pipe_format format, const uint8_t *src, unsigned src_stride, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h) +{ + const struct util_format_description *format_desc; + uint8_t *dst_row; + const uint8_t *src_row; + + format_desc = util_format_description(format); + + assert(x % format_desc->block.width == 0); + assert(y % format_desc->block.height == 0); + + dst_row = (uint8_t *)dst + y*dst_stride + x*(format_desc->block.bits/8); + src_row = src; + + format_desc->pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, w, h); +} + + +static INLINE boolean +util_format_fits_8unorm(const struct util_format_description *format_desc) +{ + unsigned chan; + + switch (format_desc->layout) { + + case UTIL_FORMAT_LAYOUT_S3TC: + case UTIL_FORMAT_LAYOUT_RGTC: + /* + * These are straight forward. + */ + + return TRUE; + + case UTIL_FORMAT_LAYOUT_PLAIN: + /* + * For these we can find a generic rule. + */ + + for (chan = 0; chan < format_desc->nr_channels; ++chan) { + switch (format_desc->channel[chan].type) { + case UTIL_FORMAT_TYPE_VOID: + break; + case UTIL_FORMAT_TYPE_UNSIGNED: + if (!format_desc->channel[chan].normalized || + format_desc->channel[chan].size > 8) { + return FALSE; + } + break; + default: + return FALSE; + } + } + return TRUE; + + default: + /* + * Handle all others on a case by case basis. + */ + + switch (format_desc->format) { + case PIPE_FORMAT_R1_UNORM: + case PIPE_FORMAT_UYVY: + case PIPE_FORMAT_YUYV: + case PIPE_FORMAT_R8G8_B8G8_UNORM: + case PIPE_FORMAT_G8R8_G8B8_UNORM: + return TRUE; + + default: + return FALSE; + } + } +} + + +void +util_format_translate(enum pipe_format dst_format, + void *dst, unsigned dst_stride, + unsigned dst_x, unsigned dst_y, + enum pipe_format src_format, + const void *src, unsigned src_stride, + unsigned src_x, unsigned src_y, + unsigned width, unsigned height) +{ + const struct util_format_description *dst_format_desc; + const struct util_format_description *src_format_desc; + uint8_t *dst_row; + const uint8_t *src_row; + unsigned y_step; + unsigned dst_step; + unsigned src_step; + + if (dst_format == src_format) { + /* + * Trivial case. + */ + + util_copy_rect(dst, dst_format, dst_stride, dst_x, dst_y, + width, height, src, (int)src_stride, + src_x, src_y); + return; + } + + dst_format_desc = util_format_description(dst_format); + src_format_desc = util_format_description(src_format); + + assert(dst_x % dst_format_desc->block.width == 0); + assert(dst_y % dst_format_desc->block.height == 0); + assert(src_x % src_format_desc->block.width == 0); + assert(src_y % src_format_desc->block.height == 0); + + dst_row = (uint8_t *)dst + dst_y*dst_stride + dst_x*(dst_format_desc->block.bits/8); + src_row = (const uint8_t *)src + src_y*src_stride + src_x*(src_format_desc->block.bits/8); + + /* + * This works because all pixel formats have pixel blocks with power of two + * sizes. + */ + + y_step = MAX2(dst_format_desc->block.height, src_format_desc->block.height); + assert(y_step % dst_format_desc->block.height == 0); + assert(y_step % src_format_desc->block.height == 0); + + dst_step = y_step / dst_format_desc->block.height * dst_stride; + src_step = y_step / src_format_desc->block.height * src_stride; + + /* + * TODO: double formats will loose precision + * TODO: Add a special case for formats that are mere swizzles of each other + */ + + if (util_format_fits_8unorm(src_format_desc) || + util_format_fits_8unorm(dst_format_desc)) { + unsigned tmp_stride; + uint8_t *tmp_row; + + tmp_stride = width * 4 * sizeof *tmp_row; + tmp_row = MALLOC(y_step * tmp_stride); + if (!tmp_row) + return; + + while (height >= y_step) { + src_format_desc->unpack_rgba_8unorm(tmp_row, tmp_stride, src_row, src_stride, width, y_step); + dst_format_desc->pack_rgba_8unorm(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step); + + dst_row += dst_step; + src_row += src_step; + height -= y_step; + } + + if (height) { + src_format_desc->unpack_rgba_8unorm(tmp_row, tmp_stride, src_row, src_stride, width, height); + dst_format_desc->pack_rgba_8unorm(dst_row, dst_stride, tmp_row, tmp_stride, width, height); + } + + FREE(tmp_row); + } + else { + unsigned tmp_stride; + float *tmp_row; + + tmp_stride = width * 4 * sizeof *tmp_row; + tmp_row = MALLOC(y_step * tmp_stride); + if (!tmp_row) + return; + + while (height >= y_step) { + src_format_desc->unpack_rgba_float(tmp_row, tmp_stride, src_row, src_stride, width, y_step); + dst_format_desc->pack_rgba_float(dst_row, dst_stride, tmp_row, tmp_stride, width, y_step); + + dst_row += dst_step; + src_row += src_step; + height -= y_step; + } + + if (height) { + src_format_desc->unpack_rgba_float(tmp_row, tmp_stride, src_row, src_stride, width, height); + dst_format_desc->pack_rgba_float(dst_row, dst_stride, tmp_row, tmp_stride, width, height); + } + + FREE(tmp_row); + } +} diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 96a0fa6550..016e73c4a1 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -47,31 +47,41 @@ # - color space: rgb, yub, sz # # See also: -# - http://msdn.microsoft.com/en-us/library/ee416489.aspx (D3D9) -# - http://msdn.microsoft.com/en-us/library/ee415668.aspx (D3D9 -> D3D10) -# - http://msdn.microsoft.com/en-us/library/ee418116.aspx (D3D10) +# - http://msdn.microsoft.com/en-us/library/bb172558.aspx (D3D9) +# - http://msdn.microsoft.com/en-us/library/bb205073.aspx#mapping_texture_formats (D3D9 -> D3D10) +# - http://msdn.microsoft.com/en-us/library/bb173059.aspx (D3D10) # # Note that GL doesn't really specify the layout of internal formats. See # OpenGL 2.1 specification, Table 3.16, on the "Correspondence of sized # internal formats to base in- ternal formats, and desired component # resolutions for each sized internal format." +# None +# Described as regular uint_8 bytes, i.e. PIPE_FORMAT_R8_USCALED +PIPE_FORMAT_NONE , plain, 1, 1, u8 , , , , x001, rgb + # Typical rendertarget formats PIPE_FORMAT_B8G8R8A8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , zyxw, rgb -PIPE_FORMAT_B8G8R8X8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , zyx1, rgb +PIPE_FORMAT_B8G8R8X8_UNORM , plain, 1, 1, un8 , un8 , un8 , x8 , zyx1, rgb PIPE_FORMAT_A8R8G8B8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , yzwx, rgb -PIPE_FORMAT_X8R8G8B8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , yzw1, rgb +PIPE_FORMAT_X8R8G8B8_UNORM , plain, 1, 1, x8 , un8 , un8 , un8 , yzw1, rgb PIPE_FORMAT_A8B8G8R8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , wzyx, rgb -PIPE_FORMAT_X8B8G8R8_UNORM , plain, 1, 1, un8 , un8 , un8 , un8 , wzy1, rgb +PIPE_FORMAT_X8B8G8R8_UNORM , plain, 1, 1, x8 , un8 , un8 , un8 , wzy1, rgb +# PIPE_FORMAT_R8G8B8A8_UNORM is below +PIPE_FORMAT_R8G8B8X8_UNORM , plain, 1, 1, un8 , un8 , un8 , x8 , xyz1, rgb +PIPE_FORMAT_B5G5R5X1_UNORM , plain, 1, 1, un5 , un5 , un5 , x1 , zyx1, rgb PIPE_FORMAT_B5G5R5A1_UNORM , plain, 1, 1, un5 , un5 , un5 , un1 , zyxw, rgb PIPE_FORMAT_B4G4R4A4_UNORM , plain, 1, 1, un4 , un4 , un4 , un4 , zyxw, rgb +PIPE_FORMAT_B4G4R4X4_UNORM , plain, 1, 1, un4 , un4 , un4 , x4 , zyx1, rgb PIPE_FORMAT_B5G6R5_UNORM , plain, 1, 1, un5 , un6 , un5 , , zyx1, rgb PIPE_FORMAT_R10G10B10A2_UNORM , plain, 1, 1, un10, un10, un10, un2 , xyzw, rgb +PIPE_FORMAT_B10G10R10A2_UNORM , plain, 1, 1, un10, un10, un10, un2 , zyxw, rgb # Luminance/Intensity/Alpha formats PIPE_FORMAT_L8_UNORM , plain, 1, 1, un8 , , , , xxx1, rgb PIPE_FORMAT_A8_UNORM , plain, 1, 1, un8 , , , , 000x, rgb PIPE_FORMAT_I8_UNORM , plain, 1, 1, un8 , , , , xxxx, rgb +PIPE_FORMAT_L4A4_UNORM , plain, 1, 1, un4 , un4 , , , xxxy, rgb PIPE_FORMAT_L8A8_UNORM , plain, 1, 1, un8 , un8 , , , xxxy, rgb PIPE_FORMAT_L16_UNORM , plain, 1, 1, un16, , , , xxx1, rgb @@ -79,44 +89,65 @@ PIPE_FORMAT_L16_UNORM , plain, 1, 1, un16, , , , xxx1, r PIPE_FORMAT_L8_SRGB , plain, 1, 1, un8 , , , , xxx1, srgb PIPE_FORMAT_L8A8_SRGB , plain, 1, 1, un8 , un8 , , , xxxy, srgb PIPE_FORMAT_R8G8B8_SRGB , plain, 1, 1, un8 , un8 , un8 , , xyz1, srgb +PIPE_FORMAT_R8G8B8A8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , xyzw, srgb PIPE_FORMAT_A8B8G8R8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , wzyx, srgb -PIPE_FORMAT_X8B8G8R8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , wzy1, srgb +PIPE_FORMAT_X8B8G8R8_SRGB , plain, 1, 1, x8 , un8 , un8 , un8 , wzy1, srgb PIPE_FORMAT_B8G8R8A8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , zyxw, srgb -PIPE_FORMAT_B8G8R8X8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , zyx1, srgb +PIPE_FORMAT_B8G8R8X8_SRGB , plain, 1, 1, un8 , un8 , un8 , x8 , zyx1, srgb PIPE_FORMAT_A8R8G8B8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , yzwx, srgb -PIPE_FORMAT_X8R8G8B8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , yzw1, srgb +PIPE_FORMAT_X8R8G8B8_SRGB , plain, 1, 1, x8 , un8 , un8 , un8 , yzw1, srgb # Mixed-sign formats (typically used for bump map textures) PIPE_FORMAT_R8SG8SB8UX8U_NORM , plain, 1, 1, sn8 , sn8 , un8 , x8 , xyz1, rgb +PIPE_FORMAT_R10SG10SB10SA2U_NORM , plain, 1, 1, sn10, sn10, sn10, un2 , xyzw, rgb PIPE_FORMAT_R5SG5SB6U_NORM , plain, 1, 1, sn5 , sn5 , un6 , , xyz1, rgb # Depth-stencil formats -PIPE_FORMAT_S8_UNORM , plain, 1, 1, un8 , , , , _x__, zs -PIPE_FORMAT_Z16_UNORM , plain, 1, 1, un16, , , , x___, zs -PIPE_FORMAT_Z32_UNORM , plain, 1, 1, un32, , , , x___, zs -PIPE_FORMAT_Z32_FLOAT , plain, 1, 1, f32 , , , , x___, zs -PIPE_FORMAT_Z24S8_UNORM , plain, 1, 1, un24, un8 , , , xy__, zs -PIPE_FORMAT_S8Z24_UNORM , plain, 1, 1, un8 , un24, , , yx__, zs -PIPE_FORMAT_Z24X8_UNORM , plain, 1, 1, un24, un8 , , , x___, zs -PIPE_FORMAT_X8Z24_UNORM , plain, 1, 1, un8 , un24, , , y___, zs +PIPE_FORMAT_S8_USCALED , plain, 1, 1, u8 , , , , _x__, zs +PIPE_FORMAT_Z16_UNORM , plain, 1, 1, un16, , , , x___, zs +PIPE_FORMAT_Z32_UNORM , plain, 1, 1, un32, , , , x___, zs +PIPE_FORMAT_Z32_FLOAT , plain, 1, 1, f32 , , , , x___, zs +PIPE_FORMAT_Z24_UNORM_S8_USCALED , plain, 1, 1, un24, u8 , , , xy__, zs +PIPE_FORMAT_S8_USCALED_Z24_UNORM , plain, 1, 1, u8 , un24, , , yx__, zs +PIPE_FORMAT_Z24X8_UNORM , plain, 1, 1, un24, x8 , , , x___, zs +PIPE_FORMAT_X8Z24_UNORM , plain, 1, 1, x8 , un24, , , y___, zs +PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED , plain, 1, 1, f32, u8 , x24 , , xy__, zs # YUV formats # http://www.fourcc.org/yuv.php#UYVY PIPE_FORMAT_UYVY , subsampled, 2, 1, x32 , , , , xyz1, yuv # http://www.fourcc.org/yuv.php#YUYV (a.k.a http://www.fourcc.org/yuv.php#YUY2) -# XXX: u_tile.c's ycbcr_get_tile_rgba actually interprets it as VYUY but the -# intent should be to match D3DFMT_YUY2 PIPE_FORMAT_YUYV , subsampled, 2, 1, x32 , , , , xyz1, yuv +# same subsampling but with rgb channels +PIPE_FORMAT_R8G8_B8G8_UNORM , subsampled, 2, 1, x32 , , , , xyz1, rgb +PIPE_FORMAT_G8R8_G8B8_UNORM , subsampled, 2, 1, x32 , , , , xyz1, rgb + +# some special formats not fitting anywhere else +PIPE_FORMAT_R10G10B10A2_USCALED , plain, 1, 1, u10 , u10 , u10 , u2 , xyzw, rgb +PIPE_FORMAT_R11G11B10_FLOAT , plain, 1, 1, f11 , f11 , f10 , , xyz1, rgb +PIPE_FORMAT_R9G9B9E5_FLOAT , other, 1, 1, x32 , , , , xyz1, rgb +PIPE_FORMAT_R1_UNORM , other, 8, 1, x8 , , , , x001, rgb +# A.k.a. D3DFMT_CxV8U8 +PIPE_FORMAT_R8G8Bx_SNORM , other, 1, 1, sn8 , sn8 , , , xyz1, rgb # Compressed formats -PIPE_FORMAT_DXT1_RGB , compressed, 4, 4, x64 , , , , xyz1, rgb -PIPE_FORMAT_DXT1_RGBA , compressed, 4, 4, x64 , , , , xyzw, rgb -PIPE_FORMAT_DXT3_RGBA , compressed, 4, 4, x128, , , , xyzw, rgb -PIPE_FORMAT_DXT5_RGBA , compressed, 4, 4, x128, , , , xyzw, rgb -PIPE_FORMAT_DXT1_SRGB , compressed, 4, 4, x64 , , , , xyz1, srgb -PIPE_FORMAT_DXT1_SRGBA , compressed, 4, 4, x64 , , , , xyzw, srgb -PIPE_FORMAT_DXT3_SRGBA , compressed, 4, 4, x128, , , , xyzw, srgb -PIPE_FORMAT_DXT5_SRGBA , compressed, 4, 4, x128, , , , xyzw, srgb +# - http://en.wikipedia.org/wiki/S3_Texture_Compression +# - http://www.opengl.org/registry/specs/EXT/texture_compression_s3tc.txt +# - http://www.opengl.org/registry/specs/ARB/texture_compression_rgtc.txt +# - http://msdn.microsoft.com/en-us/library/bb694531.aspx +PIPE_FORMAT_DXT1_RGB , s3tc, 4, 4, x64 , , , , xyz1, rgb +PIPE_FORMAT_DXT1_RGBA , s3tc, 4, 4, x64 , , , , xyzw, rgb +PIPE_FORMAT_DXT3_RGBA , s3tc, 4, 4, x128, , , , xyzw, rgb +PIPE_FORMAT_DXT5_RGBA , s3tc, 4, 4, x128, , , , xyzw, rgb +PIPE_FORMAT_DXT1_SRGB , s3tc, 4, 4, x64 , , , , xyz1, srgb +PIPE_FORMAT_DXT1_SRGBA , s3tc, 4, 4, x64 , , , , xyzw, srgb +PIPE_FORMAT_DXT3_SRGBA , s3tc, 4, 4, x128, , , , xyzw, srgb +PIPE_FORMAT_DXT5_SRGBA , s3tc, 4, 4, x128, , , , xyzw, srgb + +PIPE_FORMAT_RGTC1_UNORM , rgtc, 4, 4, x64, , , , x001, rgb +PIPE_FORMAT_RGTC1_SNORM , rgtc, 4, 4, x64, , , , x001, rgb +PIPE_FORMAT_RGTC2_UNORM , rgtc, 4, 4, x128, , , , xy01, rgb +PIPE_FORMAT_RGTC2_SNORM , rgtc, 4, 4, x128, , , , xy01, rgb # Straightforward D3D10-like formats (also used for # vertex buffer element description) @@ -148,10 +179,10 @@ PIPE_FORMAT_R32_SSCALED , plain, 1, 1, s32 , , , , x001, r PIPE_FORMAT_R32G32_SSCALED , plain, 1, 1, s32 , s32 , , , xy01, rgb PIPE_FORMAT_R32G32B32_SSCALED , plain, 1, 1, s32 , s32 , s32 , , xyz1, rgb PIPE_FORMAT_R32G32B32A32_SSCALED , plain, 1, 1, s32 , s32 , s32 , s32 , xyzw, rgb -PIPE_FORMAT_R32_FIXED , plain, 1, 1, h32 , , , , x001, rgb -PIPE_FORMAT_R32G32_FIXED , plain, 1, 1, h32 , h32 , , , xy01, rgb -PIPE_FORMAT_R32G32B32_FIXED , plain, 1, 1, h32 , h32 , h32 , , xyz1, rgb -PIPE_FORMAT_R32G32B32A32_FIXED , plain, 1, 1, h32 , h32 , h32 , h32 , xyzw, rgb +PIPE_FORMAT_R16_FLOAT , plain, 1, 1, f16 , , , , x001, rgb +PIPE_FORMAT_R16G16_FLOAT , plain, 1, 1, f16 , f16 , , , xy01, rgb +PIPE_FORMAT_R16G16B16_FLOAT , plain, 1, 1, f16 , f16 , f16 , , xyz1, rgb +PIPE_FORMAT_R16G16B16A16_FLOAT , plain, 1, 1, f16 , f16 , f16 , f16 , xyzw, rgb PIPE_FORMAT_R16_UNORM , plain, 1, 1, un16, , , , x001, rgb PIPE_FORMAT_R16G16_UNORM , plain, 1, 1, un16, un16, , , xy01, rgb PIPE_FORMAT_R16G16B16_UNORM , plain, 1, 1, un16, un16, un16, , xyz1, rgb @@ -184,3 +215,18 @@ PIPE_FORMAT_R8_SSCALED , plain, 1, 1, s8 , , , , x001, r PIPE_FORMAT_R8G8_SSCALED , plain, 1, 1, s8 , s8 , , , xy01, rgb PIPE_FORMAT_R8G8B8_SSCALED , plain, 1, 1, s8 , s8 , s8 , , xyz1, rgb PIPE_FORMAT_R8G8B8A8_SSCALED , plain, 1, 1, s8 , s8 , s8 , s8 , xyzw, rgb + +# GL-specific vertex buffer element formats +# A.k.a. GL_FIXED +PIPE_FORMAT_R32_FIXED , plain, 1, 1, h32 , , , , x001, rgb +PIPE_FORMAT_R32G32_FIXED , plain, 1, 1, h32 , h32 , , , xy01, rgb +PIPE_FORMAT_R32G32B32_FIXED , plain, 1, 1, h32 , h32 , h32 , , xyz1, rgb +PIPE_FORMAT_R32G32B32A32_FIXED , plain, 1, 1, h32 , h32 , h32 , h32 , xyzw, rgb + +# D3D9-specific vertex buffer element formats +# See also: +# - http://msdn.microsoft.com/en-us/library/bb172533.aspx +# A.k.a. D3DDECLTYPE_UDEC3 +PIPE_FORMAT_R10G10B10X2_USCALED , plain, 1, 1, u10 , u10 , u10 , x2 , xyz1, rgb +# A.k.a. D3DDECLTYPE_DEC3N +PIPE_FORMAT_R10G10B10X2_SNORM , plain, 1, 1, sn10, sn10, sn10 , x2 , xyz1, rgb diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index e8fa0022b5..5e3dc694be 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -56,15 +56,23 @@ enum util_format_layout { * * This is for formats like YV12 where there is less than one sample per * pixel. - * - * XXX: This could actually b */ UTIL_FORMAT_LAYOUT_SUBSAMPLED = 3, /** - * An unspecified compression algorithm. + * S3 Texture Compression formats. + */ + UTIL_FORMAT_LAYOUT_S3TC = 4, + + /** + * Red-Green Texture Compression formats. + */ + UTIL_FORMAT_LAYOUT_RGTC = 5, + + /** + * Everything else that doesn't fit in any of the above layouts. */ - UTIL_FORMAT_LAYOUT_COMPRESSED = 4 + UTIL_FORMAT_LAYOUT_OTHER = 6 }; @@ -120,9 +128,15 @@ struct util_format_channel_description struct util_format_description { enum pipe_format format; + const char *name; /** + * Short name, striped of the prefix, lower case. + */ + const char *short_name; + + /** * Pixel block dimensions. */ struct util_format_block block; @@ -140,6 +154,15 @@ struct util_format_description unsigned is_array:1; /** + * Whether the pixel format can be described as a bitfield structure. + * + * In particular: + * - pixel depth must be 8, 16, or 32 bits; + * - all channels must be unsigned, signed, or void + */ + unsigned is_bitmask:1; + + /** * Whether channels have mixed types (ignoring UTIL_FORMAT_TYPE_VOID). */ unsigned is_mixed:1; @@ -166,6 +189,117 @@ struct util_format_description * Colorspace transformation. */ enum util_format_colorspace colorspace; + + /** + * Unpack pixel blocks to R8G8B8A8_UNORM. + * + * Only defined for non-depth-stencil formats. + */ + void + (*unpack_rgba_8unorm)(uint8_t *dst, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height); + + /** + * Pack pixel blocks from R8G8B8A8_UNORM. + * + * Only defined for non-depth-stencil formats. + */ + void + (*pack_rgba_8unorm)(uint8_t *dst, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height); + + /** + * Unpack pixel blocks to R32G32B32A32_FLOAT. + * + * Only defined for non-depth-stencil formats. + */ + void + (*unpack_rgba_float)(float *dst, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height); + + /** + * Pack pixel blocks from R32G32B32A32_FLOAT. + * + * Only defined for non-depth-stencil formats. + */ + void + (*pack_rgba_float)(uint8_t *dst, unsigned dst_stride, + const float *src, unsigned src_stride, + unsigned width, unsigned height); + + /** + * Fetch a single pixel (i, j) from a block. + * + * Only defined for non-depth-stencil formats. + */ + void + (*fetch_rgba_float)(float *dst, + const uint8_t *src, + unsigned i, unsigned j); + + /** + * Unpack pixels to Z32_UNORM. + * + * Only defined for depth formats. + */ + void + (*unpack_z_32unorm)(uint32_t *dst, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height); + + /** + * Pack pixels from Z32_FLOAT. + * + * Only defined for depth formats. + */ + void + (*pack_z_32unorm)(uint8_t *dst, unsigned dst_stride, + const uint32_t *src, unsigned src_stride, + unsigned width, unsigned height); + + /** + * Unpack pixels to Z32_FLOAT. + * + * Only defined for depth formats. + */ + void + (*unpack_z_float)(float *dst, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height); + + /** + * Pack pixels from Z32_FLOAT. + * + * Only defined for depth formats. + */ + void + (*pack_z_float)(uint8_t *dst, unsigned dst_stride, + const float *src, unsigned src_stride, + unsigned width, unsigned height); + + /** + * Unpack pixels to S8_USCALED. + * + * Only defined for stencil formats. + */ + void + (*unpack_s_8uscaled)(uint8_t *dst, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height); + + /** + * Pack pixels from S8_USCALED. + * + * Only defined for stencil formats. + */ + void + (*pack_s_8uscaled)(uint8_t *dst, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height); + }; @@ -186,8 +320,8 @@ util_format_name(enum pipe_format format) { const struct util_format_description *desc = util_format_description(format); - assert(format); - if (!format) { + assert(desc); + if (!desc) { return "???"; } @@ -195,16 +329,16 @@ util_format_name(enum pipe_format format) } static INLINE boolean -util_format_is_compressed(enum pipe_format format) +util_format_is_s3tc(enum pipe_format format) { const struct util_format_description *desc = util_format_description(format); - assert(format); - if (!format) { + assert(desc); + if (!desc) { return FALSE; } - return desc->layout == UTIL_FORMAT_LAYOUT_COMPRESSED ? TRUE : FALSE; + return desc->layout == UTIL_FORMAT_LAYOUT_S3TC ? TRUE : FALSE; } static INLINE boolean @@ -212,8 +346,8 @@ util_format_is_depth_or_stencil(enum pipe_format format) { const struct util_format_description *desc = util_format_description(format); - assert(format); - if (!format) { + assert(desc); + if (!desc) { return FALSE; } @@ -225,8 +359,8 @@ util_format_is_depth_and_stencil(enum pipe_format format) { const struct util_format_description *desc = util_format_description(format); - assert(format); - if (!format) { + assert(desc); + if (!desc) { return FALSE; } @@ -238,6 +372,34 @@ util_format_is_depth_and_stencil(enum pipe_format format) desc->swizzle[1] != UTIL_FORMAT_SWIZZLE_NONE) ? TRUE : FALSE; } +/** + * Whether this format is a rgab8 variant. + * + * That is, any format that matches the + * + * PIPE_FORMAT_?8?8?8?8_UNORM + */ +static INLINE boolean +util_format_is_rgba8_variant(const struct util_format_description *desc) +{ + unsigned chan; + + if(desc->block.width != 1 || + desc->block.height != 1 || + desc->block.bits != 32) + return FALSE; + + for(chan = 0; chan < 4; ++chan) { + if(desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED && + desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) + return FALSE; + if(desc->channel[chan].size != 8) + return FALSE; + } + + return TRUE; +} + /** * Return total bits needed for the pixel format per block. @@ -247,8 +409,8 @@ util_format_get_blocksizebits(enum pipe_format format) { const struct util_format_description *desc = util_format_description(format); - assert(format); - if (!format) { + assert(desc); + if (!desc) { return 0; } @@ -273,8 +435,8 @@ util_format_get_blockwidth(enum pipe_format format) { const struct util_format_description *desc = util_format_description(format); - assert(format); - if (!format) { + assert(desc); + if (!desc) { return 1; } @@ -286,8 +448,8 @@ util_format_get_blockheight(enum pipe_format format) { const struct util_format_description *desc = util_format_description(format); - assert(format); - if (!format) { + assert(desc); + if (!desc) { return 1; } @@ -400,6 +562,16 @@ util_format_has_alpha(enum pipe_format format) } } +/** + * Return the number of components stored. + * Formats with block size != 1x1 will always have 1 component (the block). + */ +static INLINE unsigned +util_format_get_nr_components(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + return desc->nr_channels; +} /* * Format access functions. @@ -429,6 +601,19 @@ util_format_write_4ub(enum pipe_format format, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h); +/* + * Generic format conversion; + */ + +void +util_format_translate(enum pipe_format dst_format, + void *dst, unsigned dst_stride, + unsigned dst_x, unsigned dst_y, + enum pipe_format src_format, + const void *src, unsigned src_stride, + unsigned src_x, unsigned src_y, + unsigned width, unsigned height); + #ifdef __cplusplus } // extern "C" { #endif diff --git a/src/gallium/auxiliary/util/u_format_access.py b/src/gallium/auxiliary/util/u_format_access.py deleted file mode 100644 index 00424779d2..0000000000 --- a/src/gallium/auxiliary/util/u_format_access.py +++ /dev/null @@ -1,341 +0,0 @@ -#!/usr/bin/env python - -''' -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Pixel format accessor functions. - * - * @author Jose Fonseca <jfonseca@vmware.com> - */ -''' - - -import math -import sys - -from u_format_pack import * - - -def is_format_supported(format): - '''Determines whether we actually have the plumbing necessary to generate the - to read/write to/from this format.''' - - # FIXME: Ideally we would support any format combination here. - - # XXX: It should be straightforward to support srgb - if format.colorspace not in ('rgb', 'zs'): - return False - - if format.layout != PLAIN: - return False - - for i in range(4): - channel = format.channels[i] - if channel.type not in (VOID, UNSIGNED, FLOAT): - return False - - # We can only read a color from a depth/stencil format if the depth channel is present - if format.colorspace == 'zs' and format.swizzles[0] == SWIZZLE_NONE: - return False - - return True - - -def native_type(format): - '''Get the native appropriate for a format.''' - - if format.layout == PLAIN: - if not format.is_array(): - # For arithmetic pixel formats return the integer type that matches the whole pixel - return 'uint%u_t' % format.block_size() - else: - # For array pixel formats return the integer type that matches the color channel - channel = format.channels[0] - if channel.type == UNSIGNED: - return 'uint%u_t' % channel.size - elif channel.type == SIGNED: - return 'int%u_t' % channel.size - elif channel.type == FLOAT: - if channel.size == 32: - return 'float' - elif channel.size == 64: - return 'double' - else: - assert False - else: - assert False - else: - assert False - - -def generate_srgb_tables(): - print 'static ubyte srgb_to_linear[256] = {' - for i in range(256): - print ' %s,' % (int(math.pow((i / 255.0 + 0.055) / 1.055, 2.4) * 255)) - print '};' - print - print 'static ubyte linear_to_srgb[256] = {' - print ' 0,' - for i in range(1, 256): - print ' %s,' % (int((1.055 * math.pow(i / 255.0, 0.41666) - 0.055) * 255)) - print '};' - print - - -def generate_format_read(format, dst_channel, dst_native_type, dst_suffix): - '''Generate the function to read pixels from a particular format''' - - name = format.short_name() - - src_native_type = native_type(format) - - print 'static void' - print 'util_format_%s_read_%s(%s *dst, unsigned dst_stride, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, dst_suffix, dst_native_type) - print '{' - print ' unsigned x, y;' - print ' const uint8_t *src_row = src + y0*src_stride;' - print ' %s *dst_row = dst;' % dst_native_type - print ' for (y = 0; y < h; ++y) {' - print ' const %s *src_pixel = (const %s *)(src_row + x0*%u);' % (src_native_type, src_native_type, format.stride()) - print ' %s *dst_pixel = dst_row;' %dst_native_type - print ' for (x = 0; x < w; ++x) {' - - names = ['']*4 - if format.colorspace == 'rgb': - for i in range(4): - swizzle = format.swizzles[i] - if swizzle < 4: - names[swizzle] += 'rgba'[i] - elif format.colorspace == 'zs': - swizzle = format.swizzles[0] - if swizzle < 4: - names[swizzle] = 'z' - else: - assert False - else: - assert False - - if format.layout == PLAIN: - if not format.is_array(): - print ' %s pixel = *src_pixel++;' % src_native_type - shift = 0; - for i in range(4): - src_channel = format.channels[i] - width = src_channel.size - if names[i]: - value = 'pixel' - mask = (1 << width) - 1 - if shift: - value = '(%s >> %u)' % (value, shift) - if shift + width < format.block_size(): - value = '(%s & 0x%x)' % (value, mask) - value = conversion_expr(src_channel, dst_channel, dst_native_type, value) - print ' %s %s = %s;' % (dst_native_type, names[i], value) - shift += width - else: - for i in range(4): - src_channel = format.channels[i] - if names[i]: - value = 'src_pixel[%u]' % i - value = conversion_expr(src_channel, dst_channel, dst_native_type, value) - print ' %s %s = %s;' % (dst_native_type, names[i], value) - print ' src_pixel += %u;' % (format.nr_channels()) - else: - assert False - - for i in range(4): - if format.colorspace == 'rgb': - swizzle = format.swizzles[i] - if swizzle < 4: - value = names[swizzle] - elif swizzle == SWIZZLE_0: - value = '0' - elif swizzle == SWIZZLE_1: - value = get_one(dst_channel) - else: - assert False - elif format.colorspace == 'zs': - if i < 3: - value = 'z' - else: - value = get_one(dst_channel) - else: - assert False - print ' *dst_pixel++ = %s; /* %s */' % (value, 'rgba'[i]) - - print ' }' - print ' src_row += src_stride;' - print ' dst_row += dst_stride/sizeof(*dst_row);' - print ' }' - print '}' - print - - -def generate_format_write(format, src_channel, src_native_type, src_suffix): - '''Generate the function to write pixels to a particular format''' - - name = format.short_name() - - dst_native_type = native_type(format) - - print 'static void' - print 'util_format_%s_write_%s(const %s *src, unsigned src_stride, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type) - print '{' - print ' unsigned x, y;' - print ' uint8_t *dst_row = dst + y0*dst_stride;' - print ' const %s *src_row = src;' % src_native_type - print ' for (y = 0; y < h; ++y) {' - print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride()) - print ' const %s *src_pixel = src_row;' %src_native_type - print ' for (x = 0; x < w; ++x) {' - - inv_swizzle = format.inv_swizzles() - - if format.layout == PLAIN: - if not format.is_array(): - print ' %s pixel = 0;' % dst_native_type - shift = 0; - for i in range(4): - dst_channel = format.channels[i] - width = dst_channel.size - if inv_swizzle[i] is not None: - value = 'src_pixel[%u]' % inv_swizzle[i] - value = conversion_expr(src_channel, dst_channel, dst_native_type, value) - if shift: - value = '(%s << %u)' % (value, shift) - print ' pixel |= %s;' % value - shift += width - print ' *dst_pixel++ = pixel;' - else: - for i in range(4): - dst_channel = format.channels[i] - if inv_swizzle[i] is not None: - value = 'src_pixel[%u]' % inv_swizzle[i] - value = conversion_expr(src_channel, dst_channel, dst_native_type, value) - print ' *dst_pixel++ = %s;' % value - else: - assert False - print ' src_pixel += 4;' - - print ' }' - print ' dst_row += dst_stride;' - print ' src_row += src_stride/sizeof(*src_row);' - print ' }' - print '}' - print - - -def generate_read(formats, dst_channel, dst_native_type, dst_suffix): - '''Generate the dispatch function to read pixels from any format''' - - for format in formats: - if is_format_supported(format): - generate_format_read(format, dst_channel, dst_native_type, dst_suffix) - - print 'void' - print 'util_format_read_%s(enum pipe_format format, %s *dst, unsigned dst_stride, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type) - print '{' - print ' void (*func)(%s *dst, unsigned dst_stride, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % dst_native_type - print ' switch(format) {' - for format in formats: - if is_format_supported(format): - print ' case %s:' % format.name - print ' func = &util_format_%s_read_%s;' % (format.short_name(), dst_suffix) - print ' break;' - print ' default:' - print ' debug_printf("unsupported format\\n");' - print ' return;' - print ' }' - print ' func(dst, dst_stride, (const uint8_t *)src, src_stride, x, y, w, h);' - print '}' - print - - -def generate_write(formats, src_channel, src_native_type, src_suffix): - '''Generate the dispatch function to write pixels to any format''' - - for format in formats: - if is_format_supported(format): - generate_format_write(format, src_channel, src_native_type, src_suffix) - - print 'void' - print 'util_format_write_%s(enum pipe_format format, const %s *src, unsigned src_stride, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type) - - print '{' - print ' void (*func)(const %s *src, unsigned src_stride, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % src_native_type - print ' switch(format) {' - for format in formats: - if is_format_supported(format): - print ' case %s:' % format.name - print ' func = &util_format_%s_write_%s;' % (format.short_name(), src_suffix) - print ' break;' - print ' default:' - print ' debug_printf("unsupported format\\n");' - print ' return;' - print ' }' - print ' func(src, src_stride, (uint8_t *)dst, dst_stride, x, y, w, h);' - print '}' - print - - -def main(): - formats = [] - for arg in sys.argv[1:]: - formats.extend(parse(arg)) - - print '/* This file is autogenerated by u_format_access.py from u_format.csv. Do not edit directly. */' - print - # This will print the copyright message on the top of this file - print __doc__.strip() - print - print '#include "pipe/p_compiler.h"' - print '#include "u_math.h"' - print '#include "u_format_pack.h"' - print - - generate_srgb_tables() - - type = Channel(FLOAT, False, 32) - native_type = 'float' - suffix = '4f' - - generate_read(formats, type, native_type, suffix) - generate_write(formats, type, native_type, suffix) - - type = Channel(UNSIGNED, True, 8) - native_type = 'uint8_t' - suffix = '4ub' - - generate_read(formats, type, native_type, suffix) - generate_write(formats, type, native_type, suffix) - - -if __name__ == '__main__': - main() diff --git a/src/gallium/auxiliary/util/u_format_other.c b/src/gallium/auxiliary/util/u_format_other.c new file mode 100644 index 0000000000..723fa8c3bf --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_other.c @@ -0,0 +1,267 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#include "u_math.h" +#include "u_format_other.h" + + +void +util_format_r9g9b9e5_float_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + +} + +void +util_format_r9g9b9e5_float_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + +} + +void +util_format_r9g9b9e5_float_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) +{ + +} + + +void +util_format_r9g9b9e5_float_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + +} + + +void +util_format_r9g9b9e5_float_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + +} + + +void +util_format_r1_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + +} + + +void +util_format_r1_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + +} + + +void +util_format_r1_unorm_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) +{ + +} + + +void +util_format_r1_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + +} + + +void +util_format_r1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ +} + + +/* + * PIPE_FORMAT_R8G8Bx_SNORM + * + * A.k.a. D3DFMT_CxV8U8 + */ + + +void +util_format_r8g8bx_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for(y = 0; y < height; y += 1) { + float *dst = dst_row; + const uint16_t *src = (const uint16_t *)src_row; + for(x = 0; x < width; x += 1) { + uint16_t value = *src++; + int16_t r, g; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + r = ((int16_t)(value << 8)) >> 8; + g = ((int16_t)(value << 0)) >> 8; + + dst[0] = (float)(r * (1.0f/0x7f)); /* r */ + dst[1] = (float)(g * (1.0f/0x7f)); /* g */ + dst[2] = sqrtf(1.0f - dst[0] * dst[0] - dst[1] * dst[1]); /* b */ + dst[3] = 1.0f; /* a */ + dst += 4; + } + src_row += src_stride; + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; y += 1) { + uint8_t *dst = dst_row; + const uint16_t *src = (const uint16_t *)src_row; + for(x = 0; x < width; x += 1) { + uint16_t value = *src++; + int16_t r, g; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + r = ((int16_t)(value << 8)) >> 8; + g = ((int16_t)(value << 0)) >> 8; + + dst[0] = (uint8_t)(((uint16_t)MAX2(r, 0)) * 0xff / 0x7f); /* r */ + dst[1] = (uint8_t)(((uint16_t)MAX2(g, 0)) * 0xff / 0x7f); /* g */ + dst[2] = (uint8_t)sqrtf(0x7f*0x7f - r * r - g * g) * 0xff / 0x7f; /* b */ + dst[3] = 255; /* a */ + dst += 4; + } + src_row += src_stride; + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_r8g8bx_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; y += 1) { + const float *src = src_row; + uint16_t *dst = (uint16_t *)dst_row; + for(x = 0; x < width; x += 1) { + uint16_t value = 0; + + value |= (uint16_t)(((int8_t)(CLAMP(src[0], -1, 1) * 0x7f)) & 0xff) ; + value |= (uint16_t)((((int8_t)(CLAMP(src[1], -1, 1) * 0x7f)) & 0xff) << 8) ; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst++ = value; + + src += 4; + } + dst_row += dst_stride; + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_r8g8bx_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for(y = 0; y < height; y += 1) { + const uint8_t *src = src_row; + uint16_t *dst = (uint16_t *)dst_row; + for(x = 0; x < width; x += 1) { + uint16_t value = 0; + + value |= src[0] >> 1; + value |= (src[1] >> 1) << 8; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst++ = value; + + src += 4; + } + dst_row += dst_stride; + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_r8g8bx_snorm_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) +{ + uint16_t value = *(const uint16_t *)src; + int16_t r, g; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + r = ((int16_t)(value << 8)) >> 8; + g = ((int16_t)(value << 0)) >> 8; + + dst[0] = r * (1.0f/0x7f); /* r */ + dst[1] = g * (1.0f/0x7f); /* g */ + dst[2] = sqrtf(1.0f - dst[0] * dst[0] - dst[1] * dst[1]); /* b */ + dst[3] = 1.0f; /* a */ +} diff --git a/src/gallium/auxiliary/util/u_format_other.h b/src/gallium/auxiliary/util/u_format_other.h new file mode 100644 index 0000000000..98c97be33f --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_other.h @@ -0,0 +1,108 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#ifndef U_FORMAT_OTHER_H_ +#define U_FORMAT_OTHER_H_ + + +#include "pipe/p_compiler.h" + + +void +util_format_r9g9b9e5_float_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_r9g9b9e5_float_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_r9g9b9e5_float_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j); + +void +util_format_r9g9b9e5_float_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_r9g9b9e5_float_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_r1_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_r1_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_r1_unorm_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j); + +void +util_format_r1_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_r1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_r8g8bx_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_r8g8bx_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_r8g8bx_snorm_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j); + +void +util_format_r8g8bx_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_r8g8bx_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); + +#endif /* U_FORMAT_OTHER_H_ */ diff --git a/src/gallium/auxiliary/util/u_format_pack.py b/src/gallium/auxiliary/util/u_format_pack.py index 3f33f7cc02..0c1bbc84c1 100644 --- a/src/gallium/auxiliary/util/u_format_pack.py +++ b/src/gallium/auxiliary/util/u_format_pack.py @@ -3,7 +3,7 @@ ''' /************************************************************************** * - * Copyright 2009 VMware, Inc. + * Copyright 2009-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -38,6 +38,7 @@ import sys +import math from u_format_parse import * @@ -45,19 +46,35 @@ from u_format_parse import * def generate_format_type(format): '''Generate a structure that describes the format.''' + assert format.layout == PLAIN + print 'union util_format_%s {' % format.short_name() - if format.is_bitmask(): + + if format.block_size() in (8, 16, 32, 64): print ' uint%u_t value;' % (format.block_size(),) + + use_bitfields = False + for channel in format.channels: + if channel.size % 8 or not is_pot(channel.size): + use_bitfields = True + print ' struct {' for channel in format.channels: - if format.is_bitmask() and not format.is_array(): + if use_bitfields: if channel.type == VOID: if channel.size: print ' unsigned %s:%u;' % (channel.name, channel.size) elif channel.type == UNSIGNED: print ' unsigned %s:%u;' % (channel.name, channel.size) - elif channel.type == SIGNED: + elif channel.type in (SIGNED, FIXED): print ' int %s:%u;' % (channel.name, channel.size) + elif channel.type == FLOAT: + if channel.size == 64: + print ' double %s;' % (channel.name) + elif channel.size == 32: + print ' float %s;' % (channel.name) + else: + print ' unsigned %s:%u;' % (channel.name, channel.size) else: assert 0 else: @@ -88,7 +105,7 @@ def generate_format_type(format): def bswap_format(format): '''Generate a structure that describes the format.''' - if format.is_bitmask() and not format.is_array(): + if format.is_bitmask() and not format.is_array() and format.block_size() > 8: print '#ifdef PIPE_ARCH_BIG_ENDIAN' print ' pixel.value = util_bswap%u(pixel.value);' % format.block_size() print '#endif' @@ -105,12 +122,10 @@ def is_format_supported(format): for i in range(4): channel = format.channels[i] - if channel.type not in (VOID, UNSIGNED, SIGNED, FLOAT): + if channel.type not in (VOID, UNSIGNED, SIGNED, FLOAT, FIXED): + return False + if channel.type == FLOAT and channel.size not in (16, 32, 64): return False - - # We can only read a color from a depth/stencil format if the depth channel is present - if format.colorspace == 'zs' and format.swizzles[0] == SWIZZLE_NONE: - return False return True @@ -124,15 +139,17 @@ def native_type(format): return 'uint%u_t' % format.block_size() else: # For array pixel formats return the integer type that matches the color channel - type = format.channels[0] - if type.type == UNSIGNED: - return 'uint%u_t' % type.size - elif type.type == SIGNED: - return 'int%u_t' % type.size - elif type.type == FLOAT: - if type.size == 32: + channel = format.channels[0] + if channel.type in (UNSIGNED, VOID): + return 'uint%u_t' % channel.size + elif channel.type in (SIGNED, FIXED): + return 'int%u_t' % channel.size + elif channel.type == FLOAT: + if channel.size == 16: + return 'uint16_t' + elif channel.size == 32: return 'float' - elif type.size == 64: + elif channel.size == 64: return 'double' else: assert False @@ -171,37 +188,35 @@ def get_one_shift(type): assert False -def get_one(type): +def value_to_native(type, value): '''Get the value of unity for this type.''' - if type.type == 'FLOAT' or not type.norm: - return 1 + if type.type == FLOAT: + return value + if type.type == FIXED: + return int(value * (1 << (type.size/2))) + if not type.norm: + return int(value) + if type.type == UNSIGNED: + return int(value * ((1 << type.size) - 1)) + if type.type == SIGNED: + return int(value * ((1 << (type.size - 1)) - 1)) + assert False + + +def native_to_constant(type, value): + '''Get the value of unity for this type.''' + if type.type == FLOAT: + if type.size <= 32: + return "%ff" % value + else: + return "%ff" % value else: - return (1 << get_one_shift(type)) - 1 - - -def generate_clamp(): - '''Code generate the clamping functions for each type. - - We don't use a macro so that arguments with side effects, - like *src_pixel++ are correctly handled. - ''' - - for suffix, native_type in [ - ('', 'double'), - ('f', 'float'), - ('ui', 'unsigned int'), - ('si', 'int'), - ]: - print 'static INLINE %s' % native_type - print 'clamp%s(%s value, %s lbound, %s ubound)' % (suffix, native_type, native_type, native_type) - print '{' - print ' if(value < lbound)' - print ' return lbound;' - print ' if(value > ubound)' - print ' return ubound;' - print ' return value;' - print '}' - print + return str(int(value)) + + +def get_one(type): + '''Get the value of unity for this type.''' + return value_to_native(type, 1) def clamp_expr(src_channel, dst_channel, dst_native_type, value): @@ -211,274 +226,448 @@ def clamp_expr(src_channel, dst_channel, dst_native_type, value): if src_channel == dst_channel: return value - # Pick the approriate clamp function - if src_channel.type == FLOAT: - if src_channel.size == 32: - func = 'clampf' - elif src_channel.size == 64: - func = 'clamp' - else: - assert False - elif src_channel.type == UNSIGNED: - func = 'clampui' - elif src_channel.type == SIGNED: - func = 'clampsi' - else: - assert False - src_min = src_channel.min() src_max = src_channel.max() dst_min = dst_channel.min() dst_max = dst_channel.max() + + # Translate the destination range to the src native value + dst_min_native = value_to_native(src_channel, dst_min) + dst_max_native = value_to_native(src_channel, dst_max) if src_min < dst_min and src_max > dst_max: - return 'CLAMP(%s, %s, %s)' % (value, dst_min, dst_max) + return 'CLAMP(%s, %s, %s)' % (value, dst_min_native, dst_max_native) if src_max > dst_max: - return 'MIN2(%s, %s)' % (value, dst_max) + return 'MIN2(%s, %s)' % (value, dst_max_native) if src_min < dst_min: - return 'MAX2(%s, %s)' % (value, dst_min) + return 'MAX2(%s, %s)' % (value, dst_min_native) return value -def conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=True): +def conversion_expr(src_channel, + dst_channel, dst_native_type, + value, + clamp=True, + src_colorspace = RGB, + dst_colorspace = RGB): '''Generate the expression to convert a value between two types.''' + if src_colorspace != dst_colorspace: + if src_colorspace == SRGB: + assert src_channel.type == UNSIGNED + assert src_channel.norm + assert src_channel.size == 8 + assert dst_colorspace == RGB + if dst_channel.type == FLOAT: + return 'util_format_srgb_8unorm_to_linear_float(%s)' % value + else: + assert dst_channel.type == UNSIGNED + assert dst_channel.norm + assert dst_channel.size == 8 + return 'util_format_srgb_to_linear_8unorm(%s)' % value + elif dst_colorspace == SRGB: + assert dst_channel.type == UNSIGNED + assert dst_channel.norm + assert dst_channel.size == 8 + assert src_colorspace == RGB + if src_channel.type == FLOAT: + return 'util_format_linear_float_to_srgb_8unorm(%s)' % value + else: + assert src_channel.type == UNSIGNED + assert src_channel.norm + assert src_channel.size == 8 + return 'util_format_linear_to_srgb_8unorm(%s)' % value + elif src_colorspace == ZS: + pass + elif dst_colorspace == ZS: + pass + else: + assert 0 + if src_channel == dst_channel: return value - if src_channel.type == FLOAT and dst_channel.type == FLOAT: - return '(%s)%s' % (dst_native_type, value) - - if not src_channel.norm and not dst_channel.norm: - return '(%s)%s' % (dst_native_type, value) + src_type = src_channel.type + src_size = src_channel.size + src_norm = src_channel.norm - if clamp: - value = clamp_expr(src_channel, dst_channel, dst_native_type, value) + # Promote half to float + if src_type == FLOAT and src_size == 16: + value = 'util_half_to_float(%s)' % value + src_size = 32 - if dst_channel.type == FLOAT: - if src_channel.norm: - one = get_one(src_channel) - if src_channel.size <= 23: - scale = '(1.0f/0x%x)' % one - else: - # bigger than single precision mantissa, use double - scale = '(1.0/0x%x)' % one - value = '(%s * %s)' % (value, scale) - return '(%s)%s' % (dst_native_type, value) + # Special case for float <-> ubytes for more accurate results + # Done before clamping since these functions already take care of that + if src_type == UNSIGNED and src_norm and src_size == 8 and dst_channel.type == FLOAT and dst_channel.size == 32: + return 'ubyte_to_float(%s)' % value + if src_type == FLOAT and src_size == 32 and dst_channel.type == UNSIGNED and dst_channel.norm and dst_channel.size == 8: + return 'float_to_ubyte(%s)' % value - if src_channel.type == FLOAT: - if dst_channel.norm: - dst_one = get_one(dst_channel) - if dst_channel.size <= 23: - scale = '0x%x' % dst_one - else: - # bigger than single precision mantissa, use double - scale = '(double)0x%x' % dst_one - value = '(%s * %s)' % (value, scale) - return '(%s)%s' % (dst_native_type, value) + if clamp: + if dst_channel.type != FLOAT or src_type != FLOAT: + value = clamp_expr(src_channel, dst_channel, dst_native_type, value) - if not src_channel.norm and not dst_channel.norm: - # neither is normalized -- just cast - return '(%s)%s' % (dst_native_type, value) + if src_type in (SIGNED, UNSIGNED) and dst_channel.type in (SIGNED, UNSIGNED): + if not src_norm and not dst_channel.norm: + # neither is normalized -- just cast + return '(%s)%s' % (dst_native_type, value) - if src_channel.type in (SIGNED, UNSIGNED) and dst_channel.type in (SIGNED, UNSIGNED): src_one = get_one(src_channel) dst_one = get_one(dst_channel) - if src_one > dst_one and src_channel.norm: + if src_one > dst_one and src_norm and dst_channel.norm: # We can just bitshift src_shift = get_one_shift(src_channel) dst_shift = get_one_shift(dst_channel) value = '(%s >> %s)' % (value, src_shift - dst_shift) else: # We need to rescale using an intermediate type big enough to hold the multiplication of both - tmp_native_type = intermediate_native_type(src_channel.size + dst_channel.size, src_channel.sign and dst_channel.sign) - value = '(%s)%s' % (tmp_native_type, value) + tmp_native_type = intermediate_native_type(src_size + dst_channel.size, src_channel.sign and dst_channel.sign) + value = '((%s)%s)' % (tmp_native_type, value) value = '(%s * 0x%x / 0x%x)' % (value, dst_one, src_one) value = '(%s)%s' % (dst_native_type, value) return value - assert False + # Promote to either float or double + if src_type != FLOAT: + if src_norm or src_type == FIXED: + one = get_one(src_channel) + if src_size <= 23: + value = '(%s * (1.0f/0x%x))' % (value, one) + if dst_channel.size <= 32: + value = '(float)%s' % value + src_size = 32 + else: + # bigger than single precision mantissa, use double + value = '(%s * (1.0/0x%x))' % (value, one) + src_size = 64 + src_norm = False + else: + if src_size <= 23 or dst_channel.size <= 32: + value = '(float)%s' % value + src_size = 32 + else: + # bigger than single precision mantissa, use double + value = '(double)%s' % value + src_size = 64 + src_type = FLOAT + # Convert double or float to non-float + if dst_channel.type != FLOAT: + if dst_channel.norm or dst_channel.type == FIXED: + dst_one = get_one(dst_channel) + if dst_channel.size <= 23: + value = '(%s * 0x%x)' % (value, dst_one) + else: + # bigger than single precision mantissa, use double + value = '(%s * (double)0x%x)' % (value, dst_one) + value = '(%s)%s' % (dst_native_type, value) + else: + # Cast double to float when converting to either half or float + if dst_channel.size <= 32 and src_size > 32: + value = '(float)%s' % value + src_size = 32 -def generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix): - '''Generate the function to unpack pixels from a particular format''' + if dst_channel.size == 16: + value = 'util_float_to_half(%s)' % value + elif dst_channel.size == 64 and src_size < 64: + value = '(double)%s' % value - name = format.short_name() + return value - src_native_type = native_type(format) - print 'static INLINE void' - print 'util_format_%s_unpack_%s(%s *dst, const void *src)' % (name, dst_suffix, dst_native_type) - print '{' - print ' union util_format_%s pixel;' % format.short_name() - print ' memcpy(&pixel, src, sizeof pixel);' - bswap_format(format) +def generate_unpack_kernel(format, dst_channel, dst_native_type): + if not is_format_supported(format): + return + assert format.layout == PLAIN - for i in range(4): - swizzle = format.swizzles[i] - if swizzle < 4: - src_channel = format.channels[swizzle] - value = 'pixel.chan.%s' % src_channel.name - value = conversion_expr(src_channel, dst_channel, dst_native_type, value) - elif swizzle == SWIZZLE_0: - value = '0' - elif swizzle == SWIZZLE_1: - value = get_one(dst_channel) - elif swizzle == SWIZZLE_NONE: - value = '0' - else: - assert False - if format.colorspace == ZS: - if i == 3: - value = get_one(dst_channel) - elif i >= 1: - value = 'dst[0]' - print ' dst[%u] = %s; /* %s */' % (i, value, 'rgba'[i]) + src_native_type = native_type(format) - print '}' - print + if format.is_bitmask(): + depth = format.block_size() + print ' uint%u_t value = *(const uint%u_t *)src;' % (depth, depth) + + # Declare the intermediate variables + for i in range(format.nr_channels()): + src_channel = format.channels[i] + if src_channel.type == UNSIGNED: + print ' uint%u_t %s;' % (depth, src_channel.name) + elif src_channel.type == SIGNED: + print ' int%u_t %s;' % (depth, src_channel.name) + + if depth > 8: + print '#ifdef PIPE_ARCH_BIG_ENDIAN' + print ' value = util_bswap%u(value);' % depth + print '#endif' + + # Compute the intermediate unshifted values + shift = 0 + for i in range(format.nr_channels()): + src_channel = format.channels[i] + value = 'value' + if src_channel.type == UNSIGNED: + if shift: + value = '%s >> %u' % (value, shift) + if shift + src_channel.size < depth: + value = '(%s) & 0x%x' % (value, (1 << src_channel.size) - 1) + elif src_channel.type == SIGNED: + if shift + src_channel.size < depth: + # Align the sign bit + lshift = depth - (shift + src_channel.size) + value = '%s << %u' % (value, lshift) + # Cast to signed + value = '(int%u_t)(%s) ' % (depth, value) + if src_channel.size < depth: + # Align the LSB bit + rshift = depth - src_channel.size + value = '(%s) >> %u' % (value, rshift) + else: + value = None + + if value is not None: + print ' %s = %s;' % (src_channel.name, value) + + shift += src_channel.size + + # Convert, swizzle, and store final values + for i in range(4): + swizzle = format.swizzles[i] + if swizzle < 4: + src_channel = format.channels[swizzle] + src_colorspace = format.colorspace + if src_colorspace == SRGB and i == 3: + # Alpha channel is linear + src_colorspace = RGB + value = src_channel.name + value = conversion_expr(src_channel, + dst_channel, dst_native_type, + value, + src_colorspace = src_colorspace) + elif swizzle == SWIZZLE_0: + value = '0' + elif swizzle == SWIZZLE_1: + value = get_one(dst_channel) + elif swizzle == SWIZZLE_NONE: + value = '0' + else: + assert False + print ' dst[%u] = %s; /* %s */' % (i, value, 'rgba'[i]) + + else: + print ' union util_format_%s pixel;' % format.short_name() + print ' memcpy(&pixel, src, sizeof pixel);' + bswap_format(format) + + for i in range(4): + swizzle = format.swizzles[i] + if swizzle < 4: + src_channel = format.channels[swizzle] + src_colorspace = format.colorspace + if src_colorspace == SRGB and i == 3: + # Alpha channel is linear + src_colorspace = RGB + value = 'pixel.chan.%s' % src_channel.name + value = conversion_expr(src_channel, + dst_channel, dst_native_type, + value, + src_colorspace = src_colorspace) + elif swizzle == SWIZZLE_0: + value = '0' + elif swizzle == SWIZZLE_1: + value = get_one(dst_channel) + elif swizzle == SWIZZLE_NONE: + value = '0' + else: + assert False + print ' dst[%u] = %s; /* %s */' % (i, value, 'rgba'[i]) -def generate_format_pack(format, src_channel, src_native_type, src_suffix): - '''Generate the function to pack pixels to a particular format''' - - name = format.short_name() +def generate_pack_kernel(format, src_channel, src_native_type): + if not is_format_supported(format): + return + dst_native_type = native_type(format) - print 'static INLINE void' - print 'util_format_%s_pack_%s(void *dst, %s r, %s g, %s b, %s a)' % (name, src_suffix, src_native_type, src_native_type, src_native_type, src_native_type) - print '{' - print ' union util_format_%s pixel;' % format.short_name() - assert format.layout == PLAIN inv_swizzle = format.inv_swizzles() - for i in range(4): - dst_channel = format.channels[i] - width = dst_channel.size - if inv_swizzle[i] is None: - continue - value = 'rgba'[inv_swizzle[i]] - value = conversion_expr(src_channel, dst_channel, dst_native_type, value) - if format.colorspace == ZS: - if i == 3: - value = get_one(dst_channel) - elif i >= 1: - value = '0' - print ' pixel.chan.%s = %s;' % (dst_channel.name, value) + if format.is_bitmask(): + depth = format.block_size() + print ' uint%u_t value = 0;' % depth + + shift = 0 + for i in range(4): + dst_channel = format.channels[i] + if inv_swizzle[i] is not None: + value ='src[%u]' % inv_swizzle[i] + dst_colorspace = format.colorspace + if dst_colorspace == SRGB and inv_swizzle[i] == 3: + # Alpha channel is linear + dst_colorspace = RGB + value = conversion_expr(src_channel, + dst_channel, dst_native_type, + value, + dst_colorspace = dst_colorspace) + if dst_channel.type in (UNSIGNED, SIGNED): + if shift + dst_channel.size < depth: + value = '(%s) & 0x%x' % (value, (1 << dst_channel.size) - 1) + if shift: + value = '(%s) << %u' % (value, shift) + if dst_channel.type == SIGNED: + # Cast to unsigned + value = '(uint%u_t)(%s) ' % (depth, value) + else: + value = None + if value is not None: + print ' value |= %s;' % (value) + + shift += dst_channel.size + + if depth > 8: + print '#ifdef PIPE_ARCH_BIG_ENDIAN' + print ' value = util_bswap%u(value);' % depth + print '#endif' + + print ' *(uint%u_t *)dst = value;' % depth + + else: + print ' union util_format_%s pixel;' % format.short_name() + + for i in range(4): + dst_channel = format.channels[i] + width = dst_channel.size + if inv_swizzle[i] is None: + continue + dst_colorspace = format.colorspace + if dst_colorspace == SRGB and inv_swizzle[i] == 3: + # Alpha channel is linear + dst_colorspace = RGB + value ='src[%u]' % inv_swizzle[i] + value = conversion_expr(src_channel, + dst_channel, dst_native_type, + value, + dst_colorspace = dst_colorspace) + print ' pixel.chan.%s = %s;' % (dst_channel.name, value) + + bswap_format(format) + print ' memcpy(dst, &pixel, sizeof pixel);' + + +def generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix): + '''Generate the function to unpack pixels from a particular format''' + + name = format.short_name() + + print 'static INLINE void' + print 'util_format_%s_unpack_%s(%s *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)' % (name, dst_suffix, dst_native_type) + print '{' + + if is_format_supported(format): + print ' unsigned x, y;' + print ' for(y = 0; y < height; y += %u) {' % (format.block_height,) + print ' %s *dst = dst_row;' % (dst_native_type) + print ' const uint8_t *src = src_row;' + print ' for(x = 0; x < width; x += %u) {' % (format.block_width,) + + generate_unpack_kernel(format, dst_channel, dst_native_type) + + print ' src += %u;' % (format.block_size() / 8,) + print ' dst += 4;' + print ' }' + print ' src_row += src_stride;' + print ' dst_row += dst_stride/sizeof(*dst_row);' + print ' }' - bswap_format(format) - print ' memcpy(dst, &pixel, sizeof pixel);' print '}' print -def generate_unpack(formats, dst_channel, dst_native_type, dst_suffix): - '''Generate the dispatch function to unpack pixels from any format''' +def generate_format_pack(format, src_channel, src_native_type, src_suffix): + '''Generate the function to pack pixels to a particular format''' - for format in formats: - if is_format_supported(format): - generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix) + name = format.short_name() print 'static INLINE void' - print 'util_format_unpack_%s(enum pipe_format format, %s *dst, const void *src)' % (dst_suffix, dst_native_type) + print 'util_format_%s_pack_%s(uint8_t *dst_row, unsigned dst_stride, const %s *src_row, unsigned src_stride, unsigned width, unsigned height)' % (name, src_suffix, src_native_type) print '{' - print ' void (*func)(%s *dst, const void *src);' % dst_native_type - print ' switch(format) {' - for format in formats: - if is_format_supported(format): - print ' case %s:' % format.name - print ' func = &util_format_%s_unpack_%s;' % (format.short_name(), dst_suffix) - print ' break;' - print ' default:' - print ' debug_printf("unsupported format\\n");' - print ' return;' - print ' }' - print ' func(dst, src);' + + if is_format_supported(format): + print ' unsigned x, y;' + print ' for(y = 0; y < height; y += %u) {' % (format.block_height,) + print ' const %s *src = src_row;' % (src_native_type) + print ' uint8_t *dst = dst_row;' + print ' for(x = 0; x < width; x += %u) {' % (format.block_width,) + + generate_pack_kernel(format, src_channel, src_native_type) + + print ' src += 4;' + print ' dst += %u;' % (format.block_size() / 8,) + print ' }' + print ' dst_row += dst_stride;' + print ' src_row += src_stride/sizeof(*src_row);' + print ' }' + print '}' print + +def generate_format_fetch(format, dst_channel, dst_native_type, dst_suffix): + '''Generate the function to unpack pixels from a particular format''' -def generate_pack(formats, src_channel, src_native_type, src_suffix): - '''Generate the dispatch function to pack pixels to any format''' - - for format in formats: - if is_format_supported(format): - generate_format_pack(format, src_channel, src_native_type, src_suffix) + name = format.short_name() print 'static INLINE void' - print 'util_format_pack_%s(enum pipe_format format, void *dst, %s r, %s g, %s b, %s a)' % (src_suffix, src_native_type, src_native_type, src_native_type, src_native_type) + print 'util_format_%s_fetch_%s(%s *dst, const uint8_t *src, unsigned i, unsigned j)' % (name, dst_suffix, dst_native_type) print '{' - print ' void (*func)(void *dst, %s r, %s g, %s b, %s a);' % (src_native_type, src_native_type, src_native_type, src_native_type) - print ' switch(format) {' - for format in formats: - if is_format_supported(format): - print ' case %s:' % format.name - print ' func = &util_format_%s_pack_%s;' % (format.short_name(), src_suffix) - print ' break;' - print ' default:' - print ' debug_printf("%s: unsupported format\\n", __FUNCTION__);' - print ' return;' - print ' }' - print ' func(dst, r, g, b, a);' + + if is_format_supported(format): + generate_unpack_kernel(format, dst_channel, dst_native_type) + print '}' print -def main(): - formats = [] - for arg in sys.argv[1:]: - formats.extend(parse(arg)) +def is_format_hand_written(format): + return format.layout in ('s3tc', 'subsampled', 'other') or format.colorspace == ZS - print '/* This file is autogenerated by u_format_pack.py from u_format.csv. Do not edit directly. */' - print - # This will print the copyright message on the top of this file - print __doc__.strip() - print - print '#ifndef U_FORMAT_PACK_H' - print '#define U_FORMAT_PACK_H' +def generate(formats): print print '#include "pipe/p_compiler.h"' print '#include "u_math.h"' + print '#include "u_half.h"' print '#include "u_format.h"' + print '#include "u_format_other.h"' + print '#include "u_format_srgb.h"' + print '#include "u_format_yuv.h"' + print '#include "u_format_zs.h"' print - generate_clamp() - for format in formats: - if format.layout == PLAIN: - generate_format_type(format) - - channel = Channel(FLOAT, False, 32) - native_type = 'float' - suffix = '4f' + if not is_format_hand_written(format): + + if is_format_supported(format): + generate_format_type(format) - generate_unpack(formats, channel, native_type, suffix) - generate_pack(formats, channel, native_type, suffix) + channel = Channel(FLOAT, False, 32) + native_type = 'float' + suffix = 'rgba_float' - channel = Channel(UNSIGNED, True, 8) - native_type = 'uint8_t' - suffix = '4ub' + generate_format_unpack(format, channel, native_type, suffix) + generate_format_pack(format, channel, native_type, suffix) + generate_format_fetch(format, channel, native_type, suffix) - generate_unpack(formats, channel, native_type, suffix) - generate_pack(formats, channel, native_type, suffix) - - print - print '#ifdef __cplusplus' - print '}' - print '#endif' - print - print '#endif /* ! U_FORMAT_PACK_H */' + channel = Channel(UNSIGNED, True, 8) + native_type = 'uint8_t' + suffix = 'rgba_8unorm' + generate_format_unpack(format, channel, native_type, suffix) + generate_format_pack(format, channel, native_type, suffix) -if __name__ == '__main__': - main() diff --git a/src/gallium/auxiliary/util/u_format_parse.py b/src/gallium/auxiliary/util/u_format_parse.py index 250926418e..7076c676aa 100755 --- a/src/gallium/auxiliary/util/u_format_parse.py +++ b/src/gallium/auxiliary/util/u_format_parse.py @@ -73,18 +73,22 @@ class Channel: '''Maximum representable number.''' if self.type == FLOAT: return VERY_LARGE + if self.type == FIXED: + return (1 << (self.size/2)) - 1 if self.norm: return 1 if self.type == UNSIGNED: return (1 << self.size) - 1 if self.type == SIGNED: - return self.size - 1 + return (1 << (self.size - 1)) - 1 assert False def min(self): '''Minimum representable number.''' if self.type == FLOAT: return -VERY_LARGE + if self.type == FIXED: + return -(1 << (self.size/2)) if self.type == UNSIGNED: return 0 if self.norm: @@ -134,6 +138,8 @@ class Format: return nr_channels def is_array(self): + if self.layout != PLAIN: + return False ref_channel = self.channels[0] for channel in self.channels[1:]: if channel.size and (channel.size != ref_channel.size or channel.size % 8): @@ -141,7 +147,11 @@ class Format: return True def is_mixed(self): + if self.layout != PLAIN: + return False ref_channel = self.channels[0] + if ref_channel.type == VOID: + ref_channel = self.channels[1] for channel in self.channels[1:]: if channel.type != VOID: if channel.type != ref_channel.type: @@ -154,29 +164,29 @@ class Format: return is_pot(self.block_size()) def is_int(self): + if self.layout != PLAIN: + return False for channel in self.channels: if channel.type not in (VOID, UNSIGNED, SIGNED): return False return True def is_float(self): + if self.layout != PLAIN: + return False for channel in self.channels: if channel.type not in (VOID, FLOAT): return False return True def is_bitmask(self): - if self.block_size() > 32: + if self.layout != PLAIN: return False - if not self.is_pot(): + if self.block_size() not in (8, 16, 32): return False for channel in self.channels: - if not is_pot(channel.size): - return True if channel.type not in (VOID, UNSIGNED, SIGNED): return False - if channel.size >= 32: - return False return True def inv_swizzles(self): diff --git a/src/gallium/auxiliary/util/u_format_s3tc.c b/src/gallium/auxiliary/util/u_format_s3tc.c new file mode 100644 index 0000000000..79dee2b423 --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_s3tc.c @@ -0,0 +1,763 @@ +/************************************************************************** + * + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * Copyright (c) 2008 VMware, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "u_dl.h" +#include "u_math.h" +#include "u_format.h" +#include "u_format_s3tc.h" + + +#if defined(_WIN32) || defined(WIN32) +#define DXTN_LIBNAME "dxtn.dll" +#elif defined(__APPLE__) +#define DXTN_LIBNAME "libtxc_dxtn.dylib" +#else +#define DXTN_LIBNAME "libtxc_dxtn.so" +#endif + + +static void +util_format_dxt1_rgb_fetch_stub(int src_stride, + const uint8_t *src, + int col, int row, + uint8_t *dst) +{ + assert(0); +} + + +static void +util_format_dxt1_rgba_fetch_stub(int src_stride, + const uint8_t *src, + int col, int row, + uint8_t *dst ) +{ + assert(0); +} + + +static void +util_format_dxt3_rgba_fetch_stub(int src_stride, + const uint8_t *src, + int col, int row, + uint8_t *dst ) +{ + assert(0); +} + + +static void +util_format_dxt5_rgba_fetch_stub(int src_stride, + const uint8_t *src, + int col, int row, + uint8_t *dst ) +{ + assert(0); +} + + +static void +util_format_dxtn_pack_stub(int src_comps, + int width, int height, + const uint8_t *src, + enum util_format_dxtn dst_format, + uint8_t *dst, + int dst_stride) +{ + assert(0); +} + + +boolean util_format_s3tc_enabled = FALSE; + +util_format_dxtn_fetch_t util_format_dxt1_rgb_fetch = util_format_dxt1_rgb_fetch_stub; +util_format_dxtn_fetch_t util_format_dxt1_rgba_fetch = util_format_dxt1_rgba_fetch_stub; +util_format_dxtn_fetch_t util_format_dxt3_rgba_fetch = util_format_dxt3_rgba_fetch_stub; +util_format_dxtn_fetch_t util_format_dxt5_rgba_fetch = util_format_dxt5_rgba_fetch_stub; + +util_format_dxtn_pack_t util_format_dxtn_pack = util_format_dxtn_pack_stub; + + +void +util_format_s3tc_init(void) +{ + static boolean first_time = TRUE; + struct util_dl_library *library = NULL; + util_dl_proc fetch_2d_texel_rgb_dxt1; + util_dl_proc fetch_2d_texel_rgba_dxt1; + util_dl_proc fetch_2d_texel_rgba_dxt3; + util_dl_proc fetch_2d_texel_rgba_dxt5; + util_dl_proc tx_compress_dxtn; + + if (!first_time) + return; + first_time = FALSE; + + if (util_format_s3tc_enabled) + return; + + library = util_dl_open(DXTN_LIBNAME); + if (!library) { + debug_printf("couldn't open " DXTN_LIBNAME ", software DXTn " + "compression/decompression unavailable"); + return; + } + + fetch_2d_texel_rgb_dxt1 = + util_dl_get_proc_address(library, "fetch_2d_texel_rgb_dxt1"); + fetch_2d_texel_rgba_dxt1 = + util_dl_get_proc_address(library, "fetch_2d_texel_rgba_dxt1"); + fetch_2d_texel_rgba_dxt3 = + util_dl_get_proc_address(library, "fetch_2d_texel_rgba_dxt3"); + fetch_2d_texel_rgba_dxt5 = + util_dl_get_proc_address(library, "fetch_2d_texel_rgba_dxt5"); + tx_compress_dxtn = + util_dl_get_proc_address(library, "tx_compress_dxtn"); + + if (!util_format_dxt1_rgb_fetch || + !util_format_dxt1_rgba_fetch || + !util_format_dxt3_rgba_fetch || + !util_format_dxt5_rgba_fetch || + !util_format_dxtn_pack) { + debug_printf("couldn't reference all symbols in " DXTN_LIBNAME + ", software DXTn compression/decompression " + "unavailable"); + util_dl_close(library); + return; + } + + util_format_dxt1_rgb_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgb_dxt1; + util_format_dxt1_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt1; + util_format_dxt3_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt3; + util_format_dxt5_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt5; + util_format_dxtn_pack = (util_format_dxtn_pack_t)tx_compress_dxtn; + util_format_s3tc_enabled = TRUE; +} + + +/* + * Pixel fetch. + */ + +void +util_format_dxt1_rgb_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + util_format_dxt1_rgb_fetch(0, src, i, j, dst); +} + +void +util_format_dxt1_rgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + util_format_dxt1_rgba_fetch(0, src, i, j, dst); +} + +void +util_format_dxt3_rgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + util_format_dxt3_rgba_fetch(0, src, i, j, dst); +} + +void +util_format_dxt5_rgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + util_format_dxt5_rgba_fetch(0, src, i, j, dst); +} + +void +util_format_dxt1_rgb_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp[4]; + util_format_dxt1_rgb_fetch(0, src, i, j, tmp); + dst[0] = ubyte_to_float(tmp[0]); + dst[1] = ubyte_to_float(tmp[1]); + dst[2] = ubyte_to_float(tmp[2]); + dst[3] = 1.0; +} + +void +util_format_dxt1_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp[4]; + util_format_dxt1_rgba_fetch(0, src, i, j, tmp); + dst[0] = ubyte_to_float(tmp[0]); + dst[1] = ubyte_to_float(tmp[1]); + dst[2] = ubyte_to_float(tmp[2]); + dst[3] = ubyte_to_float(tmp[3]); +} + +void +util_format_dxt3_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp[4]; + util_format_dxt3_rgba_fetch(0, src, i, j, tmp); + dst[0] = ubyte_to_float(tmp[0]); + dst[1] = ubyte_to_float(tmp[1]); + dst[2] = ubyte_to_float(tmp[2]); + dst[3] = ubyte_to_float(tmp[3]); +} + +void +util_format_dxt5_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp[4]; + util_format_dxt5_rgba_fetch(0, src, i, j, tmp); + dst[0] = ubyte_to_float(tmp[0]); + dst[1] = ubyte_to_float(tmp[1]); + dst[2] = ubyte_to_float(tmp[2]); + dst[3] = ubyte_to_float(tmp[3]); +} + + +/* + * Block decompression. + */ + +void +util_format_dxt1_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j; + for(y = 0; y < height; y += 4) { + const uint8_t *src = src_row; + for(x = 0; x < width; x += 4) { + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + util_format_dxt1_rgb_fetch(0, src, i, j, dst); + } + } + src += 8; + } + src_row += src_stride; + } +} + +void +util_format_dxt1_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j; + for(y = 0; y < height; y += 4) { + const uint8_t *src = src_row; + for(x = 0; x < width; x += 4) { + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + util_format_dxt1_rgba_fetch(0, src, i, j, dst); + } + } + src += 8; + } + src_row += src_stride; + } +} + +void +util_format_dxt3_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j; + for(y = 0; y < height; y += 4) { + const uint8_t *src = src_row; + for(x = 0; x < width; x += 4) { + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + util_format_dxt3_rgba_fetch(0, src, i, j, dst); + } + } + src += 16; + } + src_row += src_stride; + } +} + +void +util_format_dxt5_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j; + for(y = 0; y < height; y += 4) { + const uint8_t *src = src_row; + for(x = 0; x < width; x += 4) { + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + util_format_dxt5_rgba_fetch(0, src, i, j, dst); + } + } + src += 16; + } + src_row += src_stride; + } +} + +void +util_format_dxt1_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j; + for(y = 0; y < height; y += 4) { + const uint8_t *src = src_row; + for(x = 0; x < width; x += 4) { + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + uint8_t tmp[4]; + util_format_dxt1_rgb_fetch(0, src, i, j, tmp); + dst[0] = ubyte_to_float(tmp[0]); + dst[1] = ubyte_to_float(tmp[1]); + dst[2] = ubyte_to_float(tmp[2]); + dst[3] = 1.0; + } + } + src += 8; + } + src_row += src_stride; + } +} + +void +util_format_dxt1_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j; + for(y = 0; y < height; y += 4) { + const uint8_t *src = src_row; + for(x = 0; x < width; x += 4) { + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + uint8_t tmp[4]; + util_format_dxt1_rgba_fetch(0, src, i, j, tmp); + dst[0] = ubyte_to_float(tmp[0]); + dst[1] = ubyte_to_float(tmp[1]); + dst[2] = ubyte_to_float(tmp[2]); + dst[3] = ubyte_to_float(tmp[3]); + } + } + src += 8; + } + src_row += src_stride; + } +} + +void +util_format_dxt3_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j; + for(y = 0; y < height; y += 4) { + const uint8_t *src = src_row; + for(x = 0; x < width; x += 4) { + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + uint8_t tmp[4]; + util_format_dxt3_rgba_fetch(0, src, i, j, tmp); + dst[0] = ubyte_to_float(tmp[0]); + dst[1] = ubyte_to_float(tmp[1]); + dst[2] = ubyte_to_float(tmp[2]); + dst[3] = ubyte_to_float(tmp[3]); + } + } + src += 16; + } + src_row += src_stride; + } +} + +void +util_format_dxt5_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j; + for(y = 0; y < height; y += 4) { + const uint8_t *src = src_row; + for(x = 0; x < width; x += 4) { + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + uint8_t tmp[4]; + util_format_dxt5_rgba_fetch(0, src, i, j, tmp); + dst[0] = ubyte_to_float(tmp[0]); + dst[1] = ubyte_to_float(tmp[1]); + dst[2] = ubyte_to_float(tmp[2]); + dst[3] = ubyte_to_float(tmp[3]); + } + } + src += 16; + } + src_row += src_stride; + } +} + + +/* + * Block compression. + */ + +void +util_format_dxt1_rgb_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j, k; + for(y = 0; y < height; y += 4) { + const uint8_t *src = src_row; + uint8_t *dst = dst_row; + for(x = 0; x < width; x += 4) { + uint8_t tmp[4][4][3]; + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + for(k = 0; k < 3; ++k) { + tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k]; + } + } + } + util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, dst_stride); + src += 4*4; + dst += 8; + } + src_row += src_stride; + dst_row += 4*dst_stride/sizeof(*dst_row); + } +} + +void +util_format_dxt1_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j, k; + for(y = 0; y < height; y += 4) { + const uint8_t *src = src_row; + uint8_t *dst = dst_row; + for(x = 0; x < width; x += 4) { + uint8_t tmp[4][4][4]; + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + for(k = 0; k < 4; ++k) { + tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k]; + } + } + } + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, dst_stride); + src += 4*4; + dst += 8; + } + src_row += src_stride; + dst_row += 4*dst_stride/sizeof(*dst_row); + } +} + +void +util_format_dxt3_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j, k; + for(y = 0; y < height; y += 4) { + const uint8_t *src = src_row; + uint8_t *dst = dst_row; + for(x = 0; x < width; x += 4) { + uint8_t tmp[4][4][4]; + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + for(k = 0; k < 4; ++k) { + tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k]; + } + } + } + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, dst_stride); + src += 4*4; + dst += 16; + } + src_row += src_stride; + dst_row += 4*dst_stride/sizeof(*dst_row); + } +} + +void +util_format_dxt5_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j, k; + for(y = 0; y < height; y += 4) { + const uint8_t *src = src_row; + uint8_t *dst = dst_row; + for(x = 0; x < width; x += 4) { + uint8_t tmp[4][4][4]; + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + for(k = 0; k < 4; ++k) { + tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k]; + } + } + } + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, dst_stride); + src += 4*4; + dst += 16; + } + src_row += src_stride; + dst_row += 4*dst_stride/sizeof(*dst_row); + } +} + +void +util_format_dxt1_rgb_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j, k; + for(y = 0; y < height; y += 4) { + const float *src = src_row; + uint8_t *dst = dst_row; + for(x = 0; x < width; x += 4) { + uint8_t tmp[4][4][3]; + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + for(k = 0; k < 3; ++k) { + tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]); + } + } + } + util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, dst_stride); + src += 4*4; + dst += 8; + } + src_row += src_stride; + dst_row += 4*dst_stride/sizeof(*dst_row); + } +} + +void +util_format_dxt1_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j, k; + for(y = 0; y < height; y += 4) { + const float *src = src_row; + uint8_t *dst = dst_row; + for(x = 0; x < width; x += 4) { + uint8_t tmp[4][4][4]; + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + for(k = 0; k < 4; ++k) { + tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]); + } + } + } + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, dst_stride); + src += 4*4; + dst += 8; + } + src_row += src_stride; + dst_row += 4*dst_stride/sizeof(*dst_row); + } +} + +void +util_format_dxt3_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j, k; + for(y = 0; y < height; y += 4) { + const float *src = src_row; + uint8_t *dst = dst_row; + for(x = 0; x < width; x += 4) { + uint8_t tmp[4][4][4]; + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + for(k = 0; k < 4; ++k) { + tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]); + } + } + } + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, dst_stride); + src += 4*4; + dst += 16; + } + src_row += src_stride; + dst_row += 4*dst_stride/sizeof(*dst_row); + } +} + +void +util_format_dxt5_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j, k; + for(y = 0; y < height; y += 4) { + const float *src = src_row; + uint8_t *dst = dst_row; + for(x = 0; x < width; x += 4) { + uint8_t tmp[4][4][4]; + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + for(k = 0; k < 4; ++k) { + tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]); + } + } + } + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, dst_stride); + src += 4*4; + dst += 16; + } + src_row += src_stride; + dst_row += 4*dst_stride/sizeof(*dst_row); + } +} + + +/* + * SRGB variants. + * + * FIXME: shunts to RGB for now + */ + +void +util_format_dxt1_srgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxt1_rgb_unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_dxt1_srgb_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxt1_rgb_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_dxt1_srgb_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + util_format_dxt1_rgb_fetch_rgba_8unorm(dst, src, i, j); +} + +void +util_format_dxt1_srgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxt1_rgba_unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_dxt1_srgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxt1_rgba_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_dxt1_srgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + util_format_dxt1_rgba_fetch_rgba_8unorm(dst, src, i, j); +} + +void +util_format_dxt3_srgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxt3_rgba_unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_dxt3_srgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxt3_rgba_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_dxt3_srgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + util_format_dxt3_rgba_fetch_rgba_8unorm(dst, src, i, j); +} + +void +util_format_dxt5_srgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxt5_rgba_unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_dxt5_srgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxt5_rgba_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_dxt5_srgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + util_format_dxt5_rgba_fetch_rgba_8unorm(dst, src, i, j); +} + +void +util_format_dxt1_srgb_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxt1_rgb_unpack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_dxt1_srgb_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxt1_rgb_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_dxt1_srgb_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + util_format_dxt1_rgb_fetch_rgba_float(dst, src, i, j); +} + +void +util_format_dxt1_srgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxt1_rgba_unpack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_dxt1_srgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxt1_rgba_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_dxt1_srgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + util_format_dxt1_rgba_fetch_rgba_float(dst, src, i, j); +} + +void +util_format_dxt3_srgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxt3_rgba_unpack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_dxt3_srgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxt3_rgba_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_dxt3_srgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + util_format_dxt3_rgba_fetch_rgba_float(dst, src, i, j); +} + +void +util_format_dxt5_srgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxt5_rgba_unpack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_dxt5_srgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_dxt5_rgba_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_dxt5_srgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + util_format_dxt5_rgba_fetch_rgba_float(dst, src, i, j); +} + diff --git a/src/gallium/auxiliary/util/u_format_s3tc.h b/src/gallium/auxiliary/util/u_format_s3tc.h new file mode 100644 index 0000000000..97770abd42 --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_s3tc.h @@ -0,0 +1,218 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#ifndef U_FORMAT_S3TC_H_ +#define U_FORMAT_S3TC_H_ + + +#include "pipe/p_compiler.h" + + +enum util_format_dxtn { + UTIL_FORMAT_DXT1_RGB = 0x83F0, + UTIL_FORMAT_DXT1_RGBA = 0x83F1, + UTIL_FORMAT_DXT3_RGBA = 0x83F2, + UTIL_FORMAT_DXT5_RGBA = 0x83F3 +}; + + +typedef void +(*util_format_dxtn_fetch_t)( int src_stride, + const uint8_t *src, + int col, int row, + uint8_t *dst ); + +typedef void +(*util_format_dxtn_pack_t)( int src_comps, + int width, int height, + const uint8_t *src, + enum util_format_dxtn dst_format, + uint8_t *dst, + int dst_stride); + +extern boolean util_format_s3tc_enabled; + +extern util_format_dxtn_fetch_t util_format_dxt1_rgb_fetch; +extern util_format_dxtn_fetch_t util_format_dxt1_rgba_fetch; +extern util_format_dxtn_fetch_t util_format_dxt3_rgba_fetch; +extern util_format_dxtn_fetch_t util_format_dxt5_rgba_fetch; + +extern util_format_dxtn_pack_t util_format_dxtn_pack; + + +void +util_format_s3tc_init(void); + + +void +util_format_dxt1_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt1_rgb_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt1_rgb_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_dxt1_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt1_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt1_rgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_dxt3_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt3_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt3_rgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_dxt5_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt5_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt5_rgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_dxt1_srgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt1_srgb_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt1_srgb_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_dxt1_srgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt1_srgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt1_srgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_dxt3_srgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt3_srgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt3_srgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_dxt5_srgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt5_srgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt5_srgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); + + +void +util_format_dxt1_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt1_rgb_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt1_rgb_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_dxt1_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt1_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt1_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_dxt3_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt3_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt3_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_dxt5_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt5_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt5_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_dxt1_srgb_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt1_srgb_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt1_srgb_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_dxt1_srgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt1_srgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt1_srgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_dxt3_srgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt3_srgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt3_srgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_dxt5_srgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt5_srgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_dxt5_srgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); + + +#endif /* U_FORMAT_S3TC_H_ */ diff --git a/src/gallium/auxiliary/util/u_format_srgb.h b/src/gallium/auxiliary/util/u_format_srgb.h new file mode 100644 index 0000000000..43213fbeb3 --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_srgb.h @@ -0,0 +1,106 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + +/** + * @file + * SRGB translation. + * + * @author Brian Paul <brianp@vmware.com> + * @author Michal Krol <michal@vmware.com> + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#ifndef U_FORMAT_SRGB_H_ +#define U_FORMAT_SRGB_H_ + + +#include "pipe/p_compiler.h" +#include "u_math.h" + + +extern const float +util_format_srgb_8unorm_to_linear_float_table[256]; + +extern const uint8_t +util_format_srgb_to_linear_8unorm_table[256]; + +extern const uint8_t +util_format_linear_to_srgb_8unorm_table[256]; + + +/** + * Convert a unclamped linear float to srgb value in the [0,255]. + * XXX this hasn't been tested (render to srgb surface). + * XXX this needs optimization. + */ +static INLINE uint8_t +util_format_linear_float_to_srgb_8unorm(float x) +{ + if (x >= 1.0f) + return 255; + else if (x >= 0.0031308f) + return float_to_ubyte(1.055f * powf(x, 0.41666f) - 0.055f); + else if (x > 0.0f) + return float_to_ubyte(12.92f * x); + else + return 0; +} + + +/** + * Convert an 8-bit sRGB value from non-linear space to a + * linear RGB value in [0, 1]. + * Implemented with a 256-entry lookup table. + */ +static INLINE float +util_format_srgb_8unorm_to_linear_float(uint8_t x) +{ + return util_format_srgb_8unorm_to_linear_float_table[x]; +} + + +/** + * Convert a 8bit normalized value from linear to srgb. + */ +static INLINE uint8_t +util_format_linear_to_srgb_8unorm(uint8_t x) +{ + return util_format_linear_to_srgb_8unorm_table[x]; +} + + +/** + * Convert a 8bit normalized value from srgb to linear. + */ +static INLINE uint8_t +util_format_srgb_to_linear_8unorm(uint8_t x) +{ + return util_format_srgb_to_linear_8unorm_table[x]; +} + + +#endif /* U_FORMAT_SRGB_H_ */ diff --git a/src/gallium/auxiliary/util/u_format_srgb.py b/src/gallium/auxiliary/util/u_format_srgb.py new file mode 100644 index 0000000000..a4c76dc00b --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_srgb.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python + +''' +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * SRGB translation. + * + * @author Brian Paul <brianp@vmware.com> + * @author Michal Krol <michal@vmware.com> + * @author Jose Fonseca <jfonseca@vmware.com> + */ +''' + + +import sys +import math + + +def srgb_to_linear(x): + if x <= 0.04045: + return x / 12.92 + else: + return math.pow((x + 0.055) / 1.055, 2.4) + + +def linear_to_srgb(x): + if x >= 0.0031308: + return 1.055 * math.pow(x, 0.41666) - 0.055 + else: + return 12.92 * x + +def generate_srgb_tables(): + print 'const float' + print 'util_format_srgb_8unorm_to_linear_float_table[256] = {' + for j in range(0, 256, 4): + print ' ', + for i in range(j, j + 4): + print '%.7e,' % (srgb_to_linear(i / 255.0),), + print + print '};' + print + print 'const uint8_t' + print 'util_format_srgb_to_linear_8unorm_table[256] = {' + for j in range(0, 256, 16): + print ' ', + for i in range(j, j + 16): + print '%3u,' % (int(srgb_to_linear(i / 255.0) * 255.0 + 0.5),), + print + print '};' + print + print 'const uint8_t' + print 'util_format_linear_to_srgb_8unorm_table[256] = {' + for j in range(0, 256, 16): + print ' ', + for i in range(j, j + 16): + print '%3u,' % (int(linear_to_srgb(i / 255.0) * 255.0 + 0.5),), + print + print '};' + print + + +def main(): + print '/* This file is autogenerated by u_format_srgb.py. Do not edit directly. */' + print + # This will print the copyright message on the top of this file + print __doc__.strip() + print + print '#include "u_format_srgb.h"' + print + generate_srgb_tables() + + +if __name__ == '__main__': + main() diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py index 4e29d15f3b..ae9a598197 100755 --- a/src/gallium/auxiliary/util/u_format_table.py +++ b/src/gallium/auxiliary/util/u_format_table.py @@ -3,7 +3,7 @@ ''' /************************************************************************** * - * Copyright 2009 VMware, Inc. + * Copyright 2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -33,6 +33,7 @@ import sys from u_format_parse import * +import u_format_pack def layout_map(layout): @@ -85,30 +86,22 @@ def write_format_table(formats): print __doc__.strip() print print '#include "u_format.h"' + print '#include "u_format_s3tc.h"' print - print 'const struct util_format_description' - print 'util_format_none_description = {' - print " PIPE_FORMAT_NONE," - print " \"PIPE_FORMAT_NONE\"," - print " {0, 0, 0}," - print " 0," - print " 0," - print " 0," - print " 0," - print " {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}}," - print " {0, 0, 0, 0}," - print " 0" - print "};" - print + + u_format_pack.generate(formats) + for format in formats: print 'const struct util_format_description' print 'util_format_%s_description = {' % (format.short_name(),) print " %s," % (format.name,) print " \"%s\"," % (format.name,) + print " \"%s\"," % (format.short_name(),) print " {%u, %u, %u},\t/* block */" % (format.block_width, format.block_height, format.block_size()) print " %s," % (layout_map(format.layout),) print " %u,\t/* nr_channels */" % (format.nr_channels(),) print " %s,\t/* is_array */" % (bool_map(format.is_array()),) + print " %s,\t/* is_bitmask */" % (bool_map(format.is_bitmask()),) print " %s,\t/* is_mixed */" % (bool_map(format.is_mixed()),) print " {" for i in range(4): @@ -136,8 +129,37 @@ def write_format_table(formats): print " %s%s\t/* %s */" % (swizzle_map[swizzle], sep, comment) print " }," print " %s," % (colorspace_map(format.colorspace),) + if format.colorspace != ZS: + print " &util_format_%s_unpack_rgba_8unorm," % format.short_name() + print " &util_format_%s_pack_rgba_8unorm," % format.short_name() + print " &util_format_%s_unpack_rgba_float," % format.short_name() + print " &util_format_%s_pack_rgba_float," % format.short_name() + print " &util_format_%s_fetch_rgba_float," % format.short_name() + else: + print " NULL, /* unpack_rgba_8unorm */" + print " NULL, /* pack_rgba_8unorm */" + print " NULL, /* unpack_rgba_float */" + print " NULL, /* pack_rgba_float */" + print " NULL, /* fetch_rgba_float */" + if format.colorspace == ZS and format.swizzles[0] != SWIZZLE_NONE: + print " &util_format_%s_unpack_z_32unorm," % format.short_name() + print " &util_format_%s_pack_z_32unorm," % format.short_name() + print " &util_format_%s_unpack_z_float," % format.short_name() + print " &util_format_%s_pack_z_float," % format.short_name() + else: + print " NULL, /* unpack_z_32unorm */" + print " NULL, /* pack_z_32unorm */" + print " NULL, /* unpack_z_float */" + print " NULL, /* pack_z_float */" + if format.colorspace == ZS and format.swizzles[1] != SWIZZLE_NONE: + print " &util_format_%s_unpack_s_8uscaled," % format.short_name() + print " &util_format_%s_pack_s_8uscaled" % format.short_name() + else: + print " NULL, /* unpack_s_8uscaled */" + print " NULL /* pack_s_8uscaled */" print "};" print + print "const struct util_format_description *" print "util_format_description(enum pipe_format format)" print "{" @@ -146,13 +168,10 @@ def write_format_table(formats): print " }" print print " switch (format) {" - print " case PIPE_FORMAT_NONE:" - print " return &util_format_none_description;" for format in formats: print " case %s:" % format.name print " return &util_format_%s_description;" % (format.short_name(),) print " default:" - print " assert(0);" print " return NULL;" print " }" print "}" diff --git a/src/gallium/auxiliary/util/u_format_tests.c b/src/gallium/auxiliary/util/u_format_tests.c new file mode 100644 index 0000000000..b1df6c49c4 --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_tests.c @@ -0,0 +1,970 @@ +/************************************************************************** + * + * Copyright 2009-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "u_memory.h" +#include "u_format_tests.h" + + +/* + * Helper macros to create the packed bytes for longer words. + */ + +#define PACKED_1x8(x) {x, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} +#define PACKED_2x8(x, y) {x, y, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} +#define PACKED_3x8(x, y, z) {x, y, z, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} +#define PACKED_4x8(x, y, z, w) {x, y, z, w, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} +#define PACKED_8x8(a, b, c, d, e, f, g, h) {a, b, c, d, e, f, g, h, 0, 0, 0, 0, 0, 0, 0, 0} + +#define PACKED_1x16(x) {(x) & 0xff, (x) >> 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} +#define PACKED_2x16(x, y) {(x) & 0xff, (x) >> 8, (y) & 0xff, (y) >> 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} +#define PACKED_3x16(x, y, z) {(x) & 0xff, (x) >> 8, (y) & 0xff, (y) >> 8, (z) & 0xff, (z) >> 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} +#define PACKED_4x16(x, y, z, w) {(x) & 0xff, (x) >> 8, (y) & 0xff, (y) >> 8, (z) & 0xff, (z) >> 8, (w) & 0xff, (w) >> 8, 0, 0, 0, 0, 0, 0, 0, 0} + +#define PACKED_1x32(x) {(x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, (x) >> 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} +#define PACKED_2x32(x, y) {(x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, (x) >> 24, (y) & 0xff, ((y) >> 8) & 0xff, ((y) >> 16) & 0xff, (y) >> 24, 0, 0, 0, 0, 0, 0, 0, 0} +#define PACKED_3x32(x, y, z) {(x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, (x) >> 24, (y) & 0xff, ((y) >> 8) & 0xff, ((y) >> 16) & 0xff, (y) >> 24, (z) & 0xff, ((z) >> 8) & 0xff, ((z) >> 16) & 0xff, (z) >> 24, 0, 0, 0, 0} +#define PACKED_4x32(x, y, z, w) {(x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, (x) >> 24, (y) & 0xff, ((y) >> 8) & 0xff, ((y) >> 16) & 0xff, (y) >> 24, (z) & 0xff, ((z) >> 8) & 0xff, ((z) >> 16) & 0xff, (z) >> 24, (w) & 0xff, ((w) >> 8) & 0xff, ((w) >> 16) & 0xff, (w) >> 24} + +#define UNPACKED_1x1(r, g, b, a) \ + {{{r, g, b, a}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}, \ + {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}, \ + {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}, \ + {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}} + +#define UNPACKED_2x1(r0, g0, b0, a0, r1, g1, b1, a1) \ + {{{r0, g0, b0, a0}, {r1, g1, b1, a1}, {0, 0, 0, 0}, {0, 0, 0, 0}}, \ + {{ 0, 0, 0, 0}, { 0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}, \ + {{ 0, 0, 0, 0}, { 0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}, \ + {{ 0, 0, 0, 0}, { 0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}} + + +/** + * Test cases. + * + * These were manually entered. We could generate these + * + * To keep this to a we cover only the corner cases, which should produce + * good enough coverage since that pixel format transformations are afine for + * non SRGB formats. + */ +const struct util_format_test_case +util_format_test_cases[] = +{ + + /* + * 32-bit rendertarget formats + */ + + {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000ff00), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00ff0000), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_B8G8R8A8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x0000ff00), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00ff0000), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0xff000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_B8G8R8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000ff00), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00ff0000), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_A8R8G8B8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x0000ff00), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x00ff0000), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0xff000000), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_X8R8G8B8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000ff00), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00ff0000), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_A8B8G8R8_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x0000ff00), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x00ff0000), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0xff000000), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_X8B8G8R8_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_R8G8B8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x000000ff), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x0000ff00), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00ff0000), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_R8G8B8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0xff000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000003ff), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000ffc00), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x3ff00000), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xc0000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_B10G10R10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_B10G10R10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000003ff), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_B10G10R10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000ffc00), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_B10G10R10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x3ff00000), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_B10G10R10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xc0000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_B10G10R10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + /* + * 16-bit rendertarget formats + */ + + {PIPE_FORMAT_B5G5R5X1_UNORM, PACKED_1x16(0x7fff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_B5G5R5X1_UNORM, PACKED_1x16(0x7fff), PACKED_1x16(0x001f), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_B5G5R5X1_UNORM, PACKED_1x16(0x7fff), PACKED_1x16(0x03e0), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_B5G5R5X1_UNORM, PACKED_1x16(0x7fff), PACKED_1x16(0x7c00), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_B5G5R5X1_UNORM, PACKED_1x16(0x7fff), PACKED_1x16(0x7fff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x001f), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x03e0), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x7c00), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x8000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_B4G4R4X4_UNORM, PACKED_1x16(0x0fff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_B4G4R4X4_UNORM, PACKED_1x16(0x0fff), PACKED_1x16(0x000f), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_B4G4R4X4_UNORM, PACKED_1x16(0x0fff), PACKED_1x16(0x00f0), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_B4G4R4X4_UNORM, PACKED_1x16(0x0fff), PACKED_1x16(0x0f00), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_B4G4R4X4_UNORM, PACKED_1x16(0x0fff), PACKED_1x16(0x0fff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x000f), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x00f0), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0f00), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xf000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_B4G4R4A4_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_B5G6R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_B5G6R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x001f), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_B5G6R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x07e0), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_B5G6R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xf800), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_B5G6R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + /* + * Luminance/intensity/alpha formats + */ + + {PIPE_FORMAT_L8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_L8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0xff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_A8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_A8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0xff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + + {PIPE_FORMAT_I8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_I8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0xff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_L4A4_UNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_L4A4_UNORM, PACKED_1x8(0xff), PACKED_1x8(0x0f), UNPACKED_1x1(1.0, 1.0, 1.0, 0.0)}, + {PIPE_FORMAT_L4A4_UNORM, PACKED_1x8(0xff), PACKED_1x8(0xf0), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_L4A4_UNORM, PACKED_1x8(0xff), PACKED_1x8(0xff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_L8A8_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_L8A8_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x00ff), UNPACKED_1x1(1.0, 1.0, 1.0, 0.0)}, + {PIPE_FORMAT_L8A8_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xff00), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_L8A8_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_L16_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_L16_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + /* + * SRGB formats + */ + + {PIPE_FORMAT_L8_SRGB, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_L8_SRGB, PACKED_1x8(0xff), PACKED_1x8(0xbc), UNPACKED_1x1(0.502886458033, 0.502886458033, 0.502886458033, 1.0)}, + {PIPE_FORMAT_L8_SRGB, PACKED_1x8(0xff), PACKED_1x8(0xff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_L8A8_SRGB, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_L8A8_SRGB, PACKED_1x16(0xffff), PACKED_1x16(0x00bc), UNPACKED_1x1(0.502886458033, 0.502886458033, 0.502886458033, 0.0)}, + {PIPE_FORMAT_L8A8_SRGB, PACKED_1x16(0xffff), PACKED_1x16(0x00ff), UNPACKED_1x1(1.0, 1.0, 1.0, 0.0)}, + {PIPE_FORMAT_L8A8_SRGB, PACKED_1x16(0xffff), PACKED_1x16(0xcc00), UNPACKED_1x1(0.0, 0.0, 0.0, 0.8)}, + {PIPE_FORMAT_L8A8_SRGB, PACKED_1x16(0xffff), PACKED_1x16(0xff00), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_L8A8_SRGB, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_R8G8B8_SRGB, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_SRGB, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xbc, 0x00, 0x00), UNPACKED_1x1(0.502886458033, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_SRGB, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xff, 0x00, 0x00), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_SRGB, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0xbc, 0x00), UNPACKED_1x1(0.0, 0.502886458033, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_SRGB, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0xff, 0x00), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_SRGB, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0xbc), UNPACKED_1x1(0.0, 0.0, 0.502886458033, 1.0)}, + {PIPE_FORMAT_R8G8B8_SRGB, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0xff), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_SRGB, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xff, 0xff, 0xff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_R8G8B8A8_SRGB, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_SRGB, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xbc, 0x00, 0x00, 0x00), UNPACKED_1x1(0.502886458033, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_SRGB, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0x00, 0x00, 0x00), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_SRGB, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0xbc, 0x00, 0x00), UNPACKED_1x1(0.0, 0.502886458033, 0.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_SRGB, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0xff, 0x00, 0x00), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_SRGB, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0xbc, 0x00), UNPACKED_1x1(0.0, 0.0, 0.502886458033, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_SRGB, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0xff, 0x00), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_SRGB, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0xcc), UNPACKED_1x1(0.0, 0.0, 0.0, 0.8)}, + {PIPE_FORMAT_R8G8B8A8_SRGB, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0xff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8A8_SRGB, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0xff, 0xff, 0xff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_B8G8R8A8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_B8G8R8A8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000bc), UNPACKED_1x1(0.0, 0.0, 0.502886458033, 0.0)}, + {PIPE_FORMAT_B8G8R8A8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_B8G8R8A8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000bc00), UNPACKED_1x1(0.0, 0.502886458033, 0.0, 0.0)}, + {PIPE_FORMAT_B8G8R8A8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000ff00), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_B8G8R8A8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x00bc0000), UNPACKED_1x1(0.502886458033, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_B8G8R8A8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x00ff0000), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_B8G8R8A8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0xcc000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.8)}, + {PIPE_FORMAT_B8G8R8A8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_B8G8R8A8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_B8G8R8X8_SRGB, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_B8G8R8X8_SRGB, PACKED_1x32(0x00ffffff), PACKED_1x32(0x000000bc), UNPACKED_1x1(0.0, 0.0, 0.502886458033, 1.0)}, + {PIPE_FORMAT_B8G8R8X8_SRGB, PACKED_1x32(0x00ffffff), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_B8G8R8X8_SRGB, PACKED_1x32(0x00ffffff), PACKED_1x32(0x0000bc00), UNPACKED_1x1(0.0, 0.502886458033, 0.0, 1.0)}, + {PIPE_FORMAT_B8G8R8X8_SRGB, PACKED_1x32(0x00ffffff), PACKED_1x32(0x0000ff00), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_B8G8R8X8_SRGB, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00bc0000), UNPACKED_1x1(0.502886458033, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_B8G8R8X8_SRGB, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00ff0000), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_B8G8R8X8_SRGB, PACKED_1x32(0x00ffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_A8R8G8B8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_A8R8G8B8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000cc), UNPACKED_1x1(0.0, 0.0, 0.0, 0.8)}, + {PIPE_FORMAT_A8R8G8B8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_A8R8G8B8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000bc00), UNPACKED_1x1(0.502886458033, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_A8R8G8B8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000ff00), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_A8R8G8B8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x00bc0000), UNPACKED_1x1(0.0, 0.502886458033, 0.0, 0.0)}, + {PIPE_FORMAT_A8R8G8B8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x00ff0000), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_A8R8G8B8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0xbc000000), UNPACKED_1x1(0.0, 0.0, 0.502886458033, 0.0)}, + {PIPE_FORMAT_A8R8G8B8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_A8R8G8B8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_X8R8G8B8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_X8R8G8B8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0x0000bc00), UNPACKED_1x1(0.502886458033, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_X8R8G8B8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0x0000ff00), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_X8R8G8B8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0x00bc0000), UNPACKED_1x1(0.0, 0.502886458033, 0.0, 1.0)}, + {PIPE_FORMAT_X8R8G8B8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0x00ff0000), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_X8R8G8B8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0xbc000000), UNPACKED_1x1(0.0, 0.0, 0.502886458033, 1.0)}, + {PIPE_FORMAT_X8R8G8B8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0xff000000), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_X8R8G8B8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_A8B8G8R8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_A8B8G8R8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000cc), UNPACKED_1x1(0.0, 0.0, 0.0, 0.8)}, + {PIPE_FORMAT_A8B8G8R8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_A8B8G8R8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000bc00), UNPACKED_1x1(0.0, 0.0, 0.502886458033, 0.0)}, + {PIPE_FORMAT_A8B8G8R8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x0000ff00), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_A8B8G8R8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x00bc0000), UNPACKED_1x1(0.0, 0.502886458033, 0.0, 0.0)}, + {PIPE_FORMAT_A8B8G8R8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0x00ff0000), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_A8B8G8R8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0xbc000000), UNPACKED_1x1(0.502886458033, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_A8B8G8R8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_A8B8G8R8_SRGB, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_X8B8G8R8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_X8B8G8R8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0x0000bc00), UNPACKED_1x1(0.0, 0.0, 0.502886458033, 1.0)}, + {PIPE_FORMAT_X8B8G8R8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0x0000ff00), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_X8B8G8R8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0x00bc0000), UNPACKED_1x1(0.0, 0.502886458033, 0.0, 1.0)}, + {PIPE_FORMAT_X8B8G8R8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0x00ff0000), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_X8B8G8R8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0xbc000000), UNPACKED_1x1(0.502886458033, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_X8B8G8R8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0xff000000), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_X8B8G8R8_SRGB, PACKED_1x32(0xffffff00), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + /* + * Mixed-signed formats + */ + + {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x00, 0x00, 0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x7f, 0x00, 0x00, 0x00), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x81, 0x00, 0x00, 0x00), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x00, 0x7f, 0x00, 0x00), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x00, 0x81, 0x00, 0x00), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8SG8SB8UX8U_NORM, PACKED_4x8(0xff, 0xff, 0xff, 0x00), PACKED_4x8(0x00, 0x00, 0xff, 0x00), UNPACKED_1x1( 0.0, 0.0, 1.0, 1.0)}, + + {PIPE_FORMAT_R10SG10SB10SA2U_NORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R10SG10SB10SA2U_NORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000001ff), UNPACKED_1x1( 1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R10SG10SB10SA2U_NORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000201), UNPACKED_1x1(-1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R10SG10SB10SA2U_NORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x0007fc00), UNPACKED_1x1( 0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_R10SG10SB10SA2U_NORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00080400), UNPACKED_1x1( 0.0, -1.0, 0.0, 0.0)}, + {PIPE_FORMAT_R10SG10SB10SA2U_NORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x1ff00000), UNPACKED_1x1( 0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_R10SG10SB10SA2U_NORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x20100000), UNPACKED_1x1( 0.0, 0.0, -1.0, 0.0)}, + {PIPE_FORMAT_R10SG10SB10SA2U_NORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xc0000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0x000f), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0x0011), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0x01e0), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0x0220), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R5SG5SB6U_NORM, PACKED_1x16(0xffff), PACKED_1x16(0xfc00), UNPACKED_1x1( 0.0, 0.0, 1.0, 1.0)}, + + {PIPE_FORMAT_R8G8Bx_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_R8G8Bx_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x7f, 0x00), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8Bx_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x81, 0x00), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8Bx_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x7f), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8Bx_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x81), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)}, + + /* + * Depth-stencil formats + */ + + {PIPE_FORMAT_S8_USCALED, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_S8_USCALED, PACKED_1x8(0xff), PACKED_1x8(0xff), UNPACKED_1x1(0.0, 255.0, 0.0, 0.0)}, + + {PIPE_FORMAT_Z16_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_Z16_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + + {PIPE_FORMAT_Z32_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_Z32_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + + {PIPE_FORMAT_Z32_FLOAT, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_Z32_FLOAT, PACKED_1x32(0xffffffff), PACKED_1x32(0x3f800000), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + + {PIPE_FORMAT_Z24_UNORM_S8_USCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_Z24_UNORM_S8_USCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0x00ffffff), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_Z24_UNORM_S8_USCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), UNPACKED_1x1(0.0, 255.0, 0.0, 0.0)}, + {PIPE_FORMAT_Z24_UNORM_S8_USCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 255.0, 0.0, 0.0)}, + + {PIPE_FORMAT_S8_USCALED_Z24_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_S8_USCALED_Z24_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffff00), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_S8_USCALED_Z24_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000000ff), UNPACKED_1x1(0.0, 255.0, 0.0, 0.0)}, + {PIPE_FORMAT_S8_USCALED_Z24_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 255.0, 0.0, 0.0)}, + + {PIPE_FORMAT_Z24X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_Z24X8_UNORM, PACKED_1x32(0x00ffffff), PACKED_1x32(0x00ffffff), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + + {PIPE_FORMAT_X8Z24_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_X8Z24_UNORM, PACKED_1x32(0xffffff00), PACKED_1x32(0xffffff00), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + + {PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED, PACKED_2x32(0xffffffff, 0x000000ff), PACKED_2x32(0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED, PACKED_2x32(0xffffffff, 0x000000ff), PACKED_2x32(0x3f800000, 0x00000000), UNPACKED_1x1( 1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED, PACKED_2x32(0xffffffff, 0x000000ff), PACKED_2x32(0x00000000, 0x000000ff), UNPACKED_1x1( 0.0, 255.0, 0.0, 0.0)}, + + /* + * YUV formats + */ + + {PIPE_FORMAT_R8G8_B8G8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), UNPACKED_2x1(0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8_B8G8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0x00, 0x00, 0x00), UNPACKED_2x1(1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8_B8G8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0xff, 0x00, 0x00), UNPACKED_2x1(0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8_B8G8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0xff, 0x00), UNPACKED_2x1(0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_R8G8_B8G8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0xff), UNPACKED_2x1(0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8_B8G8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0xff, 0xff, 0xff), UNPACKED_2x1(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_G8R8_G8B8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), UNPACKED_2x1(0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_G8R8_G8B8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0x00, 0x00, 0x00), UNPACKED_2x1(0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_G8R8_G8B8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0xff, 0x00, 0x00), UNPACKED_2x1(1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_G8R8_G8B8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0xff, 0x00), UNPACKED_2x1(0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_G8R8_G8B8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0xff), UNPACKED_2x1(0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_G8R8_G8B8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0xff, 0xff, 0xff), UNPACKED_2x1(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0)}, + + /* + * TODO: Exercise the UV channels as well. + */ + {PIPE_FORMAT_UYVY, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x80, 0x10, 0x80, 0x10), UNPACKED_2x1(0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_UYVY, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x80, 0xeb, 0x80, 0x10), UNPACKED_2x1(1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_UYVY, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x80, 0x10, 0x80, 0xeb), UNPACKED_2x1(0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_YUYV, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x10, 0x80, 0x10, 0x80), UNPACKED_2x1(0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_YUYV, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xeb, 0x80, 0x10, 0x80), UNPACKED_2x1(1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_YUYV, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x10, 0x80, 0xeb, 0x80), UNPACKED_2x1(0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0)}, + + /* + * Compressed formats + */ + + { + PIPE_FORMAT_DXT1_RGB, + PACKED_8x8(0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff), + PACKED_8x8(0xf2, 0xd7, 0xb0, 0x20, 0xae, 0x2c, 0x6f, 0x97), + { + { + {0x99/255.0, 0xb0/255.0, 0x8e/255.0, 0xff/255.0}, + {0x5d/255.0, 0x62/255.0, 0x89/255.0, 0xff/255.0}, + {0x99/255.0, 0xb0/255.0, 0x8e/255.0, 0xff/255.0}, + {0x99/255.0, 0xb0/255.0, 0x8e/255.0, 0xff/255.0} + }, + { + {0xd6/255.0, 0xff/255.0, 0x94/255.0, 0xff/255.0}, + {0x5d/255.0, 0x62/255.0, 0x89/255.0, 0xff/255.0}, + {0x99/255.0, 0xb0/255.0, 0x8e/255.0, 0xff/255.0}, + {0xd6/255.0, 0xff/255.0, 0x94/255.0, 0xff/255.0} + }, + { + {0x5d/255.0, 0x62/255.0, 0x89/255.0, 0xff/255.0}, + {0x5d/255.0, 0x62/255.0, 0x89/255.0, 0xff/255.0}, + {0x99/255.0, 0xb0/255.0, 0x8e/255.0, 0xff/255.0}, + {0x21/255.0, 0x14/255.0, 0x84/255.0, 0xff/255.0} + }, + { + {0x5d/255.0, 0x62/255.0, 0x89/255.0, 0xff/255.0}, + {0x21/255.0, 0x14/255.0, 0x84/255.0, 0xff/255.0}, + {0x21/255.0, 0x14/255.0, 0x84/255.0, 0xff/255.0}, + {0x99/255.0, 0xb0/255.0, 0x8e/255.0, 0xff/255.0} + } + } + }, + { + PIPE_FORMAT_DXT1_RGBA, + PACKED_8x8(0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff), + PACKED_8x8(0xff, 0x2f, 0xa4, 0x72, 0xeb, 0xb2, 0xbd, 0xbe), + { + { + {0x00/255.0, 0x00/255.0, 0x00/255.0, 0x00/255.0}, + {0x4e/255.0, 0xaa/255.0, 0x90/255.0, 0xff/255.0}, + {0x4e/255.0, 0xaa/255.0, 0x90/255.0, 0xff/255.0}, + {0x00/255.0, 0x00/255.0, 0x00/255.0, 0x00/255.0} + }, + { + {0x4e/255.0, 0xaa/255.0, 0x90/255.0, 0xff/255.0}, + {0x29/255.0, 0xff/255.0, 0xff/255.0, 0xff/255.0}, + {0x00/255.0, 0x00/255.0, 0x00/255.0, 0x00/255.0}, + {0x4e/255.0, 0xaa/255.0, 0x90/255.0, 0xff/255.0} + }, + { + {0x73/255.0, 0x55/255.0, 0x21/255.0, 0xff/255.0}, + {0x00/255.0, 0x00/255.0, 0x00/255.0, 0x00/255.0}, + {0x00/255.0, 0x00/255.0, 0x00/255.0, 0x00/255.0}, + {0x4e/255.0, 0xaa/255.0, 0x90/255.0, 0xff/255.0} + }, + { + {0x4e/255.0, 0xaa/255.0, 0x90/255.0, 0xff/255.0}, + {0x00/255.0, 0x00/255.0, 0x00/255.0, 0x00/255.0}, + {0x00/255.0, 0x00/255.0, 0x00/255.0, 0x00/255.0}, + {0x4e/255.0, 0xaa/255.0, 0x90/255.0, 0xff/255.0} + } + } + }, + { + PIPE_FORMAT_DXT3_RGBA, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xe7, 0x4a, 0x8f, 0x96, 0x5b, 0xc1, 0x1c, 0x84, 0xf6, 0x8f, 0xab, 0x32, 0x2a, 0x9a, 0x95, 0x5a}, + { + { + {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0x77/255.0}, + {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0xee/255.0}, + {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0xaa/255.0}, + {0x8c/255.0, 0xff/255.0, 0xb5/255.0, 0x44/255.0} + }, + { + {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0xff/255.0}, + {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0x88/255.0}, + {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0x66/255.0}, + {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0x99/255.0} + }, + { + {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0xbb/255.0}, + {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0x55/255.0}, + {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0x11/255.0}, + {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0xcc/255.0} + }, + { + {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0xcc/255.0}, + {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0x11/255.0}, + {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0x44/255.0}, + {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0x88/255.0} + } + } + }, + { + PIPE_FORMAT_DXT5_RGBA, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xf8, 0x11, 0xc5, 0x0c, 0x9a, 0x73, 0xb4, 0x9c, 0xf6, 0x8f, 0xab, 0x32, 0x2a, 0x9a, 0x95, 0x5a}, + { + { + {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0x74/255.0}, + {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0xf8/255.0}, + {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0xb6/255.0}, + {0x8c/255.0, 0xff/255.0, 0xb5/255.0, 0x53/255.0} + }, + { + {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0xf8/255.0}, + {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0x95/255.0}, + {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0x53/255.0}, + {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0x95/255.0} + }, + { + {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0xb6/255.0}, + {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0x53/255.0}, + {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0x11/255.0}, + {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0xd7/255.0} + }, + { + {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0xb6/255.0}, + {0x6d/255.0, 0xc6/255.0, 0x96/255.0, 0x11/255.0}, + {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0x32/255.0}, + {0x31/255.0, 0x55/255.0, 0x5a/255.0, 0x95/255.0} + } + } + }, + + + /* + * Standard 8-bit integer formats + */ + + {PIPE_FORMAT_R8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8_UNORM, PACKED_1x8(0xff), PACKED_1x8(0xff), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R8G8_UNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8_UNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0xff, 0x00), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8_UNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0xff), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8_UNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0xff, 0xff), UNPACKED_1x1(1.0, 1.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R8G8B8_UNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_UNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xff, 0x00, 0x00), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_UNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0xff, 0x00), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_UNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0xff), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_UNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xff, 0xff, 0xff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0x00, 0x00, 0x00), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0xff, 0x00, 0x00), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0xff, 0x00), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0xff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8A8_UNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0xff, 0xff, 0xff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_R8_USCALED, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8_USCALED, PACKED_1x8(0xff), PACKED_1x8(0xff), UNPACKED_1x1(255.0, 0.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R8G8_USCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8_USCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0xff, 0x00), UNPACKED_1x1(255.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8_USCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0xff), UNPACKED_1x1( 0.0, 255.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8_USCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0xff, 0xff), UNPACKED_1x1(255.0, 255.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R8G8B8_USCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_USCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xff, 0x00, 0x00), UNPACKED_1x1(255.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_USCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0xff, 0x00), UNPACKED_1x1( 0.0, 255.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_USCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0xff), UNPACKED_1x1( 0.0, 0.0, 255.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_USCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0xff, 0xff, 0xff), UNPACKED_1x1(255.0, 255.0, 255.0, 1.0)}, + + {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0x00, 0x00, 0x00), UNPACKED_1x1(255.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0xff, 0x00, 0x00), UNPACKED_1x1( 0.0, 255.0, 0.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0xff, 0x00), UNPACKED_1x1( 0.0, 0.0, 255.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0xff), UNPACKED_1x1( 0.0, 0.0, 0.0, 255.0)}, + {PIPE_FORMAT_R8G8B8A8_USCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0xff, 0xff, 0xff, 0xff), UNPACKED_1x1(255.0, 255.0, 255.0, 255.0)}, + + {PIPE_FORMAT_R8_SNORM, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8_SNORM, PACKED_1x8(0xff), PACKED_1x8(0x7f), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8_SNORM, PACKED_1x8(0xff), PACKED_1x8(0x81), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R8G8_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x7f, 0x00), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x81, 0x00), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x7f), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8_SNORM, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x81), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x7f, 0x00, 0x00), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x81, 0x00, 0x00), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x7f, 0x00), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x81, 0x00), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x7f), UNPACKED_1x1( 0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_SNORM, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x81), UNPACKED_1x1( 0.0, 0.0, -1.0, 1.0)}, + + {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x7f, 0x00, 0x00, 0x00), UNPACKED_1x1( 1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x81, 0x00, 0x00, 0x00), UNPACKED_1x1(-1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x7f, 0x00, 0x00), UNPACKED_1x1( 0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x81, 0x00, 0x00), UNPACKED_1x1( 0.0, -1.0, 0.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x7f, 0x00), UNPACKED_1x1( 0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x81, 0x00), UNPACKED_1x1( 0.0, 0.0, -1.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x7f), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8A8_SNORM, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x81), UNPACKED_1x1( 0.0, 0.0, 0.0, -1.0)}, + + {PIPE_FORMAT_R8_SSCALED, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8_SSCALED, PACKED_1x8(0xff), PACKED_1x8(0x7f), UNPACKED_1x1( 127.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8_SSCALED, PACKED_1x8(0xff), PACKED_1x8(0x80), UNPACKED_1x1(-128.0, 0.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R8G8_SSCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8_SSCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x7f, 0x00), UNPACKED_1x1( 127.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8_SSCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x80, 0x00), UNPACKED_1x1(-128.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8_SSCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x7f), UNPACKED_1x1( 0.0, 127.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8_SSCALED, PACKED_2x8(0xff, 0xff), PACKED_2x8(0x00, 0x80), UNPACKED_1x1( 0.0, -128.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x7f, 0x00, 0x00), UNPACKED_1x1( 127.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x80, 0x00, 0x00), UNPACKED_1x1(-128.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x7f, 0x00), UNPACKED_1x1( 0.0, 127.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x80, 0x00), UNPACKED_1x1( 0.0, -128.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x7f), UNPACKED_1x1( 0.0, 0.0, 127.0, 1.0)}, + {PIPE_FORMAT_R8G8B8_SSCALED, PACKED_3x8(0xff, 0xff, 0xff), PACKED_3x8(0x00, 0x00, 0x80), UNPACKED_1x1( 0.0, 0.0, -128.0, 1.0)}, + + {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x00), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x7f, 0x00, 0x00, 0x00), UNPACKED_1x1( 127.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x80, 0x00, 0x00, 0x00), UNPACKED_1x1(-128.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x7f, 0x00, 0x00), UNPACKED_1x1( 0.0, 127.0, 0.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x80, 0x00, 0x00), UNPACKED_1x1( 0.0, -128.0, 0.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x7f, 0x00), UNPACKED_1x1( 0.0, 0.0, 127.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x80, 0x00), UNPACKED_1x1( 0.0, 0.0, -128.0, 0.0)}, + {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x7f), UNPACKED_1x1( 0.0, 0.0, 0.0, 127.0)}, + {PIPE_FORMAT_R8G8B8A8_SSCALED, PACKED_4x8(0xff, 0xff, 0xff, 0xff), PACKED_4x8(0x00, 0x00, 0x00, 0x80), UNPACKED_1x1( 0.0, 0.0, 0.0, -128.0)}, + + /* + * Standard 16-bit integer formats + */ + + {PIPE_FORMAT_R16_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R16G16_UNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16_UNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0xffff, 0x0000), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16_UNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0xffff), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16_UNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0xffff, 0xffff), UNPACKED_1x1(1.0, 1.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R16G16B16_UNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_UNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0xffff, 0x0000, 0x0000), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_UNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0xffff, 0x0000), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_UNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0xffff), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_UNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0xffff, 0xffff, 0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0xffff, 0x0000, 0x0000, 0x0000), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0xffff, 0x0000, 0x0000), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0xffff, 0x0000), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0xffff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16A16_UNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_R16_USCALED, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16_USCALED, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(65535.0, 0.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R16G16_USCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16_USCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0xffff, 0x0000), UNPACKED_1x1(65535.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16_USCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0xffff), UNPACKED_1x1( 0.0, 65535.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16_USCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0xffff, 0xffff), UNPACKED_1x1(65535.0, 65535.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R16G16B16_USCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_USCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0xffff, 0x0000, 0x0000), UNPACKED_1x1(65535.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_USCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0xffff, 0x0000), UNPACKED_1x1( 0.0, 65535.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_USCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0xffff), UNPACKED_1x1( 0.0, 0.0, 65535.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_USCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0xffff, 0xffff, 0xffff), UNPACKED_1x1(65535.0, 65535.0, 65535.0, 1.0)}, + + {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0xffff, 0x0000, 0x0000, 0x0000), UNPACKED_1x1(65535.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0xffff, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 65535.0, 0.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0xffff, 0x0000), UNPACKED_1x1( 0.0, 0.0, 65535.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0xffff), UNPACKED_1x1( 0.0, 0.0, 0.0, 65535.0)}, + {PIPE_FORMAT_R16G16B16A16_USCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), UNPACKED_1x1(65535.0, 65535.0, 65535.0, 65535.0)}, + + {PIPE_FORMAT_R16_SNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16_SNORM, PACKED_1x16(0xffff), PACKED_1x16(0x7fff), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16_SNORM, PACKED_1x16(0xffff), PACKED_1x16(0x8001), UNPACKED_1x1( -1.0, 0.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R16G16_SNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16_SNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x7fff, 0x0000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16_SNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x8001, 0x0000), UNPACKED_1x1( -1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16_SNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x7fff), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16_SNORM, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x8001), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x7fff, 0x0000, 0x0000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x8001, 0x0000, 0x0000), UNPACKED_1x1( -1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x7fff, 0x0000), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x8001, 0x0000), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x7fff), UNPACKED_1x1( 0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_SNORM, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x8001), UNPACKED_1x1( 0.0, 0.0, -1.0, 1.0)}, + + {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x7fff, 0x0000, 0x0000, 0x0000), UNPACKED_1x1( 1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x8001, 0x0000, 0x0000, 0x0000), UNPACKED_1x1( -1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x7fff, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x8001, 0x0000, 0x0000), UNPACKED_1x1( 0.0, -1.0, 0.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x7fff, 0x0000), UNPACKED_1x1( 0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x8001, 0x0000), UNPACKED_1x1( 0.0, 0.0, -1.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x7fff), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16A16_SNORM, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x8001), UNPACKED_1x1( 0.0, 0.0, 0.0, -1.0)}, + + {PIPE_FORMAT_R16_SSCALED, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16_SSCALED, PACKED_1x16(0xffff), PACKED_1x16(0x7fff), UNPACKED_1x1( 32767.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16_SSCALED, PACKED_1x16(0xffff), PACKED_1x16(0x8000), UNPACKED_1x1(-32768.0, 0.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R16G16_SSCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16_SSCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x7fff, 0x0000), UNPACKED_1x1( 32767.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16_SSCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x8000, 0x0000), UNPACKED_1x1(-32768.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16_SSCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x7fff), UNPACKED_1x1( 0.0, 32767.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16_SSCALED, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x8000), UNPACKED_1x1( 0.0, -32768.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x7fff, 0x0000, 0x0000), UNPACKED_1x1( 32767.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x8000, 0x0000, 0x0000), UNPACKED_1x1(-32768.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x7fff, 0x0000), UNPACKED_1x1( 0.0, 32767.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x8000, 0x0000), UNPACKED_1x1( 0.0, -32768.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x7fff), UNPACKED_1x1( 0.0, 0.0, 32767.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_SSCALED, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x8000), UNPACKED_1x1( 0.0, 0.0, -32768.0, 1.0)}, + + {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x7fff, 0x0000, 0x0000, 0x0000), UNPACKED_1x1( 32767.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x8000, 0x0000, 0x0000, 0x0000), UNPACKED_1x1(-32768.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x7fff, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 32767.0, 0.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x8000, 0x0000, 0x0000), UNPACKED_1x1( 0.0, -32768.0, 0.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x7fff, 0x0000), UNPACKED_1x1( 0.0, 0.0, 32767.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x8000, 0x0000), UNPACKED_1x1( 0.0, 0.0, -32768.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x7fff), UNPACKED_1x1( 0.0, 0.0, 0.0, 32767.0)}, + {PIPE_FORMAT_R16G16B16A16_SSCALED, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x8000), UNPACKED_1x1( 0.0, 0.0, 0.0, -32768.0)}, + + /* + * Standard 32-bit integer formats + * + * NOTE: We can't accurately represent integers larger than +/-0x1000000 + * with single precision floats, so that's as far as we test. + */ + + {PIPE_FORMAT_R32_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R32G32_UNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_UNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0xffffffff, 0x00000000), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_UNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0xffffffff), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_UNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0xffffffff, 0xffffffff), UNPACKED_1x1(1.0, 1.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R32G32B32_UNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_UNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0xffffffff, 0x00000000, 0x00000000), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_UNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0xffffffff, 0x00000000), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_UNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0xffffffff), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_UNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0xffffffff, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0xffffffff, 0x00000000, 0x00000000), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0xffffffff, 0x00000000), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0xffffffff), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32A32_UNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_R32_USCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32_USCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0x01000000), UNPACKED_1x1(16777216.0, 0.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R32G32_USCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_USCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x01000000, 0x00000000), UNPACKED_1x1(16777216.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_USCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x01000000), UNPACKED_1x1( 0.0, 16777216.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_USCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x01000000, 0x01000000), UNPACKED_1x1(16777216.0, 16777216.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R32G32B32_USCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_USCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x01000000, 0x00000000, 0x00000000), UNPACKED_1x1(16777216.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_USCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x01000000, 0x00000000), UNPACKED_1x1( 0.0, 16777216.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_USCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x01000000), UNPACKED_1x1( 0.0, 0.0, 16777216.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_USCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x01000000, 0x01000000, 0x01000000), UNPACKED_1x1(16777216.0, 16777216.0, 16777216.0, 1.0)}, + + {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x01000000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1(16777216.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x01000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 16777216.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x01000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 16777216.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x01000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 16777216.0)}, + {PIPE_FORMAT_R32G32B32A32_USCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x01000000, 0x01000000, 0x01000000, 0x01000000), UNPACKED_1x1(16777216.0, 16777216.0, 16777216.0, 16777216.0)}, + + {PIPE_FORMAT_R32_SNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32_SNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x7fffffff), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32_SNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x80000001), UNPACKED_1x1( -1.0, 0.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R32G32_SNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_SNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x7fffffff, 0x00000000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_SNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x80000001, 0x00000000), UNPACKED_1x1( -1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_SNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x7fffffff), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_SNORM, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x80000001), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x7fffffff, 0x00000000, 0x00000000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x80000001, 0x00000000, 0x00000000), UNPACKED_1x1( -1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x7fffffff, 0x00000000), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x80000001, 0x00000000), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x7fffffff), UNPACKED_1x1( 0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_SNORM, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x80000001), UNPACKED_1x1( 0.0, 0.0, -1.0, 1.0)}, + + {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x7fffffff, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x80000001, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( -1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x7fffffff, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x80000001, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, -1.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x7fffffff, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x80000001, 0x00000000), UNPACKED_1x1( 0.0, 0.0, -1.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x7fffffff), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32A32_SNORM, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x80000001), UNPACKED_1x1( 0.0, 0.0, 0.0, -1.0)}, + + {PIPE_FORMAT_R32_SSCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32_SSCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0x01000000), UNPACKED_1x1( 16777216.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32_SSCALED, PACKED_1x32(0xffffffff), PACKED_1x32(0xff000000), UNPACKED_1x1(-16777216.0, 0.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R32G32_SSCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_SSCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x01000000, 0x00000000), UNPACKED_1x1( 16777216.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_SSCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0xff000000, 0x00000000), UNPACKED_1x1(-16777216.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_SSCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x01000000), UNPACKED_1x1( 0.0, 16777216.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_SSCALED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0xff000000), UNPACKED_1x1( 0.0, -16777216.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x01000000, 0x00000000, 0x00000000), UNPACKED_1x1( 16777216.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0xff000000, 0x00000000, 0x00000000), UNPACKED_1x1(-16777216.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x01000000, 0x00000000), UNPACKED_1x1( 0.0, 16777216.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0xff000000, 0x00000000), UNPACKED_1x1( 0.0, -16777216.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x01000000), UNPACKED_1x1( 0.0, 0.0, 16777216.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_SSCALED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0xff000000), UNPACKED_1x1( 0.0, 0.0, -16777216.0, 1.0)}, + + {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x01000000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 16777216.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0xff000000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1(-16777216.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x01000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 16777216.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0xff000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, -16777216.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x01000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 16777216.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0xff000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, -16777216.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x01000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 16777216.0)}, + {PIPE_FORMAT_R32G32B32A32_SSCALED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0xff000000), UNPACKED_1x1( 0.0, 0.0, 0.0, -16777216.0)}, + + /* + * Standard 32-bit float formats + */ + + {PIPE_FORMAT_R32_FLOAT, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32_FLOAT, PACKED_1x32(0xffffffff), PACKED_1x32(0x3f800000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32_FLOAT, PACKED_1x32(0xffffffff), PACKED_1x32(0xbf800000), UNPACKED_1x1( -1.0, 0.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x3f800000, 0x00000000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0xbf800000, 0x00000000), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x3f800000), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0xbf800000), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_FLOAT, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x3f800000, 0x3f800000), UNPACKED_1x1( 1.0, 1.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x3f800000, 0x00000000, 0x00000000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0xbf800000, 0x00000000, 0x00000000), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x3f800000, 0x00000000), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0xbf800000, 0x00000000), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x3f800000), UNPACKED_1x1( 0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0xbf800000), UNPACKED_1x1( 0.0, 0.0, -1.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_FLOAT, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x3f800000, 0x3f800000, 0x3f800000), UNPACKED_1x1( 1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x3f800000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0xbf800000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1(-1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x3f800000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0xbf800000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, -1.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x3f800000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0xbf800000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, -1.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x3f800000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0xbf800000), UNPACKED_1x1( 0.0, 0.0, 0.0, -1.0)}, + {PIPE_FORMAT_R32G32B32A32_FLOAT, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000), UNPACKED_1x1( 1.0, 1.0, 1.0, 1.0)}, + + /* + * Half float formats + */ + + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x3c00), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0xbc00), UNPACKED_1x1( -1.0, 0.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R16G16_FLOAT, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16_FLOAT, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x3c00, 0x0000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16_FLOAT, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0xbc00, 0x0000), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16_FLOAT, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0x3c00), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16_FLOAT, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x0000, 0xbc00), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16_FLOAT, PACKED_2x16(0xffff, 0xffff), PACKED_2x16(0x3c00, 0x3c00), UNPACKED_1x1( 1.0, 1.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R16G16B16_FLOAT, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_FLOAT, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x3c00, 0x0000, 0x0000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_FLOAT, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0xbc00, 0x0000, 0x0000), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_FLOAT, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x3c00, 0x0000), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_FLOAT, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0xbc00, 0x0000), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_FLOAT, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0x3c00), UNPACKED_1x1( 0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_FLOAT, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x0000, 0x0000, 0xbc00), UNPACKED_1x1( 0.0, 0.0, -1.0, 1.0)}, + {PIPE_FORMAT_R16G16B16_FLOAT, PACKED_3x16(0xffff, 0xffff, 0xffff), PACKED_3x16(0x3c00, 0x3c00, 0x3c00), UNPACKED_1x1( 1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_R16G16B16A16_FLOAT, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_FLOAT, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x3c00, 0x0000, 0x0000, 0x0000), UNPACKED_1x1( 1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_FLOAT, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0xbc00, 0x0000, 0x0000, 0x0000), UNPACKED_1x1(-1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_FLOAT, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x3c00, 0x0000, 0x0000), UNPACKED_1x1( 0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_FLOAT, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0xbc00, 0x0000, 0x0000), UNPACKED_1x1( 0.0, -1.0, 0.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_FLOAT, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x3c00, 0x0000), UNPACKED_1x1( 0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_FLOAT, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0xbc00, 0x0000), UNPACKED_1x1( 0.0, 0.0, -1.0, 0.0)}, + {PIPE_FORMAT_R16G16B16A16_FLOAT, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0x3c00), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16G16B16A16_FLOAT, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x0000, 0x0000, 0x0000, 0xbc00), UNPACKED_1x1( 0.0, 0.0, 0.0, -1.0)}, + {PIPE_FORMAT_R16G16B16A16_FLOAT, PACKED_4x16(0xffff, 0xffff, 0xffff, 0xffff), PACKED_4x16(0x3c00, 0x3c00, 0x3c00, 0x3c00), UNPACKED_1x1( 1.0, 1.0, 1.0, 1.0)}, + + /* + * 32-bit fixed point formats + */ + + {PIPE_FORMAT_R32_FIXED, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32_FIXED, PACKED_1x32(0xffffffff), PACKED_1x32(0x00010000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32_FIXED, PACKED_1x32(0xffffffff), PACKED_1x32(0xffff0000), UNPACKED_1x1( -1.0, 0.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R32G32_FIXED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_FIXED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00010000, 0x00000000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_FIXED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0xffff0000, 0x00000000), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_FIXED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0x00010000), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_FIXED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00000000, 0xffff0000), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32_FIXED, PACKED_2x32(0xffffffff, 0xffffffff), PACKED_2x32(0x00010000, 0x00010000), UNPACKED_1x1( 1.0, 1.0, 0.0, 1.0)}, + + {PIPE_FORMAT_R32G32B32_FIXED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_FIXED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00010000, 0x00000000, 0x00000000), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_FIXED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0xffff0000, 0x00000000, 0x00000000), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_FIXED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00010000, 0x00000000), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_FIXED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0xffff0000, 0x00000000), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_FIXED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0x00010000), UNPACKED_1x1( 0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_FIXED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00000000, 0x00000000, 0xffff0000), UNPACKED_1x1( 0.0, 0.0, -1.0, 1.0)}, + {PIPE_FORMAT_R32G32B32_FIXED, PACKED_3x32(0xffffffff, 0xffffffff, 0xffffffff), PACKED_3x32(0x00010000, 0x00010000, 0x00010000), UNPACKED_1x1( 1.0, 1.0, 1.0, 1.0)}, + + {PIPE_FORMAT_R32G32B32A32_FIXED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_FIXED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00010000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1( 1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_FIXED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0xffff0000, 0x00000000, 0x00000000, 0x00000000), UNPACKED_1x1(-1.0, 0.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_FIXED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00010000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, 1.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_FIXED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0xffff0000, 0x00000000, 0x00000000), UNPACKED_1x1( 0.0, -1.0, 0.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_FIXED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00010000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, 1.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_FIXED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0xffff0000, 0x00000000), UNPACKED_1x1( 0.0, 0.0, -1.0, 0.0)}, + {PIPE_FORMAT_R32G32B32A32_FIXED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0x00010000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R32G32B32A32_FIXED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00000000, 0x00000000, 0x00000000, 0xffff0000), UNPACKED_1x1( 0.0, 0.0, 0.0, -1.0)}, + {PIPE_FORMAT_R32G32B32A32_FIXED, PACKED_4x32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), PACKED_4x32(0x00010000, 0x00010000, 0x00010000, 0x00010000), UNPACKED_1x1( 1.0, 1.0, 1.0, 1.0)}, + + /* + * D3D9 specific vertex formats + */ + + {PIPE_FORMAT_R10G10B10X2_USCALED, PACKED_1x32(0x3fffffff), PACKED_1x32(0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R10G10B10X2_USCALED, PACKED_1x32(0x3fffffff), PACKED_1x32(0x000003ff), UNPACKED_1x1(1023.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R10G10B10X2_USCALED, PACKED_1x32(0x3fffffff), PACKED_1x32(0x000ffc00), UNPACKED_1x1( 0.0, 1023.0, 0.0, 1.0)}, + {PIPE_FORMAT_R10G10B10X2_USCALED, PACKED_1x32(0x3fffffff), PACKED_1x32(0x3ff00000), UNPACKED_1x1( 0.0, 0.0, 1023.0, 1.0)}, + {PIPE_FORMAT_R10G10B10X2_USCALED, PACKED_1x32(0x3fffffff), PACKED_1x32(0x3fffffff), UNPACKED_1x1(1023.0, 1023.0, 1023.0, 1.0)}, + + {PIPE_FORMAT_R10G10B10X2_SNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x00000000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R10G10B10X2_SNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x000001ff), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R10G10B10X2_SNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x00000201), UNPACKED_1x1(-1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R10G10B10X2_SNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x0007fc00), UNPACKED_1x1( 0.0, 1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R10G10B10X2_SNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x00080400), UNPACKED_1x1( 0.0, -1.0, 0.0, 1.0)}, + {PIPE_FORMAT_R10G10B10X2_SNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x1ff00000), UNPACKED_1x1( 0.0, 0.0, 1.0, 1.0)}, + {PIPE_FORMAT_R10G10B10X2_SNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x20100000), UNPACKED_1x1( 0.0, 0.0, -1.0, 1.0)}, + + /* + * Special formats that not fit anywhere else + */ + +}; + + +const unsigned util_format_nr_test_cases = Elements(util_format_test_cases); diff --git a/src/gallium/auxiliary/util/u_format_tests.h b/src/gallium/auxiliary/util/u_format_tests.h new file mode 100644 index 0000000000..f59563f4f4 --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_tests.h @@ -0,0 +1,71 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#ifndef U_FORMAT_TESTS_H_ +#define U_FORMAT_TESTS_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" + + +#define UTIL_FORMAT_MAX_PACKED_BYTES 16 +#define UTIL_FORMAT_MAX_UNPACKED_WIDTH 4 +#define UTIL_FORMAT_MAX_UNPACKED_HEIGHT 4 + + +/** + * A (packed, unpacked) color pair. + */ +struct util_format_test_case +{ + enum pipe_format format; + + /** + * Mask of the bits that actually meaningful data. Used to mask out the + * "X" channels. + */ + uint8_t mask[UTIL_FORMAT_MAX_PACKED_BYTES]; + + uint8_t packed[UTIL_FORMAT_MAX_PACKED_BYTES]; + + /** + * RGBA. + */ + double unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4]; +}; + + +extern const struct util_format_test_case +util_format_test_cases[]; + + +extern const unsigned util_format_nr_test_cases; + + +#endif /* U_FORMAT_TESTS_H_ */ diff --git a/src/gallium/auxiliary/util/u_format_yuv.c b/src/gallium/auxiliary/util/u_format_yuv.c new file mode 100644 index 0000000000..ab8bf29c97 --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_yuv.c @@ -0,0 +1,1047 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +/** + * @file + * YUV and RGB subsampled formats conversion. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + + +#include "util/u_format_yuv.h" + + +void +util_format_r8g8_b8g8_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + float *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + uint32_t value; + float r, g0, g1, b; + + for (x = 0; x + 1 < width; x += 2) { + value = *src++; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + r = ubyte_to_float((value >> 0) & 0xff); + g0 = ubyte_to_float((value >> 8) & 0xff); + b = ubyte_to_float((value >> 16) & 0xff); + g1 = ubyte_to_float((value >> 24) & 0xff); + + dst[0] = r; /* r */ + dst[1] = g0; /* g */ + dst[2] = b; /* b */ + dst[3] = 1.0f; /* a */ + dst += 4; + + dst[0] = r; /* r */ + dst[1] = g1; /* g */ + dst[2] = b; /* b */ + dst[3] = 1.0f; /* a */ + dst += 4; + } + + if (x < width) { + value = *src; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + r = ubyte_to_float((value >> 0) & 0xff); + g0 = ubyte_to_float((value >> 8) & 0xff); + b = ubyte_to_float((value >> 16) & 0xff); + g1 = ubyte_to_float((value >> 24) & 0xff); + + dst[0] = r; /* r */ + dst[1] = g0; /* g */ + dst[2] = b; /* b */ + dst[3] = 1.0f; /* a */ + } + + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_r8g8_b8g8_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + uint8_t *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + uint32_t value; + uint8_t r, g0, g1, b; + + for (x = 0; x + 1 < width; x += 2) { + value = *src++; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + r = (value >> 0) & 0xff; + g0 = (value >> 8) & 0xff; + b = (value >> 16) & 0xff; + g1 = (value >> 24) & 0xff; + + dst[0] = r; /* r */ + dst[1] = g0; /* g */ + dst[2] = b; /* b */ + dst[3] = 0xff; /* a */ + dst += 4; + + dst[0] = r; /* r */ + dst[1] = g1; /* g */ + dst[2] = b; /* b */ + dst[3] = 0xff; /* a */ + dst += 4; + } + + if (x < width) { + value = *src; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + r = (value >> 0) & 0xff; + g0 = (value >> 8) & 0xff; + b = (value >> 16) & 0xff; + g1 = (value >> 24) & 0xff; + + dst[0] = r; /* r */ + dst[1] = g0; /* g */ + dst[2] = b; /* b */ + dst[3] = 0xff; /* a */ + } + + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_r8g8_b8g8_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + const float *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + float r, g0, g1, b; + uint32_t value; + + for (x = 0; x + 1 < width; x += 2) { + r = 0.5f*(src[0] + src[4]); + g0 = src[1]; + g1 = src[5]; + b = 0.5f*(src[2] + src[6]); + + value = float_to_ubyte(r); + value |= float_to_ubyte(g0) << 8; + value |= float_to_ubyte(b) << 16; + value |= float_to_ubyte(g1) << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst++ = value; + + src += 8; + } + + if (x < width) { + r = src[0]; + g0 = src[1]; + g1 = 0; + b = src[2]; + + value = float_to_ubyte(r); + value |= float_to_ubyte(g0) << 8; + value |= float_to_ubyte(b) << 16; + value |= float_to_ubyte(g1) << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst = value; + } + + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_r8g8_b8g8_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + const uint8_t *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + uint32_t r, g0, g1, b; + uint32_t value; + + for (x = 0; x + 1 < width; x += 2) { + r = (src[0] + src[4] + 1) >> 1; + g0 = src[1]; + g1 = src[5]; + b = (src[2] + src[6] + 1) >> 1; + + value = r; + value |= g0 << 8; + value |= b << 16; + value |= g1 << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst++ = value; + + src += 8; + } + + if (x < width) { + r = src[0]; + g0 = src[1]; + g1 = 0; + b = src[2]; + + value = r; + value |= g0 << 8; + value |= b << 16; + value |= g1 << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst = value; + } + + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_r8g8_b8g8_unorm_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) +{ + assert(i < 2); + assert(j < 1); + + dst[0] = ubyte_to_float(src[0]); /* r */ + dst[1] = ubyte_to_float(src[1 + 2*i]); /* g */ + dst[2] = ubyte_to_float(src[2]); /* b */ + dst[3] = 1.0f; /* a */ +} + + +void +util_format_g8r8_g8b8_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + float *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + uint32_t value; + float r, g0, g1, b; + + for (x = 0; x + 1 < width; x += 2) { + value = *src++; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + g0 = ubyte_to_float((value >> 0) & 0xff); + r = ubyte_to_float((value >> 8) & 0xff); + g1 = ubyte_to_float((value >> 16) & 0xff); + b = ubyte_to_float((value >> 24) & 0xff); + + dst[0] = r; /* r */ + dst[1] = g0; /* g */ + dst[2] = b; /* b */ + dst[3] = 1.0f; /* a */ + dst += 4; + + dst[0] = r; /* r */ + dst[1] = g1; /* g */ + dst[2] = b; /* b */ + dst[3] = 1.0f; /* a */ + dst += 4; + } + + if (x < width) { + value = *src; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + g0 = ubyte_to_float((value >> 0) & 0xff); + r = ubyte_to_float((value >> 8) & 0xff); + g1 = ubyte_to_float((value >> 16) & 0xff); + b = ubyte_to_float((value >> 24) & 0xff); + + dst[0] = r; /* r */ + dst[1] = g0; /* g */ + dst[2] = b; /* b */ + dst[3] = 1.0f; /* a */ + } + + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_g8r8_g8b8_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + uint8_t *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + uint32_t value; + uint8_t r, g0, g1, b; + + for (x = 0; x + 1 < width; x += 2) { + value = *src++; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + g0 = (value >> 0) & 0xff; + r = (value >> 8) & 0xff; + g1 = (value >> 16) & 0xff; + b = (value >> 24) & 0xff; + + dst[0] = r; /* r */ + dst[1] = g0; /* g */ + dst[2] = b; /* b */ + dst[3] = 0xff; /* a */ + dst += 4; + + dst[0] = r; /* r */ + dst[1] = g1; /* g */ + dst[2] = b; /* b */ + dst[3] = 0xff; /* a */ + dst += 4; + } + + if (x < width) { + value = *src; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + g0 = (value >> 0) & 0xff; + r = (value >> 8) & 0xff; + g1 = (value >> 16) & 0xff; + b = (value >> 24) & 0xff; + + dst[0] = r; /* r */ + dst[1] = g0; /* g */ + dst[2] = b; /* b */ + dst[3] = 0xff; /* a */ + } + + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_g8r8_g8b8_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + const float *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + float r, g0, g1, b; + uint32_t value; + + for (x = 0; x + 1 < width; x += 2) { + r = 0.5f*(src[0] + src[4]); + g0 = src[1]; + g1 = src[5]; + b = 0.5f*(src[2] + src[6]); + + value = float_to_ubyte(g0); + value |= float_to_ubyte(r) << 8; + value |= float_to_ubyte(g1) << 16; + value |= float_to_ubyte(b) << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst++ = value; + + src += 8; + } + + if (x < width) { + r = src[0]; + g0 = src[1]; + g1 = 0; + b = src[2]; + + value = float_to_ubyte(g0); + value |= float_to_ubyte(r) << 8; + value |= float_to_ubyte(g1) << 16; + value |= float_to_ubyte(b) << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst = value; + } + + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_g8r8_g8b8_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + const uint8_t *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + uint32_t r, g0, g1, b; + uint32_t value; + + for (x = 0; x + 1 < width; x += 2) { + r = (src[0] + src[4] + 1) >> 1; + g0 = src[1]; + g1 = src[5]; + b = (src[2] + src[6] + 1) >> 1; + + value = g0; + value |= r << 8; + value |= g1 << 16; + value |= b << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst++ = value; + + src += 8; + } + + if (x < width) { + r = src[0]; + g0 = src[1]; + g1 = 0; + b = src[2]; + + value = g0; + value |= r << 8; + value |= g1 << 16; + value |= b << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst = value; + } + + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_g8r8_g8b8_unorm_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) +{ + assert(i < 2); + assert(j < 1); + + dst[0] = ubyte_to_float(src[1]); /* r */ + dst[1] = ubyte_to_float(src[0 + 2*i]); /* g */ + dst[2] = ubyte_to_float(src[3]); /* b */ + dst[3] = 1.0f; /* a */ +} + + +void +util_format_uyvy_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + float *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + uint32_t value; + uint8_t y0, y1, u, v; + + for (x = 0; x + 1 < width; x += 2) { + value = *src++; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + u = (value >> 0) & 0xff; + y0 = (value >> 8) & 0xff; + v = (value >> 16) & 0xff; + y1 = (value >> 24) & 0xff; + + util_format_yuv_to_rgb_float(y0, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 1.0f; /* a */ + dst += 4; + + util_format_yuv_to_rgb_float(y1, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 1.0f; /* a */ + dst += 4; + } + + if (x < width) { + value = *src; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + u = (value >> 0) & 0xff; + y0 = (value >> 8) & 0xff; + v = (value >> 16) & 0xff; + y1 = (value >> 24) & 0xff; + + util_format_yuv_to_rgb_float(y0, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 1.0f; /* a */ + } + + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_uyvy_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + uint8_t *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + uint32_t value; + uint8_t y0, y1, u, v; + + for (x = 0; x + 1 < width; x += 2) { + value = *src++; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + u = (value >> 0) & 0xff; + y0 = (value >> 8) & 0xff; + v = (value >> 16) & 0xff; + y1 = (value >> 24) & 0xff; + + util_format_yuv_to_rgb_8unorm(y0, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 0xff; /* a */ + dst += 4; + + util_format_yuv_to_rgb_8unorm(y1, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 0xff; /* a */ + dst += 4; + } + + if (x < width) { + value = *src; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + u = (value >> 0) & 0xff; + y0 = (value >> 8) & 0xff; + v = (value >> 16) & 0xff; + y1 = (value >> 24) & 0xff; + + util_format_yuv_to_rgb_8unorm(y0, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 0xff; /* a */ + } + + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_uyvy_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + const float *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + uint8_t y0, y1, u, v; + uint32_t value; + + for (x = 0; x + 1 < width; x += 2) { + uint8_t y0, y1, u0, u1, v0, v1, u, v; + + util_format_rgb_float_to_yuv(src[0], src[1], src[2], + &y0, &u0, &v0); + util_format_rgb_float_to_yuv(src[4], src[5], src[6], + &y1, &u1, &v1); + + u = (u0 + u1 + 1) >> 1; + v = (v0 + v1 + 1) >> 1; + + value = u; + value |= y0 << 8; + value |= v << 16; + value |= y1 << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst++ = value; + + src += 8; + } + + if (x < width) { + util_format_rgb_float_to_yuv(src[0], src[1], src[2], + &y0, &u, &v); + y1 = 0; + + value = u; + value |= y0 << 8; + value |= v << 16; + value |= y1 << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst = value; + } + + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_uyvy_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + const uint8_t *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + uint8_t y0, y1, u, v; + uint32_t value; + + for (x = 0; x + 1 < width; x += 2) { + uint8_t y0, y1, u0, u1, v0, v1, u, v; + + util_format_rgb_8unorm_to_yuv(src[0], src[1], src[2], + &y0, &u0, &v0); + util_format_rgb_8unorm_to_yuv(src[4], src[5], src[6], + &y1, &u1, &v1); + + u = (u0 + u1 + 1) >> 1; + v = (v0 + v1 + 1) >> 1; + + value = u; + value |= y0 << 8; + value |= v << 16; + value |= y1 << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst++ = value; + + src += 8; + } + + if (x < width) { + util_format_rgb_8unorm_to_yuv(src[0], src[1], src[2], + &y0, &u, &v); + y1 = 0; + + value = u; + value |= y0 << 8; + value |= v << 16; + value |= y1 << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst = value; + } + + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_uyvy_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) +{ + uint8_t y, u, v; + + assert(i < 2); + assert(j < 1); + + y = src[1 + i*2]; + u = src[0]; + v = src[2]; + + util_format_yuv_to_rgb_float(y, u, v, &dst[0], &dst[1], &dst[2]); + + dst[3] = 1.0f; +} + + +void +util_format_yuyv_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + float *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + uint32_t value; + uint8_t y0, y1, u, v; + + for (x = 0; x + 1 < width; x += 2) { + value = *src++; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + y0 = (value >> 0) & 0xff; + u = (value >> 8) & 0xff; + y1 = (value >> 16) & 0xff; + v = (value >> 24) & 0xff; + + util_format_yuv_to_rgb_float(y0, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 1.0f; /* a */ + dst += 4; + + util_format_yuv_to_rgb_float(y1, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 1.0f; /* a */ + dst += 4; + } + + if (x < width) { + value = *src; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + y0 = (value >> 0) & 0xff; + u = (value >> 8) & 0xff; + y1 = (value >> 16) & 0xff; + v = (value >> 24) & 0xff; + + util_format_yuv_to_rgb_float(y0, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 1.0f; /* a */ + } + + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_yuyv_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + uint8_t *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + uint32_t value; + uint8_t y0, y1, u, v; + + for (x = 0; x + 1 < width; x += 2) { + value = *src++; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + y0 = (value >> 0) & 0xff; + u = (value >> 8) & 0xff; + y1 = (value >> 16) & 0xff; + v = (value >> 24) & 0xff; + + util_format_yuv_to_rgb_8unorm(y0, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 0xff; /* a */ + dst += 4; + + util_format_yuv_to_rgb_8unorm(y1, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 0xff; /* a */ + dst += 4; + } + + if (x < width) { + value = *src; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + y0 = (value >> 0) & 0xff; + u = (value >> 8) & 0xff; + y1 = (value >> 16) & 0xff; + v = (value >> 24) & 0xff; + + util_format_yuv_to_rgb_8unorm(y0, u, v, &dst[0], &dst[1], &dst[2]); + dst[3] = 0xff; /* a */ + } + + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + + +void +util_format_yuyv_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + const float *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + uint8_t y0, y1, u, v; + uint32_t value; + + for (x = 0; x + 1 < width; x += 2) { + uint8_t y0, y1, u0, u1, v0, v1, u, v; + + util_format_rgb_float_to_yuv(src[0], src[1], src[2], + &y0, &u0, &v0); + util_format_rgb_float_to_yuv(src[4], src[5], src[6], + &y1, &u1, &v1); + + u = (u0 + u1 + 1) >> 1; + v = (v0 + v1 + 1) >> 1; + + value = y0; + value |= u << 8; + value |= y1 << 16; + value |= v << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst++ = value; + + src += 8; + } + + if (x < width) { + util_format_rgb_float_to_yuv(src[0], src[1], src[2], + &y0, &u, &v); + y1 = 0; + + value = y0; + value |= u << 8; + value |= y1 << 16; + value |= v << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst = value; + } + + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_yuyv_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + + for (y = 0; y < height; y += 1) { + const uint8_t *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + uint8_t y0, y1, u, v; + uint32_t value; + + for (x = 0; x + 1 < width; x += 2) { + uint8_t y0, y1, u0, u1, v0, v1, u, v; + + util_format_rgb_8unorm_to_yuv(src[0], src[1], src[2], + &y0, &u0, &v0); + util_format_rgb_8unorm_to_yuv(src[4], src[5], src[6], + &y1, &u1, &v1); + + u = (u0 + u1 + 1) >> 1; + v = (v0 + v1 + 1) >> 1; + + value = y0; + value |= u << 8; + value |= y1 << 16; + value |= v << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst++ = value; + + src += 8; + } + + if (x < width) { + util_format_rgb_8unorm_to_yuv(src[0], src[1], src[2], + &y0, &u, &v); + y1 = 0; + + value = y0; + value |= u << 8; + value |= y1 << 16; + value |= v << 24; + +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + + *dst = value; + } + + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + + +void +util_format_yuyv_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j) +{ + uint8_t y, u, v; + + assert(i < 2); + assert(j < 1); + + y = src[0 + i*2]; + u = src[1]; + v = src[3]; + + util_format_yuv_to_rgb_float(y, u, v, &dst[0], &dst[1], &dst[2]); + + dst[3] = 1.0f; +} diff --git a/src/gallium/auxiliary/util/u_format_yuv.h b/src/gallium/auxiliary/util/u_format_yuv.h new file mode 100644 index 0000000000..dc9632346d --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_yuv.h @@ -0,0 +1,223 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +/** + * @file + * YUV colorspace conversion. + * + * @author Brian Paul <brianp@vmware.com> + * @author Michal Krol <michal@vmware.com> + * @author Jose Fonseca <jfonseca@vmware.com> + * + * See also: + * - http://www.fourcc.org/fccyvrgb.php + * - http://msdn.microsoft.com/en-us/library/ms893078 + * - http://en.wikipedia.org/wiki/YUV + */ + + +#ifndef U_FORMAT_YUV_H_ +#define U_FORMAT_YUV_H_ + + +#include "pipe/p_compiler.h" +#include "u_math.h" + + +/* + * TODO: Ensure we use consistent and right floating formulas, with enough + * precision in the coefficients. + */ + +static INLINE void +util_format_rgb_float_to_yuv(float r, float g, float b, + uint8_t *y, uint8_t *u, uint8_t *v) +{ + const float _r = CLAMP(r, 0.0f, 1.0f); + const float _g = CLAMP(g, 0.0f, 1.0f); + const float _b = CLAMP(b, 0.0f, 1.0f); + + const float scale = 255.0f; + + const int _y = scale * ( (0.257f * _r) + (0.504f * _g) + (0.098f * _b)); + const int _u = scale * (-(0.148f * _r) - (0.291f * _g) + (0.439f * _b)); + const int _v = scale * ( (0.439f * _r) - (0.368f * _g) - (0.071f * _b)); + + *y = _y + 16; + *u = _u + 128; + *v = _v + 128; +} + + +static INLINE void +util_format_yuv_to_rgb_float(uint8_t y, uint8_t u, uint8_t v, + float *r, float *g, float *b) +{ + const int _y = y - 16; + const int _u = u - 128; + const int _v = v - 128; + + const float y_factor = 255.0f / 219.0f; + + const float scale = 1.0f / 255.0f; + + *r = scale * (y_factor * _y + 1.596f * _v); + *g = scale * (y_factor * _y - 0.391f * _u - 0.813f * _v); + *b = scale * (y_factor * _y + 2.018f * _u ); +} + + +static INLINE void +util_format_rgb_8unorm_to_yuv(uint8_t r, uint8_t g, uint8_t b, + uint8_t *y, uint8_t *u, uint8_t *v) +{ + *y = (( 66 * r + 129 * g + 25 * b + 128) >> 8) + 16; + *u = (( -38 * r - 74 * g + 112 * b + 128) >> 8) + 128; + *v = (( 112 * r - 94 * g - 18 * b + 128) >> 8) + 128; +} + + +static INLINE void +util_format_yuv_to_rgb_8unorm(uint8_t y, uint8_t u, uint8_t v, + uint8_t *r, uint8_t *g, uint8_t *b) +{ + const int _y = y - 16; + const int _u = u - 128; + const int _v = v - 128; + + const int _r = (298 * _y + 409 * _v + 128) >> 8; + const int _g = (298 * _y - 100 * _u - 208 * _v + 128) >> 8; + const int _b = (298 * _y + 516 * _u + 128) >> 8; + + *r = CLAMP(_r, 0, 255); + *g = CLAMP(_g, 0, 255); + *b = CLAMP(_b, 0, 255); +} + + + +void +util_format_uyvy_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_uyvy_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_uyvy_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_uyvy_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_uyvy_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j); + +void +util_format_yuyv_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_yuyv_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_yuyv_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_yuyv_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_yuyv_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j); + + +void +util_format_r8g8_b8g8_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_r8g8_b8g8_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_r8g8_b8g8_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_r8g8_b8g8_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_r8g8_b8g8_unorm_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j); + +void +util_format_g8r8_g8b8_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_g8r8_g8b8_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_g8r8_g8b8_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_g8r8_g8b8_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height); + +void +util_format_g8r8_g8b8_unorm_fetch_rgba_float(float *dst, const uint8_t *src, + unsigned i, unsigned j); + + + +#endif /* U_FORMAT_YUV_H_ */ diff --git a/src/gallium/auxiliary/util/u_format_zs.c b/src/gallium/auxiliary/util/u_format_zs.c new file mode 100644 index 0000000000..792d69c214 --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_zs.c @@ -0,0 +1,920 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#include "u_debug.h" +#include "u_math.h" +#include "u_format_zs.h" + + +/* + * z32_unorm conversion functions + */ + +static INLINE uint16_t +z32_unorm_to_z16_unorm(uint32_t z) +{ + /* z * 0xffff / 0xffffffff */ + return z >> 16; +} + +static INLINE uint32_t +z16_unorm_to_z32_unorm(uint16_t z) +{ + /* z * 0xffffffff / 0xffff */ + return (z << 16) | z; +} + +static INLINE uint32_t +z32_unorm_to_z24_unorm(uint32_t z) +{ + /* z * 0xffffff / 0xffffffff */ + return z >> 8; +} + +static INLINE uint32_t +z24_unorm_to_z32_unorm(uint32_t z) +{ + /* z * 0xffffffff / 0xffffff */ + return (z << 8) | (z >> 16); +} + + +/* + * z32_float conversion functions + */ + +static INLINE uint16_t +z32_float_to_z16_unorm(float z) +{ + const float scale = 0xffff; + return (uint16_t)(z * scale); +} + +static INLINE float +z16_unorm_to_z32_float(uint16_t z) +{ + const float scale = 1.0 / 0xffff; + return (float)(z * scale); +} + +static INLINE uint32_t +z32_float_to_z24_unorm(float z) +{ + const double scale = 0xffffff; + return (uint32_t)(z * scale) & 0xffffff; +} + +static INLINE float +z24_unorm_to_z32_float(uint32_t z) +{ + const double scale = 1.0 / 0xffffff; + return (float)(z * scale); +} + +static INLINE uint32_t +z32_float_to_z32_unorm(float z) +{ + const double scale = 0xffffffff; + return (uint32_t)(z * scale); +} + +static INLINE float +z32_unorm_to_z32_float(uint32_t z) +{ + const double scale = 1.0 / 0xffffffff; + return (float)(z * scale); +} + + +void +util_format_s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned y; + for(y = 0; y < height; ++y) { + memcpy(dst_row, src_row, width); + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned y; + for(y = 0; y < height; ++y) { + memcpy(dst_row, src_row, width); + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z16_unorm_unpack_z_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + float *dst = dst_row; + const uint16_t *src = (const uint16_t *)src_row; + for(x = 0; x < width; ++x) { + uint16_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap16(value); +#endif + *dst++ = z16_unorm_to_z32_float(value); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z16_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const float *src = src_row; + uint16_t *dst = (uint16_t *)dst_row; + for(x = 0; x < width; ++x) { + uint16_t value; + value = z32_float_to_z16_unorm(*src++); +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap16(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z16_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + uint32_t *dst = dst_row; + const uint16_t *src = (const uint16_t *)src_row; + for(x = 0; x < width; ++x) { + uint16_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap16(value); +#endif + *dst++ = z16_unorm_to_z32_unorm(value); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z16_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, + const uint32_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const uint32_t *src = src_row; + uint16_t *dst = (uint16_t *)dst_row; + for(x = 0; x < width; ++x) { + uint16_t value; + value = z32_unorm_to_z16_unorm(*src++); +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap16(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z32_unorm_unpack_z_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + float *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = z32_unorm_to_z32_float(value); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z32_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const float *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value; + value = z32_float_to_z32_unorm(*src++); +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z32_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned y; + for(y = 0; y < height; ++y) { + memcpy(dst_row, src_row, width * 4); + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z32_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, + const uint32_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned y; + for(y = 0; y < height; ++y) { + memcpy(dst_row, src_row, width * 4); + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z32_float_unpack_z_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned y; + for(y = 0; y < height; ++y) { + memcpy(dst_row, src_row, width * 4); + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z32_float_pack_z_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned y; + for(y = 0; y < height; ++y) { + memcpy(dst_row, src_row, width * 4); + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z32_float_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + uint32_t *dst = dst_row; + const float *src = (const float *)src_row; + for(x = 0; x < width; ++x) { + *dst++ = z32_float_to_z32_unorm(*src++); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z32_float_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, + const uint32_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const uint32_t *src = src_row; + float *dst = (float *)dst_row; + for(x = 0; x < width; ++x) { + *dst++ = z32_unorm_to_z32_float(*src++); + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z24_unorm_s8_uscaled_unpack_z_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + float *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = z24_unorm_to_z32_float(value & 0xffffff); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z24_unorm_s8_uscaled_pack_z_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const float *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value = *dst; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + value &= 0xff000000; + value |= z32_float_to_z24_unorm(*src++); +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z24_unorm_s8_uscaled_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + uint32_t *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = z24_unorm_to_z32_unorm(value & 0xffffff); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z24_unorm_s8_uscaled_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, + const uint32_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const uint32_t *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value= *dst; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + value &= 0xff000000; + value |= z32_unorm_to_z24_unorm(*src++); +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z24_unorm_s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + uint8_t *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value >> 24; + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z24_unorm_s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const uint8_t *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value = *dst; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + value &= 0x00ffffff; + value |= *src++ << 24; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_s8_uscaled_z24_unorm_unpack_z_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + float *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = z24_unorm_to_z32_float(value >> 8); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_s8_uscaled_z24_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const float *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value = *dst; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + value &= 0x000000ff; + value |= z32_float_to_z24_unorm(*src++) << 8; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_s8_uscaled_z24_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + uint32_t *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = z24_unorm_to_z32_unorm(value >> 8); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_s8_uscaled_z24_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, + const uint32_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const uint32_t *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value = *dst; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + value &= 0x000000ff; + value |= *src++ & 0xffffff00; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_s8_uscaled_z24_unorm_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + uint8_t *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value & 0xff; + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_s8_uscaled_z24_unorm_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const uint8_t *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value = *dst; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + value &= 0xffffff00; + value |= *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z24x8_unorm_unpack_z_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + float *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = z24_unorm_to_z32_float(value & 0xffffff); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z24x8_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const float *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value; + value = z32_float_to_z24_unorm(*src++); +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z24x8_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + uint32_t *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = z24_unorm_to_z32_unorm(value & 0xffffff); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z24x8_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, + const uint32_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const uint32_t *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value; + value = z32_unorm_to_z24_unorm(*src++); +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_x8z24_unorm_unpack_z_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + float *dst = dst_row; + const uint32_t *src = (uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = z24_unorm_to_z32_float(value >> 8); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_x8z24_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const float *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value; + value = z32_float_to_z24_unorm(*src++) << 8; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_x8z24_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + uint32_t *dst = dst_row; + const uint32_t *src = (const uint32_t *)src_row; + for(x = 0; x < width; ++x) { + uint32_t value = *src++; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = z24_unorm_to_z32_unorm(value >> 8); + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_x8z24_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, + const uint32_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const uint32_t *src = src_row; + uint32_t *dst = (uint32_t *)dst_row; + for(x = 0; x < width; ++x) { + uint32_t value; + value = z32_unorm_to_z24_unorm(*src++) << 8; +#ifdef PIPE_ARCH_BIG_ENDIAN + value = util_bswap32(value); +#endif + *dst++ = value; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z32_float_s8x24_uscaled_unpack_z_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + float *dst = dst_row; + const float *src = (const float *)src_row; + for(x = 0; x < width; ++x) { + *dst = *src; + src += 2; + dst += 1; + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z32_float_s8x24_uscaled_pack_z_float(uint8_t *dst_row, unsigned dst_stride, + const float *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const float *src = src_row; + float *dst = (float *)dst_row; + for(x = 0; x < width; ++x) { + *dst = *src; + src += 1; + dst += 2; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z32_float_s8x24_uscaled_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + uint32_t *dst = dst_row; + const float *src = (const float *)src_row; + for(x = 0; x < width; ++x) { + *dst = z32_float_to_z32_unorm(*src); + src += 2; + dst += 1; + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z32_float_s8x24_uscaled_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, + const uint32_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const uint32_t *src = src_row; + float *dst = (float *)dst_row; + for(x = 0; x < width; ++x) { + *dst++ = z32_unorm_to_z32_float(*src++); + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + +void +util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + uint8_t *dst = dst_row; + const uint8_t *src = src_row + 4; + for(x = 0; x < width; ++x) { + *dst = *src; + src += 8; + dst += 1; + } + src_row += src_stride/sizeof(*src_row); + dst_row += dst_stride/sizeof(*dst_row); + } +} + +void +util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + unsigned x, y; + for(y = 0; y < height; ++y) { + const uint8_t *src = src_row; + uint8_t *dst = dst_row + 4; + for(x = 0; x < width; ++x) { + *dst = *src; + src += 1; + dst += 8; + } + dst_row += dst_stride/sizeof(*dst_row); + src_row += src_stride/sizeof(*src_row); + } +} + diff --git a/src/gallium/auxiliary/util/u_format_zs.h b/src/gallium/auxiliary/util/u_format_zs.h new file mode 100644 index 0000000000..650db4b95f --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_zs.h @@ -0,0 +1,196 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#ifndef U_FORMAT_ZS_H_ +#define U_FORMAT_ZS_H_ + + +#include "pipe/p_compiler.h" + + +void +util_format_s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z16_unorm_unpack_z_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z16_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z16_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z16_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, const uint32_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z32_unorm_unpack_z_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z32_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z32_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z32_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, const uint32_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z32_float_unpack_z_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z32_float_pack_z_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z32_float_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z32_float_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, const uint32_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z24_unorm_s8_uscaled_unpack_z_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z24_unorm_s8_uscaled_pack_z_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z24_unorm_s8_uscaled_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z24_unorm_s8_uscaled_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, const uint32_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z24_unorm_s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z24_unorm_s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_s8_uscaled_z24_unorm_unpack_z_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_s8_uscaled_z24_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_s8_uscaled_z24_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_s8_uscaled_z24_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, const uint32_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_s8_uscaled_z24_unorm_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_s8_uscaled_z24_unorm_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z24x8_unorm_unpack_z_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z24x8_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z24x8_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z24x8_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, const uint32_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_x8z24_unorm_unpack_z_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_x8z24_unorm_pack_z_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_x8z24_unorm_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_x8z24_unorm_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, const uint32_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z32_float_s8x24_uscaled_unpack_z_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z32_float_s8x24_uscaled_pack_z_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z32_float_s8x24_uscaled_unpack_z_32unorm(uint32_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z32_float_s8x24_uscaled_pack_z_32unorm(uint8_t *dst_row, unsigned dst_stride, const uint32_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +void +util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + +#endif /* U_FORMAT_ZS_H_ */ diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index fc027e48e4..eee6030ddc 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -62,11 +62,12 @@ struct gen_mipmap_state struct pipe_rasterizer_state rasterizer; struct pipe_sampler_state sampler; struct pipe_clip_state clip; + struct pipe_vertex_element velem[2]; void *vs; void *fs2d, *fsCube; - struct pipe_buffer *vbuf; /**< quad vertices */ + struct pipe_resource *vbuf; /**< quad vertices */ unsigned vbuf_slot; float vertices[4][2][4]; /**< vertex/texcoords for quad */ @@ -937,6 +938,7 @@ format_to_type_comps(enum pipe_format pformat, *datatype = DTYPE_UBYTE; *comps = 4; return; + case PIPE_FORMAT_B5G5R5X1_UNORM: case PIPE_FORMAT_B5G5R5A1_UNORM: *datatype = DTYPE_USHORT_1_5_5_5_REV; *comps = 4; @@ -1114,11 +1116,10 @@ reduce_3d(enum pipe_format pformat, static void make_1d_mipmap(struct gen_mipmap_state *ctx, - struct pipe_texture *pt, + struct pipe_resource *pt, uint face, uint baseLevel, uint lastLevel) { struct pipe_context *pipe = ctx->pipe; - struct pipe_screen *screen = pipe->screen; const uint zslice = 0; uint dstLevel; @@ -1127,38 +1128,37 @@ make_1d_mipmap(struct gen_mipmap_state *ctx, struct pipe_transfer *srcTrans, *dstTrans; void *srcMap, *dstMap; - srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, + srcTrans = pipe_get_transfer(pipe, pt, face, srcLevel, zslice, PIPE_TRANSFER_READ, 0, 0, u_minify(pt->width0, srcLevel), u_minify(pt->height0, srcLevel)); - dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, + dstTrans = pipe_get_transfer(pipe, pt, face, dstLevel, zslice, PIPE_TRANSFER_WRITE, 0, 0, u_minify(pt->width0, dstLevel), u_minify(pt->height0, dstLevel)); - srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); - dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); + srcMap = (ubyte *) pipe->transfer_map(pipe, srcTrans); + dstMap = (ubyte *) pipe->transfer_map(pipe, dstTrans); reduce_1d(pt->format, - srcTrans->width, srcMap, - dstTrans->width, dstMap); + srcTrans->box.width, srcMap, + dstTrans->box.width, dstMap); - screen->transfer_unmap(screen, srcTrans); - screen->transfer_unmap(screen, dstTrans); + pipe->transfer_unmap(pipe, srcTrans); + pipe->transfer_unmap(pipe, dstTrans); - screen->tex_transfer_destroy(srcTrans); - screen->tex_transfer_destroy(dstTrans); + pipe->transfer_destroy(pipe, srcTrans); + pipe->transfer_destroy(pipe, dstTrans); } } static void make_2d_mipmap(struct gen_mipmap_state *ctx, - struct pipe_texture *pt, + struct pipe_resource *pt, uint face, uint baseLevel, uint lastLevel) { struct pipe_context *pipe = ctx->pipe; - struct pipe_screen *screen = pipe->screen; const uint zslice = 0; uint dstLevel; @@ -1170,36 +1170,36 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, struct pipe_transfer *srcTrans, *dstTrans; ubyte *srcMap, *dstMap; - srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, - PIPE_TRANSFER_READ, 0, 0, - u_minify(pt->width0, srcLevel), - u_minify(pt->height0, srcLevel)); - dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, - PIPE_TRANSFER_WRITE, 0, 0, - u_minify(pt->width0, dstLevel), - u_minify(pt->height0, dstLevel)); - - srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); - dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); + srcTrans = pipe_get_transfer(pipe, pt, face, srcLevel, zslice, + PIPE_TRANSFER_READ, 0, 0, + u_minify(pt->width0, srcLevel), + u_minify(pt->height0, srcLevel)); + dstTrans = pipe_get_transfer(pipe, pt, face, dstLevel, zslice, + PIPE_TRANSFER_WRITE, 0, 0, + u_minify(pt->width0, dstLevel), + u_minify(pt->height0, dstLevel)); + + srcMap = (ubyte *) pipe->transfer_map(pipe, srcTrans); + dstMap = (ubyte *) pipe->transfer_map(pipe, dstTrans); reduce_2d(pt->format, - srcTrans->width, srcTrans->height, + srcTrans->box.width, srcTrans->box.height, srcTrans->stride, srcMap, - dstTrans->width, dstTrans->height, + dstTrans->box.width, dstTrans->box.height, dstTrans->stride, dstMap); - screen->transfer_unmap(screen, srcTrans); - screen->transfer_unmap(screen, dstTrans); + pipe->transfer_unmap(pipe, srcTrans); + pipe->transfer_unmap(pipe, dstTrans); - screen->tex_transfer_destroy(srcTrans); - screen->tex_transfer_destroy(dstTrans); + pipe->transfer_destroy(pipe, srcTrans); + pipe->transfer_destroy(pipe, dstTrans); } } static void make_3d_mipmap(struct gen_mipmap_state *ctx, - struct pipe_texture *pt, + struct pipe_resource *pt, uint face, uint baseLevel, uint lastLevel) { #if 0 @@ -1215,17 +1215,17 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, struct pipe_transfer *srcTrans, *dstTrans; ubyte *srcMap, *dstMap; - srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, + srcTrans = pipe->get_transfer(pipe, pt, face, srcLevel, zslice, PIPE_TRANSFER_READ, 0, 0, u_minify(pt->width0, srcLevel), u_minify(pt->height0, srcLevel)); - dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, + dstTrans = pipe->get_transfer(pipe, pt, face, dstLevel, zslice, PIPE_TRANSFER_WRITE, 0, 0, u_minify(pt->width0, dstLevel), u_minify(pt->height0, dstLevel)); - srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); - dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); + srcMap = (ubyte *) pipe->transfer_map(pipe, srcTrans); + dstMap = (ubyte *) pipe->transfer_map(pipe, dstTrans); reduce_3d(pt->format, srcTrans->width, srcTrans->height, @@ -1233,11 +1233,11 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, dstTrans->width, dstTrans->height, dstTrans->stride, dstMap); - screen->transfer_unmap(screen, srcTrans); - screen->transfer_unmap(screen, dstTrans); + pipe->transfer_unmap(pipe, srcTrans); + pipe->transfer_unmap(pipe, dstTrans); - screen->tex_transfer_destroy(srcTrans); - screen->tex_transfer_destroy(dstTrans); + pipe->transfer_destroy(pipe, srcTrans); + pipe->transfer_destroy(pipe, dstTrans); } #else (void) reduce_3d; @@ -1247,7 +1247,7 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, static void fallback_gen_mipmap(struct gen_mipmap_state *ctx, - struct pipe_texture *pt, + struct pipe_resource *pt, uint face, uint baseLevel, uint lastLevel) { switch (pt->target) { @@ -1307,6 +1307,15 @@ util_create_gen_mipmap(struct pipe_context *pipe, ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; ctx->sampler.normalized_coords = 1; + /* vertex elements state */ + memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2); + for (i = 0; i < 2; i++) { + ctx->velem[i].src_offset = i * 4 * sizeof(float); + ctx->velem[i].instance_divisor = 0; + ctx->velem[i].vertex_buffer_index = 0; + ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + } + /* vertex shader - still needed to specify mapping from fragment * shader input semantics to vertex elements */ @@ -1349,8 +1358,7 @@ get_next_slot(struct gen_mipmap_state *ctx) if (!ctx->vbuf) { ctx->vbuf = pipe_buffer_create(ctx->pipe->screen, - 32, - PIPE_BUFFER_USAGE_VERTEX, + PIPE_BIND_VERTEX_BUFFER, max_slots * sizeof ctx->vertices); } @@ -1411,7 +1419,7 @@ set_vertex_data(struct gen_mipmap_state *ctx, offset = get_next_slot( ctx ); - pipe_buffer_write_nooverlap(ctx->pipe->screen, ctx->vbuf, + pipe_buffer_write_nooverlap(ctx->pipe, ctx->vbuf, offset, sizeof(ctx->vertices), ctx->vertices); return offset; @@ -1431,7 +1439,7 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx) pipe->delete_fs_state(pipe, ctx->fs2d); pipe->delete_fs_state(pipe, ctx->fsCube); - pipe_buffer_reference(&ctx->vbuf, NULL); + pipe_resource_reference(&ctx->vbuf, NULL); FREE(ctx); } @@ -1443,7 +1451,7 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx) */ void util_gen_mipmap_flush( struct gen_mipmap_state *ctx ) { - pipe_buffer_reference(&ctx->vbuf, NULL); + pipe_resource_reference(&ctx->vbuf, NULL); ctx->vbuf_slot = 0; } @@ -1452,7 +1460,7 @@ void util_gen_mipmap_flush( struct gen_mipmap_state *ctx ) * Generate mipmap images. It's assumed all needed texture memory is * already allocated. * - * \param pt the texture to generate mipmap levels for + * \param psv the sampler view to the texture to generate mipmap levels for * \param face which cube face to generate mipmaps for (0 for non-cube maps) * \param baseLevel the first mipmap level to use as a src * \param lastLevel the last mipmap level to generate @@ -1461,12 +1469,13 @@ void util_gen_mipmap_flush( struct gen_mipmap_state *ctx ) */ void util_gen_mipmap(struct gen_mipmap_state *ctx, - struct pipe_texture *pt, + struct pipe_sampler_view *psv, uint face, uint baseLevel, uint lastLevel, uint filter) { struct pipe_context *pipe = ctx->pipe; struct pipe_screen *screen = pipe->screen; struct pipe_framebuffer_state fb; + struct pipe_resource *pt = psv->texture; void *fs = (pt->target == PIPE_TEXTURE_CUBE) ? ctx->fsCube : ctx->fs2d; uint dstLevel; uint zslice = 0; @@ -1484,8 +1493,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, filter == PIPE_TEX_FILTER_NEAREST); /* check if we can render in the texture's format */ - if (!screen->is_format_supported(screen, pt->format, PIPE_TEXTURE_2D, - PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) { + if (!screen->is_format_supported(screen, psv->format, PIPE_TEXTURE_2D, + PIPE_BIND_RENDER_TARGET, 0)) { fallback_gen_mipmap(ctx, pt, face, baseLevel, lastLevel); return; } @@ -1495,18 +1504,20 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, cso_save_depth_stencil_alpha(ctx->cso); cso_save_rasterizer(ctx->cso); cso_save_samplers(ctx->cso); - cso_save_sampler_textures(ctx->cso); + cso_save_fragment_sampler_views(ctx->cso); cso_save_framebuffer(ctx->cso); cso_save_fragment_shader(ctx->cso); cso_save_vertex_shader(ctx->cso); cso_save_viewport(ctx->cso); cso_save_clip(ctx->cso); + cso_save_vertex_elements(ctx->cso); /* bind our state */ cso_set_blend(ctx->cso, &ctx->blend); cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); cso_set_rasterizer(ctx->cso, &ctx->rasterizer); cso_set_clip(ctx->cso, &ctx->clip); + cso_set_vertex_elements(ctx->cso, 2, ctx->velem); cso_set_fragment_shader_handle(ctx->cso, fs); cso_set_vertex_shader_handle(ctx->cso, ctx->vs); @@ -1529,7 +1540,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, struct pipe_surface *surf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, - PIPE_BUFFER_USAGE_GPU_WRITE); + PIPE_BIND_RENDER_TARGET); /* * Setup framebuffer / dest surface @@ -1562,7 +1573,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, cso_single_sampler(ctx->cso, 0, &ctx->sampler); cso_single_sampler_done(ctx->cso); - cso_set_sampler_textures(ctx->cso, 1, &pt); + cso_set_fragment_sampler_views(ctx->cso, 1, &psv); /* quad coords in clip coords */ offset = set_vertex_data(ctx, @@ -1587,10 +1598,11 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, cso_restore_depth_stencil_alpha(ctx->cso); cso_restore_rasterizer(ctx->cso); cso_restore_samplers(ctx->cso); - cso_restore_sampler_textures(ctx->cso); + cso_restore_fragment_sampler_views(ctx->cso); cso_restore_framebuffer(ctx->cso); cso_restore_fragment_shader(ctx->cso); cso_restore_vertex_shader(ctx->cso); cso_restore_viewport(ctx->cso); cso_restore_clip(ctx->cso); + cso_restore_vertex_elements(ctx->cso); } diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.h b/src/gallium/auxiliary/util/u_gen_mipmap.h index 54608f9466..a7502b9982 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.h +++ b/src/gallium/auxiliary/util/u_gen_mipmap.h @@ -37,7 +37,7 @@ extern "C" { struct pipe_context; -struct pipe_texture; +struct pipe_resource; struct cso_context; struct gen_mipmap_state; @@ -59,7 +59,7 @@ util_gen_mipmap_flush( struct gen_mipmap_state *ctx ); extern void util_gen_mipmap(struct gen_mipmap_state *ctx, - struct pipe_texture *pt, + struct pipe_sampler_view *psv, uint face, uint baseLevel, uint lastLevel, uint filter); diff --git a/src/gallium/auxiliary/util/u_half.h b/src/gallium/auxiliary/util/u_half.h new file mode 100644 index 0000000000..ad030e90c6 --- /dev/null +++ b/src/gallium/auxiliary/util/u_half.h @@ -0,0 +1,90 @@ +/************************************************************************** + * + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef U_HALF_H +#define U_HALF_H + +#include "pipe/p_compiler.h" +#include "util/u_math.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern const uint32_t util_half_to_float_mantissa_table[2048]; +extern const uint32_t util_half_to_float_exponent_table[64]; +extern const uint32_t util_half_to_float_offset_table[64]; +extern const uint16_t util_float_to_half_base_table[512]; +extern const uint8_t util_float_to_half_shift_table[512]; + +/* + * Note that if the half float is a signaling NaN, the x87 FPU will turn + * it into a quiet NaN immediately upon loading into a float. + * + * Additionally, denormals may be flushed to zero. + * + * To avoid this, use the floatui functions instead of the float ones + * when just doing conversion rather than computation on the resulting + * floats. + */ + +static INLINE uint32_t +util_half_to_floatui(uint16_t h) +{ + unsigned exp = h >> 10; + return util_half_to_float_mantissa_table[util_half_to_float_offset_table[exp] + (h & 0x3ff)] + util_half_to_float_exponent_table[exp]; +} + +static INLINE float +util_half_to_float(uint16_t h) +{ + union fi r; + r.ui = util_half_to_floatui(h); + return r.f; +} + +static INLINE uint16_t +util_floatui_to_half(uint32_t v) +{ + unsigned signexp = v >> 23; + return util_float_to_half_base_table[signexp] + ((v & 0x007fffff) >> util_float_to_half_shift_table[signexp]); +} + +static INLINE uint16_t +util_float_to_half(float f) +{ + union fi i; + i.f = f; + return util_floatui_to_half(i.ui); +} + +#ifdef __cplusplus +} +#endif + +#endif /* U_HALF_H */ + diff --git a/src/gallium/auxiliary/util/u_half.py b/src/gallium/auxiliary/util/u_half.py new file mode 100644 index 0000000000..8007482e97 --- /dev/null +++ b/src/gallium/auxiliary/util/u_half.py @@ -0,0 +1,179 @@ +# Copyright 2010 Luca Barbieri +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice (including the +# next paragraph) shall be included in all copies or substantial +# portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# ************************************************************************* + +# The code is a reimplementation of the algorithm in +# www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf +# "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008 +# +# The table contents have been slightly changed so that the exponent +# bias is now in the exponent table instead of the mantissa table (mostly +# for cosmetic reasons, and because it theoretically allows a variant +# that flushes denormal to zero but uses a mantissa table with 24-bit +# entries). +# +# The tables are also constructed slightly differently. +# + +# Note that using a 64K * 4 table is a terrible idea since it will not fit +# in the L1 cache and will massively pollute the L2 cache as well +# +# These should instead fit in the L1 cache. +# +# TODO: we could use a denormal bias table instead of the mantissa/offset +# tables: this would reduce the L1 cache usage from 8704 to 2304 bytes +# but would involve more computation +# +# Note however that if denormals are never encountered, the L1 cache usage +# is only about 4608 bytes anyway. + +table_index = None +table_length = None + +def begin(t, n, l): + global table_length + global table_index + table_index = 0 + table_length = l + print + print "const " + t + " " + n + "[" + str(l) + "] = {" + +def value(v): + global table_index + table_index += 1 + print "\t" + hex(v) + "," + +def end(): + global table_length + global table_index + print "};" + assert table_index == table_length + +print "/* This file is autogenerated by u_half.py. Do not edit directly. */" +print "#include \"util/u_half.h\"" + +begin("uint32_t", "util_half_to_float_mantissa_table", 2048) +# zero +value(0) + +# denormals +for i in xrange(1, 1024): + m = i << 13 + e = 0 + + # normalize number + while (m & 0x00800000) == 0: + e -= 0x00800000; + m <<= 1; + + m &= ~0x00800000; + e += 0x38800000; + value(m | e) + +# normals +for i in xrange(1024, 2048): + value((i - 1024) << 13) +end() + +begin("uint32_t", "util_half_to_float_exponent_table", 64) +# positive zero or denormals +value(0) + +# positive numbers +for i in xrange(1, 31): + value(0x38000000 + (i << 23)) + +# positive infinity/NaN +value(0x7f800000) + +# negative zero or denormals +value(0x80000000) + +# negative numbers +for i in range(33, 63): + value(0xb8000000 + ((i - 32) << 23)) + +# negative infinity/NaN +value(0xff800000) +end() + +begin("uint32_t", "util_half_to_float_offset_table", 64) +# positive zero or denormals +value(0) + +# positive normals +for i in range(1, 32): + value(1024) + +# negative zero or denormals +value(0) + +# negative normals +for i in xrange(33, 64): + value(1024) +end() + +begin("uint16_t", "util_float_to_half_base_table", 512) +for sign in (0, 0x8000): + # very small numbers mapping to zero + for i in xrange(-127, -24): + value(sign | 0) + + # small numbers mapping to denormals + for i in xrange(-24, -14): + value(sign | (0x400 >> (-14 -i))) + + # normal numbers + for i in xrange(-14, 16): + value(sign | ((i + 15) << 10)) + + # large numbers mapping to infinity + for i in xrange(16, 128): + value(sign | 0x7c00) + + # infinity and NaNs + value(sign | 0x7c00) +end() + +begin("uint8_t", "util_float_to_half_shift_table", 512) +for sign in (0, 0x8000): + # very small numbers mapping to zero + for i in xrange(-127, -24): + value(24) + + # small numbers mapping to denormals + for i in xrange(-24, -14): + value(-1 - i) + + # normal numbers + for i in xrange(-14, 16): + value(13) + + # large numbers mapping to infinity + for i in xrange(16, 128): + value(24) + + # infinity and NaNs + value(13) +end() + diff --git a/src/gallium/auxiliary/util/u_init.h b/src/gallium/auxiliary/util/u_init.h new file mode 100644 index 0000000000..7bc356a791 --- /dev/null +++ b/src/gallium/auxiliary/util/u_init.h @@ -0,0 +1,52 @@ +/* + * Copyright 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_INIT_H +#define U_INIT_H + +/* Use UTIL_INIT(f) to have f called at program initialization. + Note that it is only guaranteed to be called if any symbol in the + .c file it is in sis referenced by the program. + + UTIL_INIT functions are called in arbitrary order. +*/ + +#ifdef __cplusplus +/* use a C++ global constructor */ +#define UTIL_INIT(f) struct f##__gctor_t {f##__gctor_t() {x();}} f##__gctor; +#elif defined(_MSC_VER) +/* add a pointer to the section where MSVC stores global constructor pointers */ +/* see http://blogs.msdn.com/vcblog/archive/2006/10/20/crt-initialization.aspx and + http://stackoverflow.com/questions/1113409/attribute-constructor-equivalent-in-vc */ +#pragma section(".CRT$XCU",read) +#define UTIL_INIT(f) static void __cdecl f##__init(void) {f();}; __declspec(allocate(".CRT$XCU")) void (__cdecl* f##__xcu)(void) = f##__init; +#elif defined(__GNUC__) +#define UTIL_INIT(f) static void f##__init(void) __attribute__((constructor)); static void f##__init(void) {f();} +#else +#error Unsupported compiler: please find out how to implement global initializers in C on it +#endif + +#endif + diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h index 0cb3432c6e..c2f4f05990 100644 --- a/src/gallium/auxiliary/util/u_inlines.h +++ b/src/gallium/auxiliary/util/u_inlines.h @@ -34,6 +34,7 @@ #include "pipe/p_screen.h" #include "util/u_debug.h" #include "util/u_atomic.h" +#include "util/u_box.h" #ifdef __cplusplus @@ -87,18 +88,6 @@ pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference) return destroy; } -static INLINE void -pipe_buffer_reference(struct pipe_buffer **ptr, struct pipe_buffer *buf) -{ - struct pipe_buffer *old_buf; - - assert(ptr); - old_buf = *ptr; - - if (pipe_reference(&(*ptr)->reference, &buf->reference)) - old_buf->screen->buffer_destroy(old_buf); - *ptr = buf; -} static INLINE void pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf) @@ -110,106 +99,162 @@ pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf) *ptr = surf; } + static INLINE void -pipe_texture_reference(struct pipe_texture **ptr, struct pipe_texture *tex) +pipe_resource_reference(struct pipe_resource **ptr, struct pipe_resource *tex) { - struct pipe_texture *old_tex = *ptr; + struct pipe_resource *old_tex = *ptr; if (pipe_reference(&(*ptr)->reference, &tex->reference)) - old_tex->screen->texture_destroy(old_tex); + old_tex->screen->resource_destroy(old_tex->screen, old_tex); *ptr = tex; } +static INLINE void +pipe_sampler_view_reference(struct pipe_sampler_view **ptr, struct pipe_sampler_view *view) +{ + struct pipe_sampler_view *old_view = *ptr; + + if (pipe_reference(&(*ptr)->reference, &view->reference)) + old_view->context->sampler_view_destroy(old_view->context, old_view); + *ptr = view; +} + + /* * Convenience wrappers for screen buffer functions. */ -static INLINE struct pipe_buffer * +static INLINE struct pipe_resource * pipe_buffer_create( struct pipe_screen *screen, - unsigned alignment, unsigned usage, unsigned size ) + unsigned bind, + unsigned size ) { - return screen->buffer_create(screen, alignment, usage, size); + struct pipe_resource buffer; + memset(&buffer, 0, sizeof buffer); + buffer.target = PIPE_BUFFER; + buffer.format = PIPE_FORMAT_R8_UNORM; /* want TYPELESS or similar */ + buffer.bind = bind; + buffer._usage = PIPE_USAGE_DEFAULT; + buffer.flags = 0; + buffer.width0 = size; + buffer.height0 = 1; + buffer.depth0 = 1; + return screen->resource_create(screen, &buffer); } -static INLINE struct pipe_buffer * -pipe_user_buffer_create( struct pipe_screen *screen, void *ptr, unsigned size ) + +static INLINE struct pipe_resource * +pipe_user_buffer_create( struct pipe_screen *screen, void *ptr, unsigned size, + unsigned usage ) { - return screen->user_buffer_create(screen, ptr, size); + return screen->user_buffer_create(screen, ptr, size, usage); } static INLINE void * -pipe_buffer_map(struct pipe_screen *screen, - struct pipe_buffer *buf, - unsigned usage) +pipe_buffer_map_range(struct pipe_context *pipe, + struct pipe_resource *buffer, + unsigned offset, + unsigned length, + unsigned usage, + struct pipe_transfer **transfer) { - if(screen->buffer_map_range) { - unsigned offset = 0; - unsigned length = buf->size; - return screen->buffer_map_range(screen, buf, offset, length, usage); + struct pipe_box box; + void *map; + + assert(offset < buffer->width0); + assert(offset + length <= buffer->width0); + assert(length); + + u_box_1d(offset, length, &box); + + *transfer = pipe->get_transfer( pipe, + buffer, + u_subresource(0, 0), + usage, + &box); + + if (*transfer == NULL) + return NULL; + + map = pipe->transfer_map( pipe, *transfer ); + if (map == NULL) { + pipe->transfer_destroy( pipe, *transfer ); + return NULL; } - else - return screen->buffer_map(screen, buf, usage); + + /* Match old screen->buffer_map_range() behaviour, return pointer + * to where the beginning of the buffer would be: + */ + return (void *)((char *)map - offset); } -static INLINE void -pipe_buffer_unmap(struct pipe_screen *screen, - struct pipe_buffer *buf) + +static INLINE void * +pipe_buffer_map(struct pipe_context *pipe, + struct pipe_resource *buffer, + unsigned usage, + struct pipe_transfer **transfer) { - screen->buffer_unmap(screen, buf); + return pipe_buffer_map_range(pipe, buffer, 0, buffer->width0, usage, transfer); } -static INLINE void * -pipe_buffer_map_range(struct pipe_screen *screen, - struct pipe_buffer *buf, - unsigned offset, - unsigned length, - unsigned usage) + +static INLINE void +pipe_buffer_unmap(struct pipe_context *pipe, + struct pipe_resource *buf, + struct pipe_transfer *transfer) { - assert(offset < buf->size); - assert(offset + length <= buf->size); - assert(length); - if(screen->buffer_map_range) - return screen->buffer_map_range(screen, buf, offset, length, usage); - else - return screen->buffer_map(screen, buf, usage); + if (transfer) { + pipe->transfer_unmap(pipe, transfer); + pipe->transfer_destroy(pipe, transfer); + } } static INLINE void -pipe_buffer_flush_mapped_range(struct pipe_screen *screen, - struct pipe_buffer *buf, +pipe_buffer_flush_mapped_range(struct pipe_context *pipe, + struct pipe_transfer *transfer, unsigned offset, unsigned length) { - assert(offset < buf->size); - assert(offset + length <= buf->size); + struct pipe_box box; + int transfer_offset; + assert(length); - if(screen->buffer_flush_mapped_range) - screen->buffer_flush_mapped_range(screen, buf, offset, length); + assert(transfer->box.x <= offset); + assert(offset + length <= transfer->box.x + transfer->box.width); + + /* Match old screen->buffer_flush_mapped_range() behaviour, where + * offset parameter is relative to the start of the buffer, not the + * mapped range. + */ + transfer_offset = offset - transfer->box.x; + + u_box_1d(transfer_offset, length, &box); + + pipe->transfer_flush_region(pipe, transfer, &box); } static INLINE void -pipe_buffer_write(struct pipe_screen *screen, - struct pipe_buffer *buf, - unsigned offset, unsigned size, +pipe_buffer_write(struct pipe_context *pipe, + struct pipe_resource *buf, + unsigned offset, + unsigned size, const void *data) { - void *map; - - assert(offset < buf->size); - assert(offset + size <= buf->size); - assert(size); - - map = pipe_buffer_map_range(screen, buf, offset, size, - PIPE_BUFFER_USAGE_CPU_WRITE | - PIPE_BUFFER_USAGE_FLUSH_EXPLICIT | - PIPE_BUFFER_USAGE_DISCARD); - assert(map); - if(map) { - memcpy((uint8_t *)map + offset, data, size); - pipe_buffer_flush_mapped_range(screen, buf, offset, size); - pipe_buffer_unmap(screen, buf); - } + struct pipe_box box; + + u_box_1d(offset, size, &box); + + pipe->transfer_inline_write( pipe, + buf, + u_subresource(0,0), + PIPE_TRANSFER_WRITE, + &box, + data, + size, + 0); } /** @@ -219,87 +264,88 @@ pipe_buffer_write(struct pipe_screen *screen, * been written before. */ static INLINE void -pipe_buffer_write_nooverlap(struct pipe_screen *screen, - struct pipe_buffer *buf, +pipe_buffer_write_nooverlap(struct pipe_context *pipe, + struct pipe_resource *buf, unsigned offset, unsigned size, const void *data) { - void *map; - - assert(offset < buf->size); - assert(offset + size <= buf->size); - assert(size); - - map = pipe_buffer_map_range(screen, buf, offset, size, - PIPE_BUFFER_USAGE_CPU_WRITE | - PIPE_BUFFER_USAGE_FLUSH_EXPLICIT | - PIPE_BUFFER_USAGE_DISCARD | - PIPE_BUFFER_USAGE_UNSYNCHRONIZED); - assert(map); - if(map) { - memcpy((uint8_t *)map + offset, data, size); - pipe_buffer_flush_mapped_range(screen, buf, offset, size); - pipe_buffer_unmap(screen, buf); - } + struct pipe_box box; + + u_box_1d(offset, size, &box); + + pipe->transfer_inline_write(pipe, + buf, + u_subresource(0,0), + (PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_NOOVERWRITE), + &box, + data, + 0, 0); } static INLINE void -pipe_buffer_read(struct pipe_screen *screen, - struct pipe_buffer *buf, - unsigned offset, unsigned size, +pipe_buffer_read(struct pipe_context *pipe, + struct pipe_resource *buf, + unsigned offset, + unsigned size, void *data) { - void *map; - - assert(offset < buf->size); - assert(offset + size <= buf->size); - assert(size); - - map = pipe_buffer_map_range(screen, buf, offset, size, PIPE_BUFFER_USAGE_CPU_READ); - assert(map); - if(map) { - memcpy(data, (const uint8_t *)map + offset, size); - pipe_buffer_unmap(screen, buf); - } + struct pipe_transfer *src_transfer; + ubyte *map; + + map = (ubyte *) pipe_buffer_map_range(pipe, + buf, + offset, size, + PIPE_TRANSFER_READ, + &src_transfer); + + if (map) + memcpy(data, map + offset, size); + + pipe_buffer_unmap(pipe, buf, src_transfer); } -static INLINE void * -pipe_transfer_map( struct pipe_transfer *transf ) +static INLINE struct pipe_transfer * +pipe_get_transfer( struct pipe_context *context, + struct pipe_resource *resource, + unsigned face, unsigned level, + unsigned zslice, + enum pipe_transfer_usage usage, + unsigned x, unsigned y, + unsigned w, unsigned h) { - struct pipe_screen *screen = transf->texture->screen; - return screen->transfer_map(screen, transf); + struct pipe_box box; + u_box_2d_zslice( x, y, zslice, w, h, &box ); + return context->get_transfer( context, + resource, + u_subresource(face, level), + usage, + &box ); } -static INLINE void -pipe_transfer_unmap( struct pipe_transfer *transf ) +static INLINE void * +pipe_transfer_map( struct pipe_context *context, + struct pipe_transfer *transfer ) { - struct pipe_screen *screen = transf->texture->screen; - screen->transfer_unmap(screen, transf); + return context->transfer_map( context, transfer ); } static INLINE void -pipe_transfer_destroy( struct pipe_transfer *transf ) +pipe_transfer_unmap( struct pipe_context *context, + struct pipe_transfer *transfer ) { - struct pipe_screen *screen = transf->texture->screen; - screen->tex_transfer_destroy(transf); + context->transfer_unmap( context, transfer ); } -static INLINE unsigned -pipe_transfer_buffer_flags( struct pipe_transfer *transf ) + +static INLINE void +pipe_transfer_destroy( struct pipe_context *context, + struct pipe_transfer *transfer ) { - switch (transf->usage & PIPE_TRANSFER_READ_WRITE) { - case PIPE_TRANSFER_READ_WRITE: - return PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE; - case PIPE_TRANSFER_READ: - return PIPE_BUFFER_USAGE_CPU_READ; - case PIPE_TRANSFER_WRITE: - return PIPE_BUFFER_USAGE_CPU_WRITE; - default: - debug_assert(0); - return 0; - } + context->transfer_destroy(context, transfer); } + #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h index a2fc597356..53d56599fe 100644 --- a/src/gallium/auxiliary/util/u_memory.h +++ b/src/gallium/auxiliary/util/u_memory.h @@ -88,7 +88,7 @@ mem_dup(const void *src, uint size) /** * Offset of a field in a struct, in bytes. */ -#define Offset(TYPE, MEMBER) ((unsigned)&(((TYPE *)NULL)->MEMBER)) +#define Offset(TYPE, MEMBER) ((uintptr_t)&(((TYPE *)NULL)->MEMBER)) diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 50f1b1670b..3ebef9fb74 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -37,6 +37,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_format.h" +#include "util/u_debug.h" #include "util/u_format.h" #include "util/u_math.h" @@ -92,6 +93,11 @@ util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a, uc->us = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); } return; + case PIPE_FORMAT_B5G5R5X1_UNORM: + { + uc->us = ((0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); + } + return; case PIPE_FORMAT_B5G5R5A1_UNORM: { uc->us = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); @@ -216,6 +222,15 @@ util_unpack_color_ub(enum pipe_format format, union util_color *uc, *a = (ubyte) 0xff; } return; + case PIPE_FORMAT_B5G5R5X1_UNORM: + { + ushort p = uc->us; + *r = (ubyte) (((p >> 7) & 0xf8) | ((p >> 12) & 0x7)); + *g = (ubyte) (((p >> 2) & 0xf8) | ((p >> 7) & 0x7)); + *b = (ubyte) (((p << 3) & 0xf8) | ((p >> 2) & 0x7)); + *a = (ubyte) 0xff; + } + return; case PIPE_FORMAT_B5G5R5A1_UNORM: { ushort p = uc->us; @@ -361,6 +376,11 @@ util_pack_color(const float rgba[4], enum pipe_format format, union util_color * uc->us = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); } return; + case PIPE_FORMAT_B5G5R5X1_UNORM: + { + uc->us = ((0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); + } + return; case PIPE_FORMAT_B5G5R5A1_UNORM: { uc->us = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); @@ -427,17 +447,17 @@ util_pack_z(enum pipe_format format, double z) return (uint) (z * 0xffffffff); case PIPE_FORMAT_Z32_FLOAT: return (uint)z; - case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: case PIPE_FORMAT_Z24X8_UNORM: if (z == 1.0) return 0xffffff; return (uint) (z * 0xffffff); - case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: if (z == 1.0) return 0xffffff00; return ((uint) (z * 0xffffff)) << 8; - case PIPE_FORMAT_S8_UNORM: + case PIPE_FORMAT_S8_USCALED: /* this case can get it via util_pack_z_stencil() */ return 0; default: @@ -458,13 +478,13 @@ util_pack_z_stencil(enum pipe_format format, double z, uint s) unsigned packed = util_pack_z(format, z); switch (format) { - case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: packed |= s << 24; break; - case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: packed |= s; break; - case PIPE_FORMAT_S8_UNORM: + case PIPE_FORMAT_S8_USCALED: packed |= s; break; default: diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c index 8479161c74..098cdfd58b 100644 --- a/src/gallium/auxiliary/util/u_rect.c +++ b/src/gallium/auxiliary/util/u_rect.c @@ -35,6 +35,7 @@ #include "pipe/p_context.h" #include "pipe/p_screen.h" #include "util/u_format.h" +#include "util/u_inlines.h" #include "util/u_rect.h" @@ -169,7 +170,6 @@ util_surface_copy(struct pipe_context *pipe, unsigned src_x, unsigned src_y, unsigned w, unsigned h) { - struct pipe_screen *screen = pipe->screen; struct pipe_transfer *src_trans, *dst_trans; void *dst_map; const void *src_map; @@ -182,28 +182,28 @@ util_surface_copy(struct pipe_context *pipe, src_format = src->texture->format; dst_format = dst->texture->format; - src_trans = screen->get_tex_transfer(screen, - src->texture, - src->face, - src->level, - src->zslice, - PIPE_TRANSFER_READ, - src_x, src_y, w, h); - - dst_trans = screen->get_tex_transfer(screen, - dst->texture, - dst->face, - dst->level, - dst->zslice, - PIPE_TRANSFER_WRITE, - dst_x, dst_y, w, h); + src_trans = pipe_get_transfer(pipe, + src->texture, + src->face, + src->level, + src->zslice, + PIPE_TRANSFER_READ, + src_x, src_y, w, h); + + dst_trans = pipe_get_transfer(pipe, + dst->texture, + dst->face, + dst->level, + dst->zslice, + PIPE_TRANSFER_WRITE, + dst_x, dst_y, w, h); assert(util_format_get_blocksize(dst_format) == util_format_get_blocksize(src_format)); assert(util_format_get_blockwidth(dst_format) == util_format_get_blockwidth(src_format)); assert(util_format_get_blockheight(dst_format) == util_format_get_blockheight(src_format)); - src_map = pipe->screen->transfer_map(screen, src_trans); - dst_map = pipe->screen->transfer_map(screen, dst_trans); + src_map = pipe->transfer_map(pipe, src_trans); + dst_map = pipe->transfer_map(pipe, dst_trans); assert(src_map); assert(dst_map); @@ -221,11 +221,11 @@ util_surface_copy(struct pipe_context *pipe, do_flip ? h - 1 : 0); } - pipe->screen->transfer_unmap(pipe->screen, src_trans); - pipe->screen->transfer_unmap(pipe->screen, dst_trans); + pipe->transfer_unmap(pipe, src_trans); + pipe->transfer_unmap(pipe, dst_trans); - screen->tex_transfer_destroy(src_trans); - screen->tex_transfer_destroy(dst_trans); + pipe->transfer_destroy(pipe, src_trans); + pipe->transfer_destroy(pipe, dst_trans); } @@ -243,65 +243,65 @@ util_surface_fill(struct pipe_context *pipe, unsigned dstx, unsigned dsty, unsigned width, unsigned height, unsigned value) { - struct pipe_screen *screen = pipe->screen; struct pipe_transfer *dst_trans; void *dst_map; assert(dst->texture); if (!dst->texture) return; - dst_trans = screen->get_tex_transfer(screen, - dst->texture, - dst->face, - dst->level, - dst->zslice, - PIPE_TRANSFER_WRITE, - dstx, dsty, width, height); + dst_trans = pipe_get_transfer(pipe, + dst->texture, + dst->face, + dst->level, + dst->zslice, + PIPE_TRANSFER_WRITE, + dstx, dsty, width, height); - dst_map = pipe->screen->transfer_map(screen, dst_trans); + dst_map = pipe->transfer_map(pipe, dst_trans); assert(dst_map); if (dst_map) { assert(dst_trans->stride > 0); - switch (util_format_get_blocksize(dst_trans->texture->format)) { + switch (util_format_get_blocksize(dst->texture->format)) { case 1: case 2: case 4: - util_fill_rect(dst_map, dst_trans->texture->format, dst_trans->stride, + util_fill_rect(dst_map, dst->texture->format, + dst_trans->stride, 0, 0, width, height, value); break; case 8: - { - /* expand the 4-byte clear value to an 8-byte value */ - ushort *row = (ushort *) dst_map; - ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff); - ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff); - ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff); - ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff); - unsigned i, j; - val0 = (val0 << 8) | val0; - val1 = (val1 << 8) | val1; - val2 = (val2 << 8) | val2; - val3 = (val3 << 8) | val3; - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - row[j*4+0] = val0; - row[j*4+1] = val1; - row[j*4+2] = val2; - row[j*4+3] = val3; - } - row += dst_trans->stride/2; - } - } - break; + { + /* expand the 4-byte clear value to an 8-byte value */ + ushort *row = (ushort *) dst_map; + ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff); + ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff); + ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff); + ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff); + unsigned i, j; + val0 = (val0 << 8) | val0; + val1 = (val1 << 8) | val1; + val2 = (val2 << 8) | val2; + val3 = (val3 << 8) | val3; + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + row[j*4+0] = val0; + row[j*4+1] = val1; + row[j*4+2] = val2; + row[j*4+3] = val3; + } + row += dst_trans->stride/2; + } + } + break; default: assert(0); break; } } - pipe->screen->transfer_unmap(pipe->screen, dst_trans); - screen->tex_transfer_destroy(dst_trans); + pipe->transfer_unmap(pipe, dst_trans); + pipe->transfer_destroy(pipe, dst_trans); } diff --git a/src/gallium/auxiliary/util/u_resource.c b/src/gallium/auxiliary/util/u_resource.c new file mode 100644 index 0000000000..9e6474b83d --- /dev/null +++ b/src/gallium/auxiliary/util/u_resource.c @@ -0,0 +1,96 @@ + + +#include "util/u_inlines.h" +#include "util/u_transfer.h" + +static INLINE struct u_resource * +u_resource( struct pipe_resource *res ) +{ + return (struct u_resource *)res; +} + +boolean u_resource_get_handle_vtbl(struct pipe_screen *screen, + struct pipe_resource *resource, + struct winsys_handle *handle) +{ + struct u_resource *ur = u_resource(resource); + return ur->vtbl->resource_get_handle(screen, resource, handle); +} + +void u_resource_destroy_vtbl(struct pipe_screen *screen, + struct pipe_resource *resource) +{ + struct u_resource *ur = u_resource(resource); + ur->vtbl->resource_destroy(screen, resource); +} + +unsigned u_is_resource_referenced_vtbl( struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned face, unsigned level) +{ + struct u_resource *ur = u_resource(resource); + return ur->vtbl->is_resource_referenced(pipe, resource, face, level); +} + +struct pipe_transfer *u_get_transfer_vtbl(struct pipe_context *context, + struct pipe_resource *resource, + struct pipe_subresource sr, + enum pipe_transfer_usage usage, + const struct pipe_box *box) +{ + struct u_resource *ur = u_resource(resource); + return ur->vtbl->get_transfer(context, resource, sr, usage, box); +} + +void u_transfer_destroy_vtbl(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + struct u_resource *ur = u_resource(transfer->resource); + ur->vtbl->transfer_destroy(pipe, transfer); +} + +void *u_transfer_map_vtbl( struct pipe_context *pipe, + struct pipe_transfer *transfer ) +{ + struct u_resource *ur = u_resource(transfer->resource); + return ur->vtbl->transfer_map(pipe, transfer); +} + +void u_transfer_flush_region_vtbl( struct pipe_context *pipe, + struct pipe_transfer *transfer, + const struct pipe_box *box) +{ + struct u_resource *ur = u_resource(transfer->resource); + ur->vtbl->transfer_flush_region(pipe, transfer, box); +} + +void u_transfer_unmap_vtbl( struct pipe_context *pipe, + struct pipe_transfer *transfer ) +{ + struct u_resource *ur = u_resource(transfer->resource); + ur->vtbl->transfer_unmap(pipe, transfer); +} + +void u_transfer_inline_write_vtbl( struct pipe_context *pipe, + struct pipe_resource *resource, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned slice_stride) +{ + struct u_resource *ur = u_resource(resource); + ur->vtbl->transfer_inline_write(pipe, + resource, + sr, + usage, + box, + data, + stride, + slice_stride); +} + + + + diff --git a/src/gallium/auxiliary/util/u_sampler.c b/src/gallium/auxiliary/util/u_sampler.c new file mode 100644 index 0000000000..e77f562ea2 --- /dev/null +++ b/src/gallium/auxiliary/util/u_sampler.c @@ -0,0 +1,100 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "u_format.h" +#include "u_sampler.h" + + +static void +default_template(struct pipe_sampler_view *view, + const struct pipe_resource *texture, + enum pipe_format format, + unsigned expand_green_blue) +{ + /* XXX: Check if format is compatible with texture->format. + */ + + view->format = format; + view->first_level = 0; + view->last_level = texture->last_level; + view->swizzle_r = PIPE_SWIZZLE_RED; + view->swizzle_g = PIPE_SWIZZLE_GREEN; + view->swizzle_b = PIPE_SWIZZLE_BLUE; + view->swizzle_a = PIPE_SWIZZLE_ALPHA; + + /* Override default green and blue component expansion to the requested + * one. + * + * Gallium expands nonexistent components to (0,0,0,1), DX9 expands + * to (1,1,1,1). Since alpha is always expanded to 1, and red is + * always present, we only really care about green and blue + * components. + * + * To make it look less hackish, one would have to add + * UTIL_FORMAT_SWIZZLE_EXPAND to indicate components for expansion + * and then override without exceptions or favoring one component + * over another. + */ + if (format != PIPE_FORMAT_A8_UNORM) { + const struct util_format_description *desc = util_format_description(format); + + assert(desc); + if (desc) { + if (desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0) { + view->swizzle_g = expand_green_blue; + } + if (desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0) { + view->swizzle_b = expand_green_blue; + } + } + } +} + +void +u_sampler_view_default_template(struct pipe_sampler_view *view, + const struct pipe_resource *texture, + enum pipe_format format) +{ + /* Expand to (0, 0, 0, 1) */ + default_template(view, + texture, + format, + PIPE_SWIZZLE_ZERO); +} + +void +u_sampler_view_default_dx9_template(struct pipe_sampler_view *view, + const struct pipe_resource *texture, + enum pipe_format format) +{ + /* Expand to (1, 1, 1, 1) */ + default_template(view, + texture, + format, + PIPE_SWIZZLE_ONE); +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_query.c b/src/gallium/auxiliary/util/u_sampler.h index f3832d07ff..f3dad7417e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_query.c +++ b/src/gallium/auxiliary/util/u_sampler.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMware, Inc. + * Copyright 2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,48 +25,33 @@ * **************************************************************************/ -/** - * @file - * Utility functions to make assertions about formats. - * - * This module centralizes most of logic used when determining what algorithm - * is most suitable (i.e., most efficient yet correct) for a given format. - * - * It might be possible to move some of these functions to u_format module, - * but since tiny differences in the format my render it more/less - * appropriate to a given algorithm it is impossible to make any long term - * guarantee about the semantics of these functions. - * - * @author Jose Fonseca <jfonseca@vmware.com> - */ +#ifndef U_SAMPLER_H +#define U_SAMPLER_H + + +#include "pipe/p_defines.h" +#include "pipe/p_format.h" +#include "pipe/p_state.h" -#include "util/u_format.h" +#ifdef __cplusplus +extern "C" { +#endif -#include "lp_bld_format.h" +void +u_sampler_view_default_template(struct pipe_sampler_view *view, + const struct pipe_resource *texture, + enum pipe_format format); -/** - * Whether this format is a 4 rgba8 variant - */ -boolean -lp_format_is_rgba8(const struct util_format_description *desc) -{ - unsigned chan; +void +u_sampler_view_default_dx9_template(struct pipe_sampler_view *view, + const struct pipe_resource *texture, + enum pipe_format format); - if(desc->block.width != 1 || - desc->block.height != 1 || - desc->block.bits != 32) - return FALSE; - for(chan = 0; chan < 4; ++chan) { - if(desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED && - desc->channel[chan].type != UTIL_FORMAT_TYPE_SIGNED && - desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) - return FALSE; - if(desc->channel[chan].size != 8) - return FALSE; - } +#ifdef __cplusplus +} /* extern "C" { */ +#endif - return TRUE; -} +#endif /* U_SAMPLER_H */ diff --git a/src/gallium/auxiliary/util/u_simple_screen.c b/src/gallium/auxiliary/util/u_simple_screen.c deleted file mode 100644 index 53f3c16dbc..0000000000 --- a/src/gallium/auxiliary/util/u_simple_screen.c +++ /dev/null @@ -1,153 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "u_simple_screen.h" - -#include "pipe/p_screen.h" -#include "pipe/p_state.h" -#include "util/u_simple_screen.h" - - -static struct pipe_buffer * -pass_buffer_create(struct pipe_screen *screen, - unsigned alignment, - unsigned usage, - unsigned size) -{ - struct pipe_buffer *buffer = - screen->winsys->buffer_create(screen->winsys, alignment, usage, size); - - buffer->screen = screen; - - return buffer; -} - -static struct pipe_buffer * -pass_user_buffer_create(struct pipe_screen *screen, - void *ptr, - unsigned bytes) -{ - struct pipe_buffer *buffer = - screen->winsys->user_buffer_create(screen->winsys, ptr, bytes); - - buffer->screen = screen; - - return buffer; -} - -static struct pipe_buffer * -pass_surface_buffer_create(struct pipe_screen *screen, - unsigned width, unsigned height, - enum pipe_format format, - unsigned usage, - unsigned tex_usage, - unsigned *stride) -{ - struct pipe_buffer *buffer = - screen->winsys->surface_buffer_create(screen->winsys, width, height, - format, usage, tex_usage, stride); - - buffer->screen = screen; - - return buffer; -} - -static void * -pass_buffer_map(struct pipe_screen *screen, - struct pipe_buffer *buf, - unsigned usage) -{ - return screen->winsys->buffer_map(screen->winsys, buf, usage); -} - -static void -pass_buffer_unmap(struct pipe_screen *screen, - struct pipe_buffer *buf) -{ - screen->winsys->buffer_unmap(screen->winsys, buf); -} - -static void -pass_buffer_destroy(struct pipe_buffer *buf) -{ - buf->screen->winsys->buffer_destroy(buf); -} - - -static void -pass_flush_frontbuffer(struct pipe_screen *screen, - struct pipe_surface *surf, - void *context_private) -{ - screen->winsys->flush_frontbuffer(screen->winsys, surf, context_private); -} - -static void -pass_fence_reference(struct pipe_screen *screen, - struct pipe_fence_handle **ptr, - struct pipe_fence_handle *fence) -{ - screen->winsys->fence_reference(screen->winsys, ptr, fence); -} - -static int -pass_fence_signalled(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flag) -{ - return screen->winsys->fence_signalled(screen->winsys, fence, flag); -} - -static int -pass_fence_finish(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flag) -{ - return screen->winsys->fence_finish(screen->winsys, fence, flag); -} - -void -u_simple_screen_init(struct pipe_screen *screen) -{ - screen->buffer_create = pass_buffer_create; - screen->user_buffer_create = pass_user_buffer_create; - screen->surface_buffer_create = pass_surface_buffer_create; - - screen->buffer_map = pass_buffer_map; - screen->buffer_unmap = pass_buffer_unmap; - screen->buffer_destroy = pass_buffer_destroy; - screen->flush_frontbuffer = pass_flush_frontbuffer; - screen->fence_reference = pass_fence_reference; - screen->fence_signalled = pass_fence_signalled; - screen->fence_finish = pass_fence_finish; -} - -const char * -u_simple_screen_winsys_name(struct pipe_screen *screen) -{ - return screen->winsys->get_name(screen->winsys); -} diff --git a/src/gallium/auxiliary/util/u_simple_screen.h b/src/gallium/auxiliary/util/u_simple_screen.h index bb3f5ba102..b52232f025 100644 --- a/src/gallium/auxiliary/util/u_simple_screen.h +++ b/src/gallium/auxiliary/util/u_simple_screen.h @@ -33,7 +33,7 @@ struct pipe_screen; struct pipe_fence_handle; struct pipe_surface; -struct pipe_buffer; +struct pipe_resource; /** * Gallium3D drivers are (meant to be!) independent of both GL and the @@ -53,11 +53,6 @@ struct pipe_winsys const char *(*get_name)( struct pipe_winsys *ws ); /** - * Do any special operations to ensure buffer size is correct - */ - void (*update_buffer)( struct pipe_winsys *ws, - void *context_private ); - /** * Do any special operations to ensure frontbuffer contents are * displayed, eg copy fake frontbuffer. */ @@ -73,14 +68,13 @@ struct pipe_winsys * window systems must then implement that interface (rather than the * other way around...). * - * usage is a bitmask of PIPE_BUFFER_USAGE_PIXEL/VERTEX/INDEX/CONSTANT. This - * usage argument is only an optimization hint, not a guarantee, therefore - * proper behavior must be observed in all circumstances. + * usage is a bitmask of PIPE_BIND_*. + * All possible usages must be present. * * alignment indicates the client's alignment requirements, eg for * SSE instructions. */ - struct pipe_buffer *(*buffer_create)( struct pipe_winsys *ws, + struct pipe_resource *(*buffer_create)( struct pipe_winsys *ws, unsigned alignment, unsigned usage, unsigned size ); @@ -106,7 +100,7 @@ struct pipe_winsys * Note that ptr may be accessed at any time upto the time when the * buffer is destroyed, so the data must not be freed before then. */ - struct pipe_buffer *(*user_buffer_create)(struct pipe_winsys *ws, + struct pipe_resource *(*user_buffer_create)(struct pipe_winsys *ws, void *ptr, unsigned bytes); @@ -117,11 +111,11 @@ struct pipe_winsys * display targets) must be allocated with special characteristics, memory * pools, or obtained directly from the windowing system. * - * This callback is invoked by the pipe_screenwhen creating a texture marked - * with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET flag to get the underlying + * This callback is invoked by the pipe_screen when creating a texture marked + * with the PIPE_BIND_DISPLAY_TARGET flag to get the underlying * buffer storage. */ - struct pipe_buffer *(*surface_buffer_create)(struct pipe_winsys *ws, + struct pipe_resource *(*surface_buffer_create)(struct pipe_winsys *ws, unsigned width, unsigned height, enum pipe_format format, unsigned usage, @@ -134,13 +128,13 @@ struct pipe_winsys * flags is bitmask of PIPE_BUFFER_USAGE_CPU_READ/WRITE flags. */ void *(*buffer_map)( struct pipe_winsys *ws, - struct pipe_buffer *buf, + struct pipe_resource *buf, unsigned usage ); void (*buffer_unmap)( struct pipe_winsys *ws, - struct pipe_buffer *buf ); + struct pipe_resource *buf ); - void (*buffer_destroy)( struct pipe_buffer *buf ); + void (*buffer_destroy)( struct pipe_resource *buf ); /** Set ptr = fence, with reference counting */ diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index 33306bbc2a..42440d0d67 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -50,7 +50,8 @@ boolean util_create_rgba_surface(struct pipe_screen *screen, uint width, uint height, - struct pipe_texture **textureOut, + uint bind, + struct pipe_resource **textureOut, struct pipe_surface **surfaceOut) { static const enum pipe_format rgbaFormats[] = { @@ -60,15 +61,14 @@ util_create_rgba_surface(struct pipe_screen *screen, PIPE_FORMAT_NONE }; const uint target = PIPE_TEXTURE_2D; - const uint usage = PIPE_TEXTURE_USAGE_RENDER_TARGET; enum pipe_format format = PIPE_FORMAT_NONE; - struct pipe_texture templ; + struct pipe_resource templ; uint i; /* Choose surface format */ for (i = 0; rgbaFormats[i]; i++) { if (screen->is_format_supported(screen, rgbaFormats[i], - target, usage, 0)) { + target, bind, 0)) { format = rgbaFormats[i]; break; } @@ -84,16 +84,19 @@ util_create_rgba_surface(struct pipe_screen *screen, templ.width0 = width; templ.height0 = height; templ.depth0 = 1; - templ.tex_usage = usage; + templ.bind = bind; - *textureOut = screen->texture_create(screen, &templ); + *textureOut = screen->resource_create(screen, &templ); if (!*textureOut) return FALSE; /* create surface / view into texture */ - *surfaceOut = screen->get_tex_surface(screen, *textureOut, 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE); + *surfaceOut = screen->get_tex_surface(screen, + *textureOut, + 0, 0, 0, + bind); if (!*surfaceOut) { - pipe_texture_reference(textureOut, NULL); + pipe_resource_reference(textureOut, NULL); return FALSE; } @@ -105,11 +108,11 @@ util_create_rgba_surface(struct pipe_screen *screen, * Release the surface and texture from util_create_rgba_surface(). */ void -util_destroy_rgba_surface(struct pipe_texture *texture, +util_destroy_rgba_surface(struct pipe_resource *texture, struct pipe_surface *surface) { pipe_surface_reference(&surface, NULL); - pipe_texture_reference(&texture, NULL); + pipe_resource_reference(&texture, NULL); } diff --git a/src/gallium/auxiliary/util/u_surface.h b/src/gallium/auxiliary/util/u_surface.h index 3c60df2c3e..119fcd4ce8 100644 --- a/src/gallium/auxiliary/util/u_surface.h +++ b/src/gallium/auxiliary/util/u_surface.h @@ -52,13 +52,13 @@ util_same_surface(const struct pipe_surface *s1, const struct pipe_surface *s2) extern boolean util_create_rgba_surface(struct pipe_screen *screen, - uint width, uint height, - struct pipe_texture **textureOut, + uint width, uint height, uint bind, + struct pipe_resource **textureOut, struct pipe_surface **surfaceOut); extern void -util_destroy_rgba_surface(struct pipe_texture *texture, +util_destroy_rgba_surface(struct pipe_resource *texture, struct pipe_surface *surface); diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 79481b710b..fe327c302b 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -45,27 +45,27 @@ * Move raw block of pixels from transfer object to user memory. */ void -pipe_get_tile_raw(struct pipe_transfer *pt, +pipe_get_tile_raw(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, void *dst, int dst_stride) { - struct pipe_screen *screen = pt->texture->screen; const void *src; if (dst_stride == 0) - dst_stride = util_format_get_stride(pt->texture->format, w); + dst_stride = util_format_get_stride(pt->resource->format, w); - if (pipe_clip_tile(x, y, &w, &h, pt)) + if (u_clip_tile(x, y, &w, &h, &pt->box)) return; - src = screen->transfer_map(screen, pt); + src = pipe->transfer_map(pipe, pt); assert(src); if(!src) return; - util_copy_rect(dst, pt->texture->format, dst_stride, 0, 0, w, h, src, pt->stride, x, y); + util_copy_rect(dst, pt->resource->format, dst_stride, 0, 0, w, h, src, pt->stride, x, y); - screen->transfer_unmap(screen, pt); + pipe->transfer_unmap(pipe, pt); } @@ -73,28 +73,28 @@ pipe_get_tile_raw(struct pipe_transfer *pt, * Move raw block of pixels from user memory to transfer object. */ void -pipe_put_tile_raw(struct pipe_transfer *pt, +pipe_put_tile_raw(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const void *src, int src_stride) { - struct pipe_screen *screen = pt->texture->screen; void *dst; - enum pipe_format format = pt->texture->format; + enum pipe_format format = pt->resource->format; if (src_stride == 0) src_stride = util_format_get_stride(format, w); - if (pipe_clip_tile(x, y, &w, &h, pt)) + if (u_clip_tile(x, y, &w, &h, &pt->box)) return; - dst = screen->transfer_map(screen, pt); + dst = pipe->transfer_map(pipe, pt); assert(dst); if(!dst) return; util_copy_rect(dst, format, pt->stride, x, y, w, h, src, src_stride, 0, 0); - screen->transfer_unmap(screen, pt); + pipe->transfer_unmap(pipe, pt); } @@ -108,387 +108,6 @@ pipe_put_tile_raw(struct pipe_transfer *pt, -/*** PIPE_FORMAT_B8G8R8A8_UNORM ***/ - -static void -a8r8g8b8_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - const unsigned pixel = *src++; - pRow[0] = ubyte_to_float((pixel >> 16) & 0xff); - pRow[1] = ubyte_to_float((pixel >> 8) & 0xff); - pRow[2] = ubyte_to_float((pixel >> 0) & 0xff); - pRow[3] = ubyte_to_float((pixel >> 24) & 0xff); - } - p += dst_stride; - } -} - - -static void -a8r8g8b8_put_tile_rgba(unsigned *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned r, g, b, a; - r = float_to_ubyte(pRow[0]); - g = float_to_ubyte(pRow[1]); - b = float_to_ubyte(pRow[2]); - a = float_to_ubyte(pRow[3]); - *dst++ = (a << 24) | (r << 16) | (g << 8) | b; - } - p += src_stride; - } -} - - -/*** PIPE_FORMAT_B8G8R8X8_UNORM ***/ - -static void -x8r8g8b8_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - const unsigned pixel = *src++; - pRow[0] = ubyte_to_float((pixel >> 16) & 0xff); - pRow[1] = ubyte_to_float((pixel >> 8) & 0xff); - pRow[2] = ubyte_to_float((pixel >> 0) & 0xff); - pRow[3] = 1.0F; - } - p += dst_stride; - } -} - - -static void -x8r8g8b8_put_tile_rgba(unsigned *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned r, g, b; - r = float_to_ubyte(pRow[0]); - g = float_to_ubyte(pRow[1]); - b = float_to_ubyte(pRow[2]); - *dst++ = (0xff << 24) | (r << 16) | (g << 8) | b; - } - p += src_stride; - } -} - - -/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/ - -static void -b8g8r8a8_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - const unsigned pixel = *src++; - pRow[0] = ubyte_to_float((pixel >> 8) & 0xff); - pRow[1] = ubyte_to_float((pixel >> 16) & 0xff); - pRow[2] = ubyte_to_float((pixel >> 24) & 0xff); - pRow[3] = ubyte_to_float((pixel >> 0) & 0xff); - } - p += dst_stride; - } -} - - -static void -b8g8r8a8_put_tile_rgba(unsigned *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned r, g, b, a; - r = float_to_ubyte(pRow[0]); - g = float_to_ubyte(pRow[1]); - b = float_to_ubyte(pRow[2]); - a = float_to_ubyte(pRow[3]); - *dst++ = (b << 24) | (g << 16) | (r << 8) | a; - } - p += src_stride; - } -} - - -/*** PIPE_FORMAT_A8B8G8R8_UNORM ***/ - -static void -r8g8b8a8_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - const unsigned pixel = *src++; - pRow[0] = ubyte_to_float((pixel >> 24) & 0xff); - pRow[1] = ubyte_to_float((pixel >> 16) & 0xff); - pRow[2] = ubyte_to_float((pixel >> 8) & 0xff); - pRow[3] = ubyte_to_float((pixel >> 0) & 0xff); - } - p += dst_stride; - } -} - - -static void -r8g8b8a8_put_tile_rgba(unsigned *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned r, g, b, a; - r = float_to_ubyte(pRow[0]); - g = float_to_ubyte(pRow[1]); - b = float_to_ubyte(pRow[2]); - a = float_to_ubyte(pRow[3]); - *dst++ = (r << 24) | (g << 16) | (b << 8) | a; - } - p += src_stride; - } -} - - -/*** PIPE_FORMAT_B5G5R5A1_UNORM ***/ - -static void -a1r5g5b5_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - const ushort pixel = *src++; - pRow[0] = ((pixel >> 10) & 0x1f) * (1.0f / 31.0f); - pRow[1] = ((pixel >> 5) & 0x1f) * (1.0f / 31.0f); - pRow[2] = ((pixel ) & 0x1f) * (1.0f / 31.0f); - pRow[3] = ((pixel >> 15) ) * 1.0f; - } - p += dst_stride; - } -} - - -static void -a1r5g5b5_put_tile_rgba(ushort *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned r, g, b, a; - r = float_to_ubyte(pRow[0]); - g = float_to_ubyte(pRow[1]); - b = float_to_ubyte(pRow[2]); - a = float_to_ubyte(pRow[3]); - r = r >> 3; /* 5 bits */ - g = g >> 3; /* 5 bits */ - b = b >> 3; /* 5 bits */ - a = a >> 7; /* 1 bit */ - *dst++ = (a << 15) | (r << 10) | (g << 5) | b; - } - p += src_stride; - } -} - - -/*** PIPE_FORMAT_B4G4R4A4_UNORM ***/ - -static void -a4r4g4b4_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - const ushort pixel = *src++; - pRow[0] = ((pixel >> 8) & 0xf) * (1.0f / 15.0f); - pRow[1] = ((pixel >> 4) & 0xf) * (1.0f / 15.0f); - pRow[2] = ((pixel ) & 0xf) * (1.0f / 15.0f); - pRow[3] = ((pixel >> 12) ) * (1.0f / 15.0f); - } - p += dst_stride; - } -} - - -static void -a4r4g4b4_put_tile_rgba(ushort *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned r, g, b, a; - r = float_to_ubyte(pRow[0]); - g = float_to_ubyte(pRow[1]); - b = float_to_ubyte(pRow[2]); - a = float_to_ubyte(pRow[3]); - r >>= 4; - g >>= 4; - b >>= 4; - a >>= 4; - *dst++ = (a << 12) | (r << 8) | (g << 4) | b; - } - p += src_stride; - } -} - - -/*** PIPE_FORMAT_B5G6R5_UNORM ***/ - -static void -r5g6b5_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - const ushort pixel = *src++; - pRow[0] = ((pixel >> 11) & 0x1f) * (1.0f / 31.0f); - pRow[1] = ((pixel >> 5) & 0x3f) * (1.0f / 63.0f); - pRow[2] = ((pixel ) & 0x1f) * (1.0f / 31.0f); - pRow[3] = 1.0f; - } - p += dst_stride; - } -} - - -static void -r5g6b5_put_tile_rgba(ushort *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - uint r = (uint) (CLAMP(pRow[0], 0.0, 1.0) * 31.0); - uint g = (uint) (CLAMP(pRow[1], 0.0, 1.0) * 63.0); - uint b = (uint) (CLAMP(pRow[2], 0.0, 1.0) * 31.0); - *dst++ = (r << 11) | (g << 5) | (b); - } - p += src_stride; - } -} - - - -/*** PIPE_FORMAT_R8G8B8_UNORM ***/ - -static void -r8g8b8_get_tile_rgba(const ubyte *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - pRow[0] = ubyte_to_float(src[0]); - pRow[1] = ubyte_to_float(src[1]); - pRow[2] = ubyte_to_float(src[2]); - pRow[3] = 1.0f; - src += 3; - } - p += dst_stride; - } -} - - -static void -r8g8b8_put_tile_rgba(ubyte *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - dst[0] = float_to_ubyte(pRow[0]); - dst[1] = float_to_ubyte(pRow[1]); - dst[2] = float_to_ubyte(pRow[2]); - dst += 3; - } - p += src_stride; - } -} - - - /*** PIPE_FORMAT_Z16_UNORM ***/ /** @@ -518,448 +137,6 @@ z16_get_tile_rgba(const ushort *src, -/*** PIPE_FORMAT_L8_UNORM ***/ - -static void -l8_get_tile_rgba(const ubyte *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, src++, pRow += 4) { - pRow[0] = - pRow[1] = - pRow[2] = ubyte_to_float(*src); - pRow[3] = 1.0; - } - p += dst_stride; - } -} - - -static void -l8_put_tile_rgba(ubyte *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned r; - r = float_to_ubyte(pRow[0]); - *dst++ = (ubyte) r; - } - p += src_stride; - } -} - - - -/*** PIPE_FORMAT_A8_UNORM ***/ - -static void -a8_get_tile_rgba(const ubyte *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, src++, pRow += 4) { - pRow[0] = - pRow[1] = - pRow[2] = 0.0; - pRow[3] = ubyte_to_float(*src); - } - p += dst_stride; - } -} - - -static void -a8_put_tile_rgba(ubyte *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned a; - a = float_to_ubyte(pRow[3]); - *dst++ = (ubyte) a; - } - p += src_stride; - } -} - - - -/*** PIPE_FORMAT_R16_SNORM ***/ - -static void -r16_get_tile_rgba(const short *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, src++, pRow += 4) { - pRow[0] = SHORT_TO_FLOAT(src[0]); - pRow[1] = - pRow[2] = 0.0; - pRow[3] = 1.0; - } - p += dst_stride; - } -} - - -static void -r16_put_tile_rgba(short *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, dst++, pRow += 4) { - UNCLAMPED_FLOAT_TO_SHORT(dst[0], pRow[0]); - } - p += src_stride; - } -} - - -/*** PIPE_FORMAT_R16G16B16A16_SNORM ***/ - -static void -r16g16b16a16_get_tile_rgba(const short *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, src += 4, pRow += 4) { - pRow[0] = SHORT_TO_FLOAT(src[0]); - pRow[1] = SHORT_TO_FLOAT(src[1]); - pRow[2] = SHORT_TO_FLOAT(src[2]); - pRow[3] = SHORT_TO_FLOAT(src[3]); - } - p += dst_stride; - } -} - - -static void -r16g16b16a16_put_tile_rgba(short *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, dst += 4, pRow += 4) { - UNCLAMPED_FLOAT_TO_SHORT(dst[0], pRow[0]); - UNCLAMPED_FLOAT_TO_SHORT(dst[1], pRow[1]); - UNCLAMPED_FLOAT_TO_SHORT(dst[2], pRow[2]); - UNCLAMPED_FLOAT_TO_SHORT(dst[3], pRow[3]); - } - p += src_stride; - } -} - - -/*** PIPE_FORMAT_A8B8G8R8_SRGB ***/ - -/** - * Convert an 8-bit sRGB value from non-linear space to a - * linear RGB value in [0, 1]. - * Implemented with a 256-entry lookup table. - */ -static INLINE float -srgb_to_linear(ubyte cs8) -{ - static float table[256]; - static boolean tableReady = FALSE; - if (!tableReady) { - /* compute lookup table now */ - uint i; - for (i = 0; i < 256; i++) { - const float cs = ubyte_to_float(i); - if (cs <= 0.04045) { - table[i] = cs / 12.92f; - } - else { - table[i] = (float) powf((cs + 0.055) / 1.055, 2.4); - } - } - tableReady = TRUE; - } - return table[cs8]; -} - - -/** - * Convert linear float in [0,1] to an srgb ubyte value in [0,255]. - * XXX this hasn't been tested (render to srgb surface). - * XXX this needs optimization. - */ -static INLINE ubyte -linear_to_srgb(float cl) -{ - if (cl >= 1.0F) - return 255; - else if (cl >= 0.0031308F) - return float_to_ubyte(1.055F * powf(cl, 0.41666F) - 0.055F); - else if (cl > 0.0F) - return float_to_ubyte(12.92F * cl); - else - return 0.0; -} - - -static void -a8r8g8b8_srgb_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - const unsigned pixel = *src++; - pRow[0] = srgb_to_linear((pixel >> 16) & 0xff); - pRow[1] = srgb_to_linear((pixel >> 8) & 0xff); - pRow[2] = srgb_to_linear((pixel >> 0) & 0xff); - pRow[3] = ubyte_to_float((pixel >> 24) & 0xff); - } - p += dst_stride; - } -} - -static void -a8r8g8b8_srgb_put_tile_rgba(unsigned *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned r, g, b, a; - r = linear_to_srgb(pRow[0]); - g = linear_to_srgb(pRow[1]); - b = linear_to_srgb(pRow[2]); - a = float_to_ubyte(pRow[3]); - *dst++ = (a << 24) | (r << 16) | (g << 8) | b; - } - p += src_stride; - } -} - - -/*** PIPE_FORMAT_L8A8_SRGB ***/ - -static void -a8l8_srgb_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - ushort p = *src++; - pRow[0] = - pRow[1] = - pRow[2] = srgb_to_linear(p & 0xff); - pRow[3] = ubyte_to_float(p >> 8); - } - p += dst_stride; - } -} - -static void -a8l8_srgb_put_tile_rgba(ushort *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned r, a; - r = linear_to_srgb(pRow[0]); - a = float_to_ubyte(pRow[3]); - *dst++ = (a << 8) | r; - } - p += src_stride; - } -} - - -/*** PIPE_FORMAT_L8_SRGB ***/ - -static void -l8_srgb_get_tile_rgba(const ubyte *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, src++, pRow += 4) { - pRow[0] = - pRow[1] = - pRow[2] = srgb_to_linear(*src); - pRow[3] = 1.0; - } - p += dst_stride; - } -} - -static void -l8_srgb_put_tile_rgba(ubyte *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned r; - r = linear_to_srgb(pRow[0]); - *dst++ = (ubyte) r; - } - p += src_stride; - } -} - - -/*** PIPE_FORMAT_I8_UNORM ***/ - -static void -i8_get_tile_rgba(const ubyte *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, src++, pRow += 4) { - pRow[0] = - pRow[1] = - pRow[2] = - pRow[3] = ubyte_to_float(*src); - } - p += dst_stride; - } -} - - -static void -i8_put_tile_rgba(ubyte *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned r; - r = float_to_ubyte(pRow[0]); - *dst++ = (ubyte) r; - } - p += src_stride; - } -} - - -/*** PIPE_FORMAT_L8A8_UNORM ***/ - -static void -a8l8_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - ushort p = *src++; - pRow[0] = - pRow[1] = - pRow[2] = ubyte_to_float(p & 0xff); - pRow[3] = ubyte_to_float(p >> 8); - } - p += dst_stride; - } -} - - -static void -a8l8_put_tile_rgba(ushort *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned r, a; - r = float_to_ubyte(pRow[0]); - a = float_to_ubyte(pRow[3]); - *dst++ = (a << 8) | r; - } - p += src_stride; - } -} - - - - /*** PIPE_FORMAT_Z32_UNORM ***/ /** @@ -987,7 +164,7 @@ z32_get_tile_rgba(const unsigned *src, } -/*** PIPE_FORMAT_Z24S8_UNORM ***/ +/*** PIPE_FORMAT_Z24_UNORM_S8_USCALED ***/ /** * Return Z component as four float in [0,1]. Stencil part ignored. @@ -1014,7 +191,7 @@ s8z24_get_tile_rgba(const unsigned *src, } -/*** PIPE_FORMAT_S8Z24_UNORM ***/ +/*** PIPE_FORMAT_S8_USCALED_Z24_UNORM ***/ /** * Return Z component as four float in [0,1]. Stencil part ignored. @@ -1067,94 +244,6 @@ z32f_get_tile_rgba(const float *src, } -/*** PIPE_FORMAT_UYVY / PIPE_FORMAT_YUYV ***/ - -/** - * Convert YCbCr (or YCrCb) to RGBA. - */ -static void -ycbcr_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride, - boolean rev) -{ - const float scale = 1.0f / 255.0f; - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - /* do two texels at a time */ - for (j = 0; j < (w & ~1); j += 2, src += 2) { - const ushort t0 = src[0]; - const ushort t1 = src[1]; - const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */ - const ubyte y1 = (t1 >> 8) & 0xff; /* luminance */ - ubyte cb, cr; - float r, g, b; - - if (rev) { - cb = t1 & 0xff; /* chroma U */ - cr = t0 & 0xff; /* chroma V */ - } - else { - cb = t0 & 0xff; /* chroma U */ - cr = t1 & 0xff; /* chroma V */ - } - - /* even pixel: y0,cr,cb */ - r = 1.164f * (y0-16) + 1.596f * (cr-128); - g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128); - b = 1.164f * (y0-16) + 2.018f * (cb-128); - pRow[0] = r * scale; - pRow[1] = g * scale; - pRow[2] = b * scale; - pRow[3] = 1.0f; - pRow += 4; - - /* odd pixel: use y1,cr,cb */ - r = 1.164f * (y1-16) + 1.596f * (cr-128); - g = 1.164f * (y1-16) - 0.813f * (cr-128) - 0.391f * (cb-128); - b = 1.164f * (y1-16) + 2.018f * (cb-128); - pRow[0] = r * scale; - pRow[1] = g * scale; - pRow[2] = b * scale; - pRow[3] = 1.0f; - pRow += 4; - - } - /* do the last texel */ - if (w & 1) { - const ushort t0 = src[0]; - const ushort t1 = src[1]; - const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */ - ubyte cb, cr; - float r, g, b; - - if (rev) { - cb = t1 & 0xff; /* chroma U */ - cr = t0 & 0xff; /* chroma V */ - } - else { - cb = t0 & 0xff; /* chroma U */ - cr = t1 & 0xff; /* chroma V */ - } - - /* even pixel: y0,cr,cb */ - r = 1.164f * (y0-16) + 1.596f * (cr-128); - g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128); - b = 1.164f * (y0-16) + 2.018f * (cb-128); - pRow[0] = r * scale; - pRow[1] = g * scale; - pRow[2] = b * scale; - pRow[3] = 1.0f; - pRow += 4; - } - p += dst_stride; - } -} - - void pipe_tile_raw_to_rgba(enum pipe_format format, void *src, @@ -1162,80 +251,23 @@ pipe_tile_raw_to_rgba(enum pipe_format format, float *dst, unsigned dst_stride) { switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - a8r8g8b8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_B8G8R8X8_UNORM: - x8r8g8b8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_A8R8G8B8_UNORM: - b8g8r8a8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_A8B8G8R8_UNORM: - r8g8b8a8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_B5G5R5A1_UNORM: - a1r5g5b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_B4G4R4A4_UNORM: - a4r4g4b4_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_B5G6R5_UNORM: - r5g6b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_R8G8B8_UNORM: - r8g8b8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_L8_UNORM: - l8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_A8_UNORM: - a8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_I8_UNORM: - i8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_L8A8_UNORM: - a8l8_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_R16_SNORM: - r16_get_tile_rgba((short *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_R16G16B16A16_SNORM: - r16g16b16a16_get_tile_rgba((short *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_B8G8R8A8_SRGB: - a8r8g8b8_srgb_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_L8A8_SRGB: - a8l8_srgb_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_L8_SRGB: - l8_srgb_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); - break; case PIPE_FORMAT_Z16_UNORM: z16_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); break; case PIPE_FORMAT_Z32_UNORM: z32_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; - case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: case PIPE_FORMAT_Z24X8_UNORM: s8z24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; - case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: z24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; case PIPE_FORMAT_Z32_FLOAT: z32f_get_tile_rgba((float *) src, w, h, dst, dst_stride); break; - case PIPE_FORMAT_UYVY: - ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, FALSE); - break; - case PIPE_FORMAT_YUYV: - ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, TRUE); - break; default: util_format_read_4f(format, dst, dst_stride * sizeof(float), @@ -1246,15 +278,16 @@ pipe_tile_raw_to_rgba(enum pipe_format format, void -pipe_get_tile_rgba(struct pipe_transfer *pt, +pipe_get_tile_rgba(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, float *p) { unsigned dst_stride = w * 4; void *packed; - enum pipe_format format = pt->texture->format; + enum pipe_format format = pt->resource->format; - if (pipe_clip_tile(x, y, &w, &h, pt)) + if (u_clip_tile(x, y, &w, &h, &pt->box)) return; packed = MALLOC(util_format_get_nblocks(format, w, h) * util_format_get_blocksize(format)); @@ -1265,24 +298,94 @@ pipe_get_tile_rgba(struct pipe_transfer *pt, if(format == PIPE_FORMAT_UYVY || format == PIPE_FORMAT_YUYV) assert((x & 1) == 0); - pipe_get_tile_raw(pt, x, y, w, h, packed, 0); + pipe_get_tile_raw(pipe, pt, x, y, w, h, packed, 0); + + pipe_tile_raw_to_rgba(format, packed, w, h, p, dst_stride); + + FREE(packed); +} + + +void +pipe_get_tile_swizzle(struct pipe_context *pipe, + struct pipe_transfer *pt, + uint x, + uint y, + uint w, + uint h, + uint swizzle_r, + uint swizzle_g, + uint swizzle_b, + uint swizzle_a, + enum pipe_format format, + float *p) +{ + unsigned dst_stride = w * 4; + void *packed; + uint iy; + float rgba01[6]; + + if (u_clip_tile(x, y, &w, &h, &pt->box)) { + return; + } + + packed = MALLOC(util_format_get_nblocks(format, w, h) * util_format_get_blocksize(format)); + if (!packed) { + return; + } + + if (format == PIPE_FORMAT_UYVY || format == PIPE_FORMAT_YUYV) { + assert((x & 1) == 0); + } + + pipe_get_tile_raw(pipe, pt, x, y, w, h, packed, 0); pipe_tile_raw_to_rgba(format, packed, w, h, p, dst_stride); FREE(packed); + + if (swizzle_r == PIPE_SWIZZLE_RED && + swizzle_g == PIPE_SWIZZLE_GREEN && + swizzle_b == PIPE_SWIZZLE_BLUE && + swizzle_a == PIPE_SWIZZLE_ALPHA) { + /* no-op, skip */ + return; + } + + rgba01[PIPE_SWIZZLE_ZERO] = 0.0f; + rgba01[PIPE_SWIZZLE_ONE] = 1.0f; + + for (iy = 0; iy < h; iy++) { + float *row = p; + uint ix; + + for (ix = 0; ix < w; ix++) { + rgba01[PIPE_SWIZZLE_RED] = row[0]; + rgba01[PIPE_SWIZZLE_GREEN] = row[1]; + rgba01[PIPE_SWIZZLE_BLUE] = row[2]; + rgba01[PIPE_SWIZZLE_ALPHA] = row[3]; + + *row++ = rgba01[swizzle_r]; + *row++ = rgba01[swizzle_g]; + *row++ = rgba01[swizzle_b]; + *row++ = rgba01[swizzle_a]; + } + p += dst_stride; + } } void -pipe_put_tile_rgba(struct pipe_transfer *pt, +pipe_put_tile_rgba(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const float *p) { unsigned src_stride = w * 4; void *packed; - enum pipe_format format = pt->texture->format; + enum pipe_format format = pt->resource->format; - if (pipe_clip_tile(x, y, &w, &h, pt)) + if (u_clip_tile(x, y, &w, &h, &pt->box)) return; packed = MALLOC(util_format_get_nblocks(format, w, h) * util_format_get_blocksize(format)); @@ -1291,68 +394,17 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, return; switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_B8G8R8X8_UNORM: - x8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_A8R8G8B8_UNORM: - b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_A8B8G8R8_UNORM: - r8g8b8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_B5G5R5A1_UNORM: - a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_B5G6R5_UNORM: - r5g6b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_R8G8B8_UNORM: - r8g8b8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_B4G4R4A4_UNORM: - a4r4g4b4_put_tile_rgba((ushort *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_L8_UNORM: - l8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_A8_UNORM: - a8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_I8_UNORM: - i8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_L8A8_UNORM: - a8l8_put_tile_rgba((ushort *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_R16_SNORM: - r16_put_tile_rgba((short *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_R16G16B16A16_SNORM: - r16g16b16a16_put_tile_rgba((short *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_B8G8R8A8_SRGB: - a8r8g8b8_srgb_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_L8A8_SRGB: - a8l8_srgb_put_tile_rgba((ushort *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_L8_SRGB: - l8_srgb_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); - break; case PIPE_FORMAT_Z16_UNORM: /*z16_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/ break; case PIPE_FORMAT_Z32_UNORM: /*z32_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ break; - case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: case PIPE_FORMAT_Z24X8_UNORM: /*s8z24_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ break; - case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ break; @@ -1363,7 +415,7 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, 0, 0, w, h); } - pipe_put_tile_raw(pt, x, y, w, h, packed, 0); + pipe_put_tile_raw(pipe, pt, x, y, w, h, packed, 0); FREE(packed); } @@ -1373,21 +425,21 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, * Get a block of Z values, converted to 32-bit range. */ void -pipe_get_tile_z(struct pipe_transfer *pt, +pipe_get_tile_z(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, uint *z) { - struct pipe_screen *screen = pt->texture->screen; const uint dstStride = w; ubyte *map; uint *pDest = z; uint i, j; - enum pipe_format format = pt->texture->format; + enum pipe_format format = pt->resource->format; - if (pipe_clip_tile(x, y, &w, &h, pt)) + if (u_clip_tile(x, y, &w, &h, &pt->box)) return; - map = (ubyte *)screen->transfer_map(screen, pt); + map = (ubyte *)pipe->transfer_map(pipe, pt); if (!map) { assert(0); return; @@ -1405,7 +457,7 @@ pipe_get_tile_z(struct pipe_transfer *pt, } } break; - case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: case PIPE_FORMAT_Z24X8_UNORM: { const uint *ptrc @@ -1420,7 +472,7 @@ pipe_get_tile_z(struct pipe_transfer *pt, } } break; - case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: { const uint *ptrc @@ -1453,26 +505,26 @@ pipe_get_tile_z(struct pipe_transfer *pt, assert(0); } - screen->transfer_unmap(screen, pt); + pipe->transfer_unmap(pipe, pt); } void -pipe_put_tile_z(struct pipe_transfer *pt, +pipe_put_tile_z(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const uint *zSrc) { - struct pipe_screen *screen = pt->texture->screen; const uint srcStride = w; const uint *ptrc = zSrc; ubyte *map; uint i, j; - enum pipe_format format = pt->texture->format; + enum pipe_format format = pt->resource->format; - if (pipe_clip_tile(x, y, &w, &h, pt)) + if (u_clip_tile(x, y, &w, &h, &pt->box)) return; - map = (ubyte *)screen->transfer_map(screen, pt); + map = (ubyte *)pipe->transfer_map(pipe, pt); if (!map) { assert(0); return; @@ -1489,10 +541,10 @@ pipe_put_tile_z(struct pipe_transfer *pt, } } break; - case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: { uint *pDest = (uint *) (map + y * pt->stride + x*4); - assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE); + //assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 24-bit Z, preserve stencil */ @@ -1516,10 +568,10 @@ pipe_put_tile_z(struct pipe_transfer *pt, } } break; - case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: { uint *pDest = (uint *) (map + y * pt->stride + x*4); - assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE); + //assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 24-bit Z, preserve stencil */ @@ -1560,7 +612,7 @@ pipe_put_tile_z(struct pipe_transfer *pt, assert(0); } - screen->transfer_unmap(screen, pt); + pipe->transfer_unmap(pipe, pt); } diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h index 1453af38b8..986eee0743 100644 --- a/src/gallium/auxiliary/util/u_tile.h +++ b/src/gallium/auxiliary/util/u_tile.h @@ -32,22 +32,24 @@ struct pipe_transfer; - /** * Clip tile against transfer dims. + * + * XXX: this only clips width and height! + * * \return TRUE if tile is totally clipped, FALSE otherwise */ static INLINE boolean -pipe_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_transfer *pt) +u_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_box *box) { - if (x >= pt->width) + if (x >= box->width) return TRUE; - if (y >= pt->height) + if (y >= box->height) return TRUE; - if (x + *w > pt->width) - *w = pt->width - x; - if (y + *h > pt->height) - *h = pt->height - y; + if (x + *w > box->width) + *w = box->width - x; + if (y + *h > box->height) + *h = box->height - y; return FALSE; } @@ -56,34 +58,54 @@ extern "C" { #endif void -pipe_get_tile_raw(struct pipe_transfer *pt, +pipe_get_tile_raw(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, void *p, int dst_stride); void -pipe_put_tile_raw(struct pipe_transfer *pt, +pipe_put_tile_raw(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const void *p, int src_stride); void -pipe_get_tile_rgba(struct pipe_transfer *pt, +pipe_get_tile_rgba(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, float *p); void -pipe_put_tile_rgba(struct pipe_transfer *pt, +pipe_get_tile_swizzle(struct pipe_context *pipe, + struct pipe_transfer *pt, + uint x, + uint y, + uint w, + uint h, + uint swizzle_r, + uint swizzle_g, + uint swizzle_b, + uint swizzle_a, + enum pipe_format format, + float *p); + +void +pipe_put_tile_rgba(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const float *p); void -pipe_get_tile_z(struct pipe_transfer *pt, +pipe_get_tile_z(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, uint *z); void -pipe_put_tile_z(struct pipe_transfer *pt, +pipe_put_tile_z(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const uint *z); diff --git a/src/gallium/auxiliary/util/u_timed_winsys.c b/src/gallium/auxiliary/util/u_timed_winsys.c deleted file mode 100644 index d88298bc14..0000000000 --- a/src/gallium/auxiliary/util/u_timed_winsys.c +++ /dev/null @@ -1,312 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ -/* - * Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com> - */ - -#include "pipe/p_state.h" -#include "util/u_simple_screen.h" -#include "u_timed_winsys.h" -#include "util/u_memory.h" -#include "os/os_time.h" - - -struct timed_winsys { - struct pipe_winsys base; - struct pipe_winsys *backend; - uint64_t last_dump; - struct { - const char *name_key; - double total; - unsigned calls; - } funcs[13]; -}; - - -static struct timed_winsys *timed_winsys( struct pipe_winsys *winsys ) -{ - return (struct timed_winsys *)winsys; -} - - -static void time_display( struct pipe_winsys *winsys ) -{ - struct timed_winsys *tws = timed_winsys(winsys); - unsigned i; - double overall = 0; - - for (i = 0; i < Elements(tws->funcs); i++) { - if (tws->funcs[i].name_key) { - debug_printf("*** %-25s %5.3fms (%d calls, avg %.3fms)\n", - tws->funcs[i].name_key, - tws->funcs[i].total, - tws->funcs[i].calls, - tws->funcs[i].total / tws->funcs[i].calls); - overall += tws->funcs[i].total; - tws->funcs[i].calls = 0; - tws->funcs[i].total = 0; - } - } - - debug_printf("*** %-25s %5.3fms\n", - "OVERALL WINSYS", - overall); -} - -static void time_finish( struct pipe_winsys *winsys, - long long startval, - unsigned idx, - const char *name ) -{ - struct timed_winsys *tws = timed_winsys(winsys); - int64_t endval = os_time_get(); - double elapsed = (endval - startval)/1000.0; - - if (endval - startval > 1000LL) - debug_printf("*** %s %.3f\n", name, elapsed ); - - assert( tws->funcs[idx].name_key == name || - tws->funcs[idx].name_key == NULL); - - tws->funcs[idx].name_key = name; - tws->funcs[idx].total += elapsed; - tws->funcs[idx].calls++; - - if (endval - tws->last_dump > 10LL * 1000LL * 1000LL) { - time_display( winsys ); - tws->last_dump = endval; - } -} - - -/* Pipe has no concept of pools, but the psb driver passes a flag that - * can be mapped onto pools in the backend. - */ -static struct pipe_buffer * -timed_buffer_create(struct pipe_winsys *winsys, - unsigned alignment, - unsigned usage, - unsigned size ) -{ - struct pipe_winsys *backend = timed_winsys(winsys)->backend; - int64_t start = os_time_get(); - - struct pipe_buffer *buf = - backend->buffer_create( backend, alignment, usage, size ); - - time_finish(winsys, start, 0, __FUNCTION__); - - return buf; -} - - - - -static struct pipe_buffer * -timed_user_buffer_create(struct pipe_winsys *winsys, - void *data, - unsigned bytes) -{ - struct pipe_winsys *backend = timed_winsys(winsys)->backend; - int64_t start = os_time_get(); - - struct pipe_buffer *buf = backend->user_buffer_create( backend, data, bytes ); - - time_finish(winsys, start, 1, __FUNCTION__); - - return buf; -} - - -static void * -timed_buffer_map(struct pipe_winsys *winsys, - struct pipe_buffer *buf, - unsigned flags) -{ - struct pipe_winsys *backend = timed_winsys(winsys)->backend; - int64_t start = os_time_get(); - - void *map = backend->buffer_map( backend, buf, flags ); - - time_finish(winsys, start, 2, __FUNCTION__); - - return map; -} - - -static void -timed_buffer_unmap(struct pipe_winsys *winsys, - struct pipe_buffer *buf) -{ - struct pipe_winsys *backend = timed_winsys(winsys)->backend; - int64_t start = os_time_get(); - - backend->buffer_unmap( backend, buf ); - - time_finish(winsys, start, 3, __FUNCTION__); -} - - -static void -timed_buffer_destroy(struct pipe_buffer *buf) -{ - struct pipe_winsys *winsys = buf->screen->winsys; - struct pipe_winsys *backend = timed_winsys(winsys)->backend; - int64_t start = os_time_get(); - - backend->buffer_destroy( buf ); - - time_finish(winsys, start, 4, __FUNCTION__); -} - - -static void -timed_flush_frontbuffer( struct pipe_winsys *winsys, - struct pipe_surface *surf, - void *context_private) -{ - struct pipe_winsys *backend = timed_winsys(winsys)->backend; - int64_t start = os_time_get(); - - backend->flush_frontbuffer( backend, surf, context_private ); - - time_finish(winsys, start, 5, __FUNCTION__); -} - - - - -static struct pipe_buffer * -timed_surface_buffer_create(struct pipe_winsys *winsys, - unsigned width, unsigned height, - enum pipe_format format, - unsigned usage, - unsigned tex_usage, - unsigned *stride) -{ - struct pipe_winsys *backend = timed_winsys(winsys)->backend; - int64_t start = os_time_get(); - - struct pipe_buffer *ret = backend->surface_buffer_create( backend, width, height, - format, usage, tex_usage, stride ); - - time_finish(winsys, start, 7, __FUNCTION__); - - return ret; -} - - -static const char * -timed_get_name( struct pipe_winsys *winsys ) -{ - struct pipe_winsys *backend = timed_winsys(winsys)->backend; - int64_t start = os_time_get(); - - const char *ret = backend->get_name( backend ); - - time_finish(winsys, start, 9, __FUNCTION__); - - return ret; -} - -static void -timed_fence_reference(struct pipe_winsys *winsys, - struct pipe_fence_handle **ptr, - struct pipe_fence_handle *fence) -{ - struct pipe_winsys *backend = timed_winsys(winsys)->backend; - int64_t start = os_time_get(); - - backend->fence_reference( backend, ptr, fence ); - - time_finish(winsys, start, 10, __FUNCTION__); -} - - -static int -timed_fence_signalled( struct pipe_winsys *winsys, - struct pipe_fence_handle *fence, - unsigned flag ) -{ - struct pipe_winsys *backend = timed_winsys(winsys)->backend; - int64_t start = os_time_get(); - - int ret = backend->fence_signalled( backend, fence, flag ); - - time_finish(winsys, start, 11, __FUNCTION__); - - return ret; -} - -static int -timed_fence_finish( struct pipe_winsys *winsys, - struct pipe_fence_handle *fence, - unsigned flag ) -{ - struct pipe_winsys *backend = timed_winsys(winsys)->backend; - int64_t start = os_time_get(); - - int ret = backend->fence_finish( backend, fence, flag ); - - time_finish(winsys, start, 12, __FUNCTION__); - - return ret; -} - -static void -timed_winsys_destroy( struct pipe_winsys *winsys ) -{ - struct pipe_winsys *backend = timed_winsys(winsys)->backend; - backend->destroy( backend ); - FREE(winsys); -} - - - -struct pipe_winsys *u_timed_winsys_create( struct pipe_winsys *backend ) -{ - struct timed_winsys *ws = CALLOC_STRUCT(timed_winsys); - - ws->base.user_buffer_create = timed_user_buffer_create; - ws->base.buffer_map = timed_buffer_map; - ws->base.buffer_unmap = timed_buffer_unmap; - ws->base.buffer_destroy = timed_buffer_destroy; - ws->base.buffer_create = timed_buffer_create; - ws->base.surface_buffer_create = timed_surface_buffer_create; - ws->base.flush_frontbuffer = timed_flush_frontbuffer; - ws->base.get_name = timed_get_name; - ws->base.fence_reference = timed_fence_reference; - ws->base.fence_signalled = timed_fence_signalled; - ws->base.fence_finish = timed_fence_finish; - ws->base.destroy = timed_winsys_destroy; - - ws->backend = backend; - - return &ws->base; -} - diff --git a/src/gallium/auxiliary/util/u_transfer.c b/src/gallium/auxiliary/util/u_transfer.c new file mode 100644 index 0000000000..bedace3b1d --- /dev/null +++ b/src/gallium/auxiliary/util/u_transfer.c @@ -0,0 +1,110 @@ +#include "pipe/p_context.h" +#include "util/u_rect.h" +#include "util/u_inlines.h" +#include "util/u_transfer.h" +#include "util/u_memory.h" + +/* One-shot transfer operation with data supplied in a user + * pointer. XXX: strides?? + */ +void u_default_transfer_inline_write( struct pipe_context *pipe, + struct pipe_resource *resource, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned slice_stride) +{ + struct pipe_transfer *transfer = NULL; + uint8_t *map = NULL; + + transfer = pipe->get_transfer(pipe, + resource, + sr, + usage, + box ); + if (transfer == NULL) + goto out; + + map = pipe_transfer_map(pipe, transfer); + if (map == NULL) + goto out; + + assert(box->depth == 1); /* XXX: fix me */ + + util_copy_rect(map, + resource->format, + transfer->stride, /* bytes? */ + 0, 0, + box->width, + box->height, + data, + box->width, /* bytes? texels? */ + 0, 0); + +out: + if (map) + pipe_transfer_unmap(pipe, transfer); + + if (transfer) + pipe_transfer_destroy(pipe, transfer); +} + + +boolean u_default_resource_get_handle(struct pipe_screen *screen, + struct pipe_resource *resource, + struct winsys_handle *handle) +{ + return FALSE; +} + + + +void u_default_transfer_flush_region( struct pipe_context *pipe, + struct pipe_transfer *transfer, + const struct pipe_box *box) +{ + /* This is a no-op implementation, nothing to do. + */ +} + +unsigned u_default_is_resource_referenced( struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned face, unsigned level) +{ + return 0; +} + +struct pipe_transfer * u_default_get_transfer(struct pipe_context *context, + struct pipe_resource *resource, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box) +{ + struct pipe_transfer *transfer = CALLOC_STRUCT(pipe_transfer); + if (transfer == NULL) + return NULL; + + transfer->resource = resource; + transfer->sr = sr; + transfer->usage = usage; + transfer->box = *box; + + /* Note strides are zero, this is ok for buffers, but not for + * textures 2d & higher at least. + */ + return transfer; +} + +void u_default_transfer_unmap( struct pipe_context *pipe, + struct pipe_transfer *transfer ) +{ +} + +void u_default_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + FREE(transfer); +} + diff --git a/src/gallium/auxiliary/util/u_transfer.h b/src/gallium/auxiliary/util/u_transfer.h new file mode 100644 index 0000000000..eb07945d15 --- /dev/null +++ b/src/gallium/auxiliary/util/u_transfer.h @@ -0,0 +1,145 @@ + +#ifndef U_TRANSFER_H +#define U_TRANSFER_H + +/* Fallback implementations for inline read/writes which just go back + * to the regular transfer behaviour. + */ +#include "pipe/p_state.h" + +struct pipe_context; + +boolean u_default_resource_get_handle(struct pipe_screen *screen, + struct pipe_resource *resource, + struct winsys_handle *handle); + +void u_default_transfer_inline_write( struct pipe_context *pipe, + struct pipe_resource *resource, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned slice_stride); + +void u_default_transfer_flush_region( struct pipe_context *pipe, + struct pipe_transfer *transfer, + const struct pipe_box *box); + +unsigned u_default_is_resource_referenced( struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned face, unsigned level); + +struct pipe_transfer * u_default_get_transfer(struct pipe_context *context, + struct pipe_resource *resource, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box); + +void u_default_transfer_unmap( struct pipe_context *pipe, + struct pipe_transfer *transfer ); + +void u_default_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *transfer); + + + +/* Useful helper to allow >1 implementation of resource functionality + * to exist in a single driver. This is intended to be transitionary! + */ +struct u_resource_vtbl { + + boolean (*resource_get_handle)(struct pipe_screen *, + struct pipe_resource *tex, + struct winsys_handle *handle); + + void (*resource_destroy)(struct pipe_screen *, + struct pipe_resource *pt); + + unsigned (*is_resource_referenced)(struct pipe_context *pipe, + struct pipe_resource *texture, + unsigned face, unsigned level); + + struct pipe_transfer *(*get_transfer)(struct pipe_context *, + struct pipe_resource *resource, + struct pipe_subresource, + unsigned usage, + const struct pipe_box *); + + void (*transfer_destroy)(struct pipe_context *, + struct pipe_transfer *); + + void *(*transfer_map)( struct pipe_context *, + struct pipe_transfer *transfer ); + + void (*transfer_flush_region)( struct pipe_context *, + struct pipe_transfer *transfer, + const struct pipe_box *); + + void (*transfer_unmap)( struct pipe_context *, + struct pipe_transfer *transfer ); + + void (*transfer_inline_write)( struct pipe_context *pipe, + struct pipe_resource *resource, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned slice_stride); +}; + + +struct u_resource { + struct pipe_resource b; + struct u_resource_vtbl *vtbl; +}; + + +boolean u_resource_get_handle_vtbl(struct pipe_screen *screen, + struct pipe_resource *resource, + struct winsys_handle *handle); + +void u_resource_destroy_vtbl(struct pipe_screen *screen, + struct pipe_resource *resource); + +unsigned u_is_resource_referenced_vtbl( struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned face, unsigned level); + +struct pipe_transfer *u_get_transfer_vtbl(struct pipe_context *context, + struct pipe_resource *resource, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box); + +void u_transfer_destroy_vtbl(struct pipe_context *pipe, + struct pipe_transfer *transfer); + +void *u_transfer_map_vtbl( struct pipe_context *pipe, + struct pipe_transfer *transfer ); + +void u_transfer_flush_region_vtbl( struct pipe_context *pipe, + struct pipe_transfer *transfer, + const struct pipe_box *box); + +void u_transfer_unmap_vtbl( struct pipe_context *rm_ctx, + struct pipe_transfer *transfer ); + +void u_transfer_inline_write_vtbl( struct pipe_context *rm_ctx, + struct pipe_resource *resource, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned slice_stride); + + + + + + + + +#endif diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c index 012b2ae233..75d44432d9 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.c +++ b/src/gallium/auxiliary/util/u_upload_mgr.c @@ -31,7 +31,7 @@ #include "pipe/p_defines.h" #include "util/u_inlines.h" -#include "pipe/p_screen.h" +#include "pipe/p_context.h" #include "util/u_memory.h" #include "util/u_math.h" @@ -39,7 +39,7 @@ struct u_upload_mgr { - struct pipe_screen *screen; + struct pipe_context *pipe; unsigned default_size; unsigned alignment; @@ -47,21 +47,21 @@ struct u_upload_mgr { /* The active buffer: */ - struct pipe_buffer *buffer; + struct pipe_resource *buffer; unsigned size; unsigned offset; }; -struct u_upload_mgr *u_upload_create( struct pipe_screen *screen, +struct u_upload_mgr *u_upload_create( struct pipe_context *pipe, unsigned default_size, unsigned alignment, unsigned usage ) { struct u_upload_mgr *upload = CALLOC_STRUCT( u_upload_mgr ); + upload->pipe = pipe; upload->default_size = default_size; - upload->screen = screen; upload->alignment = alignment; upload->usage = usage; upload->buffer = NULL; @@ -69,31 +69,44 @@ struct u_upload_mgr *u_upload_create( struct pipe_screen *screen, return upload; } - +/* Slightly specialized version of buffer_write designed to maximize + * chances of the driver consolidating successive writes into a single + * upload. + * + * dirty_size may be slightly greater than size to cope with + * alignment. We don't want to leave holes between succesively mapped + * regions as that may prevent the driver from consolidating uploads. + * + * Note that the 'data' pointer has probably come from the application + * and we cannot read even a byte past its end without risking + * segfaults, or at least complaints from valgrind.. + */ static INLINE enum pipe_error -my_buffer_write(struct pipe_screen *screen, - struct pipe_buffer *buf, +my_buffer_write(struct pipe_context *pipe, + struct pipe_resource *buf, unsigned offset, unsigned size, unsigned dirty_size, const void *data) { + struct pipe_transfer *transfer = NULL; uint8_t *map; - assert(offset < buf->size); - assert(offset + size <= buf->size); + assert(offset < buf->width0); + assert(offset + size <= buf->width0); assert(dirty_size >= size); assert(size); - map = pipe_buffer_map_range(screen, buf, offset, size, - PIPE_BUFFER_USAGE_CPU_WRITE | - PIPE_BUFFER_USAGE_FLUSH_EXPLICIT | - PIPE_BUFFER_USAGE_DISCARD | - PIPE_BUFFER_USAGE_UNSYNCHRONIZED); + map = pipe_buffer_map_range(pipe, buf, offset, dirty_size, + PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_FLUSH_EXPLICIT | + PIPE_TRANSFER_DISCARD | + PIPE_TRANSFER_UNSYNCHRONIZED, + &transfer); if (map == NULL) return PIPE_ERROR_OUT_OF_MEMORY; memcpy(map + offset, data, size); - pipe_buffer_flush_mapped_range(screen, buf, offset, dirty_size); - pipe_buffer_unmap(screen, buf); + pipe_buffer_flush_mapped_range(pipe, transfer, offset, dirty_size); + pipe_buffer_unmap(pipe, buf, transfer); return PIPE_OK; } @@ -109,7 +122,7 @@ my_buffer_write(struct pipe_screen *screen, */ void u_upload_flush( struct u_upload_mgr *upload ) { - pipe_buffer_reference( &upload->buffer, NULL ); + pipe_resource_reference( &upload->buffer, NULL ); upload->size = 0; } @@ -135,9 +148,8 @@ u_upload_alloc_buffer( struct u_upload_mgr *upload, */ size = align(MAX2(upload->default_size, min_size), 4096); - upload->buffer = pipe_buffer_create( upload->screen, - upload->alignment, - upload->usage | PIPE_BUFFER_USAGE_CPU_WRITE, + upload->buffer = pipe_buffer_create( upload->pipe->screen, + upload->usage, size ); if (upload->buffer == NULL) goto fail; @@ -149,7 +161,7 @@ u_upload_alloc_buffer( struct u_upload_mgr *upload, fail: if (upload->buffer) - pipe_buffer_reference( &upload->buffer, NULL ); + pipe_resource_reference( &upload->buffer, NULL ); return PIPE_ERROR_OUT_OF_MEMORY; } @@ -159,7 +171,7 @@ enum pipe_error u_upload_data( struct u_upload_mgr *upload, unsigned size, const void *data, unsigned *out_offset, - struct pipe_buffer **outbuf ) + struct pipe_resource **outbuf ) { unsigned alloc_size = align( size, upload->alignment ); enum pipe_error ret = PIPE_OK; @@ -172,7 +184,7 @@ enum pipe_error u_upload_data( struct u_upload_mgr *upload, /* Copy the data, using map_range if available: */ - ret = my_buffer_write( upload->screen, + ret = my_buffer_write( upload->pipe, upload->buffer, upload->offset, size, @@ -183,7 +195,7 @@ enum pipe_error u_upload_data( struct u_upload_mgr *upload, /* Emit the return values: */ - pipe_buffer_reference( outbuf, upload->buffer ); + pipe_resource_reference( outbuf, upload->buffer ); *out_offset = upload->offset; upload->offset += alloc_size; return PIPE_OK; @@ -198,15 +210,18 @@ enum pipe_error u_upload_data( struct u_upload_mgr *upload, enum pipe_error u_upload_buffer( struct u_upload_mgr *upload, unsigned offset, unsigned size, - struct pipe_buffer *inbuf, + struct pipe_resource *inbuf, unsigned *out_offset, - struct pipe_buffer **outbuf ) + struct pipe_resource **outbuf ) { enum pipe_error ret = PIPE_OK; + struct pipe_transfer *transfer = NULL; const char *map = NULL; - map = (const char *)pipe_buffer_map( - upload->screen, inbuf, PIPE_BUFFER_USAGE_CPU_READ ); + map = (const char *)pipe_buffer_map(upload->pipe, + inbuf, + PIPE_TRANSFER_READ, + &transfer); if (map == NULL) { ret = PIPE_ERROR_OUT_OF_MEMORY; @@ -226,7 +241,7 @@ enum pipe_error u_upload_buffer( struct u_upload_mgr *upload, done: if (map) - pipe_buffer_unmap( upload->screen, inbuf ); + pipe_buffer_unmap( upload->pipe, inbuf, transfer ); return ret; } diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h index e158bed9d0..a124924fc8 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.h +++ b/src/gallium/auxiliary/util/u_upload_mgr.h @@ -35,11 +35,11 @@ #include "pipe/p_defines.h" struct pipe_screen; -struct pipe_buffer; +struct pipe_resource; struct u_upload_mgr; -struct u_upload_mgr *u_upload_create( struct pipe_screen *screen, +struct u_upload_mgr *u_upload_create( struct pipe_context *pipe, unsigned default_size, unsigned alignment, unsigned usage ); @@ -61,15 +61,15 @@ enum pipe_error u_upload_data( struct u_upload_mgr *upload, unsigned size, const void *data, unsigned *out_offset, - struct pipe_buffer **outbuf ); + struct pipe_resource **outbuf ); enum pipe_error u_upload_buffer( struct u_upload_mgr *upload, unsigned offset, unsigned size, - struct pipe_buffer *inbuf, + struct pipe_resource *inbuf, unsigned *out_offset, - struct pipe_buffer **outbuf ); + struct pipe_resource **outbuf ); diff --git a/src/gallium/auxiliary/vl/vl_bitstream_parser.c b/src/gallium/auxiliary/vl/vl_bitstream_parser.c deleted file mode 100644 index 3193ea5f41..0000000000 --- a/src/gallium/auxiliary/vl/vl_bitstream_parser.c +++ /dev/null @@ -1,167 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 Younes Manton. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "vl_bitstream_parser.h" -#include <assert.h> -#include <limits.h> -#include <util/u_memory.h> - -static unsigned -grab_bits(unsigned cursor, unsigned how_many_bits, unsigned bitstream_elt) -{ - unsigned excess_bits = sizeof(unsigned) * CHAR_BIT - how_many_bits - cursor; - - assert(cursor < sizeof(unsigned) * CHAR_BIT); - assert(how_many_bits > 0 && how_many_bits <= sizeof(unsigned) * CHAR_BIT); - assert(cursor + how_many_bits <= sizeof(unsigned) * CHAR_BIT); - - return (bitstream_elt << excess_bits) >> (excess_bits + cursor); -} - -static unsigned -show_bits(unsigned cursor, unsigned how_many_bits, const unsigned *bitstream) -{ - unsigned cur_int = cursor / (sizeof(unsigned) * CHAR_BIT); - unsigned cur_bit = cursor % (sizeof(unsigned) * CHAR_BIT); - - assert(bitstream); - - if (cur_bit + how_many_bits > sizeof(unsigned) * CHAR_BIT) { - unsigned lower = grab_bits(cur_bit, sizeof(unsigned) * CHAR_BIT - cur_bit, - bitstream[cur_int]); - unsigned upper = grab_bits(0, cur_bit + how_many_bits - sizeof(unsigned) * CHAR_BIT, - bitstream[cur_int + 1]); - return lower | upper << (sizeof(unsigned) * CHAR_BIT - cur_bit); - } - else - return grab_bits(cur_bit, how_many_bits, bitstream[cur_int]); -} - -bool vl_bitstream_parser_init(struct vl_bitstream_parser *parser, - unsigned num_bitstreams, - const void **bitstreams, - const unsigned *sizes) -{ - assert(parser); - assert(num_bitstreams); - assert(bitstreams); - assert(sizes); - - parser->num_bitstreams = num_bitstreams; - parser->bitstreams = (const unsigned**)bitstreams; - parser->sizes = sizes; - parser->cur_bitstream = 0; - parser->cursor = 0; - - return true; -} - -void vl_bitstream_parser_cleanup(struct vl_bitstream_parser *parser) -{ - assert(parser); -} - -unsigned -vl_bitstream_parser_get_bits(struct vl_bitstream_parser *parser, - unsigned how_many_bits) -{ - unsigned bits; - - assert(parser); - - bits = vl_bitstream_parser_show_bits(parser, how_many_bits); - - vl_bitstream_parser_forward(parser, how_many_bits); - - return bits; -} - -unsigned -vl_bitstream_parser_show_bits(struct vl_bitstream_parser *parser, - unsigned how_many_bits) -{ - unsigned bits = 0; - unsigned shift = 0; - unsigned cursor; - unsigned cur_bitstream; - - assert(parser); - - cursor = parser->cursor; - cur_bitstream = parser->cur_bitstream; - - while (1) { - unsigned bits_left = parser->sizes[cur_bitstream] * CHAR_BIT - cursor; - unsigned bits_to_show = how_many_bits > bits_left ? bits_left : how_many_bits; - - bits |= show_bits(cursor, bits_to_show, - parser->bitstreams[cur_bitstream]) << shift; - - if (how_many_bits > bits_to_show) { - how_many_bits -= bits_to_show; - cursor = 0; - ++cur_bitstream; - shift += bits_to_show; - } - else - break; - } - - return bits; -} - -void vl_bitstream_parser_forward(struct vl_bitstream_parser *parser, - unsigned how_many_bits) -{ - assert(parser); - assert(how_many_bits); - - parser->cursor += how_many_bits; - - while (parser->cursor > parser->sizes[parser->cur_bitstream] * CHAR_BIT) { - parser->cursor -= parser->sizes[parser->cur_bitstream++] * CHAR_BIT; - assert(parser->cur_bitstream < parser->num_bitstreams); - } -} - -void vl_bitstream_parser_rewind(struct vl_bitstream_parser *parser, - unsigned how_many_bits) -{ - signed c; - - assert(parser); - assert(how_many_bits); - - c = parser->cursor - how_many_bits; - - while (c < 0) { - c += parser->sizes[parser->cur_bitstream--] * CHAR_BIT; - assert(parser->cur_bitstream < parser->num_bitstreams); - } - - parser->cursor = (unsigned)c; -} diff --git a/src/gallium/auxiliary/vl/vl_bitstream_parser.h b/src/gallium/auxiliary/vl/vl_bitstream_parser.h deleted file mode 100644 index 30ec743fa7..0000000000 --- a/src/gallium/auxiliary/vl/vl_bitstream_parser.h +++ /dev/null @@ -1,63 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 Younes Manton. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef vl_bitstream_parser_h -#define vl_bitstream_parser_h - -#include "pipe/p_compiler.h" - -struct vl_bitstream_parser -{ - unsigned num_bitstreams; - const unsigned **bitstreams; - const unsigned *sizes; - unsigned cur_bitstream; - unsigned cursor; -}; - -bool vl_bitstream_parser_init(struct vl_bitstream_parser *parser, - unsigned num_bitstreams, - const void **bitstreams, - const unsigned *sizes); - -void vl_bitstream_parser_cleanup(struct vl_bitstream_parser *parser); - -unsigned -vl_bitstream_parser_get_bits(struct vl_bitstream_parser *parser, - unsigned how_many_bits); - -unsigned -vl_bitstream_parser_show_bits(struct vl_bitstream_parser *parser, - unsigned how_many_bits); - -void vl_bitstream_parser_forward(struct vl_bitstream_parser *parser, - unsigned how_many_bits); - -void vl_bitstream_parser_rewind(struct vl_bitstream_parser *parser, - unsigned how_many_bits); - -#endif /* vl_bitstream_parser_h */ diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c deleted file mode 100644 index ba23435f69..0000000000 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ /dev/null @@ -1,535 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 Younes Manton. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "vl_compositor.h" -#include <assert.h> -#include <pipe/p_context.h> -#include <util/u_inlines.h> -#include <tgsi/tgsi_parse.h> -#include <tgsi/tgsi_build.h> -#include <util/u_memory.h> -#include "vl_csc.h" -#include "vl_shader_build.h" - -struct vertex2f -{ - float x, y; -}; - -struct vertex4f -{ - float x, y, z, w; -}; - -struct vertex_shader_consts -{ - struct vertex4f dst_scale; - struct vertex4f dst_trans; - struct vertex4f src_scale; - struct vertex4f src_trans; -}; - -struct fragment_shader_consts -{ - float matrix[16]; -}; - -/* - * Represents 2 triangles in a strip in normalized coords. - * Used to render the surface onto the frame buffer. - */ -static const struct vertex2f surface_verts[4] = -{ - {0.0f, 0.0f}, - {0.0f, 1.0f}, - {1.0f, 0.0f}, - {1.0f, 1.0f} -}; - -/* - * Represents texcoords for the above. We can use the position values directly. - * TODO: Duplicate these in the shader, no need to create a buffer. - */ -static const struct vertex2f *surface_texcoords = surface_verts; - -static void -create_vert_shader(struct vl_compositor *c) -{ - const unsigned max_tokens = 50; - - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned ti; - - unsigned i; - - assert(c); - - tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token)); - header = (struct tgsi_header*)&tokens[0]; - *header = tgsi_build_header(); - *(struct tgsi_processor*)&tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 2; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Vertex texcoords - */ - for (i = 0; i < 2; i++) { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale vertex pos rect to destination size - * decl c1 ; Translation vector to move vertex pos rect into position - * decl c2 ; Scaling vector to scale texcoord rect to source size - * decl c3 ; Translation vector to move texcoord rect into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Vertex texcoords - */ - for (i = 0; i < 2; i++) { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * mad o0, i0, c0, c1 ; Scale and translate unit output rect to destination size and pos - * mad o1, i1, c2, c3 ; Scale and translate unit texcoord rect to source size and pos - */ - for (i = 0; i < 2; ++i) { - inst = vl_inst4(TGSI_OPCODE_MAD, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i, TGSI_FILE_CONSTANT, i * 2, TGSI_FILE_CONSTANT, i * 2 + 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - assert(ti <= max_tokens); - - vs.tokens = tokens; - c->vertex_shader = c->pipe->create_vs_state(c->pipe, &vs); - FREE(tokens); -} - -static void -create_frag_shader(struct vl_compositor *c) -{ - const unsigned max_tokens = 50; - - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned ti; - - unsigned i; - - assert(c); - - tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token)); - header = (struct tgsi_header*)&tokens[0]; - *header = tgsi_build_header(); - *(struct tgsi_processor*)&tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 2; - - /* decl i0 ; Texcoords for s0 */ - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl c0-c3 ; CSC matrix c0-c3 - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0 */ - decl = vl_decl_temps(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl s0 ; Sampler for tex containing picture to display */ - decl = vl_decl_samplers(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* tex2d t0, i0, s0 ; Read src pixel */ - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * dp4 o0.x, t0, c0 ; Multiply pixel by the color conversion matrix - * dp4 o0.y, t0, c1 - * dp4 o0.z, t0, c2 - * dp4 o0.w, t0, c3 - */ - for (i = 0; i < 4; ++i) { - inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i); - inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - assert(ti <= max_tokens); - - fs.tokens = tokens; - c->fragment_shader = c->pipe->create_fs_state(c->pipe, &fs); - FREE(tokens); -} - -static bool -init_pipe_state(struct vl_compositor *c) -{ - struct pipe_sampler_state sampler; - - assert(c); - - c->fb_state.nr_cbufs = 1; - c->fb_state.zsbuf = NULL; - - sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; - sampler.compare_mode = PIPE_TEX_COMPARE_NONE; - sampler.compare_func = PIPE_FUNC_ALWAYS; - sampler.normalized_coords = 1; - /*sampler.lod_bias = ;*/ - /*sampler.min_lod = ;*/ - /*sampler.max_lod = ;*/ - /*sampler.border_color[i] = ;*/ - /*sampler.max_anisotropy = ;*/ - c->sampler = c->pipe->create_sampler_state(c->pipe, &sampler); - - return true; -} - -static void cleanup_pipe_state(struct vl_compositor *c) -{ - assert(c); - - c->pipe->delete_sampler_state(c->pipe, c->sampler); -} - -static bool -init_shaders(struct vl_compositor *c) -{ - assert(c); - - create_vert_shader(c); - create_frag_shader(c); - - return true; -} - -static void cleanup_shaders(struct vl_compositor *c) -{ - assert(c); - - c->pipe->delete_vs_state(c->pipe, c->vertex_shader); - c->pipe->delete_fs_state(c->pipe, c->fragment_shader); -} - -static bool -init_buffers(struct vl_compositor *c) -{ - struct fragment_shader_consts fsc; - - assert(c); - - /* - * Create our vertex buffer and vertex buffer element - * VB contains 4 vertices that render a quad covering the entire window - * to display a rendered surface - * Quad is rendered as a tri strip - */ - c->vertex_bufs[0].stride = sizeof(struct vertex2f); - c->vertex_bufs[0].max_index = 3; - c->vertex_bufs[0].buffer_offset = 0; - c->vertex_bufs[0].buffer = pipe_buffer_create - ( - c->pipe->screen, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct vertex2f) * 4 - ); - - memcpy - ( - pipe_buffer_map(c->pipe->screen, c->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - surface_verts, - sizeof(struct vertex2f) * 4 - ); - - pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[0].buffer); - - c->vertex_elems[0].src_offset = 0; - c->vertex_elems[0].instance_divisor = 0; - c->vertex_elems[0].vertex_buffer_index = 0; - c->vertex_elems[0].nr_components = 2; - c->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* - * Create our texcoord buffer and texcoord buffer element - * Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices - */ - c->vertex_bufs[1].stride = sizeof(struct vertex2f); - c->vertex_bufs[1].max_index = 3; - c->vertex_bufs[1].buffer_offset = 0; - c->vertex_bufs[1].buffer = pipe_buffer_create - ( - c->pipe->screen, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct vertex2f) * 4 - ); - - memcpy - ( - pipe_buffer_map(c->pipe->screen, c->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - surface_texcoords, - sizeof(struct vertex2f) * 4 - ); - - pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[1].buffer); - - c->vertex_elems[1].src_offset = 0; - c->vertex_elems[1].instance_divisor = 0; - c->vertex_elems[1].vertex_buffer_index = 1; - c->vertex_elems[1].nr_components = 2; - c->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* - * Create our vertex shader's constant buffer - * Const buffer contains scaling and translation vectors - */ - c->vs_const_buf = pipe_buffer_create - ( - c->pipe->screen, - 1, - PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD, - sizeof(struct vertex_shader_consts) - ); - - /* - * Create our fragment shader's constant buffer - * Const buffer contains the color conversion matrix and bias vectors - */ - c->fs_const_buf = pipe_buffer_create - ( - c->pipe->screen, - 1, - PIPE_BUFFER_USAGE_CONSTANT, - sizeof(struct fragment_shader_consts) - ); - - vl_csc_get_matrix(VL_CSC_COLOR_STANDARD_IDENTITY, NULL, true, fsc.matrix); - - vl_compositor_set_csc_matrix(c, fsc.matrix); - - return true; -} - -static void -cleanup_buffers(struct vl_compositor *c) -{ - unsigned i; - - assert(c); - - for (i = 0; i < 2; ++i) - pipe_buffer_reference(&c->vertex_bufs[i].buffer, NULL); - - pipe_buffer_reference(&c->vs_const_buf, NULL); - pipe_buffer_reference(&c->fs_const_buf, NULL); -} - -bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe) -{ - assert(compositor); - - memset(compositor, 0, sizeof(struct vl_compositor)); - - compositor->pipe = pipe; - - if (!init_pipe_state(compositor)) - return false; - if (!init_shaders(compositor)) { - cleanup_pipe_state(compositor); - return false; - } - if (!init_buffers(compositor)) { - cleanup_shaders(compositor); - cleanup_pipe_state(compositor); - return false; - } - - return true; -} - -void vl_compositor_cleanup(struct vl_compositor *compositor) -{ - assert(compositor); - - cleanup_buffers(compositor); - cleanup_shaders(compositor); - cleanup_pipe_state(compositor); -} - -void vl_compositor_render(struct vl_compositor *compositor, - /*struct pipe_texture *backround, - struct pipe_video_rect *backround_area,*/ - struct pipe_texture *src_surface, - enum pipe_mpeg12_picture_type picture_type, - /*unsigned num_past_surfaces, - struct pipe_texture *past_surfaces, - unsigned num_future_surfaces, - struct pipe_texture *future_surfaces,*/ - struct pipe_video_rect *src_area, - struct pipe_texture *dst_surface, - struct pipe_video_rect *dst_area, - /*unsigned num_layers, - struct pipe_texture *layers, - struct pipe_video_rect *layer_src_areas, - struct pipe_video_rect *layer_dst_areas*/ - struct pipe_fence_handle **fence) -{ - struct vertex_shader_consts *vs_consts; - - assert(compositor); - assert(src_surface); - assert(src_area); - assert(dst_surface); - assert(dst_area); - assert(picture_type == PIPE_MPEG12_PICTURE_TYPE_FRAME); - - compositor->fb_state.width = dst_surface->width0; - compositor->fb_state.height = dst_surface->height0; - compositor->fb_state.cbufs[0] = compositor->pipe->screen->get_tex_surface - ( - compositor->pipe->screen, - dst_surface, - 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE - ); - - compositor->viewport.scale[0] = compositor->fb_state.width; - compositor->viewport.scale[1] = compositor->fb_state.height; - compositor->viewport.scale[2] = 1; - compositor->viewport.scale[3] = 1; - compositor->viewport.translate[0] = 0; - compositor->viewport.translate[1] = 0; - compositor->viewport.translate[2] = 0; - compositor->viewport.translate[3] = 0; - - compositor->scissor.maxx = compositor->fb_state.width; - compositor->scissor.maxy = compositor->fb_state.height; - - compositor->pipe->set_framebuffer_state(compositor->pipe, &compositor->fb_state); - compositor->pipe->set_viewport_state(compositor->pipe, &compositor->viewport); - compositor->pipe->set_scissor_state(compositor->pipe, &compositor->scissor); - compositor->pipe->bind_fragment_sampler_states(compositor->pipe, 1, &compositor->sampler); - compositor->pipe->set_fragment_sampler_textures(compositor->pipe, 1, &src_surface); - compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader); - compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader); - compositor->pipe->set_vertex_buffers(compositor->pipe, 2, compositor->vertex_bufs); - compositor->pipe->set_vertex_elements(compositor->pipe, 2, compositor->vertex_elems); - compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, compositor->vs_const_buf); - compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, compositor->fs_const_buf); - - vs_consts = pipe_buffer_map - ( - compositor->pipe->screen, - compositor->vs_const_buf, - PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD - ); - - vs_consts->dst_scale.x = dst_area->w / (float)compositor->fb_state.cbufs[0]->width; - vs_consts->dst_scale.y = dst_area->h / (float)compositor->fb_state.cbufs[0]->height; - vs_consts->dst_scale.z = 1; - vs_consts->dst_scale.w = 1; - vs_consts->dst_trans.x = dst_area->x / (float)compositor->fb_state.cbufs[0]->width; - vs_consts->dst_trans.y = dst_area->y / (float)compositor->fb_state.cbufs[0]->height; - vs_consts->dst_trans.z = 0; - vs_consts->dst_trans.w = 0; - - vs_consts->src_scale.x = src_area->w / (float)src_surface->width0; - vs_consts->src_scale.y = src_area->h / (float)src_surface->height0; - vs_consts->src_scale.z = 1; - vs_consts->src_scale.w = 1; - vs_consts->src_trans.x = src_area->x / (float)src_surface->width0; - vs_consts->src_trans.y = src_area->y / (float)src_surface->height0; - vs_consts->src_trans.z = 0; - vs_consts->src_trans.w = 0; - - pipe_buffer_unmap(compositor->pipe->screen, compositor->vs_const_buf); - - compositor->pipe->draw_arrays(compositor->pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); - compositor->pipe->flush(compositor->pipe, PIPE_FLUSH_RENDER_CACHE, fence); - - pipe_surface_reference(&compositor->fb_state.cbufs[0], NULL); -} - -void vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float *mat) -{ - assert(compositor); - - memcpy - ( - pipe_buffer_map(compositor->pipe->screen, compositor->fs_const_buf, PIPE_BUFFER_USAGE_CPU_WRITE), - mat, - sizeof(struct fragment_shader_consts) - ); - - pipe_buffer_unmap(compositor->pipe->screen, compositor->fs_const_buf); -} diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h deleted file mode 100644 index 6a9a3fd7af..0000000000 --- a/src/gallium/auxiliary/vl/vl_compositor.h +++ /dev/null @@ -1,77 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 Younes Manton. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef vl_compositor_h -#define vl_compositor_h - -#include <pipe/p_compiler.h> -#include <pipe/p_state.h> -#include <pipe/p_video_state.h> - -struct pipe_context; -struct pipe_texture; - -struct vl_compositor -{ - struct pipe_context *pipe; - - struct pipe_framebuffer_state fb_state; - void *sampler; - void *vertex_shader; - void *fragment_shader; - struct pipe_viewport_state viewport; - struct pipe_scissor_state scissor; - struct pipe_vertex_buffer vertex_bufs[2]; - struct pipe_vertex_element vertex_elems[2]; - struct pipe_buffer *vs_const_buf, *fs_const_buf; -}; - -bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe); - -void vl_compositor_cleanup(struct vl_compositor *compositor); - -void vl_compositor_render(struct vl_compositor *compositor, - /*struct pipe_texture *backround, - struct pipe_video_rect *backround_area,*/ - struct pipe_texture *src_surface, - enum pipe_mpeg12_picture_type picture_type, - /*unsigned num_past_surfaces, - struct pipe_texture *past_surfaces, - unsigned num_future_surfaces, - struct pipe_texture *future_surfaces,*/ - struct pipe_video_rect *src_area, - struct pipe_texture *dst_surface, - struct pipe_video_rect *dst_area, - /*unsigned num_layers, - struct pipe_texture *layers, - struct pipe_video_rect *layer_src_areas, - struct pipe_video_rect *layer_dst_areas,*/ - struct pipe_fence_handle **fence); - -void vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float *mat); - -#endif /* vl_compositor_h */ diff --git a/src/gallium/auxiliary/vl/vl_csc.c b/src/gallium/auxiliary/vl/vl_csc.c deleted file mode 100644 index 5ecc43a5fa..0000000000 --- a/src/gallium/auxiliary/vl/vl_csc.c +++ /dev/null @@ -1,206 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 Younes Manton. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "vl_csc.h" -#include <util/u_math.h> -#include <util/u_debug.h> - -/* - * Color space conversion formulas - * - * To convert YCbCr to RGB, - * vec4 ycbcr, rgb - * mat44 csc - * rgb = csc * ycbcr - * - * To calculate the color space conversion matrix csc with ProcAmp adjustments, - * mat44 csc, cstd, procamp, bias - * csc = cstd * (procamp * bias) - * - * Where cstd is a matrix corresponding to one of the color standards (BT.601, BT.709, etc) - * adjusted for the kind of YCbCr -> RGB mapping wanted (1:1, full), - * bias is a matrix corresponding to the kind of YCbCr -> RGB mapping wanted (1:1, full) - * - * To calculate procamp, - * mat44 procamp, hue, saturation, brightness, contrast - * procamp = brightness * (saturation * (contrast * hue)) - * Alternatively, - * procamp = saturation * (brightness * (contrast * hue)) - * - * contrast - * [ c, 0, 0, 0] - * [ 0, c, 0, 0] - * [ 0, 0, c, 0] - * [ 0, 0, 0, 1] - * - * brightness - * [ 1, 0, 0, b] - * [ 0, 1, 0, 0] - * [ 0, 0, 1, 0] - * [ 0, 0, 0, 1] - * - * saturation - * [ 1, 0, 0, 0] - * [ 0, s, 0, 0] - * [ 0, 0, s, 0] - * [ 0, 0, 0, 1] - * - * hue - * [ 1, 0, 0, 0] - * [ 0, cos(h), sin(h), 0] - * [ 0, -sin(h), cos(h), 0] - * [ 0, 0, 0, 1] - * - * procamp - * [ c, 0, 0, b] - * [ 0, c*s*cos(h), c*s*sin(h), 0] - * [ 0, -c*s*sin(h), c*s*cos(h), 0] - * [ 0, 0, 0, 1] - * - * bias - * [ 1, 0, 0, ybias] - * [ 0, 1, 0, cbbias] - * [ 0, 0, 1, crbias] - * [ 0, 0, 0, 1] - * - * csc - * [ c*cstd[ 0], c*cstd[ 1]*s*cos(h) - c*cstd[ 2]*s*sin(h), c*cstd[ 2]*s*cos(h) + c*cstd[ 1]*s*sin(h), cstd[ 3] + cstd[ 0]*(b + c*ybias) + cstd[ 1]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[ 2]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h))] - * [ c*cstd[ 4], c*cstd[ 5]*s*cos(h) - c*cstd[ 6]*s*sin(h), c*cstd[ 6]*s*cos(h) + c*cstd[ 5]*s*sin(h), cstd[ 7] + cstd[ 4]*(b + c*ybias) + cstd[ 5]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[ 6]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h))] - * [ c*cstd[ 8], c*cstd[ 9]*s*cos(h) - c*cstd[10]*s*sin(h), c*cstd[10]*s*cos(h) + c*cstd[ 9]*s*sin(h), cstd[11] + cstd[ 8]*(b + c*ybias) + cstd[ 9]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[10]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h))] - * [ c*cstd[12], c*cstd[13]*s*cos(h) - c*cstd[14]*s*sin(h), c*cstd[14]*s*cos(h) + c*cstd[13]*s*sin(h), cstd[15] + cstd[12]*(b + c*ybias) + cstd[13]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[14]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h))] - */ - -/* - * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: - * Y is in [16,235], Cb and Cr are in [16,240] - * R, G, and B are in [16,235] - */ -static const float bt_601[16] = -{ - 1.0f, 0.0f, 1.371f, 0.0f, - 1.0f, -0.336f, -0.698f, 0.0f, - 1.0f, 1.732f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f -}; - -/* - * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: - * Y is in [16,235], Cb and Cr are in [16,240] - * R, G, and B are in [0,255] - */ -static const float bt_601_full[16] = -{ - 1.164f, 0.0f, 1.596f, 0.0f, - 1.164f, -0.391f, -0.813f, 0.0f, - 1.164f, 2.018f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f -}; - -/* - * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: - * Y is in [16,235], Cb and Cr are in [16,240] - * R, G, and B are in [16,235] - */ -static const float bt_709[16] = -{ - 1.0f, 0.0f, 1.540f, 0.0f, - 1.0f, -0.183f, -0.459f, 0.0f, - 1.0f, 1.816f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f -}; - -/* - * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: - * Y is in [16,235], Cb and Cr are in [16,240] - * R, G, and B are in [0,255] - */ -static const float bt_709_full[16] = -{ - 1.164f, 0.0f, 1.793f, 0.0f, - 1.164f, -0.213f, -0.534f, 0.0f, - 1.164f, 2.115f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f -}; - -static const float identity[16] = -{ - 1.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 1.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f -}; - -void vl_csc_get_matrix(enum VL_CSC_COLOR_STANDARD cs, - struct vl_procamp *procamp, - bool full_range, - float *matrix) -{ - float ybias = full_range ? -16.0f/255.0f : 0.0f; - float cbbias = -128.0f/255.0f; - float crbias = -128.0f/255.0f; - float c = procamp ? procamp->contrast : 1.0f; - float s = procamp ? procamp->saturation : 1.0f; - float b = procamp ? procamp->brightness : 0.0f; - float h = procamp ? procamp->hue : 0.0f; - const float *cstd; - - assert(matrix); - - switch (cs) { - case VL_CSC_COLOR_STANDARD_BT_601: - cstd = full_range ? &bt_601_full[0] : &bt_601[0]; - break; - case VL_CSC_COLOR_STANDARD_BT_709: - cstd = full_range ? &bt_709_full[0] : &bt_709[0]; - break; - case VL_CSC_COLOR_STANDARD_IDENTITY: - default: - assert(cs == VL_CSC_COLOR_STANDARD_IDENTITY); - memcpy(matrix, &identity[0], sizeof(float) * 16); - return; - } - - matrix[ 0] = c*cstd[ 0]; - matrix[ 1] = c*cstd[ 1]*s*cosf(h) - c*cstd[ 2]*s*sinf(h); - matrix[ 2] = c*cstd[ 2]*s*cosf(h) + c*cstd[ 1]*s*sinf(h); - matrix[ 3] = cstd[ 3] + cstd[ 0]*(b + c*ybias) + cstd[ 1]*(c*cbbias*s*cosf(h) + c*crbias*s*sinf(h)) + cstd[ 2]*(c*crbias*s*cosf(h) - c*cbbias*s*sinf(h)); - - matrix[ 4] = c*cstd[ 4]; - matrix[ 5] = c*cstd[ 5]*s*cosf(h) - c*cstd[ 6]*s*sinf(h); - matrix[ 6] = c*cstd[ 6]*s*cosf(h) + c*cstd[ 5]*s*sinf(h); - matrix[ 7] = cstd[ 7] + cstd[ 4]*(b + c*ybias) + cstd[ 5]*(c*cbbias*s*cosf(h) + c*crbias*s*sinf(h)) + cstd[ 6]*(c*crbias*s*cosf(h) - c*cbbias*s*sinf(h)); - - matrix[ 8] = c*cstd[ 8]; - matrix[ 9] = c*cstd[ 9]*s*cosf(h) - c*cstd[10]*s*sinf(h); - matrix[10] = c*cstd[10]*s*cosf(h) + c*cstd[ 9]*s*sinf(h); - matrix[11] = cstd[11] + cstd[ 8]*(b + c*ybias) + cstd[ 9]*(c*cbbias*s*cosf(h) + c*crbias*s*sinf(h)) + cstd[10]*(c*crbias*s*cosf(h) - c*cbbias*s*sinf(h)); - - matrix[12] = c*cstd[12]; - matrix[13] = c*cstd[13]*s*cos(h) - c*cstd[14]*s*sin(h); - matrix[14] = c*cstd[14]*s*cos(h) + c*cstd[13]*s*sin(h); - matrix[15] = cstd[15] + cstd[12]*(b + c*ybias) + cstd[13]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[14]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h)); -} diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c deleted file mode 100644 index f323de0ea5..0000000000 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ /dev/null @@ -1,1672 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 Younes Manton. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "vl_mpeg12_mc_renderer.h" -#include <assert.h> -#include <pipe/p_context.h> -#include <util/u_inlines.h> -#include <util/u_format.h> -#include <util/u_math.h> -#include <util/u_memory.h> -#include <tgsi/tgsi_parse.h> -#include <tgsi/tgsi_build.h> -#include "vl_shader_build.h" - -#define DEFAULT_BUF_ALIGNMENT 1 -#define MACROBLOCK_WIDTH 16 -#define MACROBLOCK_HEIGHT 16 -#define BLOCK_WIDTH 8 -#define BLOCK_HEIGHT 8 -#define ZERO_BLOCK_NIL -1.0f -#define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f) - -struct vertex2f -{ - float x, y; -}; - -struct vertex4f -{ - float x, y, z, w; -}; - -struct vertex_shader_consts -{ - struct vertex4f denorm; -}; - -struct fragment_shader_consts -{ - struct vertex4f multiplier; - struct vertex4f div; -}; - -/* - * Muliplier renormalizes block samples from 16 bits to 12 bits. - * Divider is used when calculating Y % 2 for choosing top or bottom - * field for P or B macroblocks. - * TODO: Use immediates. - */ -static const struct fragment_shader_consts fs_consts = { - {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f}, - {0.5f, 2.0f, 0.0f, 0.0f} -}; - -struct vert_stream_0 -{ - struct vertex2f pos; - struct vertex2f luma_tc; - struct vertex2f cb_tc; - struct vertex2f cr_tc; -}; - -enum MACROBLOCK_TYPE -{ - MACROBLOCK_TYPE_INTRA, - MACROBLOCK_TYPE_FWD_FRAME_PRED, - MACROBLOCK_TYPE_FWD_FIELD_PRED, - MACROBLOCK_TYPE_BKWD_FRAME_PRED, - MACROBLOCK_TYPE_BKWD_FIELD_PRED, - MACROBLOCK_TYPE_BI_FRAME_PRED, - MACROBLOCK_TYPE_BI_FIELD_PRED, - - NUM_MACROBLOCK_TYPES -}; - -static void -create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r) -{ - const unsigned max_tokens = 50; - - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned ti; - - unsigned i; - - assert(r); - - tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); - header = (struct tgsi_header *) &tokens[0]; - *header = tgsi_build_header(); - *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 2; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma Cb texcoords - * decl i3 ; Chroma Cr texcoords - */ - for (i = 0; i < 4; i++) { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma Cb texcoords - * decl o3 ; Chroma Cr texcoords - */ - for (i = 0; i < 4; i++) { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * mov o0, i0 ; Move input vertex pos to output - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma Cb texcoords to output - * mov o3, i3 ; Move input chroma Cr texcoords to output - */ - for (i = 0; i < 4; ++i) { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - assert(ti <= max_tokens); - - vs.tokens = tokens; - r->i_vs = r->pipe->create_vs_state(r->pipe, &vs); - free(tokens); -} - -static void -create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r) -{ - const unsigned max_tokens = 100; - - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned ti; - - unsigned i; - - assert(r); - - tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); - header = (struct tgsi_header *) &tokens[0]; - *header = tgsi_build_header(); - *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 2; - - /* - * decl i0 ; Luma texcoords - * decl i1 ; Chroma Cb texcoords - * decl i2 ; Chroma Cr texcoords - */ - for (i = 0; i < 3; ++i) { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - */ - for (i = 0; i < 3; ++i) { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i2, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; - inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; - inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; - inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* mul o0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - assert(ti <= max_tokens); - - fs.tokens = tokens; - r->i_fs = r->pipe->create_fs_state(r->pipe, &fs); - free(tokens); -} - -static void -create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) -{ - const unsigned max_tokens = 100; - - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned ti; - - unsigned i; - - assert(r); - - tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); - header = (struct tgsi_header *) &tokens[0]; - *header = tgsi_build_header(); - *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 2; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma Cb texcoords - * decl i3 ; Chroma Cr texcoords - * decl i4 ; Ref surface top field texcoords - * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream) - */ - for (i = 0; i < 6; i++) { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma Cb texcoords - * decl o3 ; Chroma Cr texcoords - * decl o4 ; Ref macroblock texcoords - */ - for (i = 0; i < 5; i++) { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * mov o0, i0 ; Move input vertex pos to output - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma Cb texcoords to output - * mov o3, i3 ; Move input chroma Cr texcoords to output - */ - for (i = 0; i < 4; ++i) { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - assert(ti <= max_tokens); - - vs.tokens = tokens; - r->p_vs[0] = r->pipe->create_vs_state(r->pipe, &vs); - free(tokens); -} - -#if 0 -static void -create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) -{ - assert(false); -} -#endif - -static void -create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) -{ - const unsigned max_tokens = 100; - - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned ti; - - unsigned i; - - assert(r); - - tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); - header = (struct tgsi_header *) &tokens[0]; - *header = tgsi_build_header(); - *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 2; - - /* - * decl i0 ; Luma texcoords - * decl i1 ; Chroma Cb texcoords - * decl i2 ; Chroma Cr texcoords - * decl i3 ; Ref macroblock texcoords - */ - for (i = 0; i < 4; ++i) { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for ref surface texture - */ - for (i = 0; i < 4; ++i) { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i2, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; - inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; - inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; - inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* tex2d t1, i3, s3 ; Read texel from ref macroblock */ - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - assert(ti <= max_tokens); - - fs.tokens = tokens; - r->p_fs[0] = r->pipe->create_fs_state(r->pipe, &fs); - free(tokens); -} - -#if 0 -static void -create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) -{ - assert(false); -} -#endif - -static void -create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) -{ - const unsigned max_tokens = 100; - - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned ti; - - unsigned i; - - assert(r); - - tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); - header = (struct tgsi_header *) &tokens[0]; - *header = tgsi_build_header(); - *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 2; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma Cb texcoords - * decl i3 ; Chroma Cr texcoords - * decl i4 ; First ref macroblock top field texcoords - * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream) - * decl i6 ; Second ref macroblock top field texcoords - * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream) - */ - for (i = 0; i < 8; i++) { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma Cb texcoords - * decl o3 ; Chroma Cr texcoords - * decl o4 ; First ref macroblock texcoords - * decl o5 ; Second ref macroblock texcoords - */ - for (i = 0; i < 6; i++) { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * mov o0, i0 ; Move input vertex pos to output - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma Cb texcoords to output - * mov o3, i3 ; Move input chroma Cr texcoords to output - */ - for (i = 0; i < 4; ++i) { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* - * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords - * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords - */ - for (i = 0; i < 2; ++i) { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - assert(ti <= max_tokens); - - vs.tokens = tokens; - r->b_vs[0] = r->pipe->create_vs_state(r->pipe, &vs); - free(tokens); -} - -#if 0 -static void -create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r) -{ - assert(false); -} -#endif - -static void -create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) -{ - const unsigned max_tokens = 100; - - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned ti; - - unsigned i; - - assert(r); - - tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token)); - header = (struct tgsi_header *) &tokens[0]; - *header = tgsi_build_header(); - *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 2; - - /* - * decl i0 ; Luma texcoords - * decl i1 ; Chroma Cb texcoords - * decl i2 ; Chroma Cr texcoords - * decl i3 ; First ref macroblock texcoords - * decl i4 ; Second ref macroblock texcoords - */ - for (i = 0; i < 5; ++i) { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm - * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0-t2 */ - decl = vl_decl_temps(0, 2); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for first ref surface texture - * decl s4 ; Sampler for second ref surface texture - */ - for (i = 0; i < 5; ++i) { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i2, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; - inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; - inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; - inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t1, i3, s3 ; Read texel from first ref macroblock - * tex2d t2, i4, s4 ; Read texel from second ref macroblock - */ - for (i = 0; i < 2; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ - inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; - inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; - inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; - inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - assert(ti <= max_tokens); - - fs.tokens = tokens; - r->b_fs[0] = r->pipe->create_fs_state(r->pipe, &fs); - free(tokens); -} - -#if 0 -static void -create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) -{ - assert(false); -} -#endif - -static void -xfer_buffers_map(struct vl_mpeg12_mc_renderer *r) -{ - unsigned i; - - assert(r); - - for (i = 0; i < 3; ++i) { - r->tex_transfer[i] = r->pipe->screen->get_tex_transfer - ( - r->pipe->screen, r->textures.all[i], - 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0, - r->textures.all[i]->width0, r->textures.all[i]->height0 - ); - - r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]); - } -} - -static void -xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r) -{ - unsigned i; - - assert(r); - - for (i = 0; i < 3; ++i) { - r->pipe->screen->transfer_unmap(r->pipe->screen, r->tex_transfer[i]); - r->pipe->screen->tex_transfer_destroy(r->tex_transfer[i]); - } -} - -static bool -init_pipe_state(struct vl_mpeg12_mc_renderer *r) -{ - struct pipe_sampler_state sampler; - unsigned filters[5]; - unsigned i; - - assert(r); - - r->viewport.scale[0] = r->pot_buffers ? - util_next_power_of_two(r->picture_width) : r->picture_width; - r->viewport.scale[1] = r->pot_buffers ? - util_next_power_of_two(r->picture_height) : r->picture_height; - r->viewport.scale[2] = 1; - r->viewport.scale[3] = 1; - r->viewport.translate[0] = 0; - r->viewport.translate[1] = 0; - r->viewport.translate[2] = 0; - r->viewport.translate[3] = 0; - - r->scissor.maxx = r->pot_buffers ? - util_next_power_of_two(r->picture_width) : r->picture_width; - r->scissor.maxy = r->pot_buffers ? - util_next_power_of_two(r->picture_height) : r->picture_height; - - r->fb_state.width = r->pot_buffers ? - util_next_power_of_two(r->picture_width) : r->picture_width; - r->fb_state.height = r->pot_buffers ? - util_next_power_of_two(r->picture_height) : r->picture_height; - r->fb_state.nr_cbufs = 1; - r->fb_state.zsbuf = NULL; - - /* Luma filter */ - filters[0] = PIPE_TEX_FILTER_NEAREST; - /* Chroma filters */ - if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 || - r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) { - filters[1] = PIPE_TEX_FILTER_NEAREST; - filters[2] = PIPE_TEX_FILTER_NEAREST; - } - else { - filters[1] = PIPE_TEX_FILTER_LINEAR; - filters[2] = PIPE_TEX_FILTER_LINEAR; - } - /* Fwd, bkwd ref filters */ - filters[3] = PIPE_TEX_FILTER_LINEAR; - filters[4] = PIPE_TEX_FILTER_LINEAR; - - for (i = 0; i < 5; ++i) { - sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.min_img_filter = filters[i]; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - sampler.mag_img_filter = filters[i]; - sampler.compare_mode = PIPE_TEX_COMPARE_NONE; - sampler.compare_func = PIPE_FUNC_ALWAYS; - sampler.normalized_coords = 1; - /*sampler.shadow_ambient = ; */ - /*sampler.lod_bias = ; */ - sampler.min_lod = 0; - /*sampler.max_lod = ; */ - /*sampler.border_color[i] = ; */ - /*sampler.max_anisotropy = ; */ - r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler); - } - - return true; -} - -static void -cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r) -{ - unsigned i; - - assert(r); - - for (i = 0; i < 5; ++i) - r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]); -} - -static bool -init_shaders(struct vl_mpeg12_mc_renderer *r) -{ - assert(r); - - create_intra_vert_shader(r); - create_intra_frag_shader(r); - create_frame_pred_vert_shader(r); - create_frame_pred_frag_shader(r); - create_frame_bi_pred_vert_shader(r); - create_frame_bi_pred_frag_shader(r); - - return true; -} - -static void -cleanup_shaders(struct vl_mpeg12_mc_renderer *r) -{ - assert(r); - - r->pipe->delete_vs_state(r->pipe, r->i_vs); - r->pipe->delete_fs_state(r->pipe, r->i_fs); - r->pipe->delete_vs_state(r->pipe, r->p_vs[0]); - r->pipe->delete_fs_state(r->pipe, r->p_fs[0]); - r->pipe->delete_vs_state(r->pipe, r->b_vs[0]); - r->pipe->delete_fs_state(r->pipe, r->b_fs[0]); -} - -static bool -init_buffers(struct vl_mpeg12_mc_renderer *r) -{ - struct pipe_texture template; - - const unsigned mbw = - align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH; - const unsigned mbh = - align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT; - - unsigned i; - - assert(r); - - r->macroblocks_per_batch = - mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1); - r->num_macroblocks = 0; - r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock)); - - memset(&template, 0, sizeof(struct pipe_texture)); - template.target = PIPE_TEXTURE_2D; - /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */ - template.format = PIPE_FORMAT_R16_SNORM; - template.last_level = 0; - template.width0 = r->pot_buffers ? - util_next_power_of_two(r->picture_width) : r->picture_width; - template.height0 = r->pot_buffers ? - util_next_power_of_two(r->picture_height) : r->picture_height; - template.depth0 = 1; - template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC; - - r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template); - - if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) { - template.width0 = r->pot_buffers ? - util_next_power_of_two(r->picture_width / 2) : - r->picture_width / 2; - template.height0 = r->pot_buffers ? - util_next_power_of_two(r->picture_height / 2) : - r->picture_height / 2; - } - else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) - template.height0 = r->pot_buffers ? - util_next_power_of_two(r->picture_height / 2) : - r->picture_height / 2; - - r->textures.individual.cb = - r->pipe->screen->texture_create(r->pipe->screen, &template); - r->textures.individual.cr = - r->pipe->screen->texture_create(r->pipe->screen, &template); - - r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4; - r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1; - r->vertex_bufs.individual.ycbcr.buffer_offset = 0; - r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create - ( - r->pipe->screen, - DEFAULT_BUF_ALIGNMENT, - PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD, - sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch - ); - - for (i = 1; i < 3; ++i) { - r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2; - r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1; - r->vertex_bufs.all[i].buffer_offset = 0; - r->vertex_bufs.all[i].buffer = pipe_buffer_create - ( - r->pipe->screen, - DEFAULT_BUF_ALIGNMENT, - PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD, - sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch - ); - } - - /* Position element */ - r->vertex_elems[0].src_offset = 0; - r->vertex_elems[0].instance_divisor = 0; - r->vertex_elems[0].vertex_buffer_index = 0; - r->vertex_elems[0].nr_components = 2; - r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* Luma, texcoord element */ - r->vertex_elems[1].src_offset = sizeof(struct vertex2f); - r->vertex_elems[1].instance_divisor = 0; - r->vertex_elems[1].vertex_buffer_index = 0; - r->vertex_elems[1].nr_components = 2; - r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* Chroma Cr texcoord element */ - r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2; - r->vertex_elems[2].instance_divisor = 0; - r->vertex_elems[2].vertex_buffer_index = 0; - r->vertex_elems[2].nr_components = 2; - r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* Chroma Cb texcoord element */ - r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3; - r->vertex_elems[3].instance_divisor = 0; - r->vertex_elems[3].vertex_buffer_index = 0; - r->vertex_elems[3].nr_components = 2; - r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* First ref surface top field texcoord element */ - r->vertex_elems[4].src_offset = 0; - r->vertex_elems[4].instance_divisor = 0; - r->vertex_elems[4].vertex_buffer_index = 1; - r->vertex_elems[4].nr_components = 2; - r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* First ref surface bottom field texcoord element */ - r->vertex_elems[5].src_offset = sizeof(struct vertex2f); - r->vertex_elems[5].instance_divisor = 0; - r->vertex_elems[5].vertex_buffer_index = 1; - r->vertex_elems[5].nr_components = 2; - r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* Second ref surface top field texcoord element */ - r->vertex_elems[6].src_offset = 0; - r->vertex_elems[6].instance_divisor = 0; - r->vertex_elems[6].vertex_buffer_index = 2; - r->vertex_elems[6].nr_components = 2; - r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* Second ref surface bottom field texcoord element */ - r->vertex_elems[7].src_offset = sizeof(struct vertex2f); - r->vertex_elems[7].instance_divisor = 0; - r->vertex_elems[7].vertex_buffer_index = 2; - r->vertex_elems[7].nr_components = 2; - r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT; - - r->vs_const_buf = pipe_buffer_create - ( - r->pipe->screen, - DEFAULT_BUF_ALIGNMENT, - PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD, - sizeof(struct vertex_shader_consts) - ); - - r->fs_const_buf = pipe_buffer_create - ( - r->pipe->screen, - DEFAULT_BUF_ALIGNMENT, - PIPE_BUFFER_USAGE_CONSTANT, sizeof(struct fragment_shader_consts) - ); - - memcpy - ( - pipe_buffer_map(r->pipe->screen, r->fs_const_buf, PIPE_BUFFER_USAGE_CPU_WRITE), - &fs_consts, sizeof(struct fragment_shader_consts) - ); - - pipe_buffer_unmap(r->pipe->screen, r->fs_const_buf); - - return true; -} - -static void -cleanup_buffers(struct vl_mpeg12_mc_renderer *r) -{ - unsigned i; - - assert(r); - - pipe_buffer_reference(&r->vs_const_buf, NULL); - pipe_buffer_reference(&r->fs_const_buf, NULL); - - for (i = 0; i < 3; ++i) - pipe_buffer_reference(&r->vertex_bufs.all[i].buffer, NULL); - - for (i = 0; i < 3; ++i) - pipe_texture_reference(&r->textures.all[i], NULL); - - FREE(r->macroblock_buf); -} - -static enum MACROBLOCK_TYPE -get_macroblock_type(struct pipe_mpeg12_macroblock *mb) -{ - assert(mb); - - switch (mb->mb_type) { - case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA: - return MACROBLOCK_TYPE_INTRA; - case PIPE_MPEG12_MACROBLOCK_TYPE_FWD: - return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? - MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED; - case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD: - return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? - MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED; - case PIPE_MPEG12_MACROBLOCK_TYPE_BI: - return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? - MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED; - default: - assert(0); - } - - /* Unreachable */ - return -1; -} - -/* XXX: One of these days this will have to be killed with fire */ -#define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, use_zb, zb) \ - do { \ - (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \ - (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \ - (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \ - (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \ - (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \ - (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \ - \ - if (!use_zb || (cbp) & (lm)) \ - { \ - (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \ - (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ - (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \ - (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \ - (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ - (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ - } \ - else \ - { \ - (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \ - (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \ - (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \ - (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \ - (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \ - (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \ - } \ - \ - if (!use_zb || (cbp) & (cbm)) \ - { \ - (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \ - (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ - (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \ - (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \ - (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ - (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ - } \ - else \ - { \ - (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \ - (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \ - (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \ - (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \ - (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \ - (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \ - } \ - \ - if (!use_zb || (cbp) & (crm)) \ - { \ - (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \ - (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ - (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \ - (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \ - (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ - (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ - } \ - else \ - { \ - (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \ - (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \ - (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \ - (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \ - (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \ - (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \ - } \ - } while (0) - -static void -gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r, - struct pipe_mpeg12_macroblock *mb, unsigned pos, - struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb) -{ - struct vertex2f mo_vec[2]; - - unsigned i; - - assert(r); - assert(mb); - assert(ycbcr_vb); - assert(pos < r->macroblocks_per_batch); - - mo_vec[1].x = 0; - mo_vec[1].y = 0; - - switch (mb->mb_type) { - case PIPE_MPEG12_MACROBLOCK_TYPE_BI: - { - struct vertex2f *vb; - - assert(ref_vb && ref_vb[1]); - - vb = ref_vb[1] + pos * 2 * 24; - - mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x; - mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y; - - if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) { - for (i = 0; i < 24 * 2; i += 2) { - vb[i].x = mo_vec[0].x; - vb[i].y = mo_vec[0].y; - } - } - else { - mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x; - mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y; - - for (i = 0; i < 24 * 2; i += 2) { - vb[i].x = mo_vec[0].x; - vb[i].y = mo_vec[0].y; - vb[i + 1].x = mo_vec[1].x; - vb[i + 1].y = mo_vec[1].y; - } - } - - /* fall-through */ - } - case PIPE_MPEG12_MACROBLOCK_TYPE_FWD: - case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD: - { - struct vertex2f *vb; - - assert(ref_vb && ref_vb[0]); - - vb = ref_vb[0] + pos * 2 * 24; - - if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) { - mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x; - mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y; - - if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) { - mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x; - mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y; - } - } - else { - mo_vec[0].x = mb->pmv[0][0][0] * 0.5f * r->surface_tex_inv_size.x; - mo_vec[0].y = mb->pmv[0][0][1] * 0.5f * r->surface_tex_inv_size.y; - - if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) { - mo_vec[1].x = mb->pmv[1][0][0] * 0.5f * r->surface_tex_inv_size.x; - mo_vec[1].y = mb->pmv[1][0][1] * 0.5f * r->surface_tex_inv_size.y; - } - } - - if (mb->mb_type == PIPE_MPEG12_MOTION_TYPE_FRAME) { - for (i = 0; i < 24 * 2; i += 2) { - vb[i].x = mo_vec[0].x; - vb[i].y = mo_vec[0].y; - } - } - else { - for (i = 0; i < 24 * 2; i += 2) { - vb[i].x = mo_vec[0].x; - vb[i].y = mo_vec[0].y; - vb[i + 1].x = mo_vec[1].x; - vb[i + 1].y = mo_vec[1].y; - } - } - - /* fall-through */ - } - case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA: - { - const struct vertex2f unit = - { - r->surface_tex_inv_size.x * MACROBLOCK_WIDTH, - r->surface_tex_inv_size.y * MACROBLOCK_HEIGHT - }; - const struct vertex2f half = - { - r->surface_tex_inv_size.x * (MACROBLOCK_WIDTH / 2), - r->surface_tex_inv_size.y * (MACROBLOCK_HEIGHT / 2) - }; - const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE; - - struct vert_stream_0 *vb = ycbcr_vb + pos * 24; - - SET_BLOCK(vb, mb->cbp, mb->mbx, mb->mby, - unit.x, unit.y, 0, 0, half.x, half.y, - 32, 2, 1, use_zb, r->zero_block); - - SET_BLOCK(vb + 6, mb->cbp, mb->mbx, mb->mby, - unit.x, unit.y, half.x, 0, half.x, half.y, - 16, 2, 1, use_zb, r->zero_block); - - SET_BLOCK(vb + 12, mb->cbp, mb->mbx, mb->mby, - unit.x, unit.y, 0, half.y, half.x, half.y, - 8, 2, 1, use_zb, r->zero_block); - - SET_BLOCK(vb + 18, mb->cbp, mb->mbx, mb->mby, - unit.x, unit.y, half.x, half.y, half.x, half.y, - 4, 2, 1, use_zb, r->zero_block); - - break; - } - default: - assert(0); - } -} - -static void -gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r, - unsigned *num_macroblocks) -{ - unsigned offset[NUM_MACROBLOCK_TYPES]; - struct vert_stream_0 *ycbcr_vb; - struct vertex2f *ref_vb[2]; - unsigned i; - - assert(r); - assert(num_macroblocks); - - for (i = 0; i < r->num_macroblocks; ++i) { - enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]); - ++num_macroblocks[mb_type]; - } - - offset[0] = 0; - - for (i = 1; i < NUM_MACROBLOCK_TYPES; ++i) - offset[i] = offset[i - 1] + num_macroblocks[i - 1]; - - ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map - ( - r->pipe->screen, - r->vertex_bufs.individual.ycbcr.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD - ); - - for (i = 0; i < 2; ++i) - ref_vb[i] = (struct vertex2f *)pipe_buffer_map - ( - r->pipe->screen, - r->vertex_bufs.individual.ref[i].buffer, - PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD - ); - - for (i = 0; i < r->num_macroblocks; ++i) { - enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]); - - gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type], - ycbcr_vb, ref_vb); - - ++offset[mb_type]; - } - - pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ycbcr.buffer); - for (i = 0; i < 2; ++i) - pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ref[i].buffer); -} - -static void -flush(struct vl_mpeg12_mc_renderer *r) -{ - unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 }; - unsigned vb_start = 0; - struct vertex_shader_consts *vs_consts; - unsigned i; - - assert(r); - assert(r->num_macroblocks == r->macroblocks_per_batch); - - gen_macroblock_stream(r, num_macroblocks); - - r->fb_state.cbufs[0] = r->pipe->screen->get_tex_surface - ( - r->pipe->screen, r->surface, - 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE - ); - - r->pipe->set_framebuffer_state(r->pipe, &r->fb_state); - r->pipe->set_viewport_state(r->pipe, &r->viewport); - r->pipe->set_scissor_state(r->pipe, &r->scissor); - - vs_consts = pipe_buffer_map - ( - r->pipe->screen, r->vs_const_buf, - PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD - ); - - vs_consts->denorm.x = r->surface->width0; - vs_consts->denorm.y = r->surface->height0; - - pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf); - - r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0, - r->vs_const_buf); - r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_FRAGMENT, 0, - r->fs_const_buf); - - if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) { - r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all); - r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems); - r->pipe->set_fragment_sampler_textures(r->pipe, 3, r->textures.all); - r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all); - r->pipe->bind_vs_state(r->pipe, r->i_vs); - r->pipe->bind_fs_state(r->pipe, r->i_fs); - - r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, - num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24); - vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24; - } - - if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) { - r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); - r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); - r->textures.individual.ref[0] = r->past; - r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all); - r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all); - r->pipe->bind_vs_state(r->pipe, r->p_vs[0]); - r->pipe->bind_fs_state(r->pipe, r->p_fs[0]); - - r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, - num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24); - vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24; - } - - if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) { - r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); - r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); - r->textures.individual.ref[0] = r->past; - r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all); - r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all); - r->pipe->bind_vs_state(r->pipe, r->p_vs[1]); - r->pipe->bind_fs_state(r->pipe, r->p_fs[1]); - - r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, - num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24); - vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24; - } - - if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) { - r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); - r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); - r->textures.individual.ref[0] = r->future; - r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all); - r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all); - r->pipe->bind_vs_state(r->pipe, r->p_vs[0]); - r->pipe->bind_fs_state(r->pipe, r->p_fs[0]); - - r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, - num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24); - vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24; - } - - if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) { - r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all); - r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems); - r->textures.individual.ref[0] = r->future; - r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all); - r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all); - r->pipe->bind_vs_state(r->pipe, r->p_vs[1]); - r->pipe->bind_fs_state(r->pipe, r->p_fs[1]); - - r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, - num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24); - vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24; - } - - if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) { - r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all); - r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems); - r->textures.individual.ref[0] = r->past; - r->textures.individual.ref[1] = r->future; - r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all); - r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all); - r->pipe->bind_vs_state(r->pipe, r->b_vs[0]); - r->pipe->bind_fs_state(r->pipe, r->b_fs[0]); - - r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, - num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24); - vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24; - } - - if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) { - r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all); - r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems); - r->textures.individual.ref[0] = r->past; - r->textures.individual.ref[1] = r->future; - r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all); - r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all); - r->pipe->bind_vs_state(r->pipe, r->b_vs[1]); - r->pipe->bind_fs_state(r->pipe, r->b_fs[1]); - - r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start, - num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24); - vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24; - } - - r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence); - pipe_surface_reference(&r->fb_state.cbufs[0], NULL); - - if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) - for (i = 0; i < 3; ++i) - r->zero_block[i].x = ZERO_BLOCK_NIL; - - r->num_macroblocks = 0; -} - -static void -grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch) -{ - unsigned y; - - assert(src); - assert(dst); - - for (y = 0; y < BLOCK_HEIGHT; ++y) - memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2); -} - -static void -grab_field_coded_block(short *src, short *dst, unsigned dst_pitch) -{ - unsigned y; - - assert(src); - assert(dst); - - for (y = 0; y < BLOCK_HEIGHT; ++y) - memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2); -} - -static void -fill_zero_block(short *dst, unsigned dst_pitch) -{ - unsigned y; - - assert(dst); - - for (y = 0; y < BLOCK_HEIGHT; ++y) - memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2); -} - -static void -grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby, - enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks) -{ - unsigned tex_pitch; - short *texels; - unsigned tb = 0, sb = 0; - unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT; - unsigned x, y; - - assert(r); - assert(blocks); - - tex_pitch = r->tex_transfer[0]->stride / util_format_get_blocksize(r->tex_transfer[0]->texture->format); - texels = r->texels[0] + mbpy * tex_pitch + mbpx; - - for (y = 0; y < 2; ++y) { - for (x = 0; x < 2; ++x, ++tb) { - if ((cbp >> (5 - tb)) & 1) { - if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) { - grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, - texels + y * tex_pitch * BLOCK_WIDTH + - x * BLOCK_WIDTH, tex_pitch); - } - else { - grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, - texels + y * tex_pitch + x * BLOCK_WIDTH, - tex_pitch); - } - - ++sb; - } - else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) { - if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL || - ZERO_BLOCK_IS_NIL(r->zero_block[0])) { - fill_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch); - if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) { - r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x; - r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y; - } - } - } - } - } - - /* TODO: Implement 422, 444 */ - assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420); - - mbpx /= 2; - mbpy /= 2; - - for (tb = 0; tb < 2; ++tb) { - tex_pitch = r->tex_transfer[tb + 1]->stride / util_format_get_blocksize(r->tex_transfer[tb + 1]->texture->format); - texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx; - - if ((cbp >> (1 - tb)) & 1) { - grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch); - ++sb; - } - else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) { - if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL || - ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) { - fill_zero_block(texels, tex_pitch); - if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) { - r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x; - r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y; - } - } - } - } -} - -static void -grab_macroblock(struct vl_mpeg12_mc_renderer *r, - struct pipe_mpeg12_macroblock *mb) -{ - assert(r); - assert(mb); - assert(r->num_macroblocks < r->macroblocks_per_batch); - - memcpy(&r->macroblock_buf[r->num_macroblocks], mb, - sizeof(struct pipe_mpeg12_macroblock)); - - grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks); - - ++r->num_macroblocks; -} - -bool -vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer, - struct pipe_context *pipe, - unsigned picture_width, - unsigned picture_height, - enum pipe_video_chroma_format chroma_format, - enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode, - enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling, - bool pot_buffers) -{ - unsigned i; - - assert(renderer); - assert(pipe); - /* TODO: Implement other policies */ - assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE); - /* TODO: Implement this */ - /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */ - assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE); - /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */ - assert(pot_buffers); - - memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer)); - - renderer->pipe = pipe; - renderer->picture_width = picture_width; - renderer->picture_height = picture_height; - renderer->chroma_format = chroma_format; - renderer->bufmode = bufmode; - renderer->eb_handling = eb_handling; - renderer->pot_buffers = pot_buffers; - - if (!init_pipe_state(renderer)) - return false; - if (!init_shaders(renderer)) { - cleanup_pipe_state(renderer); - return false; - } - if (!init_buffers(renderer)) { - cleanup_shaders(renderer); - cleanup_pipe_state(renderer); - return false; - } - - renderer->surface = NULL; - renderer->past = NULL; - renderer->future = NULL; - for (i = 0; i < 3; ++i) - renderer->zero_block[i].x = ZERO_BLOCK_NIL; - renderer->num_macroblocks = 0; - - xfer_buffers_map(renderer); - - return true; -} - -void -vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer) -{ - assert(renderer); - - xfer_buffers_unmap(renderer); - - cleanup_pipe_state(renderer); - cleanup_shaders(renderer); - cleanup_buffers(renderer); -} - -void -vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer - *renderer, - struct pipe_texture *surface, - struct pipe_texture *past, - struct pipe_texture *future, - unsigned num_macroblocks, - struct pipe_mpeg12_macroblock - *mpeg12_macroblocks, - struct pipe_fence_handle **fence) -{ - bool new_surface = false; - - assert(renderer); - assert(surface); - assert(num_macroblocks); - assert(mpeg12_macroblocks); - - if (renderer->surface) { - if (surface != renderer->surface) { - if (renderer->num_macroblocks > 0) { - xfer_buffers_unmap(renderer); - flush(renderer); - } - - new_surface = true; - } - - /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */ - assert(surface != renderer->surface || renderer->past == past); - assert(surface != renderer->surface || renderer->future == future); - } - else - new_surface = true; - - if (new_surface) { - renderer->surface = surface; - renderer->past = past; - renderer->future = future; - renderer->fence = fence; - renderer->surface_tex_inv_size.x = 1.0f / surface->width0; - renderer->surface_tex_inv_size.y = 1.0f / surface->height0; - } - - while (num_macroblocks) { - unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks; - unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch); - unsigned i; - - for (i = 0; i < num_to_submit; ++i) { - assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12); - grab_macroblock(renderer, &mpeg12_macroblocks[i]); - } - - num_macroblocks -= num_to_submit; - - if (renderer->num_macroblocks == renderer->macroblocks_per_batch) { - xfer_buffers_unmap(renderer); - flush(renderer); - xfer_buffers_map(renderer); - /* Next time we get this surface it may have new ref frames */ - renderer->surface = NULL; - } - } -} diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h deleted file mode 100644 index f00b8c7b8b..0000000000 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h +++ /dev/null @@ -1,121 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 Younes Manton. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef vl_mpeg12_mc_renderer_h -#define vl_mpeg12_mc_renderer_h - -#include <pipe/p_compiler.h> -#include <pipe/p_state.h> -#include <pipe/p_video_state.h> - -struct pipe_context; -struct pipe_video_surface; -struct pipe_macroblock; - -/* A slice is video-width (rounded up to a multiple of macroblock width) x macroblock height */ -enum VL_MPEG12_MC_RENDERER_BUFFER_MODE -{ - VL_MPEG12_MC_RENDERER_BUFFER_SLICE, /* Saves memory at the cost of smaller batches */ - VL_MPEG12_MC_RENDERER_BUFFER_PICTURE /* Larger batches, more memory */ -}; - -enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK -{ - VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL, /* Waste of memory bandwidth */ - VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE, /* Can only do point-filtering when interpolating subsampled chroma channels */ - VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE /* Needs conditional texel fetch! */ -}; - -struct vl_mpeg12_mc_renderer -{ - struct pipe_context *pipe; - unsigned picture_width; - unsigned picture_height; - enum pipe_video_chroma_format chroma_format; - enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode; - enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling; - bool pot_buffers; - unsigned macroblocks_per_batch; - - struct pipe_viewport_state viewport; - struct pipe_scissor_state scissor; - struct pipe_buffer *vs_const_buf; - struct pipe_buffer *fs_const_buf; - struct pipe_framebuffer_state fb_state; - struct pipe_vertex_element vertex_elems[8]; - - union - { - void *all[5]; - struct { void *y, *cb, *cr, *ref[2]; } individual; - } samplers; - - void *i_vs, *p_vs[2], *b_vs[2]; - void *i_fs, *p_fs[2], *b_fs[2]; - - union - { - struct pipe_texture *all[5]; - struct { struct pipe_texture *y, *cb, *cr, *ref[2]; } individual; - } textures; - - union - { - struct pipe_vertex_buffer all[3]; - struct { struct pipe_vertex_buffer ycbcr, ref[2]; } individual; - } vertex_bufs; - - struct pipe_texture *surface, *past, *future; - struct pipe_fence_handle **fence; - unsigned num_macroblocks; - struct pipe_mpeg12_macroblock *macroblock_buf; - struct pipe_transfer *tex_transfer[3]; - short *texels[3]; - struct { float x, y; } surface_tex_inv_size; - struct { float x, y; } zero_block[3]; -}; - -bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer, - struct pipe_context *pipe, - unsigned picture_width, - unsigned picture_height, - enum pipe_video_chroma_format chroma_format, - enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode, - enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling, - bool pot_buffers); - -void vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer); - -void vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer, - struct pipe_texture *surface, - struct pipe_texture *past, - struct pipe_texture *future, - unsigned num_macroblocks, - struct pipe_mpeg12_macroblock *mpeg12_macroblocks, - struct pipe_fence_handle **fence); - -#endif /* vl_mpeg12_mc_renderer_h */ diff --git a/src/gallium/auxiliary/vl/vl_shader_build.c b/src/gallium/auxiliary/vl/vl_shader_build.c deleted file mode 100644 index d011ef97bd..0000000000 --- a/src/gallium/auxiliary/vl/vl_shader_build.c +++ /dev/null @@ -1,243 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 Younes Manton. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "vl_shader_build.h" -#include <assert.h> -#include <tgsi/tgsi_parse.h> -#include <tgsi/tgsi_build.h> - -struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last) -{ - struct tgsi_full_declaration decl = tgsi_default_full_declaration(); - - decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.Name = name; - decl.Semantic.Index = index; - decl.Range.First = first; - decl.Range.Last = last; - - return decl; -} - -struct tgsi_full_declaration vl_decl_interpolated_input -( - unsigned int name, - unsigned int index, - unsigned int first, - unsigned int last, - int interpolation -) -{ - struct tgsi_full_declaration decl = tgsi_default_full_declaration(); - - assert - ( - interpolation == TGSI_INTERPOLATE_CONSTANT || - interpolation == TGSI_INTERPOLATE_LINEAR || - interpolation == TGSI_INTERPOLATE_PERSPECTIVE - ); - - decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.Name = name; - decl.Semantic.Index = index; - decl.Declaration.Interpolate = interpolation;; - decl.Range.First = first; - decl.Range.Last = last; - - return decl; -} - -struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last) -{ - struct tgsi_full_declaration decl = tgsi_default_full_declaration(); - - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Declaration.Semantic = 1; - decl.Semantic.Name = name; - decl.Semantic.Index = index; - decl.Range.First = first; - decl.Range.Last = last; - - return decl; -} - -struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last) -{ - struct tgsi_full_declaration decl = tgsi_default_full_declaration(); - - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.Name = name; - decl.Semantic.Index = index; - decl.Range.First = first; - decl.Range.Last = last; - - return decl; -} - -struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last) -{ - struct tgsi_full_declaration decl = tgsi_default_full_declaration(); - - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.Range.First = first; - decl.Range.Last = last; - - return decl; -} - -struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last) -{ - struct tgsi_full_declaration decl = tgsi_default_full_declaration(); - - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.Range.First = first; - decl.Range.Last = last; - - return decl; -} - -struct tgsi_full_instruction vl_inst2 -( - int opcode, - enum tgsi_file_type dst_file, - unsigned int dst_index, - enum tgsi_file_type src_file, - unsigned int src_index -) -{ - struct tgsi_full_instruction inst = tgsi_default_full_instruction(); - - inst.Instruction.Opcode = opcode; - inst.Instruction.NumDstRegs = 1; - inst.Dst[0].Register.File = dst_file; - inst.Dst[0].Register.Index = dst_index; - inst.Instruction.NumSrcRegs = 1; - inst.Src[0].Register.File = src_file; - inst.Src[0].Register.Index = src_index; - - return inst; -} - -struct tgsi_full_instruction vl_inst3 -( - int opcode, - enum tgsi_file_type dst_file, - unsigned int dst_index, - enum tgsi_file_type src1_file, - unsigned int src1_index, - enum tgsi_file_type src2_file, - unsigned int src2_index -) -{ - struct tgsi_full_instruction inst = tgsi_default_full_instruction(); - - inst.Instruction.Opcode = opcode; - inst.Instruction.NumDstRegs = 1; - inst.Dst[0].Register.File = dst_file; - inst.Dst[0].Register.Index = dst_index; - inst.Instruction.NumSrcRegs = 2; - inst.Src[0].Register.File = src1_file; - inst.Src[0].Register.Index = src1_index; - inst.Src[1].Register.File = src2_file; - inst.Src[1].Register.Index = src2_index; - - return inst; -} - -struct tgsi_full_instruction vl_tex -( - int tex, - enum tgsi_file_type dst_file, - unsigned int dst_index, - enum tgsi_file_type src1_file, - unsigned int src1_index, - enum tgsi_file_type src2_file, - unsigned int src2_index -) -{ - struct tgsi_full_instruction inst = tgsi_default_full_instruction(); - - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.Dst[0].Register.File = dst_file; - inst.Dst[0].Register.Index = dst_index; - inst.Instruction.NumSrcRegs = 2; - inst.Instruction.Texture = 1; - inst.Texture.Texture = tex; - inst.Src[0].Register.File = src1_file; - inst.Src[0].Register.Index = src1_index; - inst.Src[1].Register.File = src2_file; - inst.Src[1].Register.Index = src2_index; - - return inst; -} - -struct tgsi_full_instruction vl_inst4 -( - int opcode, - enum tgsi_file_type dst_file, - unsigned int dst_index, - enum tgsi_file_type src1_file, - unsigned int src1_index, - enum tgsi_file_type src2_file, - unsigned int src2_index, - enum tgsi_file_type src3_file, - unsigned int src3_index -) -{ - struct tgsi_full_instruction inst = tgsi_default_full_instruction(); - - inst.Instruction.Opcode = opcode; - inst.Instruction.NumDstRegs = 1; - inst.Dst[0].Register.File = dst_file; - inst.Dst[0].Register.Index = dst_index; - inst.Instruction.NumSrcRegs = 3; - inst.Src[0].Register.File = src1_file; - inst.Src[0].Register.Index = src1_index; - inst.Src[1].Register.File = src2_file; - inst.Src[1].Register.Index = src2_index; - inst.Src[2].Register.File = src3_file; - inst.Src[2].Register.Index = src3_index; - - return inst; -} - -struct tgsi_full_instruction vl_end(void) -{ - struct tgsi_full_instruction inst = tgsi_default_full_instruction(); - - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - - return inst; -} diff --git a/src/gallium/auxiliary/vl/vl_shader_build.h b/src/gallium/auxiliary/vl/vl_shader_build.h deleted file mode 100644 index 5da71f8e13..0000000000 --- a/src/gallium/auxiliary/vl/vl_shader_build.h +++ /dev/null @@ -1,88 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 Younes Manton. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef vl_shader_build_h -#define vl_shader_build_h - -#include <pipe/p_shader_tokens.h> - -struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last); -struct tgsi_full_declaration vl_decl_interpolated_input -( - unsigned int name, - unsigned int index, - unsigned int first, - unsigned int last, - int interpolation -); -struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last); -struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last); -struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last); -struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last); -struct tgsi_full_instruction vl_inst2 -( - int opcode, - enum tgsi_file_type dst_file, - unsigned int dst_index, - enum tgsi_file_type src_file, - unsigned int src_index -); -struct tgsi_full_instruction vl_inst3 -( - int opcode, - enum tgsi_file_type dst_file, - unsigned int dst_index, - enum tgsi_file_type src1_file, - unsigned int src1_index, - enum tgsi_file_type src2_file, - unsigned int src2_index -); -struct tgsi_full_instruction vl_tex -( - int tex, - enum tgsi_file_type dst_file, - unsigned int dst_index, - enum tgsi_file_type src1_file, - unsigned int src1_index, - enum tgsi_file_type src2_file, - unsigned int src2_index -); -struct tgsi_full_instruction vl_inst4 -( - int opcode, - enum tgsi_file_type dst_file, - unsigned int dst_index, - enum tgsi_file_type src1_file, - unsigned int src1_index, - enum tgsi_file_type src2_file, - unsigned int src2_index, - enum tgsi_file_type src3_file, - unsigned int src3_index -); -struct tgsi_full_instruction vl_end(void); - -#endif |