diff options
Diffstat (limited to 'src/gallium/auxiliary')
111 files changed, 6735 insertions, 1468 deletions
diff --git a/src/gallium/auxiliary/Android.mk b/src/gallium/auxiliary/Android.mk index 7c333051b4..f5513794ee 100644 --- a/src/gallium/auxiliary/Android.mk +++ b/src/gallium/auxiliary/Android.mk @@ -108,7 +108,9 @@ C_SOURCES = \ util/u_draw_quad.c \ util/u_format.c \ util/u_format_other.c \ + util/u_format_latc.c \ util/u_format_s3tc.c \ + util/u_format_rgtc.c \ util/u_format_srgb.c \ util/u_format_table.c \ util/u_format_tests.c \ @@ -127,6 +129,7 @@ C_SOURCES = \ util/u_network.c \ util/u_math.c \ util/u_mm.c \ + util/u_pstipple.c \ util/u_rect.c \ util/u_ringbuffer.c \ util/u_sampler.c \ @@ -140,7 +143,8 @@ C_SOURCES = \ util/u_tile.c \ util/u_transfer.c \ util/u_resource.c \ - util/u_upload_mgr.c + util/u_upload_mgr.c \ + util/u_vbuf_mgr.c GENERATED_SOURCES = \ indices/u_indices_gen.c \ diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index ff355c4783..c7654046a5 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -110,7 +110,9 @@ C_SOURCES = \ util/u_draw_quad.c \ util/u_format.c \ util/u_format_other.c \ + util/u_format_latc.c \ util/u_format_s3tc.c \ + util/u_format_rgtc.c \ util/u_format_srgb.c \ util/u_format_table.c \ util/u_format_tests.c \ @@ -129,6 +131,7 @@ C_SOURCES = \ util/u_network.c \ util/u_math.c \ util/u_mm.c \ + util/u_pstipple.c \ util/u_rect.c \ util/u_ringbuffer.c \ util/u_sampler.c \ @@ -142,7 +145,8 @@ C_SOURCES = \ util/u_tile.c \ util/u_transfer.c \ util/u_resource.c \ - util/u_upload_mgr.c + util/u_upload_mgr.c \ + util/u_vbuf_mgr.c # Disabling until pipe-video branch gets merged in #vl/vl_bitstream_parser.c \ @@ -157,7 +161,6 @@ GALLIVM_SOURCES = \ gallivm/lp_bld_bitarit.c \ gallivm/lp_bld_const.c \ gallivm/lp_bld_conv.c \ - gallivm/lp_bld_debug.c \ gallivm/lp_bld_flow.c \ gallivm/lp_bld_format_aos.c \ gallivm/lp_bld_format_soa.c \ @@ -185,6 +188,7 @@ GALLIVM_SOURCES = \ draw/draw_pt_fetch_shade_pipeline_llvm.c GALLIVM_CPP_SOURCES = \ + gallivm/lp_bld_debug.cpp \ gallivm/lp_bld_misc.cpp GENERATED_SOURCES = \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index fca7e5fd11..8e422b2c11 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -157,7 +157,9 @@ source = [ 'util/u_draw_quad.c', 'util/u_format.c', 'util/u_format_other.c', + 'util/u_format_latc.c', 'util/u_format_s3tc.c', + 'util/u_format_rgtc.c', 'util/u_format_srgb.c', 'util/u_format_table.c', 'util/u_format_tests.c', @@ -176,6 +178,7 @@ source = [ 'util/u_network.c', 'util/u_math.c', 'util/u_mm.c', + 'util/u_pstipple.c', 'util/u_rect.c', 'util/u_resource.c', 'util/u_ringbuffer.c', @@ -190,6 +193,7 @@ source = [ 'util/u_tile.c', 'util/u_transfer.c', 'util/u_upload_mgr.c', + 'util/u_vbuf_mgr.c', # Disabling until pipe-video branch gets merged in #'vl/vl_bitstream_parser.c', #'vl/vl_mpeg12_mc_renderer.c', @@ -199,16 +203,13 @@ source = [ ] if env['llvm']: - if env['UDIS86']: - env.Append(CPPDEFINES = [('HAVE_UDIS86', '1')]) - source += [ 'gallivm/lp_bld_arit.c', 'gallivm/lp_bld_assert.c', 'gallivm/lp_bld_bitarit.c', 'gallivm/lp_bld_const.c', 'gallivm/lp_bld_conv.c', - 'gallivm/lp_bld_debug.c', + 'gallivm/lp_bld_debug.cpp', 'gallivm/lp_bld_flow.c', 'gallivm/lp_bld_format_aos.c', 'gallivm/lp_bld_format_soa.c', diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 58b022d531..fdd40fca12 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -47,41 +47,45 @@ #include "cso_cache/cso_hash.h" #include "cso_context.h" -struct cso_context { - struct pipe_context *pipe; - struct cso_cache *cache; +/** + * Info related to samplers and sampler views. + * We have one of these for fragment samplers and another for vertex samplers. + */ +struct sampler_info +{ struct { void *samplers[PIPE_MAX_SAMPLERS]; unsigned nr_samplers; - - void *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; - unsigned nr_vertex_samplers; } hw; void *samplers[PIPE_MAX_SAMPLERS]; unsigned nr_samplers; - void *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; - unsigned nr_vertex_samplers; - - unsigned nr_samplers_saved; void *samplers_saved[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers_saved; + + struct pipe_sampler_view *views[PIPE_MAX_SAMPLERS]; + unsigned nr_views; + + struct pipe_sampler_view *views_saved[PIPE_MAX_SAMPLERS]; + unsigned nr_views_saved; +}; + - unsigned nr_vertex_samplers_saved; - void *vertex_samplers_saved[PIPE_MAX_VERTEX_SAMPLERS]; - uint nr_fragment_sampler_views; - struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; +struct cso_context { + struct pipe_context *pipe; + struct cso_cache *cache; - uint nr_vertex_sampler_views; - struct pipe_sampler_view *vertex_sampler_views[PIPE_MAX_VERTEX_SAMPLERS]; + struct sampler_info fragment_samplers; + struct sampler_info vertex_samplers; - uint nr_fragment_sampler_views_saved; - struct pipe_sampler_view *fragment_sampler_views_saved[PIPE_MAX_SAMPLERS]; + uint nr_vertex_buffers; + struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; - uint nr_vertex_sampler_views_saved; - struct pipe_sampler_view *vertex_sampler_views_saved[PIPE_MAX_VERTEX_SAMPLERS]; + uint nr_vertex_buffers_saved; + struct pipe_vertex_buffer vertex_buffers_saved[PIPE_MAX_ATTRIBS]; /** Current and saved state. * The saved state is used as a 1-deep stack. @@ -252,6 +256,8 @@ struct cso_context *cso_create_context( struct pipe_context *pipe ) if (ctx == NULL) goto out; + assert(PIPE_MAX_SAMPLERS == PIPE_MAX_VERTEX_SAMPLERS); + ctx->cache = cso_cache_create(); if (ctx->cache == NULL) goto out; @@ -278,7 +284,8 @@ out: void cso_release_all( struct cso_context *ctx ) { unsigned i; - + struct sampler_info *info; + if (ctx->pipe) { ctx->pipe->bind_blend_state( ctx->pipe, NULL ); ctx->pipe->bind_rasterizer_state( ctx->pipe, NULL ); @@ -294,19 +301,30 @@ void cso_release_all( struct cso_context *ctx ) ctx->pipe->set_vertex_sampler_views(ctx->pipe, 0, NULL); } + /* free fragment samplers, views */ + info = &ctx->fragment_samplers; for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - pipe_sampler_view_reference(&ctx->fragment_sampler_views[i], NULL); - pipe_sampler_view_reference(&ctx->fragment_sampler_views_saved[i], NULL); + pipe_sampler_view_reference(&info->views[i], NULL); + pipe_sampler_view_reference(&info->views_saved[i], NULL); } - for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - pipe_sampler_view_reference(&ctx->vertex_sampler_views[i], NULL); - pipe_sampler_view_reference(&ctx->vertex_sampler_views_saved[i], NULL); + /* free vertex samplers, views */ + info = &ctx->vertex_samplers; + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + pipe_sampler_view_reference(&info->views[i], NULL); + pipe_sampler_view_reference(&info->views_saved[i], NULL); } util_unreference_framebuffer_state(&ctx->fb); util_unreference_framebuffer_state(&ctx->fb_saved); + util_copy_vertex_buffers(ctx->vertex_buffers, + &ctx->nr_vertex_buffers, + NULL, 0); + util_copy_vertex_buffers(ctx->vertex_buffers_saved, + &ctx->nr_vertex_buffers_saved, + NULL, 0); + if (ctx->cache) { cso_cache_delete( ctx->cache ); ctx->cache = NULL; @@ -395,233 +413,6 @@ void cso_restore_blend(struct cso_context *ctx) -enum pipe_error cso_single_sampler(struct cso_context *ctx, - unsigned idx, - const struct pipe_sampler_state *templ) -{ - void *handle = NULL; - - if (templ != NULL) { - unsigned key_size = sizeof(struct pipe_sampler_state); - unsigned hash_key = cso_construct_key((void*)templ, key_size); - struct cso_hash_iter iter = cso_find_state_template(ctx->cache, - hash_key, CSO_SAMPLER, - (void*)templ, key_size); - - if (cso_hash_iter_is_null(iter)) { - struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); - if (!cso) - return PIPE_ERROR_OUT_OF_MEMORY; - - memcpy(&cso->state, templ, sizeof(*templ)); - cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); - cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state; - cso->context = ctx->pipe; - - iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso); - if (cso_hash_iter_is_null(iter)) { - FREE(cso); - return PIPE_ERROR_OUT_OF_MEMORY; - } - - handle = cso->data; - } - else { - handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data; - } - } - - ctx->samplers[idx] = handle; - return PIPE_OK; -} - -enum pipe_error -cso_single_vertex_sampler(struct cso_context *ctx, - unsigned idx, - const struct pipe_sampler_state *templ) -{ - void *handle = NULL; - - if (templ != NULL) { - unsigned key_size = sizeof(struct pipe_sampler_state); - unsigned hash_key = cso_construct_key((void*)templ, key_size); - struct cso_hash_iter iter = cso_find_state_template(ctx->cache, - hash_key, CSO_SAMPLER, - (void*)templ, key_size); - - if (cso_hash_iter_is_null(iter)) { - struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); - if (!cso) - return PIPE_ERROR_OUT_OF_MEMORY; - - memcpy(&cso->state, templ, sizeof(*templ)); - cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); - cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state; - cso->context = ctx->pipe; - - iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso); - if (cso_hash_iter_is_null(iter)) { - FREE(cso); - return PIPE_ERROR_OUT_OF_MEMORY; - } - - handle = cso->data; - } - else { - handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data; - } - } - - ctx->vertex_samplers[idx] = handle; - return PIPE_OK; -} - -void cso_single_sampler_done( struct cso_context *ctx ) -{ - unsigned i; - - /* find highest non-null sampler */ - for (i = PIPE_MAX_SAMPLERS; i > 0; i--) { - if (ctx->samplers[i - 1] != NULL) - break; - } - - ctx->nr_samplers = i; - - if (ctx->hw.nr_samplers != ctx->nr_samplers || - memcmp(ctx->hw.samplers, - ctx->samplers, - ctx->nr_samplers * sizeof(void *)) != 0) - { - memcpy(ctx->hw.samplers, ctx->samplers, ctx->nr_samplers * sizeof(void *)); - ctx->hw.nr_samplers = ctx->nr_samplers; - - ctx->pipe->bind_fragment_sampler_states(ctx->pipe, ctx->nr_samplers, ctx->samplers); - } -} - -void -cso_single_vertex_sampler_done(struct cso_context *ctx) -{ - unsigned i; - - /* find highest non-null sampler */ - for (i = PIPE_MAX_VERTEX_SAMPLERS; i > 0; i--) { - if (ctx->vertex_samplers[i - 1] != NULL) - break; - } - - ctx->nr_vertex_samplers = i; - - if (ctx->hw.nr_vertex_samplers != ctx->nr_vertex_samplers || - memcmp(ctx->hw.vertex_samplers, - ctx->vertex_samplers, - ctx->nr_vertex_samplers * sizeof(void *)) != 0) - { - memcpy(ctx->hw.vertex_samplers, - ctx->vertex_samplers, - ctx->nr_vertex_samplers * sizeof(void *)); - ctx->hw.nr_vertex_samplers = ctx->nr_vertex_samplers; - - ctx->pipe->bind_vertex_sampler_states(ctx->pipe, - ctx->nr_vertex_samplers, - ctx->vertex_samplers); - } -} - -/* - * If the function encouters any errors it will return the - * last one. Done to always try to set as many samplers - * as possible. - */ -enum pipe_error cso_set_samplers( struct cso_context *ctx, - unsigned nr, - const struct pipe_sampler_state **templates ) -{ - unsigned i; - enum pipe_error temp, error = PIPE_OK; - - /* TODO: fastpath - */ - - for (i = 0; i < nr; i++) { - temp = cso_single_sampler( ctx, i, templates[i] ); - if (temp != PIPE_OK) - error = temp; - } - - for ( ; i < ctx->nr_samplers; i++) { - temp = cso_single_sampler( ctx, i, NULL ); - if (temp != PIPE_OK) - error = temp; - } - - cso_single_sampler_done( ctx ); - - return error; -} - -void cso_save_samplers(struct cso_context *ctx) -{ - ctx->nr_samplers_saved = ctx->nr_samplers; - memcpy(ctx->samplers_saved, ctx->samplers, sizeof(ctx->samplers)); -} - -void cso_restore_samplers(struct cso_context *ctx) -{ - ctx->nr_samplers = ctx->nr_samplers_saved; - memcpy(ctx->samplers, ctx->samplers_saved, sizeof(ctx->samplers)); - cso_single_sampler_done( ctx ); -} - -/* - * If the function encouters any errors it will return the - * last one. Done to always try to set as many samplers - * as possible. - */ -enum pipe_error cso_set_vertex_samplers(struct cso_context *ctx, - unsigned nr, - const struct pipe_sampler_state **templates) -{ - unsigned i; - enum pipe_error temp, error = PIPE_OK; - - /* TODO: fastpath - */ - - for (i = 0; i < nr; i++) { - temp = cso_single_vertex_sampler( ctx, i, templates[i] ); - if (temp != PIPE_OK) - error = temp; - } - - for ( ; i < ctx->nr_samplers; i++) { - temp = cso_single_vertex_sampler( ctx, i, NULL ); - if (temp != PIPE_OK) - error = temp; - } - - cso_single_vertex_sampler_done( ctx ); - - return error; -} - -void -cso_save_vertex_samplers(struct cso_context *ctx) -{ - ctx->nr_vertex_samplers_saved = ctx->nr_vertex_samplers; - memcpy(ctx->vertex_samplers_saved, ctx->vertex_samplers, sizeof(ctx->vertex_samplers)); -} - -void -cso_restore_vertex_samplers(struct cso_context *ctx) -{ - ctx->nr_vertex_samplers = ctx->nr_vertex_samplers_saved; - memcpy(ctx->vertex_samplers, ctx->vertex_samplers_saved, sizeof(ctx->vertex_samplers)); - cso_single_vertex_sampler_done(ctx); -} - - enum pipe_error cso_set_depth_stencil_alpha(struct cso_context *ctx, const struct pipe_depth_stencil_alpha_state *templ) { @@ -1143,121 +934,361 @@ void cso_restore_vertex_elements(struct cso_context *ctx) ctx->velements_saved = NULL; } -/* fragment sampler view state */ +/* vertex buffers */ -void -cso_set_fragment_sampler_views(struct cso_context *cso, - uint count, - struct pipe_sampler_view **views) +void cso_set_vertex_buffers(struct cso_context *ctx, + unsigned count, + const struct pipe_vertex_buffer *buffers) { - uint i; - - for (i = 0; i < count; i++) { - pipe_sampler_view_reference(&cso->fragment_sampler_views[i], views[i]); + if (count != ctx->nr_vertex_buffers || + memcmp(buffers, ctx->vertex_buffers, + sizeof(struct pipe_vertex_buffer) * count) != 0) { + util_copy_vertex_buffers(ctx->vertex_buffers, &ctx->nr_vertex_buffers, + buffers, count); + ctx->pipe->set_vertex_buffers(ctx->pipe, count, buffers); } - for (; i < cso->nr_fragment_sampler_views; i++) { - pipe_sampler_view_reference(&cso->fragment_sampler_views[i], NULL); +} + +void cso_save_vertex_buffers(struct cso_context *ctx) +{ + util_copy_vertex_buffers(ctx->vertex_buffers_saved, + &ctx->nr_vertex_buffers_saved, + ctx->vertex_buffers, + ctx->nr_vertex_buffers); +} + +void cso_restore_vertex_buffers(struct cso_context *ctx) +{ + util_copy_vertex_buffers(ctx->vertex_buffers, + &ctx->nr_vertex_buffers, + ctx->vertex_buffers_saved, + ctx->nr_vertex_buffers_saved); + ctx->pipe->set_vertex_buffers(ctx->pipe, ctx->nr_vertex_buffers, + ctx->vertex_buffers); +} + + +/**************** fragment/vertex sampler view state *************************/ + +static enum pipe_error +single_sampler(struct cso_context *ctx, + struct sampler_info *info, + unsigned idx, + const struct pipe_sampler_state *templ) +{ + void *handle = NULL; + + if (templ != NULL) { + unsigned key_size = sizeof(struct pipe_sampler_state); + unsigned hash_key = cso_construct_key((void*)templ, key_size); + struct cso_hash_iter iter = + cso_find_state_template(ctx->cache, + hash_key, CSO_SAMPLER, + (void *) templ, key_size); + + if (cso_hash_iter_is_null(iter)) { + struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + + memcpy(&cso->state, templ, sizeof(*templ)); + cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + handle = cso->data; + } + else { + handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data; + } } - cso->pipe->set_fragment_sampler_views(cso->pipe, - MAX2(count, cso->nr_fragment_sampler_views), - cso->fragment_sampler_views); + info->samplers[idx] = handle; - cso->nr_fragment_sampler_views = count; + return PIPE_OK; } -void -cso_save_fragment_sampler_views(struct cso_context *cso) +enum pipe_error +cso_single_sampler(struct cso_context *ctx, + unsigned idx, + const struct pipe_sampler_state *templ) { - uint i; + return single_sampler(ctx, &ctx->fragment_samplers, idx, templ); +} + +enum pipe_error +cso_single_vertex_sampler(struct cso_context *ctx, + unsigned idx, + const struct pipe_sampler_state *templ) +{ + return single_sampler(ctx, &ctx->vertex_samplers, idx, templ); +} + - cso->nr_fragment_sampler_views_saved = cso->nr_fragment_sampler_views; - for (i = 0; i < cso->nr_fragment_sampler_views; i++) { - assert(!cso->fragment_sampler_views_saved[i]); +static void +single_sampler_done(struct cso_context *ctx, + struct sampler_info *info) +{ + unsigned i; - pipe_sampler_view_reference(&cso->fragment_sampler_views_saved[i], - cso->fragment_sampler_views[i]); + /* find highest non-null sampler */ + for (i = PIPE_MAX_SAMPLERS; i > 0; i--) { + if (info->samplers[i - 1] != NULL) + break; + } + + info->nr_samplers = i; + + if (info->hw.nr_samplers != info->nr_samplers || + memcmp(info->hw.samplers, + info->samplers, + info->nr_samplers * sizeof(void *)) != 0) + { + memcpy(info->hw.samplers, + info->samplers, + info->nr_samplers * sizeof(void *)); + info->hw.nr_samplers = info->nr_samplers; + + if (info == &ctx->fragment_samplers) { + ctx->pipe->bind_fragment_sampler_states(ctx->pipe, + info->nr_samplers, + info->samplers); + } + else if (info == &ctx->vertex_samplers) { + ctx->pipe->bind_vertex_sampler_states(ctx->pipe, + info->nr_samplers, + info->samplers); + } + else { + assert(0); + } } } void -cso_restore_fragment_sampler_views(struct cso_context *cso) +cso_single_sampler_done( struct cso_context *ctx ) { - uint i; + single_sampler_done(ctx, &ctx->fragment_samplers); +} + +void +cso_single_vertex_sampler_done(struct cso_context *ctx) +{ + single_sampler_done(ctx, &ctx->vertex_samplers); +} - for (i = 0; i < cso->nr_fragment_sampler_views_saved; i++) { - pipe_sampler_view_reference(&cso->fragment_sampler_views[i], cso->fragment_sampler_views_saved[i]); - pipe_sampler_view_reference(&cso->fragment_sampler_views_saved[i], NULL); + +/* + * If the function encouters any errors it will return the + * last one. Done to always try to set as many samplers + * as possible. + */ +static enum pipe_error +set_samplers(struct cso_context *ctx, + struct sampler_info *info, + unsigned nr, + const struct pipe_sampler_state **templates) +{ + unsigned i; + enum pipe_error temp, error = PIPE_OK; + + /* TODO: fastpath + */ + + for (i = 0; i < nr; i++) { + temp = single_sampler(ctx, info, i, templates[i]); + if (temp != PIPE_OK) + error = temp; } - for (; i < cso->nr_fragment_sampler_views; i++) { - pipe_sampler_view_reference(&cso->fragment_sampler_views[i], NULL); + + for ( ; i < info->nr_samplers; i++) { + temp = single_sampler(ctx, info, i, NULL); + if (temp != PIPE_OK) + error = temp; } - cso->pipe->set_fragment_sampler_views(cso->pipe, - MAX2(cso->nr_fragment_sampler_views, cso->nr_fragment_sampler_views_saved), - cso->fragment_sampler_views); + single_sampler_done(ctx, info); + + return error; +} + +enum pipe_error +cso_set_samplers(struct cso_context *ctx, + unsigned nr, + const struct pipe_sampler_state **templates) +{ + return set_samplers(ctx, &ctx->fragment_samplers, nr, templates); +} - cso->nr_fragment_sampler_views = cso->nr_fragment_sampler_views_saved; - cso->nr_fragment_sampler_views_saved = 0; +enum pipe_error +cso_set_vertex_samplers(struct cso_context *ctx, + unsigned nr, + const struct pipe_sampler_state **templates) +{ + return set_samplers(ctx, &ctx->vertex_samplers, nr, templates); } -/* vertex sampler view state */ + +static void +save_samplers(struct cso_context *ctx, struct sampler_info *info) +{ + info->nr_samplers_saved = info->nr_samplers; + memcpy(info->samplers_saved, info->samplers, sizeof(info->samplers)); +} void -cso_set_vertex_sampler_views(struct cso_context *cso, - uint count, - struct pipe_sampler_view **views) +cso_save_samplers(struct cso_context *ctx) +{ + save_samplers(ctx, &ctx->fragment_samplers); +} + +void +cso_save_vertex_samplers(struct cso_context *ctx) +{ + save_samplers(ctx, &ctx->vertex_samplers); +} + + + +static void +restore_samplers(struct cso_context *ctx, struct sampler_info *info) +{ + info->nr_samplers = info->nr_samplers_saved; + memcpy(info->samplers, info->samplers_saved, sizeof(info->samplers)); + single_sampler_done(ctx, info); +} + +void +cso_restore_samplers(struct cso_context *ctx) +{ + restore_samplers(ctx, &ctx->fragment_samplers); +} + +void +cso_restore_vertex_samplers(struct cso_context *ctx) +{ + restore_samplers(ctx, &ctx->vertex_samplers); +} + + + +static void +set_sampler_views(struct cso_context *ctx, + struct sampler_info *info, + void (*set_views)(struct pipe_context *, + unsigned num_views, + struct pipe_sampler_view **), + uint count, + struct pipe_sampler_view **views) { uint i; + /* reference new views */ for (i = 0; i < count; i++) { - pipe_sampler_view_reference(&cso->vertex_sampler_views[i], views[i]); + pipe_sampler_view_reference(&info->views[i], views[i]); } - for (; i < cso->nr_vertex_sampler_views; i++) { - pipe_sampler_view_reference(&cso->vertex_sampler_views[i], NULL); + /* unref extra old views, if any */ + for (; i < info->nr_views; i++) { + pipe_sampler_view_reference(&info->views[i], NULL); } - cso->pipe->set_vertex_sampler_views(cso->pipe, - MAX2(count, cso->nr_vertex_sampler_views), - cso->vertex_sampler_views); + info->nr_views = count; - cso->nr_vertex_sampler_views = count; + /* bind the new sampler views */ + set_views(ctx->pipe, count, info->views); } void -cso_save_vertex_sampler_views(struct cso_context *cso) +cso_set_fragment_sampler_views(struct cso_context *ctx, + uint count, + struct pipe_sampler_view **views) { - uint i; + set_sampler_views(ctx, &ctx->fragment_samplers, + ctx->pipe->set_fragment_sampler_views, + count, views); +} - cso->nr_vertex_sampler_views_saved = cso->nr_vertex_sampler_views; +void +cso_set_vertex_sampler_views(struct cso_context *ctx, + uint count, + struct pipe_sampler_view **views) +{ + set_sampler_views(ctx, &ctx->vertex_samplers, + ctx->pipe->set_vertex_sampler_views, + count, views); +} - for (i = 0; i < cso->nr_vertex_sampler_views; i++) { - assert(!cso->vertex_sampler_views_saved[i]); - pipe_sampler_view_reference(&cso->vertex_sampler_views_saved[i], - cso->vertex_sampler_views[i]); + +static void +save_sampler_views(struct cso_context *ctx, + struct sampler_info *info) +{ + uint i; + + info->nr_views_saved = info->nr_views; + + for (i = 0; i < info->nr_views; i++) { + assert(!info->views_saved[i]); + pipe_sampler_view_reference(&info->views_saved[i], info->views[i]); } } void -cso_restore_vertex_sampler_views(struct cso_context *cso) +cso_save_fragment_sampler_views(struct cso_context *ctx) +{ + save_sampler_views(ctx, &ctx->fragment_samplers); +} + +void +cso_save_vertex_sampler_views(struct cso_context *ctx) +{ + save_sampler_views(ctx, &ctx->vertex_samplers); +} + + +static void +restore_sampler_views(struct cso_context *ctx, + struct sampler_info *info, + void (*set_views)(struct pipe_context *, + unsigned num_views, + struct pipe_sampler_view **)) { uint i; - for (i = 0; i < cso->nr_vertex_sampler_views_saved; i++) { - pipe_sampler_view_reference(&cso->vertex_sampler_views[i], cso->vertex_sampler_views_saved[i]); - pipe_sampler_view_reference(&cso->vertex_sampler_views_saved[i], NULL); + for (i = 0; i < info->nr_views_saved; i++) { + pipe_sampler_view_reference(&info->views[i], info->views_saved[i]); + pipe_sampler_view_reference(&info->views_saved[i], NULL); } - for (; i < cso->nr_vertex_sampler_views; i++) { - pipe_sampler_view_reference(&cso->vertex_sampler_views[i], NULL); + for (; i < info->nr_views; i++) { + pipe_sampler_view_reference(&info->views[i], NULL); } - cso->pipe->set_vertex_sampler_views(cso->pipe, - MAX2(cso->nr_vertex_sampler_views, cso->nr_vertex_sampler_views_saved), - cso->vertex_sampler_views); + /* bind the old/saved sampler views */ + set_views(ctx->pipe, info->nr_views_saved, info->views); + + info->nr_views = info->nr_views_saved; + info->nr_views_saved = 0; +} + +void +cso_restore_fragment_sampler_views(struct cso_context *ctx) +{ + restore_sampler_views(ctx, &ctx->fragment_samplers, + ctx->pipe->set_fragment_sampler_views); +} - cso->nr_vertex_sampler_views = cso->nr_vertex_sampler_views_saved; - cso->nr_vertex_sampler_views_saved = 0; +void +cso_restore_vertex_sampler_views(struct cso_context *ctx) +{ + restore_sampler_views(ctx, &ctx->vertex_samplers, + ctx->pipe->set_vertex_sampler_views); } diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index f0b07f7376..00edc9f8dd 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -110,6 +110,13 @@ void cso_save_vertex_elements(struct cso_context *ctx); void cso_restore_vertex_elements(struct cso_context *ctx); +void cso_set_vertex_buffers(struct cso_context *ctx, + unsigned count, + const struct pipe_vertex_buffer *buffers); +void cso_save_vertex_buffers(struct cso_context *ctx); +void cso_restore_vertex_buffers(struct cso_context *ctx); + + /* These aren't really sensible -- most of the time the api provides * object semantics for shaders anyway, and the cases where it doesn't * (eg mesa's internall-generated texenv programs), it will be up to diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 73d5b6e403..95d9671987 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -35,6 +35,7 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_cpu_detect.h" +#include "util/u_inlines.h" #include "draw_context.h" #include "draw_vs.h" #include "draw_gs.h" @@ -87,8 +88,14 @@ draw_create_gallivm(struct pipe_context *pipe, struct gallivm_state *gallivm) goto fail; #if HAVE_LLVM - if (draw_get_option_use_llvm() && gallivm) { - draw->llvm = draw_llvm_create(draw, gallivm); + if (draw_get_option_use_llvm()) { + if (!gallivm) { + gallivm = gallivm_create(); + draw->own_gallivm = gallivm; + } + + if (gallivm) + draw->llvm = draw_llvm_create(draw, gallivm); } #endif @@ -164,6 +171,10 @@ void draw_destroy( struct draw_context *draw ) } } + for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { + pipe_resource_reference(&draw->pt.vertex_buffer[i].buffer, NULL); + } + /* Not so fast -- we're just borrowing this at the moment. * if (draw->render) @@ -175,8 +186,11 @@ void draw_destroy( struct draw_context *draw ) draw_vs_destroy( draw ); draw_gs_destroy( draw ); #ifdef HAVE_LLVM - if(draw->llvm) + if (draw->llvm) draw_llvm_destroy( draw->llvm ); + + if (draw->own_gallivm) + gallivm_destroy(draw->own_gallivm); #endif FREE( draw ); @@ -307,8 +321,9 @@ draw_set_vertex_buffers(struct draw_context *draw, { assert(count <= PIPE_MAX_ATTRIBS); - memcpy(draw->pt.vertex_buffer, buffers, count * sizeof(buffers[0])); - draw->pt.nr_vertex_buffers = count; + util_copy_vertex_buffers(draw->pt.vertex_buffer, + &draw->pt.nr_vertex_buffers, + buffers, count); } @@ -395,7 +410,7 @@ void draw_wide_line_threshold(struct draw_context *draw, float threshold) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->pipeline.wide_line_threshold = threshold; + draw->pipeline.wide_line_threshold = roundf(threshold); } diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 3b8286f05d..a5217c1d4e 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -214,13 +214,12 @@ static LLVMTypeRef create_jit_vertex_buffer_type(struct gallivm_state *gallivm) { LLVMTargetDataRef target = gallivm->target; - LLVMTypeRef elem_types[4]; + LLVMTypeRef elem_types[3]; LLVMTypeRef vb_type; elem_types[0] = - elem_types[1] = - elem_types[2] = LLVMInt32TypeInContext(gallivm->context); - elem_types[3] = LLVMPointerType(LLVMOpaqueTypeInContext(gallivm->context), 0); /* vs_constants */ + elem_types[1] = LLVMInt32TypeInContext(gallivm->context); + elem_types[2] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* vs_constants */ vb_type = LLVMStructTypeInContext(gallivm->context, elem_types, Elements(elem_types), 0); @@ -230,7 +229,7 @@ create_jit_vertex_buffer_type(struct gallivm_state *gallivm) LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride, target, vb_type, 0); LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset, - target, vb_type, 2); + target, vb_type, 1); LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type); @@ -437,6 +436,7 @@ generate_vs(struct draw_llvm *llvm, LLVMBuilderRef builder, LLVMValueRef (*outputs)[NUM_CHANNELS], const LLVMValueRef (*inputs)[NUM_CHANNELS], + LLVMValueRef system_values_array, LLVMValueRef context_ptr, struct lp_build_sampler_soa *draw_sampler) { @@ -468,6 +468,7 @@ generate_vs(struct draw_llvm *llvm, vs_type, NULL /*struct lp_build_mask_context *mask*/, consts_ptr, + system_values_array, NULL /*pos*/, inputs, outputs, @@ -509,9 +510,7 @@ generate_fetch(struct gallivm_state *gallivm, LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, &indices, 1, ""); LLVMValueRef vb_stride = draw_jit_vbuffer_stride(gallivm, vbuf); - LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(gallivm, vbuf); LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vbuf); - LLVMValueRef cond; LLVMValueRef stride; if (velem->instance_divisor) { @@ -521,10 +520,6 @@ generate_fetch(struct gallivm_state *gallivm, "instance_divisor"); } - /* limit index to min(index, vb_max_index) */ - cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, ""); - index = LLVMBuildSelect(builder, cond, index, vb_max_index, ""); - stride = LLVMBuildMul(builder, vb_stride, index, ""); vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer"); @@ -1118,7 +1113,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) LLVMValueRef start, end, count, stride, step, io_itr; LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; LLVMValueRef instance_id; + LLVMValueRef system_values_array; struct draw_context *draw = llvm->draw; + const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info; unsigned i, j; struct lp_build_context bld; struct lp_build_loop_state lp_loop; @@ -1179,6 +1176,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) lp_build_context_init(&bld, llvm->gallivm, lp_type_int(32)); + system_values_array = lp_build_system_values_array(gallivm, vs_info, + instance_id, NULL); + end = lp_build_add(&bld, start, count); step = lp_build_const_int32(gallivm, max_vertices); @@ -1233,6 +1233,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) builder, outputs, ptr_aos, + system_values_array, context_ptr, sampler); @@ -1263,8 +1264,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) /* store clipmask in vertex header and positions in data */ convert_to_aos(gallivm, io, outputs, clipmask, - draw->vs.vertex_shader->info.num_outputs, - max_vertices); + vs_info->num_outputs, max_vertices); } lp_build_loop_end_cond(&lp_loop, end, step, LLVMIntUGE); @@ -1315,7 +1315,9 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr; LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; LLVMValueRef instance_id; + LLVMValueRef system_values_array; struct draw_context *draw = llvm->draw; + const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info; unsigned i, j; struct lp_build_context bld; struct lp_build_loop_state lp_loop; @@ -1376,6 +1378,10 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian lp_build_context_init(&bld, gallivm, lp_type_int(32)); + system_values_array = lp_build_system_values_array(gallivm, vs_info, + instance_id, NULL); + + step = lp_build_const_int32(gallivm, max_vertices); /* code generated texture sampling */ @@ -1438,6 +1444,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian builder, outputs, ptr_aos, + system_values_array, context_ptr, sampler); @@ -1471,8 +1478,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian * and transformed positions in data */ convert_to_aos(gallivm, io, outputs, clipmask, - draw->vs.vertex_shader->info.num_outputs, - max_vertices); + vs_info->num_outputs, max_vertices); } lp_build_loop_end_cond(&lp_loop, fetch_count, step, LLVMIntUGE); @@ -1610,16 +1616,14 @@ draw_llvm_destroy_variant(struct draw_llvm_variant *variant) struct draw_llvm *llvm = variant->llvm; if (variant->function_elts) { - if (variant->function_elts) - LLVMFreeMachineCodeForFunction(llvm->gallivm->engine, - variant->function_elts); + LLVMFreeMachineCodeForFunction(llvm->gallivm->engine, + variant->function_elts); LLVMDeleteFunction(variant->function_elts); } if (variant->function) { - if (variant->function) - LLVMFreeMachineCodeForFunction(llvm->gallivm->engine, - variant->function); + LLVMFreeMachineCodeForFunction(llvm->gallivm->engine, + variant->function); LLVMDeleteFunction(variant->function); } diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index 73c1d9251e..e8623e7bcd 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -133,11 +133,8 @@ struct draw_jit_context #define draw_jit_vbuffer_stride(_gallivm, _ptr) \ lp_build_struct_get(_gallivm, _ptr, 0, "stride") -#define draw_jit_vbuffer_max_index(_gallivm, _ptr) \ - lp_build_struct_get(_gallivm, _ptr, 1, "max_index") - #define draw_jit_vbuffer_offset(_gallivm, _ptr) \ - lp_build_struct_get(_gallivm, _ptr, 2, "buffer_offset") + lp_build_struct_get(_gallivm, _ptr, 1, "buffer_offset") typedef int @@ -229,7 +226,6 @@ struct draw_llvm_variant /* key is variable-sized, must be last */ struct draw_llvm_variant_key key; - /* key is variable-sized, must be last */ }; struct llvm_vertex_shader { diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 6206197dae..f1b0171f52 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -64,8 +64,8 @@ boolean draw_pipeline_init( struct draw_context *draw ) return FALSE; /* these defaults are oriented toward the needs of softpipe */ - draw->pipeline.wide_point_threshold = 1000000.0; /* infinity */ - draw->pipeline.wide_line_threshold = 1.0; + draw->pipeline.wide_point_threshold = 1000000.0f; /* infinity */ + draw->pipeline.wide_line_threshold = 1.0f; draw->pipeline.wide_point_sprites = FALSE; draw->pipeline.line_stipple = TRUE; draw->pipeline.point_sprite = TRUE; diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 0851b9acc0..32af29ae14 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -55,9 +55,16 @@ /** + * Size for the alpha texture used for antialiasing + */ +#define TEXTURE_SIZE_LOG2 5 /* 32 x 32 */ + +/** * Max texture level for the alpha texture used for antialiasing + * + * Don't use the 1x1 and 2x2 mipmap levels. */ -#define MAX_TEXTURE_LEVEL 5 /* 32 x 32 */ +#define MAX_TEXTURE_LEVEL (TEXTURE_SIZE_LOG2 - 2) /** @@ -403,8 +410,8 @@ aaline_create_texture(struct aaline_stage *aaline) texTemp.target = PIPE_TEXTURE_2D; texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */ texTemp.last_level = MAX_TEXTURE_LEVEL; - texTemp.width0 = 1 << MAX_TEXTURE_LEVEL; - texTemp.height0 = 1 << MAX_TEXTURE_LEVEL; + texTemp.width0 = 1 << TEXTURE_SIZE_LOG2; + texTemp.height0 = 1 << TEXTURE_SIZE_LOG2; texTemp.depth0 = 1; texTemp.array_size = 1; texTemp.bind = PIPE_BIND_SAMPLER_VIEW; @@ -461,7 +468,7 @@ aaline_create_texture(struct aaline_stage *aaline) d = 200; /* tuneable */ } else if (i == 0 || j == 0 || i == size - 1 || j == size - 1) { - d = 0; + d = 35; /* edge texel */ } else { d = 255; @@ -498,8 +505,7 @@ aaline_create_sampler(struct aaline_stage *aaline) sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; sampler.normalized_coords = 1; sampler.min_lod = 0.0f; - /* avoid using the 1x1 and 2x2 mipmap levels */ - sampler.max_lod = MAX_TEXTURE_LEVEL - 2; + sampler.max_lod = MAX_TEXTURE_LEVEL; aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler); if (aaline->sampler_cso == NULL) diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 5ea552f51c..60f6380c50 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -874,6 +874,8 @@ draw_install_aapoint_stage(struct draw_context *draw, { struct aapoint_stage *aapoint; + pipe->draw = (void *) draw; + /* * Create / install AA point drawing / prim stage */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index f5515c1df7..fe3627be86 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -27,8 +27,9 @@ /** * Polygon stipple stage: implement polygon stipple with texture map and - * fragment program. The fragment program samples the texture and does - * a fragment kill for the stipple-failing fragments. + * fragment program. The fragment program samples the texture using the + * fragment window coordinate register and does a fragment kill for the + * stipple-failing fragments. * * Authors: Brian Paul */ @@ -114,7 +115,8 @@ struct pstip_stage /** * Subclass of tgsi_transform_context, used for transforming the - * user's fragment shader to add the special AA instructions. + * user's fragment shader to add the extra texture sample and fragment kill + * instructions. */ struct pstip_transform_context { struct tgsi_transform_context base; @@ -386,13 +388,6 @@ pstip_update_texture(struct pstip_stage *pstip) uint i, j; ubyte *data; - /* XXX: want to avoid flushing just because we use stipple: - * - * Flush should no longer be necessary if driver is properly - * interleaving drawing and transfers on a given context: - */ - pipe->flush( pipe, PIPE_FLUSH_TEXTURE_CACHE, NULL ); - transfer = pipe_get_transfer(pipe, pstip->texture, 0, 0, PIPE_TRANSFER_WRITE, 0, 0, 32, 32); data = pipe->transfer_map(pipe, transfer); @@ -658,16 +653,16 @@ pstip_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *fs) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); - struct pstip_fragment_shader *aafs = CALLOC_STRUCT(pstip_fragment_shader); + struct pstip_fragment_shader *pstipfs = CALLOC_STRUCT(pstip_fragment_shader); - if (aafs) { - aafs->state = *fs; + if (pstipfs) { + pstipfs->state = *fs; /* pass-through */ - aafs->driver_fs = pstip->driver_create_fs_state(pstip->pipe, fs); + pstipfs->driver_fs = pstip->driver_create_fs_state(pstip->pipe, fs); } - return aafs; + return pstipfs; } @@ -675,12 +670,12 @@ static void pstip_bind_fs_state(struct pipe_context *pipe, void *fs) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); - struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs; + struct pstip_fragment_shader *pstipfs = (struct pstip_fragment_shader *) fs; /* save current */ - pstip->fs = aafs; + pstip->fs = pstipfs; /* pass-through */ pstip->driver_bind_fs_state(pstip->pipe, - (aafs ? aafs->driver_fs : NULL)); + (pstipfs ? pstipfs->driver_fs : NULL)); } @@ -688,14 +683,14 @@ static void pstip_delete_fs_state(struct pipe_context *pipe, void *fs) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); - struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs; + struct pstip_fragment_shader *pstipfs = (struct pstip_fragment_shader *) fs; /* pass-through */ - pstip->driver_delete_fs_state(pstip->pipe, aafs->driver_fs); + pstip->driver_delete_fs_state(pstip->pipe, pstipfs->driver_fs); - if (aafs->pstip_fs) - pstip->driver_delete_fs_state(pstip->pipe, aafs->pstip_fs); + if (pstipfs->pstip_fs) + pstip->driver_delete_fs_state(pstip->pipe, pstipfs->pstip_fs); - FREE(aafs); + FREE(pstipfs); } diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c index c575a8ac7c..27afba5af3 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_validate.c +++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c @@ -29,6 +29,7 @@ */ #include "util/u_memory.h" +#include "util/u_math.h" #include "pipe/p_defines.h" #include "draw_private.h" #include "draw_pipe.h" @@ -86,7 +87,7 @@ draw_need_pipeline(const struct draw_context *draw, return TRUE; /* wide lines */ - if (rasterizer->line_width > draw->pipeline.wide_line_threshold) + if (roundf(rasterizer->line_width) > draw->pipeline.wide_line_threshold) return TRUE; /* AA lines */ @@ -169,7 +170,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) stage->next = next; /* drawing wide lines? */ - wide_lines = (rast->line_width > draw->pipeline.wide_line_threshold + wide_lines = (roundf(rast->line_width) > draw->pipeline.wide_line_threshold && !rast->line_smooth); /* drawing large/sprite points (but not AA points)? */ diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 06ed4d60ef..db2e3c5410 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -286,6 +286,7 @@ struct draw_context #ifdef HAVE_LLVM struct draw_llvm *llvm; + struct gallivm_state *own_gallivm; #endif struct pipe_sampler_view *sampler_views[PIPE_MAX_VERTEX_SAMPLERS]; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 4078b2a07d..c3d7e871f7 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -459,10 +459,9 @@ draw_vbo(struct draw_context *draw, } debug_printf("Buffers:\n"); for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { - debug_printf(" %u: stride=%u maxindex=%u offset=%u ptr=%p\n", + debug_printf(" %u: stride=%u offset=%u ptr=%p\n", i, draw->pt.vertex_buffer[i].stride, - draw->pt.vertex_buffer[i].max_index, draw->pt.vertex_buffer[i].buffer_offset, draw->pt.user.vbuffer[i]); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index ae12ee24bd..4fa3b265e1 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -139,7 +139,7 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), draw->pt.vertex_buffer[i].stride, - draw->pt.vertex_buffer[i].max_index); + draw->pt.user.max_index); } translate->run_elts( translate, @@ -166,7 +166,7 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), draw->pt.vertex_buffer[i].stride, - draw->pt.vertex_buffer[i].max_index); + draw->pt.user.max_index); } translate->run( translate, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index e706b7796f..51043102a6 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -186,7 +186,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), draw->pt.vertex_buffer[i].stride, - draw->pt.vertex_buffer[i].max_index); + draw->pt.user.max_index); } *max_vertices = (draw->render->max_vertex_buffer_bytes / diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 7c198c6026..1e926fb028 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -58,8 +58,8 @@ struct fetch_shade_emit { const ubyte *src[PIPE_MAX_ATTRIBS]; unsigned prim; - struct draw_vs_varient_key key; - struct draw_vs_varient *active; + struct draw_vs_variant_key key; + struct draw_vs_variant *active; const struct vertex_info *vinfo; @@ -150,7 +150,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, } - fse->active = draw_vs_lookup_varient( draw->vs.vertex_shader, + fse->active = draw_vs_lookup_variant( draw->vs.vertex_shader, &fse->key ); if (!fse->active) { @@ -169,7 +169,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, ((const ubyte *) draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), draw->pt.vertex_buffer[i].stride, - draw->pt.vertex_buffer[i].max_index ); + draw->pt.user.max_index ); } *max_vertices = (draw->render->max_vertex_buffer_bytes / diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h index 3f66f962e1..75dba8c39a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h @@ -258,9 +258,10 @@ vsplit_segment_fan_linear(struct vsplit_frontend *vsplit, unsigned flags, boolean use_spoken = ((flags & DRAW_SPLIT_BEFORE) != 0); unsigned nr = 0, i; - assert(icount + !!use_spoken <= vsplit->segment_size); + assert(icount <= vsplit->segment_size); if (use_spoken) { + /* replace istart by i0 */ vsplit->fetch_elts[nr++] = i0; for (i = 1 ; i < icount; i++) vsplit->fetch_elts[nr++] = istart + i; diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index fb665b08ff..7caad6f005 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -165,10 +165,10 @@ draw_delete_vertex_shader(struct draw_context *draw, { unsigned i; - for (i = 0; i < dvs->nr_varients; i++) - dvs->varient[i]->destroy( dvs->varient[i] ); + for (i = 0; i < dvs->nr_variants; i++) + dvs->variant[i]->destroy( dvs->variant[i] ); - dvs->nr_varients = 0; + dvs->nr_variants = 0; dvs->delete( dvs ); } @@ -225,40 +225,40 @@ draw_vs_destroy( struct draw_context *draw ) } -struct draw_vs_varient * -draw_vs_lookup_varient( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ) +struct draw_vs_variant * +draw_vs_lookup_variant( struct draw_vertex_shader *vs, + const struct draw_vs_variant_key *key ) { - struct draw_vs_varient *varient; + struct draw_vs_variant *variant; unsigned i; - /* Lookup existing varient: + /* Lookup existing variant: */ - for (i = 0; i < vs->nr_varients; i++) - if (draw_vs_varient_key_compare(key, &vs->varient[i]->key) == 0) - return vs->varient[i]; + for (i = 0; i < vs->nr_variants; i++) + if (draw_vs_variant_key_compare(key, &vs->variant[i]->key) == 0) + return vs->variant[i]; /* Else have to create a new one: */ - varient = vs->create_varient( vs, key ); - if (varient == NULL) + variant = vs->create_variant( vs, key ); + if (variant == NULL) return NULL; /* Add it to our list, could be smarter: */ - if (vs->nr_varients < Elements(vs->varient)) { - vs->varient[vs->nr_varients++] = varient; + if (vs->nr_variants < Elements(vs->variant)) { + vs->variant[vs->nr_variants++] = variant; } else { - vs->last_varient++; - vs->last_varient %= Elements(vs->varient); - vs->varient[vs->last_varient]->destroy(vs->varient[vs->last_varient]); - vs->varient[vs->last_varient] = varient; + vs->last_variant++; + vs->last_variant %= Elements(vs->variant); + vs->variant[vs->last_variant]->destroy(vs->variant[vs->last_variant]); + vs->variant[vs->last_variant] = variant; } /* Done */ - return varient; + return variant; } diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index f9a038788f..e6d187e977 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -33,12 +33,13 @@ #include "draw_context.h" #include "draw_private.h" +#include "draw_vertex.h" struct draw_context; struct pipe_shader_state; -struct draw_varient_input +struct draw_variant_input { enum pipe_format format; unsigned buffer; @@ -46,19 +47,19 @@ struct draw_varient_input unsigned instance_divisor; }; -struct draw_varient_output +struct draw_variant_output { - enum pipe_format format; /* output format */ + enum attrib_emit format; /* output format */ unsigned vs_output:8; /* which vertex shader output is this? */ unsigned offset:24; /* offset into output vertex */ }; -struct draw_varient_element { - struct draw_varient_input in; - struct draw_varient_output out; +struct draw_variant_element { + struct draw_variant_input in; + struct draw_variant_output out; }; -struct draw_vs_varient_key { +struct draw_vs_variant_key { unsigned output_stride; unsigned nr_elements:8; /* max2(nr_inputs, nr_outputs) */ unsigned nr_inputs:8; @@ -66,34 +67,34 @@ struct draw_vs_varient_key { unsigned viewport:1; unsigned clip:1; unsigned const_vbuffers:5; - struct draw_varient_element element[PIPE_MAX_ATTRIBS]; + struct draw_variant_element element[PIPE_MAX_ATTRIBS]; }; -struct draw_vs_varient; +struct draw_vs_variant; -struct draw_vs_varient { - struct draw_vs_varient_key key; +struct draw_vs_variant { + struct draw_vs_variant_key key; struct draw_vertex_shader *vs; - void (*set_buffer)( struct draw_vs_varient *, + void (*set_buffer)( struct draw_vs_variant *, unsigned i, const void *ptr, unsigned stride, unsigned max_stride ); - void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader, + void (PIPE_CDECL *run_linear)( struct draw_vs_variant *shader, unsigned start, unsigned count, void *output_buffer ); - void (PIPE_CDECL *run_elts)( struct draw_vs_varient *shader, + void (PIPE_CDECL *run_elts)( struct draw_vs_variant *shader, const unsigned *elts, unsigned count, void *output_buffer ); - void (*destroy)( struct draw_vs_varient * ); + void (*destroy)( struct draw_vs_variant * ); }; @@ -117,11 +118,11 @@ struct draw_vertex_shader { /* */ - struct draw_vs_varient *varient[16]; - unsigned nr_varients; - unsigned last_varient; - struct draw_vs_varient *(*create_varient)( struct draw_vertex_shader *shader, - const struct draw_vs_varient_key *key ); + struct draw_vs_variant *variant[16]; + unsigned nr_variants; + unsigned last_variant; + struct draw_vs_variant *(*create_variant)( struct draw_vertex_shader *shader, + const struct draw_vs_variant_key *key ); void (*prepare)( struct draw_vertex_shader *shader, @@ -144,9 +145,9 @@ struct draw_vertex_shader { }; -struct draw_vs_varient * -draw_vs_lookup_varient( struct draw_vertex_shader *base, - const struct draw_vs_varient_key *key ); +struct draw_vs_variant * +draw_vs_lookup_variant( struct draw_vertex_shader *base, + const struct draw_vs_variant_key *key ); /******************************************************************************** @@ -166,12 +167,12 @@ draw_create_vs_ppc(struct draw_context *draw, const struct pipe_shader_state *templ); -struct draw_vs_varient_key; +struct draw_vs_variant_key; struct draw_vertex_shader; -struct draw_vs_varient * -draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ); +struct draw_vs_variant * +draw_vs_create_variant_aos_sse( struct draw_vertex_shader *vs, + const struct draw_vs_variant_key *key ); #if HAVE_LLVM struct draw_vertex_shader * @@ -181,7 +182,7 @@ draw_create_vs_llvm(struct draw_context *draw, /******************************************************************************** - * Helpers for vs implementations that don't do their own fetch/emit varients. + * Helpers for vs implementations that don't do their own fetch/emit variants. * Means these can be shared between shaders. */ struct translate; @@ -194,21 +195,21 @@ struct translate *draw_vs_get_fetch( struct draw_context *draw, struct translate *draw_vs_get_emit( struct draw_context *draw, struct translate_key *key ); -struct draw_vs_varient * -draw_vs_create_varient_generic( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ); +struct draw_vs_variant * +draw_vs_create_variant_generic( struct draw_vertex_shader *vs, + const struct draw_vs_variant_key *key ); -static INLINE int draw_vs_varient_keysize( const struct draw_vs_varient_key *key ) +static INLINE int draw_vs_variant_keysize( const struct draw_vs_variant_key *key ) { - return 2 * sizeof(int) + key->nr_elements * sizeof(struct draw_varient_element); + return 2 * sizeof(int) + key->nr_elements * sizeof(struct draw_variant_element); } -static INLINE int draw_vs_varient_key_compare( const struct draw_vs_varient_key *a, - const struct draw_vs_varient_key *b ) +static INLINE int draw_vs_variant_key_compare( const struct draw_vs_variant_key *a, + const struct draw_vs_variant_key *b ) { - int keysize = draw_vs_varient_keysize(a); + int keysize = draw_vs_variant_keysize(a); return memcmp(a, b, keysize); } diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 19f49e34c8..7b90dba0cd 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1918,7 +1918,7 @@ static void find_last_write_outputs( struct aos_compilation *cp ) #define ARG_OUTBUF 4 -static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, +static boolean build_vertex_program( struct draw_vs_variant_aos_sse *variant, boolean linear ) { struct tgsi_parse_context parse; @@ -1927,14 +1927,14 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, util_init_math(); - tgsi_parse_init( &parse, varient->base.vs->state.tokens ); + tgsi_parse_init( &parse, variant->base.vs->state.tokens ); memset(&cp, 0, sizeof(cp)); cp.insn_counter = 1; - cp.vaos = varient; + cp.vaos = variant; cp.have_sse2 = 1; - cp.func = &varient->func[ linear ? 0 : 1 ]; + cp.func = &variant->func[ linear ? 0 : 1 ]; cp.tmp_EAX = x86_make_reg(file_REG32, reg_AX); cp.idx_EBX = x86_make_reg(file_REG32, reg_BX); @@ -2090,20 +2090,20 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, /** cast wrapper */ -static INLINE struct draw_vs_varient_aos_sse * -draw_vs_varient_aos_sse(struct draw_vs_varient *varient) +static INLINE struct draw_vs_variant_aos_sse * +draw_vs_variant_aos_sse(struct draw_vs_variant *variant) { - return (struct draw_vs_varient_aos_sse *) varient; + return (struct draw_vs_variant_aos_sse *) variant; } -static void vaos_set_buffer( struct draw_vs_varient *varient, +static void vaos_set_buffer( struct draw_vs_variant *variant, unsigned buf, const void *ptr, unsigned stride, unsigned max_stride) { - struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); + struct draw_vs_variant_aos_sse *vaos = draw_vs_variant_aos_sse(variant); if (buf < vaos->nr_vb) { vaos->buffer[buf].base_ptr = (char *)ptr; @@ -2115,12 +2115,12 @@ static void vaos_set_buffer( struct draw_vs_varient *varient, -static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, +static void PIPE_CDECL vaos_run_elts( struct draw_vs_variant *variant, const unsigned *elts, unsigned count, void *output_buffer ) { - struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); + struct draw_vs_variant_aos_sse *vaos = draw_vs_variant_aos_sse(variant); struct aos_machine *machine = vaos->draw->vs.aos_machine; unsigned i; @@ -2139,12 +2139,12 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, output_buffer ); } -static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, +static void PIPE_CDECL vaos_run_linear( struct draw_vs_variant *variant, unsigned start, unsigned count, void *output_buffer ) { - struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); + struct draw_vs_variant_aos_sse *vaos = draw_vs_variant_aos_sse(variant); struct aos_machine *machine = vaos->draw->vs.aos_machine; unsigned i; @@ -2171,9 +2171,9 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, -static void vaos_destroy( struct draw_vs_varient *varient ) +static void vaos_destroy( struct draw_vs_variant *variant ) { - struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); + struct draw_vs_variant_aos_sse *vaos = draw_vs_variant_aos_sse(variant); FREE( vaos->buffer ); @@ -2185,11 +2185,11 @@ static void vaos_destroy( struct draw_vs_varient *varient ) -static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ) +static struct draw_vs_variant *variant_aos_sse( struct draw_vertex_shader *vs, + const struct draw_vs_variant_key *key ) { unsigned i; - struct draw_vs_varient_aos_sse *vaos = CALLOC_STRUCT(draw_vs_varient_aos_sse); + struct draw_vs_variant_aos_sse *vaos = CALLOC_STRUCT(draw_vs_variant_aos_sse); if (!vaos) goto fail; @@ -2249,17 +2249,17 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, } -struct draw_vs_varient * -draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ) +struct draw_vs_variant * +draw_vs_create_variant_aos_sse( struct draw_vertex_shader *vs, + const struct draw_vs_variant_key *key ) { - struct draw_vs_varient *varient = varient_aos_sse( vs, key ); + struct draw_vs_variant *variant = variant_aos_sse( vs, key ); - if (varient == NULL) { - varient = draw_vs_create_varient_generic( vs, key ); + if (variant == NULL) { + variant = draw_vs_create_variant_generic( vs, key ); } - return varient; + return variant; } diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 68e8295b5e..55e63d8b9f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -98,9 +98,9 @@ struct aos_buffer { -/* This is the temporary storage used by all the aos_sse vs varients. +/* This is the temporary storage used by all the aos_sse vs variants. * Create one per context and reuse by passing a pointer in at - * vs_varient creation?? + * vs_variant creation?? */ struct aos_machine { float input [MAX_INPUTS ][4]; @@ -134,7 +134,7 @@ struct aos_machine { struct aos_compilation { struct x86_function *func; - struct draw_vs_varient_aos_sse *vaos; + struct draw_vs_variant_aos_sse *vaos; unsigned insn_counter; unsigned num_immediates; @@ -234,8 +234,8 @@ typedef void (PIPE_CDECL *vaos_run_linear_func)( struct aos_machine *, void *output_buffer); -struct draw_vs_varient_aos_sse { - struct draw_vs_varient base; +struct draw_vs_variant_aos_sse { + struct draw_vs_variant base; struct draw_context *draw; struct aos_buffer *buffer; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index 8f8bbe7cb8..f1dd448773 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -384,7 +384,7 @@ static void emit_store_R8G8B8A8_UNORM( struct aos_compilation *cp, static boolean emit_output( struct aos_compilation *cp, struct x86_reg ptr, struct x86_reg dataXMM, - unsigned format ) + enum attrib_emit format ) { switch (format) { case EMIT_1F: @@ -422,7 +422,7 @@ boolean aos_emit_outputs( struct aos_compilation *cp ) unsigned i; for (i = 0; i < cp->vaos->base.key.nr_outputs; i++) { - unsigned format = cp->vaos->base.key.element[i].out.format; + enum attrib_emit format = cp->vaos->base.key.element[i].out.format; unsigned offset = cp->vaos->base.key.element[i].out.offset; unsigned vs_output = cp->vaos->base.key.element[i].out.vs_output; diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index dab3eb1ca8..c41d7c42a0 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -99,6 +99,12 @@ vs_exec_run_linear( struct draw_vertex_shader *shader, tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS, constants, const_size); + if (shader->info.uses_instanceid) { + unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_INSTANCEID]; + assert(i < Elements(machine->SystemValue)); + machine->SystemValue[i][0] = shader->draw->instance_id; + } + for (i = 0; i < count; i += MAX_TGSI_VERTICES) { unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); @@ -203,7 +209,7 @@ draw_create_vs_exec(struct draw_context *draw, vs->base.prepare = vs_exec_prepare; vs->base.run_linear = vs_exec_run_linear; vs->base.delete = vs_exec_delete; - vs->base.create_varient = draw_vs_create_varient_generic; + vs->base.create_variant = draw_vs_create_variant_generic; vs->machine = draw->vs.machine; return &vs->base; diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index fa9992db78..54a5574f73 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -65,19 +65,7 @@ static void vs_llvm_delete( struct draw_vertex_shader *dvs ) { struct llvm_vertex_shader *shader = llvm_vertex_shader(dvs); - struct pipe_fence_handle *fence = NULL; struct draw_llvm_variant_list_item *li; - struct pipe_context *pipe = dvs->draw->pipe; - - /* - * XXX: This might be not neccessary at all. - */ - pipe->flush(pipe, 0, &fence); - if (fence) { - pipe->screen->fence_finish(pipe->screen, fence, 0); - pipe->screen->fence_reference(pipe->screen, &fence, NULL); - } - li = first_elem(&shader->variants); while(!at_end(&shader->variants, li)) { @@ -119,7 +107,7 @@ draw_create_vs_llvm(struct draw_context *draw, vs->base.prepare = vs_llvm_prepare; vs->base.run_linear = vs_llvm_run_linear; vs->base.delete = vs_llvm_delete; - vs->base.create_varient = draw_vs_create_varient_generic; + vs->base.create_variant = draw_vs_create_variant_generic; make_empty_list(&vs->variants); diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index 5df84916c5..cf894bbe8a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -187,10 +187,10 @@ draw_create_vs_ppc(struct draw_context *draw, vs->base.draw = draw; #if 0 if (1) - vs->base.create_varient = draw_vs_varient_aos_ppc; + vs->base.create_variant = draw_vs_variant_aos_ppc; else #endif - vs->base.create_varient = draw_vs_create_varient_generic; + vs->base.create_variant = draw_vs_create_variant_generic; vs->base.prepare = vs_ppc_prepare; vs->base.run_linear = vs_ppc_run_linear; vs->base.delete = vs_ppc_delete; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 0b0c6077c6..d918579bda 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -71,6 +71,12 @@ vs_sse_prepare( struct draw_vertex_shader *base, struct tgsi_exec_machine *machine = shader->machine; machine->Samplers = draw->vs.samplers; + + if (base->info.uses_instanceid) { + unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_INSTANCEID]; + assert(i < Elements(machine->SystemValue)); + machine->SystemValue[i][0] = base->draw->instance_id; + } } @@ -166,9 +172,9 @@ draw_create_vs_sse(struct draw_context *draw, vs->base.draw = draw; if (1) - vs->base.create_varient = draw_vs_create_varient_aos_sse; + vs->base.create_variant = draw_vs_create_variant_aos_sse; else - vs->base.create_varient = draw_vs_create_varient_generic; + vs->base.create_variant = draw_vs_create_variant_generic; vs->base.prepare = vs_sse_prepare; vs->base.run_linear = vs_sse_run_linear; vs->base.delete = vs_sse_delete; @@ -194,7 +200,8 @@ draw_create_vs_sse(struct draw_context *draw, return &vs->base; fail: - debug_error("tgsi_emit_sse2() failed, falling back to interpreter\n"); + if (0) + debug_warning("tgsi_emit_sse2() failed, falling back to interpreter\n"); x86_release_func( &vs->sse2_program ); diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index eacd160187..d8f030f61e 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -41,8 +41,8 @@ /* A first pass at incorporating vertex fetch/emit functionality into */ -struct draw_vs_varient_generic { - struct draw_vs_varient base; +struct draw_vs_variant_generic { + struct draw_vs_variant base; struct draw_vertex_shader *shader; struct draw_context *draw; @@ -63,13 +63,13 @@ struct draw_vs_varient_generic { -static void vsvg_set_buffer( struct draw_vs_varient *varient, +static void vsvg_set_buffer( struct draw_vs_variant *variant, unsigned buffer, const void *ptr, unsigned stride, unsigned max_index ) { - struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; + struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant; vsvg->fetch->set_buffer(vsvg->fetch, buffer, @@ -81,7 +81,7 @@ static void vsvg_set_buffer( struct draw_vs_varient *varient, /* Mainly for debug at this stage: */ -static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg, +static void do_rhw_viewport( struct draw_vs_variant_generic *vsvg, unsigned count, void *output_buffer ) { @@ -104,7 +104,7 @@ static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg, } } -static void do_viewport( struct draw_vs_varient_generic *vsvg, +static void do_viewport( struct draw_vs_variant_generic *vsvg, unsigned count, void *output_buffer ) { @@ -126,12 +126,12 @@ static void do_viewport( struct draw_vs_varient_generic *vsvg, } -static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, +static void PIPE_CDECL vsvg_run_elts( struct draw_vs_variant *variant, const unsigned *elts, unsigned count, void *output_buffer) { - struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; + struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant; unsigned temp_vertex_stride = vsvg->temp_vertex_stride; void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride ); @@ -193,12 +193,12 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, } -static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, +static void PIPE_CDECL vsvg_run_linear( struct draw_vs_variant *variant, unsigned start, unsigned count, void *output_buffer ) { - struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; + struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant; unsigned temp_vertex_stride = vsvg->temp_vertex_stride; void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride ); @@ -259,20 +259,20 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, -static void vsvg_destroy( struct draw_vs_varient *varient ) +static void vsvg_destroy( struct draw_vs_variant *variant ) { - FREE(varient); + FREE(variant); } -struct draw_vs_varient * -draw_vs_create_varient_generic( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ) +struct draw_vs_variant * +draw_vs_create_variant_generic( struct draw_vertex_shader *vs, + const struct draw_vs_variant_key *key ) { unsigned i; struct translate_key fetch, emit; - struct draw_vs_varient_generic *vsvg = CALLOC_STRUCT( draw_vs_varient_generic ); + struct draw_vs_variant_generic *vsvg = CALLOC_STRUCT( draw_vs_variant_generic ); if (vsvg == NULL) return NULL; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h index c749a7a315..69718eb4b3 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h @@ -125,4 +125,22 @@ lp_build_const_float(struct gallivm_state *gallivm, float x) } +/** Return constant-valued pointer to int */ +static INLINE LLVMValueRef +lp_build_const_int_pointer(struct gallivm_state *gallivm, const void *ptr) +{ + LLVMTypeRef int_type; + LLVMValueRef v; + + /* int type large enough to hold a pointer */ + int_type = LLVMIntTypeInContext(gallivm->context, 8 * sizeof(void *)); + v = LLVMConstInt(int_type, (uintptr_t) ptr, 0); + v = LLVMBuildIntToPtr(gallivm->builder, v, + LLVMPointerType(int_type, 0), + "cast int to ptr"); + return v; +} + + + #endif /* !LP_BLD_CONST_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index c43ee8ac63..c261d76116 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -231,44 +231,53 @@ lp_build_unsigned_norm_to_float(struct gallivm_state *gallivm, assert(dst_type.floating); - /* Special-case int8->float, though most cases could be handled - * this way: - */ - if (src_width == 8) { - scale = 1.0/255.0; + mantissa = lp_mantissa(dst_type); + + if (src_width <= (mantissa + 1)) { + /* + * The source width matches fits what can be represented in floating + * point (i.e., mantissa + 1 bits). So do a straight multiplication + * followed by casting. No further rounding is necessary. + */ + + scale = 1.0/(double)((1ULL << src_width) - 1); res = LLVMBuildSIToFP(builder, src, vec_type, ""); res = LLVMBuildFMul(builder, res, lp_build_const_vec(gallivm, dst_type, scale), ""); return res; } + else { + /* + * The source width exceeds what can be represented in floating + * point. So truncate the incoming values. + */ - mantissa = lp_mantissa(dst_type); - - n = MIN2(mantissa, src_width); + n = MIN2(mantissa, src_width); - ubound = ((unsigned long long)1 << n); - mask = ubound - 1; - scale = (double)ubound/mask; - bias = (double)((unsigned long long)1 << (mantissa - n)); + ubound = ((unsigned long long)1 << n); + mask = ubound - 1; + scale = (double)ubound/mask; + bias = (double)((unsigned long long)1 << (mantissa - n)); - res = src; + res = src; - if(src_width > mantissa) { - int shift = src_width - mantissa; - res = LLVMBuildLShr(builder, res, - lp_build_const_int_vec(gallivm, dst_type, shift), ""); - } + if (src_width > mantissa) { + int shift = src_width - mantissa; + res = LLVMBuildLShr(builder, res, + lp_build_const_int_vec(gallivm, dst_type, shift), ""); + } - bias_ = lp_build_const_vec(gallivm, dst_type, bias); + bias_ = lp_build_const_vec(gallivm, dst_type, bias); - res = LLVMBuildOr(builder, - res, - LLVMBuildBitCast(builder, bias_, int_vec_type, ""), ""); + res = LLVMBuildOr(builder, + res, + LLVMBuildBitCast(builder, bias_, int_vec_type, ""), ""); - res = LLVMBuildBitCast(builder, res, vec_type, ""); + res = LLVMBuildBitCast(builder, res, vec_type, ""); - res = LLVMBuildFSub(builder, res, bias_, ""); - res = LLVMBuildFMul(builder, res, lp_build_const_vec(gallivm, dst_type, scale), ""); + res = LLVMBuildFSub(builder, res, bias_, ""); + res = LLVMBuildFMul(builder, res, lp_build_const_vec(gallivm, dst_type, scale), ""); + } return res; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.c b/src/gallium/auxiliary/gallivm/lp_bld_debug.c deleted file mode 100644 index 93e56553d7..0000000000 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.c +++ /dev/null @@ -1,141 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifdef HAVE_UDIS86 -#include <udis86.h> -#endif - -#include "util/u_math.h" -#include "util/u_debug.h" -#include "lp_bld_debug.h" - - -/** - * Check alignment. - * - * It is important that this check is not implemented as a macro or inlined - * function, as the compiler assumptions in respect to alignment of global - * and stack variables would often make the check a no op, defeating the - * whole purpose of the exercise. - */ -boolean -lp_check_alignment(const void *ptr, unsigned alignment) -{ - assert(util_is_power_of_two(alignment)); - return ((uintptr_t)ptr & (alignment - 1)) == 0; -} - - -void -lp_disassemble(const void* func) -{ -#ifdef HAVE_UDIS86 - ud_t ud_obj; - uint64_t max_jmp_pc; - uint inst_no; - boolean emit_addrs = TRUE, emit_line_nos = FALSE; - - ud_init(&ud_obj); - - ud_set_input_buffer(&ud_obj, (void*)func, 0xffff); - - max_jmp_pc = (uint64_t) (uintptr_t) func; - ud_set_pc(&ud_obj, max_jmp_pc); - -#ifdef PIPE_ARCH_X86 - ud_set_mode(&ud_obj, 32); -#endif -#ifdef PIPE_ARCH_X86_64 - ud_set_mode(&ud_obj, 64); -#endif - - ud_set_syntax(&ud_obj, UD_SYN_ATT); - - while (ud_disassemble(&ud_obj)) { - - if (emit_addrs) { -#ifdef PIPE_ARCH_X86 - debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj)); -#endif -#ifdef PIPE_ARCH_X86_64 - debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj)); -#endif - } - else if (emit_line_nos) { - debug_printf("%6d:\t", inst_no); - inst_no++; - } -#if 0 - debug_printf("%-16s ", ud_insn_hex(&ud_obj)); -#endif - - debug_printf("%s\n", ud_insn_asm(&ud_obj)); - - if(ud_obj.mnemonic != UD_Icall) { - unsigned i; - for(i = 0; i < 3; ++i) { - const struct ud_operand *op = &ud_obj.operand[i]; - if (op->type == UD_OP_JIMM){ - uint64_t pc = ud_obj.pc; - - switch (op->size) { - case 8: - pc += op->lval.sbyte; - break; - case 16: - pc += op->lval.sword; - break; - case 32: - pc += op->lval.sdword; - break; - default: - break; - } - if(pc > max_jmp_pc) - max_jmp_pc = pc; - } - } - } - - if (ud_obj.mnemonic == UD_Iinvalid || - (ud_insn_off(&ud_obj) >= max_jmp_pc && - (ud_obj.mnemonic == UD_Iret || - ud_obj.mnemonic == UD_Ijmp))) - break; - } - -#if 0 - /* Print GDB command, useful to verify udis86 output */ - debug_printf("disassemble %p %p\n", func, (void*)(uintptr_t)ud_obj.pc); -#endif - - debug_printf("\n"); -#else - (void)func; -#endif -} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp new file mode 100644 index 0000000000..bb2c82fe0e --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp @@ -0,0 +1,357 @@ +/************************************************************************** + * + * Copyright 2009-2011 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <llvm-c/Core.h> +#include <llvm/Target/TargetMachine.h> +#include <llvm/Target/TargetRegistry.h> +#include <llvm/Target/TargetSelect.h> +#include <llvm/Target/TargetInstrInfo.h> +#include <llvm/Support/raw_ostream.h> +#include <llvm/Support/MemoryObject.h> +#include <llvm/System/Host.h> + +#if HAVE_LLVM >= 0x0207 +#include <llvm/MC/MCDisassembler.h> +#include <llvm/MC/MCAsmInfo.h> +#include <llvm/MC/MCInst.h> +#include <llvm/MC/MCInstPrinter.h> +#endif /* HAVE_LLVM >= 0x0207 */ + +#include "util/u_math.h" +#include "util/u_debug.h" + +#include "lp_bld_debug.h" + + + +/** + * Check alignment. + * + * It is important that this check is not implemented as a macro or inlined + * function, as the compiler assumptions in respect to alignment of global + * and stack variables would often make the check a no op, defeating the + * whole purpose of the exercise. + */ +extern "C" boolean +lp_check_alignment(const void *ptr, unsigned alignment) +{ + assert(util_is_power_of_two(alignment)); + return ((uintptr_t)ptr & (alignment - 1)) == 0; +} + + +class raw_debug_ostream : + public llvm::raw_ostream +{ + uint64_t pos; + + void write_impl(const char *Ptr, size_t Size); + uint64_t current_pos() { return pos; } + uint64_t current_pos() const { return pos; } + +#if HAVE_LLVM >= 0x207 + uint64_t preferred_buffer_size() { return 512; } +#else + size_t preferred_buffer_size() { return 512; } +#endif +}; + + +void +raw_debug_ostream::write_impl(const char *Ptr, size_t Size) +{ + if (Size > 0) { + char *lastPtr = (char *)&Ptr[Size]; + char last = *lastPtr; + *lastPtr = 0; + _debug_printf("%*s", Size, Ptr); + *lastPtr = last; + pos += Size; + } +} + + +/** + * Same as LLVMDumpValue, but through our debugging channels. + */ +extern "C" void +lp_debug_dump_value(LLVMValueRef value) +{ +#if (defined(PIPE_OS_WINDOWS) && !defined(PIPE_CC_MSVC)) || defined(PIPE_OS_EMBDDED) + raw_debug_ostream os; + llvm::unwrap(value)->print(os); + os.flush(); +#else + LLVMDumpValue(value); +#endif +} + + +#if HAVE_LLVM >= 0x0207 +/* + * MemoryObject wrapper around a buffer of memory, to be used by MC + * disassembler. + */ +class BufferMemoryObject: + public llvm::MemoryObject +{ +private: + const uint8_t *Bytes; + uint64_t Length; +public: + BufferMemoryObject(const uint8_t *bytes, uint64_t length) : + Bytes(bytes), Length(length) + { + } + + uint64_t getBase() const + { + return 0; + } + + uint64_t getExtent() const + { + return Length; + } + + int readByte(uint64_t addr, uint8_t *byte) const + { + if (addr > getExtent()) + return -1; + *byte = Bytes[addr]; + return 0; + } +}; +#endif /* HAVE_LLVM >= 0x0207 */ + + +/* + * Disassemble a function, using the LLVM MC disassembler. + * + * See also: + * - http://blog.llvm.org/2010/01/x86-disassembler.html + * - http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html + */ +extern "C" void +lp_disassemble(const void* func) +{ +#if HAVE_LLVM >= 0x0207 + using namespace llvm; + + const uint8_t *bytes = (const uint8_t *)func; + + /* + * Limit disassembly to this extent + */ + const uint64_t extent = 0x10000; + + uint64_t max_pc = 0; + + /* + * Initialize all used objects. + */ + + std::string Triple = sys::getHostTriple(); + + std::string Error; + const Target *T = TargetRegistry::lookupTarget(Triple, Error); + +#if HAVE_LLVM >= 0x0208 + InitializeNativeTargetAsmPrinter(); +#else + InitializeAllAsmPrinters(); +#endif + + InitializeAllDisassemblers(); + + OwningPtr<const MCAsmInfo> AsmInfo(T->createAsmInfo(Triple)); + + if (!AsmInfo) { + debug_printf("error: no assembly info for target %s\n", Triple.c_str()); + return; + } + + OwningPtr<const MCDisassembler> DisAsm(T->createMCDisassembler()); + if (!DisAsm) { + debug_printf("error: no disassembler for target %s\n", Triple.c_str()); + return; + } + + raw_debug_ostream Out; + + int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); +#if HAVE_LLVM >= 0x0208 + OwningPtr<MCInstPrinter> Printer( + T->createMCInstPrinter(AsmPrinterVariant, *AsmInfo)); +#else + OwningPtr<MCInstPrinter> Printer( + T->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, Out)); +#endif + if (!Printer) { + debug_printf("error: no instruction printer for target %s\n", Triple.c_str()); + return; + } + + TargetMachine *TM = T->createTargetMachine(Triple, ""); + + const TargetInstrInfo *TII = TM->getInstrInfo(); + + /* + * Wrap the data in a MemoryObject + */ + BufferMemoryObject memoryObject((const uint8_t *)bytes, extent); + + uint64_t pc; + pc = 0; + while (true) { + MCInst Inst; + uint64_t Size; + + /* + * Print address. We use addresses relative to the start of the function, + * so that between runs. + */ + + debug_printf("%6lu:\t", (unsigned long)pc); + + if (!DisAsm->getInstruction(Inst, Size, memoryObject, + pc, + nulls())) { + debug_printf("invalid\n"); + pc += 1; + } + + /* + * Output the bytes in hexidecimal format. + */ + + if (0) { + unsigned i; + for (i = 0; i < Size; ++i) { + debug_printf("%02x ", ((const uint8_t*)bytes)[pc + i]); + } + for (; i < 16; ++i) { + debug_printf(" "); + } + } + + /* + * Print the instruction. + */ + +#if HAVE_LLVM >= 0x208 + Printer->printInst(&Inst, Out); +#else + Printer->printInst(&Inst); +#endif + Out.flush(); + + /* + * Advance. + */ + + pc += Size; + + const TargetInstrDesc &TID = TII->get(Inst.getOpcode()); + + /* + * Keep track of forward jumps to a nearby address. + */ + + if (TID.isBranch()) { + for (unsigned i = 0; i < Inst.getNumOperands(); ++i) { + const MCOperand &operand = Inst.getOperand(i); + if (operand.isImm()) { + uint64_t jump; + + /* + * FIXME: Handle both relative and absolute addresses correctly. + * EDInstInfo actually has this info, but operandTypes and + * operandFlags enums are not exposed in the public interface. + */ + + if (1) { + /* + * PC relative addr. + */ + + jump = pc + operand.getImm(); + } else { + /* + * Absolute addr. + */ + + jump = (uint64_t)operand.getImm(); + } + + /* + * Output the address relative to the function start, given + * that MC will print the addresses relative the current pc. + */ + debug_printf("\t\t; %lu", (unsigned long)jump); + + /* + * Ignore far jumps given it could be actually a tail return to + * a random address. + */ + + if (jump > max_pc && + jump < extent) { + max_pc = jump; + } + } + } + } + + debug_printf("\n"); + + /* + * Stop disassembling on return statements, if there is no record of a + * jump to a successive address. + */ + + if (TID.isReturn()) { + if (pc > max_pc) { + break; + } + } + } + + /* + * Print GDB command, useful to verify output. + */ + + if (0) { + debug_printf("disassemble %p %p\n", bytes, bytes + pc); + } + + debug_printf("\n"); +#else /* HAVE_LLVM < 0x0207 */ + (void)func; +#endif /* HAVE_LLVM < 0x0207 */ +} + diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h index 8a58f95b78..da873f30b2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h @@ -45,6 +45,11 @@ #define GALLIVM_DEBUG_GC (1 << 6) +#ifdef __cplusplus +extern "C" { +#endif + + #ifdef DEBUG extern unsigned gallivm_debug; #else @@ -81,4 +86,9 @@ void lp_disassemble(const void* func); +#ifdef __cplusplus +} +#endif + + #endif /* !LP_BLD_DEBUG_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index 75d2e666f0..82ab19eda1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -36,6 +36,7 @@ #include "util/u_format.h" #include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_pointer.h" #include "util/u_string.h" #include "lp_bld_arit.h" @@ -511,8 +512,6 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, * or incentive to optimize. */ - LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); - char name[256]; LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); @@ -522,19 +521,20 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, LLVMValueRef res; unsigned k; - util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm", - format_desc->short_name); - if (gallivm_debug & GALLIVM_DEBUG_PERF) { - debug_printf("%s: falling back to %s\n", __FUNCTION__, name); + debug_printf("%s: falling back to util_format_%s_fetch_rgba_8unorm\n", + __FUNCTION__, format_desc->short_name); } /* * Declare and bind format_desc->fetch_rgba_8unorm(). */ - function = LLVMGetNamedFunction(module, name); - if (!function) { + { + /* + * Function to call looks like: + * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) + */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; LLVMTypeRef function_type; @@ -542,17 +542,19 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pi8t; arg_types[1] = pi8t; - arg_types[3] = arg_types[2] = LLVMIntTypeInContext(gallivm->context, sizeof(unsigned) * 8); - function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); - function = LLVMAddFunction(module, name, function_type); - - LLVMSetFunctionCallConv(function, LLVMCCallConv); - LLVMSetLinkage(function, LLVMExternalLinkage); - - assert(LLVMIsDeclaration(function)); - - LLVMAddGlobalMapping(gallivm->engine, function, - func_to_pointer((func_pointer)format_desc->fetch_rgba_8unorm)); + arg_types[2] = i32t; + arg_types[3] = i32t; + function_type = LLVMFunctionType(ret_type, arg_types, + Elements(arg_types), 0); + + /* make const pointer for the C fetch_rgba_8unorm function */ + function = lp_build_const_int_pointer(gallivm, + func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm)); + + /* cast the callee pointer to the function's type */ + function = LLVMBuildBitCast(builder, function, + LLVMPointerType(function_type, 0), + "cast callee"); } tmp_ptr = lp_build_alloca(gallivm, i32t, ""); @@ -614,48 +616,55 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, * or incentive to optimize. */ - LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); - char name[256]; LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context); LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4); LLVMTypeRef pf32t = LLVMPointerType(f32t, 0); + LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); + LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef function; LLVMValueRef tmp_ptr; LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef res; unsigned k; - util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", - format_desc->short_name); - if (gallivm_debug & GALLIVM_DEBUG_PERF) { - debug_printf("%s: falling back to %s\n", __FUNCTION__, name); + debug_printf("%s: falling back to util_format_%s_fetch_rgba_float\n", + __FUNCTION__, format_desc->short_name); } /* * Declare and bind format_desc->fetch_rgba_float(). */ - function = LLVMGetNamedFunction(module, name); - if (!function) { + { + /* + * Function to call looks like: + * fetch(float *dst, const uint8_t *src, unsigned i, unsigned j) + */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; LLVMTypeRef function_type; ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pf32t; - arg_types[1] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); - arg_types[3] = arg_types[2] = LLVMIntTypeInContext(gallivm->context, sizeof(unsigned) * 8); - function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); - function = LLVMAddFunction(module, name, function_type); + arg_types[1] = pi8t; + arg_types[2] = i32t; + arg_types[3] = i32t; + function_type = LLVMFunctionType(ret_type, arg_types, + Elements(arg_types), 0); - LLVMSetFunctionCallConv(function, LLVMCCallConv); - LLVMSetLinkage(function, LLVMExternalLinkage); + /* Note: we're using this casting here instead of LLVMAddGlobalMapping() + * to work around a bug in LLVM 2.6, and for efficiency/simplicity. + */ - assert(LLVMIsDeclaration(function)); + /* make const pointer for the C fetch_rgba_float function */ + function = lp_build_const_int_pointer(gallivm, + func_to_pointer((func_pointer) format_desc->fetch_rgba_float)); - LLVMAddGlobalMapping(gallivm->engine, function, - func_to_pointer((func_pointer)format_desc->fetch_rgba_float)); + /* cast the callee pointer to the function's type */ + function = LLVMBuildBitCast(builder, function, + LLVMPointerType(function_type, 0), + "cast callee"); } tmp_ptr = lp_build_alloca(gallivm, f32x4t, ""); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index efe8d38b8f..45addee8fa 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -30,6 +30,7 @@ #include "util/u_cpu_detect.h" #include "util/u_debug.h" #include "util/u_memory.h" +#include "util/u_simple_list.h" #include "lp_bld_debug.h" #include "lp_bld_init.h" @@ -221,11 +222,12 @@ free_gallivm_state(struct gallivm_state *gallivm) static boolean init_gallivm_state(struct gallivm_state *gallivm) { - assert(gallivm_initialized); assert(!gallivm->context); assert(!gallivm->module); assert(!gallivm->provider); + lp_build_init(); + gallivm->context = LLVMContextCreate(); if (!gallivm->context) goto fail; @@ -291,12 +293,12 @@ struct callback { garbage_collect_callback_func func; void *cb_data; + struct callback *prev, *next; }; -#define MAX_CALLBACKS 32 -static struct callback Callbacks[MAX_CALLBACKS]; -static unsigned NumCallbacks = 0; +/** list of all garbage collector callbacks */ +static struct callback callback_list = {NULL, NULL, NULL, NULL}; /** @@ -307,20 +309,24 @@ void gallivm_register_garbage_collector_callback(garbage_collect_callback_func func, void *cb_data) { - unsigned i; + struct callback *cb; + + if (!callback_list.prev) { + make_empty_list(&callback_list); + } - for (i = 0; i < NumCallbacks; i++) { - if (Callbacks[i].func == func && Callbacks[i].cb_data == cb_data) { - /* already in list: no-op */ + /* see if already in list */ + foreach(cb, &callback_list) { + if (cb->func == func && cb->cb_data == cb_data) return; - } } - assert(NumCallbacks < MAX_CALLBACKS); - if (NumCallbacks < MAX_CALLBACKS) { - Callbacks[NumCallbacks].func = func; - Callbacks[NumCallbacks].cb_data = cb_data; - NumCallbacks++; + /* add to list */ + cb = CALLOC_STRUCT(callback); + if (cb) { + cb->func = func; + cb->cb_data = cb_data; + insert_at_head(&callback_list, cb); } } @@ -332,15 +338,13 @@ void gallivm_remove_garbage_collector_callback(garbage_collect_callback_func func, void *cb_data) { - unsigned i; - - for (i = 0; i < NumCallbacks; i++) { - if (Callbacks[i].func == func && Callbacks[i].cb_data == cb_data) { - /* found, now remove it */ - NumCallbacks--; - for ( ; i < NumCallbacks; i++) { - Callbacks[i] = Callbacks[i + 1]; - } + struct callback *cb; + + /* search list */ + foreach(cb, &callback_list) { + if (cb->func == func && cb->cb_data == cb_data) { + /* found, remove it */ + remove_from_list(cb); return; } } @@ -354,10 +358,9 @@ gallivm_remove_garbage_collector_callback(garbage_collect_callback_func func, static void call_garbage_collector_callbacks(void) { - unsigned i; - - for (i = 0; i < NumCallbacks; i++) { - Callbacks[i].func(Callbacks[i].cb_data); + struct callback *cb; + foreach(cb, &callback_list) { + cb->func(cb->cb_data); } } @@ -385,6 +388,9 @@ gallivm_garbage_collect(struct gallivm_state *gallivm) void lp_build_init(void) { + if (gallivm_initialized) + return; + #ifdef DEBUG gallivm_debug = debug_get_option_gallivm_debug(); #endif diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index f56ddee7fd..843a14a500 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -46,66 +46,6 @@ #include "util/u_debug.h" -#if (defined(PIPE_OS_WINDOWS) && !defined(PIPE_CC_MSVC)) || defined(PIPE_OS_EMBDDED) - -#include "llvm/Support/raw_ostream.h" - -class raw_debug_ostream : - public llvm::raw_ostream -{ - uint64_t pos; - - void write_impl(const char *Ptr, size_t Size); - uint64_t current_pos() { return pos; } - uint64_t current_pos() const { return pos; } - -#if HAVE_LLVM >= 0x207 - uint64_t preferred_buffer_size() { return 512; } -#else - size_t preferred_buffer_size() { return 512; } -#endif -}; - - -void -raw_debug_ostream::write_impl(const char *Ptr, size_t Size) -{ - if (Size > 0) { - char *lastPtr = (char *)&Ptr[Size]; - char last = *lastPtr; - *lastPtr = 0; - _debug_printf("%*s", Size, Ptr); - *lastPtr = last; - pos += Size; - } -} - - -/** - * Same as LLVMDumpValue, but through our debugging channels. - */ -extern "C" void -lp_debug_dump_value(LLVMValueRef value) -{ - raw_debug_ostream os; - llvm::unwrap(value)->print(os); - os.flush(); -} - - -#else - - -extern "C" void -lp_debug_dump_value(LLVMValueRef value) -{ - LLVMDumpValue(value); -} - - -#endif - - /** * Register the engine with oprofile. * @@ -144,6 +84,7 @@ lp_set_target_options(void) llvm::UnsafeFPMath = true; #endif +#if HAVE_LLVM < 0x0209 /* * LLVM will generate MMX instructions for vectors <= 64 bits, leading to * innefficient code, and in 32bit systems, to the corruption of the FPU @@ -162,6 +103,7 @@ lp_set_target_options(void) llvm::cl::ParseCommandLineOptions(2, const_cast<char**>(options)); first = FALSE; } +#endif /* * By default LLVM adds a signal handler to output a pretty stack trace. diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c index 991f6fa5ef..e61cf9541e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c @@ -1098,6 +1098,4 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, texel_out[2] = unswizzled[2]; texel_out[3] = unswizzled[3]; } - - apply_sampler_swizzle(bld, texel_out); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index cf46e2be83..9961ba08f3 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -187,8 +187,6 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, border_chan_vec, texel_out[chan]); } } - - apply_sampler_swizzle(bld, texel_out); } @@ -834,14 +832,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMValueRef *colors_out) { LLVMBuilderRef builder = bld->gallivm->builder; - LLVMValueRef size0; - LLVMValueRef size1; - LLVMValueRef row_stride0_vec; - LLVMValueRef row_stride1_vec; - LLVMValueRef img_stride0_vec; - LLVMValueRef img_stride1_vec; - LLVMValueRef data_ptr0; - LLVMValueRef data_ptr1; + LLVMValueRef size0 = NULL; + LLVMValueRef size1 = NULL; + LLVMValueRef row_stride0_vec = NULL; + LLVMValueRef row_stride1_vec = NULL; + LLVMValueRef img_stride0_vec = NULL; + LLVMValueRef img_stride1_vec = NULL; + LLVMValueRef data_ptr0 = NULL; + LLVMValueRef data_ptr1 = NULL; LLVMValueRef colors0[4], colors1[4]; unsigned chan; @@ -1110,6 +1108,11 @@ lp_build_sample_compare(struct lp_build_sample_context *bld, coord, tex); } + /* Clamp p coords to [0,1] */ + p = lp_build_clamp(&bld->coord_bld, p, + bld->coord_bld.zero, + bld->coord_bld.one); + /* result = (p FUNC texel) ? 1 : 0 */ res = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]); @@ -1268,4 +1271,6 @@ lp_build_sample_soa(struct gallivm_state *gallivm, } lp_build_sample_compare(&bld, r, texel_out); + + apply_sampler_swizzle(&bld, texel_out); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 40186befb9..9713d10048 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -180,6 +180,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef consts_ptr, + LLVMValueRef system_values_array, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[4], LLVMValueRef (*outputs)[4], @@ -199,4 +200,11 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm, const struct tgsi_shader_info *info); +LLVMValueRef +lp_build_system_values_array(struct gallivm_state *gallivm, + const struct tgsi_shader_info *info, + LLVMValueRef instance_id, + LLVMValueRef facing); + + #endif /* LP_BLD_TGSI_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 1b5a8a5903..d1585c8e2b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -157,6 +157,8 @@ struct lp_build_tgsi_soa_context */ LLVMValueRef inputs_array; + LLVMValueRef system_values_array; + const struct tgsi_shader_info *info; /** bitmask indicating which register files are accessed indirectly */ unsigned indirect_files; @@ -759,6 +761,23 @@ emit_fetch( } break; + case TGSI_FILE_SYSTEM_VALUE: + assert(!reg->Register.Indirect); + { + LLVMValueRef index; /* index into the system value array */ + LLVMValueRef scalar, scalar_ptr; + + index = lp_build_const_int32(gallivm, + reg->Register.Index * 4 + swizzle); + + scalar_ptr = LLVMBuildGEP(builder, bld->system_values_array, + &index, 1, ""); + scalar = LLVMBuildLoad(builder, scalar_ptr, ""); + + res = lp_build_broadcast_scalar(&bld->base, scalar); + } + break; + default: assert(0 && "invalid src register in emit_fetch()"); return bld->base.undef; @@ -2322,6 +2341,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef consts_ptr, + LLVMValueRef system_values_array, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[NUM_CHANNELS], LLVMValueRef (*outputs)[NUM_CHANNELS], @@ -2411,6 +2431,8 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, } } + bld.system_values_array = system_values_array; + tgsi_parse_init( &parse, tokens ); while( !tgsi_parse_end_of_tokens( &parse ) ) { @@ -2512,3 +2534,54 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, FREE( bld.instructions ); } + +/** + * Build up the system values array out of individual values such as + * the instance ID, front-face, primitive ID, etc. The shader info is + * used to determine which system values are needed and where to put + * them in the system values array. + * + * XXX only instance ID is implemented at this time. + * + * The system values register file is similar to the constants buffer. + * Example declaration: + * DCL SV[0], INSTANCEID + * Example instruction: + * MOVE foo, SV[0].xxxx; + * + * \return LLVM float array (interpreted as float [][4]) + */ +LLVMValueRef +lp_build_system_values_array(struct gallivm_state *gallivm, + const struct tgsi_shader_info *info, + LLVMValueRef instance_id, + LLVMValueRef facing) +{ + LLVMValueRef size = lp_build_const_int32(gallivm, 4 * info->num_system_values); + LLVMTypeRef float_t = LLVMFloatTypeInContext(gallivm->context); + LLVMValueRef array = lp_build_array_alloca(gallivm, float_t, + size, "sysvals_array"); + unsigned i; + + for (i = 0; i < info->num_system_values; i++) { + LLVMValueRef index = lp_build_const_int32(gallivm, i * 4); + LLVMValueRef ptr, value; + + switch (info->system_value_semantic_name[i]) { + case TGSI_SEMANTIC_INSTANCEID: + /* convert instance ID from int to float */ + value = LLVMBuildSIToFP(gallivm->builder, instance_id, float_t, + "sysval_instanceid"); + break; + case TGSI_SEMANTIC_FACE: + /* fall-through */ + default: + assert(0 && "unexpected semantic in build_system_values_array()"); + } + + ptr = LLVMBuildGEP(gallivm->builder, array, &index, 1, ""); + LLVMBuildStore(gallivm->builder, value, ptr); + } + + return array; +} diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h index 64e3869a74..3ad7ce645d 100644 --- a/src/gallium/auxiliary/os/os_thread.h +++ b/src/gallium/auxiliary/os/os_thread.h @@ -152,8 +152,9 @@ static INLINE int pipe_thread_destroy( pipe_thread thread ) */ typedef CRITICAL_SECTION pipe_mutex; +/* http://locklessinc.com/articles/pthreads_on_windows/ */ #define pipe_static_mutex(mutex) \ - /*static*/ pipe_mutex mutex = {0,0,0,0,0,0} + static pipe_mutex mutex = {(PCRITICAL_SECTION_DEBUG)-1, -1, 0, 0, 0, 0} #define pipe_mutex_init(mutex) \ InitializeCriticalSection(&mutex) diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c index c2322eed19..5754f47618 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c @@ -174,11 +174,20 @@ pb_malloc_bufmgr_destroy(struct pb_manager *mgr) } +static boolean +pb_malloc_bufmgr_is_buffer_busy( struct pb_manager *mgr, + struct pb_buffer *buf ) +{ + return FALSE; +} + + static struct pb_manager pb_malloc_bufmgr = { pb_malloc_bufmgr_destroy, pb_malloc_bufmgr_create_buffer, - pb_malloc_bufmgr_flush + pb_malloc_bufmgr_flush, + pb_malloc_bufmgr_is_buffer_busy }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 2ef02160f2..960068c494 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -82,6 +82,10 @@ struct pb_manager */ void (*flush)( struct pb_manager *mgr ); + + boolean + (*is_buffer_busy)( struct pb_manager *mgr, + struct pb_buffer *buf ); }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index a6eb403962..25accefa8d 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -227,8 +227,6 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf, pb_size size, const struct pb_desc *desc) { - void *map; - if(buf->base.base.size < size) return 0; @@ -242,13 +240,18 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf, if(!pb_check_usage(desc->usage, buf->base.base.usage)) return 0; - map = pb_map(buf->buffer, PB_USAGE_DONTBLOCK, NULL); - if (!map) { - return -1; + if (buf->mgr->provider->is_buffer_busy) { + if (buf->mgr->provider->is_buffer_busy(buf->mgr->provider, buf->buffer)) + return -1; + } else { + void *ptr = pb_map(buf->buffer, PB_USAGE_DONTBLOCK, NULL); + + if (!ptr) + return -1; + + pb_unmap(buf->buffer); } - pb_unmap(buf->buffer); - return 1; } diff --git a/src/gallium/auxiliary/rbug/rbug_context.c b/src/gallium/auxiliary/rbug/rbug_context.c index a3fd7e8430..36824058c7 100644 --- a/src/gallium/auxiliary/rbug/rbug_context.c +++ b/src/gallium/auxiliary/rbug/rbug_context.c @@ -288,7 +288,6 @@ int rbug_send_context_draw_rule(struct rbug_connection *__con, int rbug_send_context_flush(struct rbug_connection *__con, rbug_context_t context, - int32_t flags, uint32_t *__serial) { uint32_t __len = 0; @@ -298,7 +297,6 @@ int rbug_send_context_flush(struct rbug_connection *__con, LEN(8); /* header */ LEN(8); /* context */ - LEN(4); /* flags */ /* align */ PAD(__len, 8); @@ -310,7 +308,6 @@ int rbug_send_context_flush(struct rbug_connection *__con, WRITE(4, int32_t, ((int32_t)RBUG_OP_CONTEXT_FLUSH)); WRITE(4, uint32_t, ((uint32_t)(__len / 4))); WRITE(8, rbug_context_t, context); /* context */ - WRITE(4, int32_t, flags); /* flags */ /* final pad */ PAD(__pos, 8); @@ -663,7 +660,6 @@ struct rbug_proto_context_flush * rbug_demarshal_context_flush(struct rbug_proto ret->header.opcode = header->opcode; READ(8, rbug_context_t, context); /* context */ - READ(4, int32_t, flags); /* flags */ return ret; } diff --git a/src/gallium/auxiliary/rbug/rbug_context.h b/src/gallium/auxiliary/rbug/rbug_context.h index 03126d6b12..4a865c25fc 100644 --- a/src/gallium/auxiliary/rbug/rbug_context.h +++ b/src/gallium/auxiliary/rbug/rbug_context.h @@ -96,7 +96,6 @@ struct rbug_proto_context_flush { struct rbug_header header; rbug_context_t context; - int32_t flags; }; struct rbug_proto_context_list_reply @@ -162,7 +161,6 @@ int rbug_send_context_draw_rule(struct rbug_connection *__con, int rbug_send_context_flush(struct rbug_connection *__con, rbug_context_t context, - int32_t flags, uint32_t *__serial); int rbug_send_context_list_reply(struct rbug_connection *__con, diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 75b0f6a68e..b03dd3a0cf 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -2132,7 +2132,8 @@ struct x86_reg x86_fn_arg( struct x86_function *p, return x86_make_disp(x86_make_reg(file_REG32, reg_SP), p->stack_offset + arg * 4); /* ??? */ default: - abort(); + assert(0 && "Unexpected x86 target ABI in x86_fn_arg"); + return x86_make_reg(file_REG32, reg_CX); /* not used / silence warning */ } } diff --git a/src/gallium/auxiliary/target-helpers/inline_noop_helper.h b/src/gallium/auxiliary/target-helpers/inline_noop_helper.h new file mode 100644 index 0000000000..77c7cfd0c2 --- /dev/null +++ b/src/gallium/auxiliary/target-helpers/inline_noop_helper.h @@ -0,0 +1,51 @@ + +#ifndef INLINE_DEBUG_HELPER_H +#define INLINE_DEBUG_HELPER_H + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" + + +/* Helper function to wrap a screen with + * one or more debug driver: rbug, trace. + */ + +#ifdef GALLIUM_TRACE +#include "trace/tr_public.h" +#endif + +#ifdef GALLIUM_RBUG +#include "rbug/rbug_public.h" +#endif + +#ifdef GALLIUM_GALAHAD +#include "galahad/glhd_public.h" +#endif + +#ifdef GALLIUM_NOOP +#include "noop/noop_public.h" +#endif + +static INLINE struct pipe_screen * +debug_screen_wrap(struct pipe_screen *screen) +{ +#if defined(GALLIUM_RBUG) + screen = rbug_screen_create(screen); +#endif + +#if defined(GALLIUM_TRACE) + screen = trace_screen_create(screen); +#endif + +#if defined(GALLIUM_GALAHAD) + screen = galahad_screen_create(screen); +#endif + +#if defined(GALLIUM_NOOP) + screen = noop_screen_create(screen); +#endif + + return screen; +} + +#endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 16a205f206..d2cb98e84a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -26,6 +26,7 @@ **************************************************************************/ #include "util/u_debug.h" +#include "pipe/p_format.h" #include "pipe/p_shader_tokens.h" #include "tgsi_build.h" #include "tgsi_parse.h" @@ -226,6 +227,45 @@ tgsi_build_declaration_semantic( return ds; } + +static struct tgsi_declaration_resource +tgsi_default_declaration_resource(void) +{ + struct tgsi_declaration_resource declaration_resource; + + declaration_resource.Resource = TGSI_TEXTURE_UNKNOWN; + declaration_resource.ReturnTypeX = PIPE_TYPE_UNORM; + declaration_resource.ReturnTypeY = PIPE_TYPE_UNORM; + declaration_resource.ReturnTypeZ = PIPE_TYPE_UNORM; + declaration_resource.ReturnTypeW = PIPE_TYPE_UNORM; + + return declaration_resource; +} + +static struct tgsi_declaration_resource +tgsi_build_declaration_resource(unsigned texture, + unsigned return_type_x, + unsigned return_type_y, + unsigned return_type_z, + unsigned return_type_w, + struct tgsi_declaration *declaration, + struct tgsi_header *header) +{ + struct tgsi_declaration_resource declaration_resource; + + declaration_resource = tgsi_default_declaration_resource(); + declaration_resource.Resource = texture; + declaration_resource.ReturnTypeX = return_type_x; + declaration_resource.ReturnTypeY = return_type_y; + declaration_resource.ReturnTypeZ = return_type_z; + declaration_resource.ReturnTypeW = return_type_w; + + declaration_grow(declaration, header); + + return declaration_resource; +} + + struct tgsi_full_declaration tgsi_default_full_declaration( void ) { @@ -235,6 +275,7 @@ tgsi_default_full_declaration( void ) full_declaration.Range = tgsi_default_declaration_range(); full_declaration.Semantic = tgsi_default_declaration_semantic(); full_declaration.ImmediateData.u = NULL; + full_declaration.Resource = tgsi_default_declaration_resource(); return full_declaration; } @@ -324,6 +365,24 @@ tgsi_build_full_declaration( } } + if (full_decl->Declaration.File == TGSI_FILE_RESOURCE) { + struct tgsi_declaration_resource *dr; + + if (maxsize <= size) { + return 0; + } + dr = (struct tgsi_declaration_resource *)&tokens[size]; + size++; + + *dr = tgsi_build_declaration_resource(full_decl->Resource.Resource, + full_decl->Resource.ReturnTypeX, + full_decl->Resource.ReturnTypeY, + full_decl->Resource.ReturnTypeZ, + full_decl->Resource.ReturnTypeW, + declaration, + header); + } + return size; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 77bde86684..c12662076b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -104,7 +104,8 @@ tgsi_file_names[TGSI_FILE_COUNT] = "PRED", "SV", "IMMX", - "TEMPX" + "TEMPX", + "RES" }; static const char *interpolate_names[] = @@ -138,7 +139,7 @@ static const char *immediate_type_names[] = }; const char * -tgsi_swizzle_names[] = +tgsi_swizzle_names[4] = { "x", "y", @@ -147,7 +148,7 @@ tgsi_swizzle_names[] = }; const char * -tgsi_texture_names[] = +tgsi_texture_names[TGSI_TEXTURE_COUNT] = { "UNKNOWN", "1D", @@ -157,19 +158,31 @@ tgsi_texture_names[] = "RECT", "SHADOW1D", "SHADOW2D", - "SHADOWRECT" + "SHADOWRECT", + "1DARRAY", + "2DARRAY" }; -static const char *property_names[] = +const char *tgsi_property_names[TGSI_PROPERTY_COUNT] = { "GS_INPUT_PRIMITIVE", "GS_OUTPUT_PRIMITIVE", "GS_MAX_OUTPUT_VERTICES", "FS_COORD_ORIGIN", - "FS_COORD_PIXEL_CENTER" + "FS_COORD_PIXEL_CENTER", + "FS_COLOR0_WRITES_ALL_CBUFS", }; -static const char *primitive_names[] = +static const char *tgsi_type_names[] = +{ + "UNORM", + "SNORM", + "SINT", + "UINT", + "FLOAT" +}; + +const char *tgsi_primitive_names[PIPE_PRIM_MAX] = { "POINTS", "LINES", @@ -187,13 +200,13 @@ static const char *primitive_names[] = "TRIANGLE_STRIP_ADJACENCY" }; -static const char *fs_coord_origin_names[] = +const char *tgsi_fs_coord_origin_names[2] = { "UPPER_LEFT", "LOWER_LEFT" }; -static const char *fs_coord_pixel_center_names[] = +const char *tgsi_fs_coord_pixel_center_names[2] = { "HALF_INTEGER", "INTEGER" @@ -392,6 +405,26 @@ iter_declaration( } } + if (decl->Declaration.File == TGSI_FILE_RESOURCE) { + TXT(", "); + ENM(decl->Resource.Resource, tgsi_texture_names); + TXT(", "); + if ((decl->Resource.ReturnTypeX == decl->Resource.ReturnTypeY) && + (decl->Resource.ReturnTypeX == decl->Resource.ReturnTypeZ) && + (decl->Resource.ReturnTypeX == decl->Resource.ReturnTypeW)) { + ENM(decl->Resource.ReturnTypeX, tgsi_type_names); + } else { + ENM(decl->Resource.ReturnTypeX, tgsi_type_names); + TXT(", "); + ENM(decl->Resource.ReturnTypeY, tgsi_type_names); + TXT(", "); + ENM(decl->Resource.ReturnTypeZ, tgsi_type_names); + TXT(", "); + ENM(decl->Resource.ReturnTypeW, tgsi_type_names); + } + + } + if (iter->processor.Processor == TGSI_PROCESSOR_FRAGMENT && decl->Declaration.File == TGSI_FILE_INPUT) { @@ -484,10 +517,10 @@ iter_property( int i; struct dump_ctx *ctx = (struct dump_ctx *)iter; - assert(Elements(property_names) == TGSI_PROPERTY_COUNT); + assert(Elements(tgsi_property_names) == TGSI_PROPERTY_COUNT); TXT( "PROPERTY " ); - ENM(prop->Property.PropertyName, property_names); + ENM(prop->Property.PropertyName, tgsi_property_names); if (prop->Property.NrTokens > 1) TXT(" "); @@ -496,13 +529,13 @@ iter_property( switch (prop->Property.PropertyName) { case TGSI_PROPERTY_GS_INPUT_PRIM: case TGSI_PROPERTY_GS_OUTPUT_PRIM: - ENM(prop->u[i].Data, primitive_names); + ENM(prop->u[i].Data, tgsi_primitive_names); break; case TGSI_PROPERTY_FS_COORD_ORIGIN: - ENM(prop->u[i].Data, fs_coord_origin_names); + ENM(prop->u[i].Data, tgsi_fs_coord_origin_names); break; case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: - ENM(prop->u[i].Data, fs_coord_pixel_center_names); + ENM(prop->u[i].Data, tgsi_fs_coord_pixel_center_names); break; default: SID( prop->u[i].Data ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h index fc0429ad8d..2491e9198a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.h +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h @@ -29,6 +29,7 @@ #define TGSI_DUMP_H #include "pipe/p_compiler.h" +#include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #if defined __cplusplus @@ -39,10 +40,22 @@ extern const char * tgsi_file_names[TGSI_FILE_COUNT]; extern const char * -tgsi_swizzle_names[]; +tgsi_swizzle_names[4]; extern const char * -tgsi_texture_names[]; +tgsi_texture_names[TGSI_TEXTURE_COUNT]; + +extern const char * +tgsi_property_names[TGSI_PROPERTY_COUNT]; + +extern const char * +tgsi_primitive_names[PIPE_PRIM_MAX]; + +extern const char * +tgsi_fs_coord_origin_names[2]; + +extern const char * +tgsi_fs_coord_pixel_center_names[2]; void tgsi_dump_str( diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 7892a67f04..9cf74a838f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -672,6 +672,31 @@ tgsi_exec_machine_bind_shader( mach->Processor = parse.FullHeader.Processor.Processor; mach->ImmLimit = 0; + if (mach->Processor == TGSI_PROCESSOR_GEOMETRY && + !mach->UsedGeometryShader) { + struct tgsi_exec_vector *inputs = + align_malloc(sizeof(struct tgsi_exec_vector) * + TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS, + 16); + struct tgsi_exec_vector *outputs = + align_malloc(sizeof(struct tgsi_exec_vector) * + TGSI_MAX_TOTAL_VERTICES, 16); + + if (!inputs) + return; + if (!outputs) { + align_free(inputs); + return; + } + + align_free(mach->Inputs); + align_free(mach->Outputs); + + mach->Inputs = inputs; + mach->Outputs = outputs; + mach->UsedGeometryShader = TRUE; + } + declarations = (struct tgsi_full_declaration *) MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); @@ -801,6 +826,11 @@ tgsi_exec_machine_create( void ) mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; + mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16); + mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16); + if (!mach->Inputs || !mach->Outputs) + goto fail; + /* Setup constants needed by the SSE2 executor. */ for( i = 0; i < 4; i++ ) { mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000; @@ -824,7 +854,11 @@ tgsi_exec_machine_create( void ) return mach; fail: - align_free(mach); + if (mach) { + align_free(mach->Inputs); + align_free(mach->Outputs); + align_free(mach); + } return NULL; } @@ -836,10 +870,13 @@ tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) if (mach->Instructions) FREE(mach->Instructions); if (mach->Declarations) - FREE(mach->Declarations); - } + FREE(mach->Declarations); + + align_free(mach->Inputs); + align_free(mach->Outputs); - align_free(mach); + align_free(mach); + } } static void @@ -1038,7 +1075,6 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, break; case TGSI_FILE_INPUT: - case TGSI_FILE_SYSTEM_VALUE: for (i = 0; i < QUAD_SIZE; i++) { /* if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { @@ -1048,11 +1084,20 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, }*/ int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]; assert(pos >= 0); - assert(pos < Elements(mach->Inputs)); + assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS); chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i]; } break; + case TGSI_FILE_SYSTEM_VALUE: + /* XXX no swizzling at this point. Will be needed if we put + * gl_FragCoord, for example, in a sys value register. + */ + for (i = 0; i < QUAD_SIZE; i++) { + chan->f[i] = mach->SystemValue[index->i[i]][0]; + } + break; + case TGSI_FILE_TEMPORARY: for (i = 0; i < QUAD_SIZE; i++) { assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); @@ -1742,6 +1787,36 @@ exec_tex(struct tgsi_exec_machine *mach, &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; + case TGSI_TEXTURE_1D_ARRAY: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + } + + fetch_texel(mach->Samplers[unit], + &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */ + control, + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_2D_ARRAY: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + micro_div(&r[1], &r[1], &r[3]); + } + + fetch_texel(mach->Samplers[unit], + &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */ + control, + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + case TGSI_TEXTURE_3D: case TGSI_TEXTURE_CUBE: FETCH(&r[0], 0, CHAN_X); @@ -1835,6 +1910,164 @@ exec_txd(struct tgsi_exec_machine *mach, } + +static void +exec_sample(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + uint modifier) +{ + const uint resource_unit = inst->Src[1].Register.Index; + const uint sampler_unit = inst->Src[2].Register.Index; + union tgsi_exec_channel r[4]; + const union tgsi_exec_channel *lod = &ZeroVec; + enum tgsi_sampler_control control; + uint chan; + + if (modifier != TEX_MODIFIER_NONE) { + if (modifier == TEX_MODIFIER_LOD_BIAS) + FETCH(&r[3], 3, CHAN_X); + else /*TEX_MODIFIER_LOD*/ + FETCH(&r[3], 0, CHAN_W); + + if (modifier != TEX_MODIFIER_PROJECTED) { + lod = &r[3]; + } + } + + if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { + control = tgsi_sampler_lod_explicit; + } else { + control = tgsi_sampler_lod_bias; + } + + switch (mach->Resources[resource_unit].Resource) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + FETCH(&r[0], 0, CHAN_X); + + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + } + + fetch_texel(mach->Samplers[sampler_unit], + &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */ + control, + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + micro_div(&r[1], &r[1], &r[3]); + micro_div(&r[2], &r[2], &r[3]); + } + + fetch_texel(mach->Samplers[sampler_unit], + &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */ + control, + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + micro_div(&r[1], &r[1], &r[3]); + micro_div(&r[2], &r[2], &r[3]); + } + + fetch_texel(mach->Samplers[sampler_unit], + &r[0], &r[1], &r[2], lod, + control, + &r[0], &r[1], &r[2], &r[3]); + break; + + default: + assert(0); + } + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_sample_d(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + const uint resource_unit = inst->Src[1].Register.Index; + const uint sampler_unit = inst->Src[2].Register.Index; + union tgsi_exec_channel r[4]; + uint chan; + /* + * XXX: This is fake SAMPLE_D -- the derivatives are not taken into account, yet. + */ + + switch (mach->Resources[resource_unit].Resource) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + + FETCH(&r[0], 0, CHAN_X); + + fetch_texel(mach->Samplers[sampler_unit], + &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */ + tgsi_sampler_lod_bias, + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + fetch_texel(mach->Samplers[sampler_unit], + &r[0], &r[1], &r[2], &ZeroVec, /* inputs */ + tgsi_sampler_lod_bias, + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + fetch_texel(mach->Samplers[sampler_unit], + &r[0], &r[1], &r[2], &ZeroVec, + tgsi_sampler_lod_bias, + &r[0], &r[1], &r[2], &r[3]); + break; + + default: + assert(0); + } + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + + /** * Evaluate a constant-valued coefficient at the position of the * current quad. @@ -1906,9 +2139,13 @@ static void exec_declaration(struct tgsi_exec_machine *mach, const struct tgsi_full_declaration *decl) { + if (decl->Declaration.File == TGSI_FILE_RESOURCE) { + mach->Resources[decl->Range.First] = decl->Resource; + return; + } + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { - if (decl->Declaration.File == TGSI_FILE_INPUT || - decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { + if (decl->Declaration.File == TGSI_FILE_INPUT) { uint first, last, mask; first = decl->Range.First; @@ -1921,6 +2158,7 @@ exec_declaration(struct tgsi_exec_machine *mach, * ureg code to emit the right UsageMask value (WRITEMASK_X). * Then, we could remove the tgsi_exec_machine::Face field. */ + /* XXX make FACE a system value */ if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { uint i; @@ -1962,8 +2200,13 @@ exec_declaration(struct tgsi_exec_machine *mach, } } } + + if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { + mach->SysSemanticToIndex[decl->Declaration.Semantic] = decl->Range.First; + } } + typedef void (* micro_op)(union tgsi_exec_channel *dst); static void @@ -3675,6 +3918,54 @@ exec_instruction( exec_endswitch(mach); break; + case TGSI_OPCODE_LOAD: + assert(0); + break; + + case TGSI_OPCODE_LOAD_MS: + assert(0); + break; + + case TGSI_OPCODE_SAMPLE: + exec_sample(mach, inst, TEX_MODIFIER_NONE); + break; + + case TGSI_OPCODE_SAMPLE_B: + exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS); + break; + + case TGSI_OPCODE_SAMPLE_C: + exec_sample(mach, inst, TEX_MODIFIER_NONE); + break; + + case TGSI_OPCODE_SAMPLE_C_LZ: + exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS); + break; + + case TGSI_OPCODE_SAMPLE_D: + exec_sample_d(mach, inst); + break; + + case TGSI_OPCODE_SAMPLE_L: + exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD); + break; + + case TGSI_OPCODE_GATHER4: + assert(0); + break; + + case TGSI_OPCODE_RESINFO: + assert(0); + break; + + case TGSI_OPCODE_SAMPLE_POS: + assert(0); + break; + + case TGSI_OPCODE_SAMPLE_INFO: + assert(0); + break; + default: assert( 0 ); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index b5ebbfbfaa..33f33aa82c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -31,6 +31,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" #if defined __cplusplus extern "C" { @@ -181,6 +182,8 @@ struct tgsi_sampler /* The maximum total number of vertices */ #define TGSI_MAX_TOTAL_VERTICES (TGSI_MAX_PRIM_VERTICES * TGSI_MAX_PRIMITIVES * PIPE_MAX_ATTRIBS) +#define TGSI_MAX_MISC_INPUTS 8 + /** function call/activation record */ struct tgsi_call_record { @@ -225,8 +228,12 @@ struct tgsi_exec_machine float ImmArray[TGSI_EXEC_NUM_IMMEDIATES][4]; - struct tgsi_exec_vector Inputs[TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS]; - struct tgsi_exec_vector Outputs[TGSI_MAX_TOTAL_VERTICES]; + struct tgsi_exec_vector *Inputs; + struct tgsi_exec_vector *Outputs; + + /* System values */ + unsigned SysSemanticToIndex[TGSI_SEMANTIC_COUNT]; + float SystemValue[TGSI_MAX_MISC_INPUTS][4]; struct tgsi_exec_vector *Addrs; struct tgsi_exec_vector *Predicates; @@ -301,6 +308,9 @@ struct tgsi_exec_machine struct tgsi_full_declaration *Declarations; uint NumDeclarations; + struct tgsi_declaration_resource Resources[PIPE_MAX_SHADER_RESOURCES]; + + boolean UsedGeometryShader; }; struct tgsi_exec_machine * diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index e59e964ffa..14ed56a1f6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -175,7 +175,20 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 0, 1, 0, 0, 0, 0, "SWITCH", TGSI_OPCODE_SWITCH }, { 0, 1, 0, 0, 0, 0, "CASE", TGSI_OPCODE_CASE }, { 0, 0, 0, 0, 0, 0, "DEFAULT", TGSI_OPCODE_DEFAULT }, - { 0, 0, 0, 0, 0, 0, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH } + { 0, 0, 0, 0, 0, 0, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH }, + + { 1, 2, 0, 0, 0, 0, "LOAD", TGSI_OPCODE_LOAD }, + { 1, 2, 0, 0, 0, 0, "LOAD_MS", TGSI_OPCODE_LOAD_MS }, + { 1, 3, 0, 0, 0, 0, "SAMPLE", TGSI_OPCODE_SAMPLE }, + { 1, 4, 0, 0, 0, 0, "SAMPLE_B", TGSI_OPCODE_SAMPLE_B }, + { 1, 4, 0, 0, 0, 0, "SAMPLE_C", TGSI_OPCODE_SAMPLE_C }, + { 1, 4, 0, 0, 0, 0, "SAMPLE_C_LZ", TGSI_OPCODE_SAMPLE_C_LZ }, + { 1, 5, 0, 0, 0, 0, "SAMPLE_D", TGSI_OPCODE_SAMPLE_D }, + { 1, 3, 0, 0, 0, 0, "SAMPLE_L", TGSI_OPCODE_SAMPLE_L }, + { 1, 3, 0, 0, 0, 0, "GATHER4", TGSI_OPCODE_GATHER4 }, + { 1, 2, 0, 0, 0, 0, "RESINFO", TGSI_OPCODE_RESINFO }, + { 1, 2, 0, 0, 0, 0, "SAMPLE_POS", TGSI_OPCODE_SAMPLE_POS }, + { 1, 2, 0, 0, 0, 0, "SAMPLE_INFO", TGSI_OPCODE_SAMPLE_INFO }, }; const struct tgsi_opcode_info * diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index b3123ed016..b5d4504425 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -168,6 +168,19 @@ OP01(CASE) OP00(DEFAULT) OP00(ENDSWITCH) +OP12(LOAD) +OP12(LOAD_MS) +OP13(SAMPLE) +OP14(SAMPLE_B) +OP14(SAMPLE_C) +OP14(SAMPLE_C_LZ) +OP15(SAMPLE_D) +OP13(SAMPLE_L) +OP13(GATHER4) +OP12(RESINFO) +OP13(SAMPLE_POS) +OP12(SAMPLE_INFO) + #undef OP00 #undef OP01 @@ -180,6 +193,10 @@ OP00(ENDSWITCH) #undef OP14 #endif +#ifdef OP15 +#undef OP15 +#endif + #undef OP00_LBL #undef OP01_LBL diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index 1891203abe..fb36f9de32 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -128,6 +128,10 @@ tgsi_parse_token( } } + if (decl->Declaration.File == TGSI_FILE_RESOURCE) { + next_token(ctx, &decl->Resource); + } + break; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index d4df585176..b7a3c9bc0e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -69,6 +69,7 @@ struct tgsi_full_declaration struct tgsi_declaration_dimension Dim; struct tgsi_declaration_semantic Semantic; struct tgsi_immediate_array_data ImmediateData; + struct tgsi_declaration_resource Resource; }; struct tgsi_full_immediate @@ -84,7 +85,7 @@ struct tgsi_full_property }; #define TGSI_FULL_MAX_DST_REGISTERS 2 -#define TGSI_FULL_MAX_SRC_REGISTERS 4 /* TXD has 4 */ +#define TGSI_FULL_MAX_SRC_REGISTERS 5 /* SAMPLE_D has 5 */ struct tgsi_full_instruction { @@ -136,7 +137,8 @@ tgsi_parse_token( static INLINE unsigned tgsi_num_tokens(const struct tgsi_token *tokens) { - struct tgsi_header header = *(const struct tgsi_header *) tokens; + struct tgsi_header header; + memcpy(&header, tokens, sizeof(header)); return header.HeaderSize + header.BodySize; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 3521847b61..537a0f6c5e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -294,7 +294,6 @@ emit_fetch(struct gen_context *gen, case TGSI_SWIZZLE_W: switch (reg->Register.File) { case TGSI_FILE_INPUT: - case TGSI_FILE_SYSTEM_VALUE: { int offset = (reg->Register.Index * 4 + swizzle) * 16; int offset_reg = emit_li_offset(gen, offset); @@ -302,6 +301,9 @@ emit_fetch(struct gen_context *gen, ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg); } break; + case TGSI_FILE_SYSTEM_VALUE: + assert(!"unhandled system value in tgsi_ppc.c"); + break; case TGSI_FILE_TEMPORARY: if (is_ppc_vec_temporary(reg)) { /* use PPC vec register */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index acbff103ef..509c534683 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -258,7 +258,8 @@ static const char *file_names[TGSI_FILE_COUNT] = "PRED", "SV", "IMMX", - "TEMPX" + "TEMPX", + "RES" }; static boolean diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/tgsi_sanity.h index 73f0f414e3..707d11ec6d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.h +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.h @@ -28,12 +28,14 @@ #ifndef TGSI_SANITY_H #define TGSI_SANITY_H -#include "pipe/p_shader_tokens.h" - #if defined __cplusplus extern "C" { #endif +#include "pipe/p_compiler.h" + +struct tgsi_token; + /* Check the given token stream for errors and common mistakes. * Diagnostic messages are printed out to the debug output, and is * controlled by the debug option TGSI_PRINT_SANITY (default false). diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 6585da3e83..83c6ac75e5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -143,7 +143,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->file_count[file]++; info->file_max[file] = MAX2(info->file_max[file], (int)reg); - if (file == TGSI_FILE_INPUT || file == TGSI_FILE_SYSTEM_VALUE) { + if (file == TGSI_FILE_INPUT) { info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name; info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index; info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate; @@ -151,6 +151,23 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Declaration.CylindricalWrap; info->num_inputs++; } + else if (file == TGSI_FILE_SYSTEM_VALUE) { + unsigned index = fulldecl->Range.First; + unsigned semName = fulldecl->Semantic.Name; + + info->system_value_semantic_name[index] = semName; + info->num_system_values = MAX2(info->num_system_values, + index + 1); + + /* + info->system_value_semantic_name[info->num_system_values++] = + fulldecl->Semantic.Name; + */ + + if (fulldecl->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { + info->uses_instanceid = TRUE; + } + } else if (file == TGSI_FILE_OUTPUT) { info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name; info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index; diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index 104097fbc0..53ab3d509d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -51,6 +51,9 @@ struct tgsi_shader_info ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; + ubyte num_system_values; + ubyte system_value_semantic_name[PIPE_MAX_SHADER_INPUTS]; + uint file_mask[TGSI_FILE_COUNT]; /**< bitmask of declared registers */ uint file_count[TGSI_FILE_COUNT]; /**< number of declared registers */ int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers */ @@ -64,6 +67,7 @@ struct tgsi_shader_info boolean writes_stencil; /**< does fragment shader write stencil value? */ boolean writes_edgeflag; /**< vertex shader outputs edgeflag */ boolean uses_kill; /**< KIL or KILP instruction used? */ + boolean uses_instanceid; /** * Bitmask indicating which register files are accessed with diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 086d983a73..664946b00f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -28,7 +28,9 @@ #include "pipe/p_config.h" -#if defined(PIPE_ARCH_X86) +#include "tgsi/tgsi_sse2.h" + +#if defined(PIPE_ARCH_X86) && 0 /* See FIXME notes below */ #include "util/u_debug.h" #include "pipe/p_shader_tokens.h" @@ -42,7 +44,6 @@ #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_exec.h" -#include "tgsi/tgsi_sse2.h" #include "rtasm/rtasm_x86sse.h" @@ -118,6 +119,7 @@ get_machine_base( void ) static struct x86_reg get_input_base( void ) { + /* FIXME: tgsi_exec_machine::Inputs is a pointer now! */ return x86_make_disp( get_machine_base(), Offset(struct tgsi_exec_machine, Inputs) ); @@ -126,6 +128,7 @@ get_input_base( void ) static struct x86_reg get_output_base( void ) { + /* FIXME: tgsi_exec_machine::Ouputs is a pointer now! */ return x86_make_disp( get_machine_base(), Offset(struct tgsi_exec_machine, Outputs) ); @@ -163,6 +166,14 @@ get_immediate_base( void ) reg_DX ); } +static struct x86_reg +get_system_value_base( void ) +{ + return x86_make_disp( + get_machine_base(), + Offset(struct tgsi_exec_machine, SystemValue) ); +} + /** * Data access helpers. @@ -229,6 +240,16 @@ get_temp( } static struct x86_reg +get_system_value( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_system_value_base(), /* base */ + (vec * 4 + chan) * 4 ); /* byte offset from base */ +} + +static struct x86_reg get_coef( unsigned vec, unsigned chan, @@ -423,6 +444,30 @@ emit_tempf( } /** + * Copy a system value to xmm register + * \param xmm the destination xmm register + * \param vec the source system value register + * \param chan src channel to fetch (X, Y, Z or W) + */ +static void +emit_system_value( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movss( + func, + make_xmm( xmm ), + get_system_value( vec, chan ) ); + sse_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); +} + +/** * Load an xmm register with an input attrib coefficient (a0, dadx or dady) * \param xmm the destination xmm register * \param vec the src input/attribute coefficient index @@ -1281,8 +1326,15 @@ emit_fetch( swizzle ); break; - case TGSI_FILE_INPUT: case TGSI_FILE_SYSTEM_VALUE: + emit_system_value( + func, + xmm, + reg->Register.Index, + swizzle ); + break; + + case TGSI_FILE_INPUT: emit_inputf( func, xmm, @@ -1465,6 +1517,7 @@ emit_tex( struct x86_function *func, break; case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_1D_ARRAY: count = 2; break; case TGSI_TEXTURE_SHADOW1D: @@ -1472,6 +1525,7 @@ emit_tex( struct x86_function *func, case TGSI_TEXTURE_SHADOWRECT: case TGSI_TEXTURE_3D: case TGSI_TEXTURE_CUBE: + case TGSI_TEXTURE_2D_ARRAY: count = 3; break; default: @@ -2636,8 +2690,7 @@ emit_declaration( struct x86_function *func, struct tgsi_full_declaration *decl ) { - if( decl->Declaration.File == TGSI_FILE_INPUT || - decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE ) { + if( decl->Declaration.File == TGSI_FILE_INPUT ) { unsigned first, last, mask; unsigned i, j; @@ -2710,6 +2763,7 @@ static void aos_to_soa( struct x86_function *func, x86_mov( func, aos_input, x86_fn_arg( func, arg_aos ) ); x86_mov( func, soa_input, x86_fn_arg( func, arg_machine ) ); + /* FIXME: tgsi_exec_machine::Inputs is a pointer now! */ x86_lea( func, soa_input, x86_make_disp( soa_input, Offset(struct tgsi_exec_machine, Inputs) ) ); @@ -2778,6 +2832,7 @@ static void soa_to_aos( struct x86_function *func, x86_mov( func, aos_output, x86_fn_arg( func, arg_aos ) ); x86_mov( func, soa_output, x86_fn_arg( func, arg_machine ) ); + /* FIXME: tgsi_exec_machine::Ouputs is a pointer now! */ x86_lea( func, soa_output, x86_make_disp( soa_output, Offset(struct tgsi_exec_machine, Outputs) ) ); @@ -3032,4 +3087,16 @@ tgsi_emit_sse2( return ok; } -#endif /* PIPE_ARCH_X86 */ +#else /* !PIPE_ARCH_X86 */ + +unsigned +tgsi_emit_sse2( + const struct tgsi_token *tokens, + struct x86_function *func, + float (*immediates)[4], + boolean do_swizzles ) +{ + return 0; +} + +#endif /* !PIPE_ARCH_X86 */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 9a38c37979..57622a0dea 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -36,6 +36,7 @@ #include "tgsi_parse.h" #include "tgsi_sanity.h" #include "tgsi_util.h" +#include "tgsi_dump.h" static boolean is_alpha_underscore( const char *cur ) { @@ -136,7 +137,7 @@ static boolean parse_identifier( const char **pcur, char *ret ) int i = 0; if (is_alpha_underscore( cur )) { ret[i++] = *cur++; - while (is_alpha_underscore( cur )) + while (is_alpha_underscore( cur ) || is_digit( cur )) ret[i++] = *cur++; ret[i++] = '\0'; *pcur = cur; @@ -282,7 +283,8 @@ static const char *file_names[TGSI_FILE_COUNT] = "PRED", "SV", "IMMX", - "TEMPX" + "TEMPX", + "RES" }; static boolean @@ -827,6 +829,15 @@ static const char *texture_names[TGSI_TEXTURE_COUNT] = "SHADOWRECT" }; +static const char *type_names[] = +{ + "UNORM", + "SNORM", + "SINT", + "UINT", + "FLOAT" +}; + static boolean match_inst_mnemonic(const char **pcur, const struct tgsi_opcode_info *info) @@ -1103,42 +1114,113 @@ static boolean parse_declaration( struct translate_ctx *ctx ) cur = ctx->cur; eat_opt_white( &cur ); if (*cur == ',' && !is_vs_input) { - uint i; + uint i, j; cur++; eat_opt_white( &cur ); - for (i = 0; i < TGSI_SEMANTIC_COUNT; i++) { - if (str_match_no_case( &cur, semantic_names[i] )) { - const char *cur2 = cur; - uint index; - - if (is_digit_alpha_underscore( cur )) - continue; - eat_opt_white( &cur2 ); - if (*cur2 == '[') { - cur2++; - eat_opt_white( &cur2 ); - if (!parse_uint( &cur2, &index )) { - report_error( ctx, "Expected literal integer" ); + if (file == TGSI_FILE_RESOURCE) { + for (i = 0; i < TGSI_TEXTURE_COUNT; i++) { + if (str_match_no_case(&cur, texture_names[i])) { + if (!is_digit_alpha_underscore(cur)) { + decl.Resource.Resource = i; + break; + } + } + } + if (i == TGSI_TEXTURE_COUNT) { + report_error(ctx, "Expected texture target"); + return FALSE; + } + eat_opt_white( &cur ); + if (*cur != ',') { + report_error( ctx, "Expected `,'" ); + return FALSE; + } + ++cur; + eat_opt_white( &cur ); + for (j = 0; j < 4; ++j) { + for (i = 0; i < PIPE_TYPE_COUNT; ++i) { + if (str_match_no_case(&cur, type_names[i])) { + if (!is_digit_alpha_underscore(cur)) { + switch (j) { + case 0: + decl.Resource.ReturnTypeX = i; + break; + case 1: + decl.Resource.ReturnTypeY = i; + break; + case 2: + decl.Resource.ReturnTypeZ = i; + break; + case 3: + decl.Resource.ReturnTypeW = i; + break; + default: + assert(0); + } + break; + } + } + } + if (i == PIPE_TYPE_COUNT) { + if (j == 0 || j > 2) { + report_error(ctx, "Expected type name"); return FALSE; } + break; + } else { + const char *cur2 = cur; eat_opt_white( &cur2 ); - if (*cur2 != ']') { - report_error( ctx, "Expected `]'" ); - return FALSE; + if (*cur2 == ',') { + cur2++; + eat_opt_white( &cur2 ); + cur = cur2; + continue; + } else + break; + } + } + if (j < 4) { + decl.Resource.ReturnTypeY = + decl.Resource.ReturnTypeZ = + decl.Resource.ReturnTypeW = + decl.Resource.ReturnTypeX; + } + ctx->cur = cur; + } else { + for (i = 0; i < TGSI_SEMANTIC_COUNT; i++) { + if (str_match_no_case( &cur, semantic_names[i] )) { + const char *cur2 = cur; + uint index; + + if (is_digit_alpha_underscore( cur )) + continue; + eat_opt_white( &cur2 ); + if (*cur2 == '[') { + cur2++; + eat_opt_white( &cur2 ); + if (!parse_uint( &cur2, &index )) { + report_error( ctx, "Expected literal integer" ); + return FALSE; + } + eat_opt_white( &cur2 ); + if (*cur2 != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + cur2++; + + decl.Semantic.Index = index; + + cur = cur2; } - cur2++; - decl.Semantic.Index = index; + decl.Declaration.Semantic = 1; + decl.Semantic.Name = i; - cur = cur2; + ctx->cur = cur; + break; } - - decl.Declaration.Semantic = 1; - decl.Semantic.Name = i; - - ctx->cur = cur; - break; } } } else if (is_imm_array) { @@ -1258,42 +1340,6 @@ static boolean parse_immediate( struct translate_ctx *ctx ) return TRUE; } -static const char *property_names[] = -{ - "GS_INPUT_PRIMITIVE", - "GS_OUTPUT_PRIMITIVE", - "GS_MAX_OUTPUT_VERTICES", - "FS_COORD_ORIGIN", - "FS_COORD_PIXEL_CENTER" -}; - -static const char *primitive_names[] = -{ - "POINTS", - "LINES", - "LINE_LOOP", - "LINE_STRIP", - "TRIANGLES", - "TRIANGLE_STRIP", - "TRIANGLE_FAN", - "QUADS", - "QUAD_STRIP", - "POLYGON" -}; - -static const char *fs_coord_origin_names[] = -{ - "UPPER_LEFT", - "LOWER_LEFT" -}; - -static const char *fs_coord_pixel_center_names[] = -{ - "HALF_INTEGER", - "INTEGER" -}; - - static boolean parse_primitive( const char **pcur, uint *primitive ) { @@ -1302,7 +1348,7 @@ parse_primitive( const char **pcur, uint *primitive ) for (i = 0; i < PIPE_PRIM_MAX; i++) { const char *cur = *pcur; - if (str_match_no_case( &cur, primitive_names[i])) { + if (str_match_no_case( &cur, tgsi_primitive_names[i])) { *primitive = i; *pcur = cur; return TRUE; @@ -1316,10 +1362,10 @@ parse_fs_coord_origin( const char **pcur, uint *fs_coord_origin ) { uint i; - for (i = 0; i < sizeof(fs_coord_origin_names) / sizeof(fs_coord_origin_names[0]); i++) { + for (i = 0; i < Elements(tgsi_fs_coord_origin_names); i++) { const char *cur = *pcur; - if (str_match_no_case( &cur, fs_coord_origin_names[i])) { + if (str_match_no_case( &cur, tgsi_fs_coord_origin_names[i])) { *fs_coord_origin = i; *pcur = cur; return TRUE; @@ -1333,10 +1379,10 @@ parse_fs_coord_pixel_center( const char **pcur, uint *fs_coord_pixel_center ) { uint i; - for (i = 0; i < sizeof(fs_coord_pixel_center_names) / sizeof(fs_coord_pixel_center_names[0]); i++) { + for (i = 0; i < Elements(tgsi_fs_coord_pixel_center_names); i++) { const char *cur = *pcur; - if (str_match_no_case( &cur, fs_coord_pixel_center_names[i])) { + if (str_match_no_case( &cur, tgsi_fs_coord_pixel_center_names[i])) { *fs_coord_pixel_center = i; *pcur = cur; return TRUE; @@ -1364,7 +1410,7 @@ static boolean parse_property( struct translate_ctx *ctx ) } for (property_name = 0; property_name < TGSI_PROPERTY_COUNT; ++property_name) { - if (streq_nocase_uprcase(property_names[property_name], id)) { + if (streq_nocase_uprcase(tgsi_property_names[property_name], id)) { break; } } @@ -1398,6 +1444,7 @@ static boolean parse_property( struct translate_ctx *ctx ) return FALSE; } break; + case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: default: if (!parse_uint(&ctx->cur, &values[0] )) { report_error( ctx, "Expected unsigned integer as property!" ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.h b/src/gallium/auxiliary/tgsi/tgsi_text.h index 8eeeeef140..5fe53cf007 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.h +++ b/src/gallium/auxiliary/tgsi/tgsi_text.h @@ -28,12 +28,14 @@ #ifndef TGSI_TEXT_H #define TGSI_TEXT_H -#include "pipe/p_shader_tokens.h" - #if defined __cplusplus extern "C" { #endif +#include "pipe/p_compiler.h" + +struct tgsi_token; + boolean tgsi_text_translate( const char *text, diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 7d13a17bdb..4564ab81f9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMware, Inc. + * Copyright 2009-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -47,6 +47,7 @@ union tgsi_any_token { struct tgsi_declaration_range decl_range; struct tgsi_declaration_dimension decl_dim; struct tgsi_declaration_semantic decl_semantic; + struct tgsi_declaration_resource decl_resource; struct tgsi_immediate imm; union tgsi_immediate_data imm_data; struct tgsi_instruction insn; @@ -137,6 +138,16 @@ struct ureg_program struct ureg_src sampler[PIPE_MAX_SAMPLERS]; unsigned nr_samplers; + struct { + unsigned index; + unsigned target; + unsigned return_type_x; + unsigned return_type_y; + unsigned return_type_z; + unsigned return_type_w; + } resource[PIPE_MAX_SHADER_RESOURCES]; + unsigned nr_resources; + unsigned temps_active[UREG_MAX_TEMP / 32]; unsigned nr_temps; @@ -148,6 +159,7 @@ struct ureg_program unsigned property_gs_max_vertices; unsigned char property_fs_coord_origin; /* = TGSI_FS_COORD_ORIGIN_* */ unsigned char property_fs_coord_pixel_center; /* = TGSI_FS_COORD_PIXEL_CENTER_* */ + unsigned char property_fs_color0_writes_all_cbufs; /* = TGSI_FS_COLOR0_WRITES_ALL_CBUFS * */ unsigned nr_addrs; unsigned nr_preds; @@ -284,7 +296,12 @@ ureg_property_fs_coord_pixel_center(struct ureg_program *ureg, ureg->property_fs_coord_pixel_center = fs_coord_pixel_center; } - +void +ureg_property_fs_color0_writes_all_cbufs(struct ureg_program *ureg, + unsigned fs_color0_writes_all_cbufs) +{ + ureg->property_fs_color0_writes_all_cbufs = fs_color0_writes_all_cbufs; +} struct ureg_src ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg, @@ -572,6 +589,41 @@ struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, return ureg->sampler[0]; } +/* + * Allocate a new shader resource. + */ +struct ureg_src +ureg_DECL_resource(struct ureg_program *ureg, + unsigned index, + unsigned target, + unsigned return_type_x, + unsigned return_type_y, + unsigned return_type_z, + unsigned return_type_w) +{ + struct ureg_src reg = ureg_src_register(TGSI_FILE_RESOURCE, index); + uint i; + + for (i = 0; i < ureg->nr_resources; i++) { + if (ureg->resource[i].index == index) { + return reg; + } + } + + if (i < PIPE_MAX_SHADER_RESOURCES) { + ureg->resource[i].index = index; + ureg->resource[i].target = target; + ureg->resource[i].return_type_x = return_type_x; + ureg->resource[i].return_type_y = return_type_y; + ureg->resource[i].return_type_z = return_type_z; + ureg->resource[i].return_type_w = return_type_w; + ureg->nr_resources++; + return reg; + } + + assert(0); + return reg; +} static int match_or_expand_immediate( const unsigned *v, @@ -815,9 +867,10 @@ ureg_emit_dst( struct ureg_program *ureg, assert(dst.File != TGSI_FILE_CONSTANT); assert(dst.File != TGSI_FILE_INPUT); assert(dst.File != TGSI_FILE_SAMPLER); + assert(dst.File != TGSI_FILE_RESOURCE); assert(dst.File != TGSI_FILE_IMMEDIATE); assert(dst.File < TGSI_FILE_COUNT); - + out[n].value = 0; out[n].dst.File = dst.File; out[n].dst.WriteMask = dst.WriteMask; @@ -1200,6 +1253,36 @@ emit_decl_range2D(struct ureg_program *ureg, } static void +emit_decl_resource(struct ureg_program *ureg, + unsigned index, + unsigned target, + unsigned return_type_x, + unsigned return_type_y, + unsigned return_type_z, + unsigned return_type_w ) +{ + union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3); + + out[0].value = 0; + out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION; + out[0].decl.NrTokens = 3; + out[0].decl.File = TGSI_FILE_RESOURCE; + out[0].decl.UsageMask = 0xf; + out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT; + + out[1].value = 0; + out[1].decl_range.First = index; + out[1].decl_range.Last = index; + + out[2].value = 0; + out[2].decl_resource.Resource = target; + out[2].decl_resource.ReturnTypeX = return_type_x; + out[2].decl_resource.ReturnTypeY = return_type_y; + out[2].decl_resource.ReturnTypeZ = return_type_z; + out[2].decl_resource.ReturnTypeW = return_type_w; +} + +static void emit_immediate( struct ureg_program *ureg, const unsigned *v, unsigned type ) @@ -1278,6 +1361,14 @@ static void emit_decls( struct ureg_program *ureg ) ureg->property_fs_coord_pixel_center); } + if (ureg->property_fs_color0_writes_all_cbufs) { + assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT); + + emit_property(ureg, + TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, + ureg->property_fs_color0_writes_all_cbufs); + } + if (ureg->processor == TGSI_PROCESSOR_VERTEX) { for (i = 0; i < UREG_MAX_INPUT; i++) { if (ureg->vs_inputs[i/32] & (1 << (i%32))) { @@ -1327,6 +1418,16 @@ static void emit_decls( struct ureg_program *ureg ) ureg->sampler[i].Index, 1 ); } + for (i = 0; i < ureg->nr_resources; i++) { + emit_decl_resource(ureg, + ureg->resource[i].index, + ureg->resource[i].target, + ureg->resource[i].return_type_x, + ureg->resource[i].return_type_y, + ureg->resource[i].return_type_z, + ureg->resource[i].return_type_w); + } + if (ureg->const_decls.nr_constant_ranges) { for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) { emit_decl_range(ureg, diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index acc463200a..9d5553f0ea 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -43,24 +43,24 @@ struct ureg_program; */ struct ureg_src { - unsigned File : 4; /* TGSI_FILE_ */ - unsigned SwizzleX : 2; /* TGSI_SWIZZLE_ */ - unsigned SwizzleY : 2; /* TGSI_SWIZZLE_ */ - unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_ */ - unsigned SwizzleW : 2; /* TGSI_SWIZZLE_ */ - unsigned Indirect : 1; /* BOOL */ - unsigned DimIndirect : 1; /* BOOL */ - unsigned Dimension : 1; /* BOOL */ - unsigned Absolute : 1; /* BOOL */ - unsigned Negate : 1; /* BOOL */ - int Index : 16; /* SINT */ + unsigned File : 4; /* TGSI_FILE_ */ + unsigned SwizzleX : 2; /* TGSI_SWIZZLE_ */ + unsigned SwizzleY : 2; /* TGSI_SWIZZLE_ */ + unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_ */ + unsigned SwizzleW : 2; /* TGSI_SWIZZLE_ */ + unsigned Indirect : 1; /* BOOL */ + unsigned DimIndirect : 1; /* BOOL */ + unsigned Dimension : 1; /* BOOL */ + unsigned Absolute : 1; /* BOOL */ + unsigned Negate : 1; /* BOOL */ unsigned IndirectFile : 4; /* TGSI_FILE_ */ - int IndirectIndex : 16; /* SINT */ unsigned IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ - int DimensionIndex : 16; /* SINT */ unsigned DimIndFile : 4; /* TGSI_FILE_ */ - int DimIndIndex : 16; /* SINT */ unsigned DimIndSwizzle : 2; /* TGSI_SWIZZLE_ */ + int Index : 16; /* SINT */ + int IndirectIndex : 16; /* SINT */ + int DimensionIndex : 16; /* SINT */ + int DimIndIndex : 16; /* SINT */ }; /* Very similar to a tgsi_dst_register, removing unsupported fields @@ -153,6 +153,10 @@ void ureg_property_fs_coord_pixel_center(struct ureg_program *ureg, unsigned fs_coord_pixel_center); +void +ureg_property_fs_color0_writes_all_cbufs(struct ureg_program *ureg, + unsigned fs_color0_writes_all_cbufs); + /*********************************************************************** * Build shader declarations: */ @@ -266,6 +270,15 @@ struct ureg_src ureg_DECL_sampler( struct ureg_program *, unsigned index ); +struct ureg_src +ureg_DECL_resource(struct ureg_program *, + unsigned index, + unsigned target, + unsigned return_type_x, + unsigned return_type_y, + unsigned return_type_z, + unsigned return_type_w ); + static INLINE struct ureg_src ureg_imm4f( struct ureg_program *ureg, @@ -729,6 +742,66 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \ } +#define OP14( op ) \ +static INLINE void ureg_##op( struct ureg_program *ureg, \ + struct ureg_dst dst, \ + struct ureg_src src0, \ + struct ureg_src src1, \ + struct ureg_src src2, \ + struct ureg_src src3 ) \ +{ \ + unsigned opcode = TGSI_OPCODE_##op; \ + unsigned insn = ureg_emit_insn(ureg, \ + opcode, \ + dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ + 1, \ + 4).insn_token; \ + ureg_emit_dst( ureg, dst ); \ + ureg_emit_src( ureg, src0 ); \ + ureg_emit_src( ureg, src1 ); \ + ureg_emit_src( ureg, src2 ); \ + ureg_emit_src( ureg, src3 ); \ + ureg_fixup_insn_size( ureg, insn ); \ +} + + +#define OP15( op ) \ +static INLINE void ureg_##op( struct ureg_program *ureg, \ + struct ureg_dst dst, \ + struct ureg_src src0, \ + struct ureg_src src1, \ + struct ureg_src src2, \ + struct ureg_src src3, \ + struct ureg_src src4 ) \ +{ \ + unsigned opcode = TGSI_OPCODE_##op; \ + unsigned insn = ureg_emit_insn(ureg, \ + opcode, \ + dst.Saturate, \ + dst.Predicate, \ + dst.PredNegate, \ + dst.PredSwizzleX, \ + dst.PredSwizzleY, \ + dst.PredSwizzleZ, \ + dst.PredSwizzleW, \ + 1, \ + 5).insn_token; \ + ureg_emit_dst( ureg, dst ); \ + ureg_emit_src( ureg, src0 ); \ + ureg_emit_src( ureg, src1 ); \ + ureg_emit_src( ureg, src2 ); \ + ureg_emit_src( ureg, src3 ); \ + ureg_emit_src( ureg, src4 ); \ + ureg_fixup_insn_size( ureg, insn ); \ +} + + /* Use a template include to generate a correctly-typed ureg_OP() * function for each TGSI opcode: */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index 08e7e89bd6..aa9a886d4a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -280,7 +280,12 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, case TGSI_TEXTURE_CUBE: read_mask = TGSI_WRITEMASK_XYZ; break; - + case TGSI_TEXTURE_1D_ARRAY: + read_mask = TGSI_WRITEMASK_XY; + break; + case TGSI_TEXTURE_2D_ARRAY: + read_mask = TGSI_WRITEMASK_XYZ; + break; default: assert(0); read_mask = 0; diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c index 73287b667d..05e455388b 100644 --- a/src/gallium/auxiliary/translate/translate.c +++ b/src/gallium/auxiliary/translate/translate.c @@ -39,7 +39,7 @@ struct translate *translate_create( const struct translate_key *key ) struct translate *translate = NULL; #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - translate = translate_sse2_create( key ); + /*translate = translate_sse2_create( key );*/ if (translate) return translate; #else diff --git a/src/gallium/auxiliary/translate/translate_cache.c b/src/gallium/auxiliary/translate/translate_cache.c index d8069a149c..3f1ecb630f 100644 --- a/src/gallium/auxiliary/translate/translate_cache.c +++ b/src/gallium/auxiliary/translate/translate_cache.c @@ -40,6 +40,10 @@ struct translate_cache { struct translate_cache * translate_cache_create( void ) { struct translate_cache *cache = MALLOC_STRUCT(translate_cache); + if (cache == NULL) { + return NULL; + } + cache->hash = cso_hash_create(); return cache; } diff --git a/src/gallium/auxiliary/util/dbghelp.h b/src/gallium/auxiliary/util/dbghelp.h new file mode 100644 index 0000000000..bc7c53cf9f --- /dev/null +++ b/src/gallium/auxiliary/util/dbghelp.h @@ -0,0 +1,1265 @@ +/** + * This file has no copyright assigned and is placed in the Public Domain. + * This file is part of the w64 mingw-runtime package. + * No warranty is given; refer to the file DISCLAIMER.PD within this package. + */ +#ifndef _DBGHELP_ +#define _DBGHELP_ + +#ifdef _WIN64 +#ifndef _IMAGEHLP64 +#define _IMAGEHLP64 +#endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define IMAGEAPI DECLSPEC_IMPORT WINAPI +#define DBHLP_DEPRECIATED __declspec(deprecated) + +#define DBHLPAPI IMAGEAPI + +#define IMAGE_SEPARATION (64*1024) + + typedef struct _LOADED_IMAGE { + PSTR ModuleName; + HANDLE hFile; + PUCHAR MappedAddress; +#ifdef _IMAGEHLP64 + PIMAGE_NT_HEADERS64 FileHeader; +#else + PIMAGE_NT_HEADERS32 FileHeader; +#endif + PIMAGE_SECTION_HEADER LastRvaSection; + ULONG NumberOfSections; + PIMAGE_SECTION_HEADER Sections; + ULONG Characteristics; + BOOLEAN fSystemImage; + BOOLEAN fDOSImage; + LIST_ENTRY Links; + ULONG SizeOfImage; + } LOADED_IMAGE,*PLOADED_IMAGE; + +#define MAX_SYM_NAME 2000 + + typedef BOOL (CALLBACK *PFIND_DEBUG_FILE_CALLBACK)(HANDLE FileHandle,PSTR FileName,PVOID CallerData); + typedef BOOL (CALLBACK *PFINDFILEINPATHCALLBACK)(PSTR filename,PVOID context); + typedef BOOL (CALLBACK *PFIND_EXE_FILE_CALLBACK)(HANDLE FileHandle,PSTR FileName,PVOID CallerData); + + typedef BOOL (WINAPI *PSYMBOLSERVERPROC)(LPCSTR,LPCSTR,PVOID,DWORD,DWORD,LPSTR); + typedef BOOL (WINAPI *PSYMBOLSERVEROPENPROC)(VOID); + typedef BOOL (WINAPI *PSYMBOLSERVERCLOSEPROC)(VOID); + typedef BOOL (WINAPI *PSYMBOLSERVERSETOPTIONSPROC)(UINT_PTR,ULONG64); + typedef BOOL (CALLBACK WINAPI *PSYMBOLSERVERCALLBACKPROC)(UINT_PTR action,ULONG64 data,ULONG64 context); + typedef UINT_PTR (WINAPI *PSYMBOLSERVERGETOPTIONSPROC)(); + typedef BOOL (WINAPI *PSYMBOLSERVERPINGPROC)(LPCSTR); + + HANDLE IMAGEAPI FindDebugInfoFile(PSTR FileName,PSTR SymbolPath,PSTR DebugFilePath); + HANDLE IMAGEAPI FindDebugInfoFileEx(PSTR FileName,PSTR SymbolPath,PSTR DebugFilePath,PFIND_DEBUG_FILE_CALLBACK Callback,PVOID CallerData); + BOOL IMAGEAPI SymFindFileInPath(HANDLE hprocess,LPSTR SearchPath,LPSTR FileName,PVOID id,DWORD two,DWORD three,DWORD flags,LPSTR FoundFile,PFINDFILEINPATHCALLBACK callback,PVOID context); + HANDLE IMAGEAPI FindExecutableImage(PSTR FileName,PSTR SymbolPath,PSTR ImageFilePath); + HANDLE IMAGEAPI FindExecutableImageEx(PSTR FileName,PSTR SymbolPath,PSTR ImageFilePath,PFIND_EXE_FILE_CALLBACK Callback,PVOID CallerData); + PIMAGE_NT_HEADERS IMAGEAPI ImageNtHeader(PVOID Base); + PVOID IMAGEAPI ImageDirectoryEntryToDataEx(PVOID Base,BOOLEAN MappedAsImage,USHORT DirectoryEntry,PULONG Size,PIMAGE_SECTION_HEADER *FoundHeader); + PVOID IMAGEAPI ImageDirectoryEntryToData(PVOID Base,BOOLEAN MappedAsImage,USHORT DirectoryEntry,PULONG Size); + PIMAGE_SECTION_HEADER IMAGEAPI ImageRvaToSection(PIMAGE_NT_HEADERS NtHeaders,PVOID Base,ULONG Rva); + PVOID IMAGEAPI ImageRvaToVa(PIMAGE_NT_HEADERS NtHeaders,PVOID Base,ULONG Rva,PIMAGE_SECTION_HEADER *LastRvaSection); + +#define SSRVOPT_CALLBACK 0x0001 +#define SSRVOPT_DWORD 0x0002 +#define SSRVOPT_DWORDPTR 0x0004 +#define SSRVOPT_GUIDPTR 0x0008 +#define SSRVOPT_OLDGUIDPTR 0x0010 +#define SSRVOPT_UNATTENDED 0x0020 +#define SSRVOPT_NOCOPY 0x0040 +#define SSRVOPT_PARENTWIN 0x0080 +#define SSRVOPT_PARAMTYPE 0x0100 +#define SSRVOPT_SECURE 0x0200 +#define SSRVOPT_TRACE 0x0400 +#define SSRVOPT_SETCONTEXT 0x0800 +#define SSRVOPT_PROXY 0x1000 +#define SSRVOPT_DOWNSTREAM_STORE 0x2000 +#define SSRVOPT_RESET ((ULONG_PTR)-1) + +#define SSRVACTION_TRACE 1 +#define SSRVACTION_QUERYCANCEL 2 +#define SSRVACTION_EVENT 3 + +#ifndef _WIN64 + + typedef struct _IMAGE_DEBUG_INFORMATION { + LIST_ENTRY List; + DWORD ReservedSize; + PVOID ReservedMappedBase; + USHORT ReservedMachine; + USHORT ReservedCharacteristics; + DWORD ReservedCheckSum; + DWORD ImageBase; + DWORD SizeOfImage; + DWORD ReservedNumberOfSections; + PIMAGE_SECTION_HEADER ReservedSections; + DWORD ReservedExportedNamesSize; + PSTR ReservedExportedNames; + DWORD ReservedNumberOfFunctionTableEntries; + PIMAGE_FUNCTION_ENTRY ReservedFunctionTableEntries; + DWORD ReservedLowestFunctionStartingAddress; + DWORD ReservedHighestFunctionEndingAddress; + DWORD ReservedNumberOfFpoTableEntries; + PFPO_DATA ReservedFpoTableEntries; + DWORD SizeOfCoffSymbols; + PIMAGE_COFF_SYMBOLS_HEADER CoffSymbols; + DWORD ReservedSizeOfCodeViewSymbols; + PVOID ReservedCodeViewSymbols; + PSTR ImageFilePath; + PSTR ImageFileName; + PSTR ReservedDebugFilePath; + DWORD ReservedTimeDateStamp; + BOOL ReservedRomImage; + PIMAGE_DEBUG_DIRECTORY ReservedDebugDirectory; + DWORD ReservedNumberOfDebugDirectories; + DWORD ReservedOriginalFunctionTableBaseAddress; + DWORD Reserved[2 ]; + } IMAGE_DEBUG_INFORMATION,*PIMAGE_DEBUG_INFORMATION; + + PIMAGE_DEBUG_INFORMATION IMAGEAPI MapDebugInformation(HANDLE FileHandle,PSTR FileName,PSTR SymbolPath,DWORD ImageBase); + BOOL IMAGEAPI UnmapDebugInformation(PIMAGE_DEBUG_INFORMATION DebugInfo); +#endif + + typedef BOOL (CALLBACK *PENUMDIRTREE_CALLBACK)(LPCSTR FilePath,PVOID CallerData); + + BOOL IMAGEAPI SearchTreeForFile(PSTR RootPath,PSTR InputPathName,PSTR OutputPathBuffer); + BOOL IMAGEAPI EnumDirTree(HANDLE hProcess,PSTR RootPath,PSTR InputPathName,PSTR OutputPathBuffer,PENUMDIRTREE_CALLBACK Callback,PVOID CallbackData); + BOOL IMAGEAPI MakeSureDirectoryPathExists(PCSTR DirPath); + +#define UNDNAME_COMPLETE (0x0000) +#define UNDNAME_NO_LEADING_UNDERSCORES (0x0001) +#define UNDNAME_NO_MS_KEYWORDS (0x0002) +#define UNDNAME_NO_FUNCTION_RETURNS (0x0004) +#define UNDNAME_NO_ALLOCATION_MODEL (0x0008) +#define UNDNAME_NO_ALLOCATION_LANGUAGE (0x0010) +#define UNDNAME_NO_MS_THISTYPE (0x0020) +#define UNDNAME_NO_CV_THISTYPE (0x0040) +#define UNDNAME_NO_THISTYPE (0x0060) +#define UNDNAME_NO_ACCESS_SPECIFIERS (0x0080) +#define UNDNAME_NO_THROW_SIGNATURES (0x0100) +#define UNDNAME_NO_MEMBER_TYPE (0x0200) +#define UNDNAME_NO_RETURN_UDT_MODEL (0x0400) +#define UNDNAME_32_BIT_DECODE (0x0800) +#define UNDNAME_NAME_ONLY (0x1000) +#define UNDNAME_NO_ARGUMENTS (0x2000) +#define UNDNAME_NO_SPECIAL_SYMS (0x4000) + + DWORD IMAGEAPI WINAPI UnDecorateSymbolName(PCSTR DecoratedName,PSTR UnDecoratedName,DWORD UndecoratedLength,DWORD Flags); + +#define DBHHEADER_DEBUGDIRS 0x1 + + typedef struct _MODLOAD_DATA { + DWORD ssize; + DWORD ssig; + PVOID data; + DWORD size; + DWORD flags; + } MODLOAD_DATA,*PMODLOAD_DATA; + + typedef enum { + AddrMode1616,AddrMode1632,AddrModeReal,AddrModeFlat + } ADDRESS_MODE; + + typedef struct _tagADDRESS64 { + DWORD64 Offset; + WORD Segment; + ADDRESS_MODE Mode; + } ADDRESS64,*LPADDRESS64; + +#ifdef _IMAGEHLP64 +#define ADDRESS ADDRESS64 +#define LPADDRESS LPADDRESS64 +#else + typedef struct _tagADDRESS { + DWORD Offset; + WORD Segment; + ADDRESS_MODE Mode; + } ADDRESS,*LPADDRESS; + + static __inline void Address32To64(LPADDRESS a32,LPADDRESS64 a64) { + a64->Offset = (ULONG64)(LONG64)(LONG)a32->Offset; + a64->Segment = a32->Segment; + a64->Mode = a32->Mode; + } + + static __inline void Address64To32(LPADDRESS64 a64,LPADDRESS a32) { + a32->Offset = (ULONG)a64->Offset; + a32->Segment = a64->Segment; + a32->Mode = a64->Mode; + } +#endif + + typedef struct _KDHELP64 { + DWORD64 Thread; + DWORD ThCallbackStack; + DWORD ThCallbackBStore; + DWORD NextCallback; + DWORD FramePointer; + DWORD64 KiCallUserMode; + DWORD64 KeUserCallbackDispatcher; + DWORD64 SystemRangeStart; + DWORD64 Reserved[8]; + } KDHELP64,*PKDHELP64; + +#ifdef _IMAGEHLP64 +#define KDHELP KDHELP64 +#define PKDHELP PKDHELP64 +#else + typedef struct _KDHELP { + DWORD Thread; + DWORD ThCallbackStack; + DWORD NextCallback; + DWORD FramePointer; + DWORD KiCallUserMode; + DWORD KeUserCallbackDispatcher; + DWORD SystemRangeStart; + DWORD ThCallbackBStore; + DWORD Reserved[8]; + } KDHELP,*PKDHELP; + + static __inline void KdHelp32To64(PKDHELP p32,PKDHELP64 p64) { + p64->Thread = p32->Thread; + p64->ThCallbackStack = p32->ThCallbackStack; + p64->NextCallback = p32->NextCallback; + p64->FramePointer = p32->FramePointer; + p64->KiCallUserMode = p32->KiCallUserMode; + p64->KeUserCallbackDispatcher = p32->KeUserCallbackDispatcher; + p64->SystemRangeStart = p32->SystemRangeStart; + } +#endif + + typedef struct _tagSTACKFRAME64 { + ADDRESS64 AddrPC; + ADDRESS64 AddrReturn; + ADDRESS64 AddrFrame; + ADDRESS64 AddrStack; + ADDRESS64 AddrBStore; + PVOID FuncTableEntry; + DWORD64 Params[4]; + BOOL Far; + BOOL Virtual; + DWORD64 Reserved[3]; + KDHELP64 KdHelp; + } STACKFRAME64,*LPSTACKFRAME64; + +#ifdef _IMAGEHLP64 +#define STACKFRAME STACKFRAME64 +#define LPSTACKFRAME LPSTACKFRAME64 +#else + typedef struct _tagSTACKFRAME { + ADDRESS AddrPC; + ADDRESS AddrReturn; + ADDRESS AddrFrame; + ADDRESS AddrStack; + PVOID FuncTableEntry; + DWORD Params[4]; + BOOL Far; + BOOL Virtual; + DWORD Reserved[3]; + KDHELP KdHelp; + ADDRESS AddrBStore; + } STACKFRAME,*LPSTACKFRAME; +#endif + + typedef BOOL (WINAPI *PREAD_PROCESS_MEMORY_ROUTINE64)(HANDLE hProcess,DWORD64 qwBaseAddress,PVOID lpBuffer,DWORD nSize,LPDWORD lpNumberOfBytesRead); + typedef PVOID (WINAPI *PFUNCTION_TABLE_ACCESS_ROUTINE64)(HANDLE hProcess,DWORD64 AddrBase); + typedef DWORD64 (WINAPI *PGET_MODULE_BASE_ROUTINE64)(HANDLE hProcess,DWORD64 Address); + typedef DWORD64 (WINAPI *PTRANSLATE_ADDRESS_ROUTINE64)(HANDLE hProcess,HANDLE hThread,LPADDRESS64 lpaddr); + + BOOL IMAGEAPI StackWalk64(DWORD MachineType,HANDLE hProcess,HANDLE hThread,LPSTACKFRAME64 StackFrame,PVOID ContextRecord,PREAD_PROCESS_MEMORY_ROUTINE64 ReadMemoryRoutine,PFUNCTION_TABLE_ACCESS_ROUTINE64 FunctionTableAccessRoutine,PGET_MODULE_BASE_ROUTINE64 GetModuleBaseRoutine,PTRANSLATE_ADDRESS_ROUTINE64 TranslateAddress); + +#ifdef _IMAGEHLP64 +#define PREAD_PROCESS_MEMORY_ROUTINE PREAD_PROCESS_MEMORY_ROUTINE64 +#define PFUNCTION_TABLE_ACCESS_ROUTINE PFUNCTION_TABLE_ACCESS_ROUTINE64 +#define PGET_MODULE_BASE_ROUTINE PGET_MODULE_BASE_ROUTINE64 +#define PTRANSLATE_ADDRESS_ROUTINE PTRANSLATE_ADDRESS_ROUTINE64 +#define StackWalk StackWalk64 +#else + typedef BOOL (WINAPI *PREAD_PROCESS_MEMORY_ROUTINE)(HANDLE hProcess,DWORD lpBaseAddress,PVOID lpBuffer,DWORD nSize,PDWORD lpNumberOfBytesRead); + typedef PVOID (WINAPI *PFUNCTION_TABLE_ACCESS_ROUTINE)(HANDLE hProcess,DWORD AddrBase); + typedef DWORD (WINAPI *PGET_MODULE_BASE_ROUTINE)(HANDLE hProcess,DWORD Address); + typedef DWORD (WINAPI *PTRANSLATE_ADDRESS_ROUTINE)(HANDLE hProcess,HANDLE hThread,LPADDRESS lpaddr); + + BOOL IMAGEAPI StackWalk(DWORD MachineType,HANDLE hProcess,HANDLE hThread,LPSTACKFRAME StackFrame,PVOID ContextRecord,PREAD_PROCESS_MEMORY_ROUTINE ReadMemoryRoutine,PFUNCTION_TABLE_ACCESS_ROUTINE FunctionTableAccessRoutine,PGET_MODULE_BASE_ROUTINE GetModuleBaseRoutine,PTRANSLATE_ADDRESS_ROUTINE TranslateAddress); +#endif + +#define API_VERSION_NUMBER 9 + + typedef struct API_VERSION { + USHORT MajorVersion; + USHORT MinorVersion; + USHORT Revision; + USHORT Reserved; + } API_VERSION,*LPAPI_VERSION; + + LPAPI_VERSION IMAGEAPI ImagehlpApiVersion(VOID); + LPAPI_VERSION IMAGEAPI ImagehlpApiVersionEx(LPAPI_VERSION AppVersion); + DWORD IMAGEAPI GetTimestampForLoadedLibrary(HMODULE Module); + + typedef BOOL (CALLBACK *PSYM_ENUMMODULES_CALLBACK64)(PSTR ModuleName,DWORD64 BaseOfDll,PVOID UserContext); + typedef BOOL (CALLBACK *PSYM_ENUMSYMBOLS_CALLBACK64)(PSTR SymbolName,DWORD64 SymbolAddress,ULONG SymbolSize,PVOID UserContext); + typedef BOOL (CALLBACK *PSYM_ENUMSYMBOLS_CALLBACK64W)(PWSTR SymbolName,DWORD64 SymbolAddress,ULONG SymbolSize,PVOID UserContext); + typedef BOOL (CALLBACK *PENUMLOADED_MODULES_CALLBACK64)(PSTR ModuleName,DWORD64 ModuleBase,ULONG ModuleSize,PVOID UserContext); + typedef BOOL (CALLBACK *PSYMBOL_REGISTERED_CALLBACK64)(HANDLE hProcess,ULONG ActionCode,ULONG64 CallbackData,ULONG64 UserContext); + typedef PVOID (CALLBACK *PSYMBOL_FUNCENTRY_CALLBACK)(HANDLE hProcess,DWORD AddrBase,PVOID UserContext); + typedef PVOID (CALLBACK *PSYMBOL_FUNCENTRY_CALLBACK64)(HANDLE hProcess,ULONG64 AddrBase,ULONG64 UserContext); + +#ifdef _IMAGEHLP64 +#define PSYM_ENUMMODULES_CALLBACK PSYM_ENUMMODULES_CALLBACK64 +#define PSYM_ENUMSYMBOLS_CALLBACK PSYM_ENUMSYMBOLS_CALLBACK64 +#define PSYM_ENUMSYMBOLS_CALLBACKW PSYM_ENUMSYMBOLS_CALLBACK64W +#define PENUMLOADED_MODULES_CALLBACK PENUMLOADED_MODULES_CALLBACK64 +#define PSYMBOL_REGISTERED_CALLBACK PSYMBOL_REGISTERED_CALLBACK64 +#define PSYMBOL_FUNCENTRY_CALLBACK PSYMBOL_FUNCENTRY_CALLBACK64 +#else + typedef BOOL (CALLBACK *PSYM_ENUMMODULES_CALLBACK)(PSTR ModuleName,ULONG BaseOfDll,PVOID UserContext); + typedef BOOL (CALLBACK *PSYM_ENUMSYMBOLS_CALLBACK)(PSTR SymbolName,ULONG SymbolAddress,ULONG SymbolSize,PVOID UserContext); + typedef BOOL (CALLBACK *PSYM_ENUMSYMBOLS_CALLBACKW)(PWSTR SymbolName,ULONG SymbolAddress,ULONG SymbolSize,PVOID UserContext); + typedef BOOL (CALLBACK *PENUMLOADED_MODULES_CALLBACK)(PSTR ModuleName,ULONG ModuleBase,ULONG ModuleSize,PVOID UserContext); + typedef BOOL (CALLBACK *PSYMBOL_REGISTERED_CALLBACK)(HANDLE hProcess,ULONG ActionCode,PVOID CallbackData,PVOID UserContext); +#endif + +#define SYMFLAG_VALUEPRESENT 0x00000001 +#define SYMFLAG_REGISTER 0x00000008 +#define SYMFLAG_REGREL 0x00000010 +#define SYMFLAG_FRAMEREL 0x00000020 +#define SYMFLAG_PARAMETER 0x00000040 +#define SYMFLAG_LOCAL 0x00000080 +#define SYMFLAG_CONSTANT 0x00000100 +#define SYMFLAG_EXPORT 0x00000200 +#define SYMFLAG_FORWARDER 0x00000400 +#define SYMFLAG_FUNCTION 0x00000800 +#define SYMFLAG_VIRTUAL 0x00001000 +#define SYMFLAG_THUNK 0x00002000 +#define SYMFLAG_TLSREL 0x00004000 + + typedef enum { + SymNone = 0,SymCoff,SymCv,SymPdb,SymExport,SymDeferred,SymSym,SymDia,SymVirtual,NumSymTypes + } SYM_TYPE; + + typedef struct _IMAGEHLP_SYMBOL64 { + DWORD SizeOfStruct; + DWORD64 Address; + DWORD Size; + DWORD Flags; + DWORD MaxNameLength; + CHAR Name[1]; + } IMAGEHLP_SYMBOL64,*PIMAGEHLP_SYMBOL64; + + typedef struct _IMAGEHLP_SYMBOL64_PACKAGE { + IMAGEHLP_SYMBOL64 sym; + CHAR name[MAX_SYM_NAME + 1]; + } IMAGEHLP_SYMBOL64_PACKAGE,*PIMAGEHLP_SYMBOL64_PACKAGE; + +#ifdef _IMAGEHLP64 + +#define IMAGEHLP_SYMBOL IMAGEHLP_SYMBOL64 +#define PIMAGEHLP_SYMBOL PIMAGEHLP_SYMBOL64 +#define IMAGEHLP_SYMBOL_PACKAGE IMAGEHLP_SYMBOL64_PACKAGE +#define PIMAGEHLP_SYMBOL_PACKAGE PIMAGEHLP_SYMBOL64_PACKAGE +#else + + typedef struct _IMAGEHLP_SYMBOL { + DWORD SizeOfStruct; + DWORD Address; + DWORD Size; + DWORD Flags; + DWORD MaxNameLength; + CHAR Name[1]; + } IMAGEHLP_SYMBOL,*PIMAGEHLP_SYMBOL; + + typedef struct _IMAGEHLP_SYMBOL_PACKAGE { + IMAGEHLP_SYMBOL sym; + CHAR name[MAX_SYM_NAME + 1]; + } IMAGEHLP_SYMBOL_PACKAGE,*PIMAGEHLP_SYMBOL_PACKAGE; +#endif + + typedef struct _IMAGEHLP_MODULE64 { + DWORD SizeOfStruct; + DWORD64 BaseOfImage; + DWORD ImageSize; + DWORD TimeDateStamp; + DWORD CheckSum; + DWORD NumSyms; + SYM_TYPE SymType; + CHAR ModuleName[32]; + CHAR ImageName[256]; + CHAR LoadedImageName[256]; + CHAR LoadedPdbName[256]; + DWORD CVSig; + CHAR CVData[MAX_PATH*3]; + DWORD PdbSig; + GUID PdbSig70; + DWORD PdbAge; + BOOL PdbUnmatched; + BOOL DbgUnmatched; + BOOL LineNumbers; + BOOL GlobalSymbols; + BOOL TypeInfo; + } IMAGEHLP_MODULE64,*PIMAGEHLP_MODULE64; + + typedef struct _IMAGEHLP_MODULE64W { + DWORD SizeOfStruct; + DWORD64 BaseOfImage; + DWORD ImageSize; + DWORD TimeDateStamp; + DWORD CheckSum; + DWORD NumSyms; + SYM_TYPE SymType; + WCHAR ModuleName[32]; + WCHAR ImageName[256]; + WCHAR LoadedImageName[256]; + WCHAR LoadedPdbName[256]; + DWORD CVSig; + WCHAR CVData[MAX_PATH*3]; + DWORD PdbSig; + GUID PdbSig70; + DWORD PdbAge; + BOOL PdbUnmatched; + BOOL DbgUnmatched; + BOOL LineNumbers; + BOOL GlobalSymbols; + BOOL TypeInfo; + } IMAGEHLP_MODULEW64,*PIMAGEHLP_MODULEW64; + +#ifdef _IMAGEHLP64 +#define IMAGEHLP_MODULE IMAGEHLP_MODULE64 +#define PIMAGEHLP_MODULE PIMAGEHLP_MODULE64 +#define IMAGEHLP_MODULEW IMAGEHLP_MODULEW64 +#define PIMAGEHLP_MODULEW PIMAGEHLP_MODULEW64 +#else + typedef struct _IMAGEHLP_MODULE { + DWORD SizeOfStruct; + DWORD BaseOfImage; + DWORD ImageSize; + DWORD TimeDateStamp; + DWORD CheckSum; + DWORD NumSyms; + SYM_TYPE SymType; + CHAR ModuleName[32]; + CHAR ImageName[256]; + CHAR LoadedImageName[256]; + } IMAGEHLP_MODULE,*PIMAGEHLP_MODULE; + + typedef struct _IMAGEHLP_MODULEW { + DWORD SizeOfStruct; + DWORD BaseOfImage; + DWORD ImageSize; + DWORD TimeDateStamp; + DWORD CheckSum; + DWORD NumSyms; + SYM_TYPE SymType; + WCHAR ModuleName[32]; + WCHAR ImageName[256]; + WCHAR LoadedImageName[256]; + } IMAGEHLP_MODULEW,*PIMAGEHLP_MODULEW; +#endif + + typedef struct _IMAGEHLP_LINE64 { + DWORD SizeOfStruct; + PVOID Key; + DWORD LineNumber; + PCHAR FileName; + DWORD64 Address; + } IMAGEHLP_LINE64,*PIMAGEHLP_LINE64; + +#ifdef _IMAGEHLP64 +#define IMAGEHLP_LINE IMAGEHLP_LINE64 +#define PIMAGEHLP_LINE PIMAGEHLP_LINE64 +#else + typedef struct _IMAGEHLP_LINE { + DWORD SizeOfStruct; + PVOID Key; + DWORD LineNumber; + PCHAR FileName; + DWORD Address; + } IMAGEHLP_LINE,*PIMAGEHLP_LINE; +#endif + + typedef struct _SOURCEFILE { + DWORD64 ModBase; + PCHAR FileName; + } SOURCEFILE,*PSOURCEFILE; + +#define CBA_DEFERRED_SYMBOL_LOAD_START 0x00000001 +#define CBA_DEFERRED_SYMBOL_LOAD_COMPLETE 0x00000002 +#define CBA_DEFERRED_SYMBOL_LOAD_FAILURE 0x00000003 +#define CBA_SYMBOLS_UNLOADED 0x00000004 +#define CBA_DUPLICATE_SYMBOL 0x00000005 +#define CBA_READ_MEMORY 0x00000006 +#define CBA_DEFERRED_SYMBOL_LOAD_CANCEL 0x00000007 +#define CBA_SET_OPTIONS 0x00000008 +#define CBA_EVENT 0x00000010 +#define CBA_DEFERRED_SYMBOL_LOAD_PARTIAL 0x00000020 +#define CBA_DEBUG_INFO 0x10000000 + + typedef struct _IMAGEHLP_CBA_READ_MEMORY { + DWORD64 addr; + PVOID buf; + DWORD bytes; + DWORD *bytesread; + } IMAGEHLP_CBA_READ_MEMORY,*PIMAGEHLP_CBA_READ_MEMORY; + + enum { + sevInfo = 0,sevProblem,sevAttn,sevFatal,sevMax + }; + + typedef struct _IMAGEHLP_CBA_EVENT { + DWORD severity; + DWORD code; + PCHAR desc; + PVOID object; + } IMAGEHLP_CBA_EVENT,*PIMAGEHLP_CBA_EVENT; + + typedef struct _IMAGEHLP_DEFERRED_SYMBOL_LOAD64 { + DWORD SizeOfStruct; + DWORD64 BaseOfImage; + DWORD CheckSum; + DWORD TimeDateStamp; + CHAR FileName[MAX_PATH]; + BOOLEAN Reparse; + HANDLE hFile; + DWORD Flags; + } IMAGEHLP_DEFERRED_SYMBOL_LOAD64,*PIMAGEHLP_DEFERRED_SYMBOL_LOAD64; + +#define DSLFLAG_MISMATCHED_PDB 0x1 +#define DSLFLAG_MISMATCHED_DBG 0x2 + +#ifdef _IMAGEHLP64 +#define IMAGEHLP_DEFERRED_SYMBOL_LOAD IMAGEHLP_DEFERRED_SYMBOL_LOAD64 +#define PIMAGEHLP_DEFERRED_SYMBOL_LOAD PIMAGEHLP_DEFERRED_SYMBOL_LOAD64 +#else + typedef struct _IMAGEHLP_DEFERRED_SYMBOL_LOAD { + DWORD SizeOfStruct; + DWORD BaseOfImage; + DWORD CheckSum; + DWORD TimeDateStamp; + CHAR FileName[MAX_PATH]; + BOOLEAN Reparse; + HANDLE hFile; + } IMAGEHLP_DEFERRED_SYMBOL_LOAD,*PIMAGEHLP_DEFERRED_SYMBOL_LOAD; +#endif + + typedef struct _IMAGEHLP_DUPLICATE_SYMBOL64 { + DWORD SizeOfStruct; + DWORD NumberOfDups; + PIMAGEHLP_SYMBOL64 Symbol; + DWORD SelectedSymbol; + } IMAGEHLP_DUPLICATE_SYMBOL64,*PIMAGEHLP_DUPLICATE_SYMBOL64; + +#ifdef _IMAGEHLP64 +#define IMAGEHLP_DUPLICATE_SYMBOL IMAGEHLP_DUPLICATE_SYMBOL64 +#define PIMAGEHLP_DUPLICATE_SYMBOL PIMAGEHLP_DUPLICATE_SYMBOL64 +#else + typedef struct _IMAGEHLP_DUPLICATE_SYMBOL { + DWORD SizeOfStruct; + DWORD NumberOfDups; + PIMAGEHLP_SYMBOL Symbol; + DWORD SelectedSymbol; + } IMAGEHLP_DUPLICATE_SYMBOL,*PIMAGEHLP_DUPLICATE_SYMBOL; +#endif + + BOOL IMAGEAPI SymSetParentWindow(HWND hwnd); + PCHAR IMAGEAPI SymSetHomeDirectory(PCSTR dir); + PCHAR IMAGEAPI SymGetHomeDirectory(DWORD type,PSTR dir,size_t size); + + enum { + hdBase = 0,hdSym,hdSrc,hdMax + }; + +#define SYMOPT_CASE_INSENSITIVE 0x00000001 +#define SYMOPT_UNDNAME 0x00000002 +#define SYMOPT_DEFERRED_LOADS 0x00000004 +#define SYMOPT_NO_CPP 0x00000008 +#define SYMOPT_LOAD_LINES 0x00000010 +#define SYMOPT_OMAP_FIND_NEAREST 0x00000020 +#define SYMOPT_LOAD_ANYTHING 0x00000040 +#define SYMOPT_IGNORE_CVREC 0x00000080 +#define SYMOPT_NO_UNQUALIFIED_LOADS 0x00000100 +#define SYMOPT_FAIL_CRITICAL_ERRORS 0x00000200 +#define SYMOPT_EXACT_SYMBOLS 0x00000400 +#define SYMOPT_ALLOW_ABSOLUTE_SYMBOLS 0x00000800 +#define SYMOPT_IGNORE_NT_SYMPATH 0x00001000 +#define SYMOPT_INCLUDE_32BIT_MODULES 0x00002000 +#define SYMOPT_PUBLICS_ONLY 0x00004000 +#define SYMOPT_NO_PUBLICS 0x00008000 +#define SYMOPT_AUTO_PUBLICS 0x00010000 +#define SYMOPT_NO_IMAGE_SEARCH 0x00020000 +#define SYMOPT_SECURE 0x00040000 +#define SYMOPT_NO_PROMPTS 0x00080000 + +#define SYMOPT_DEBUG 0x80000000 + + DWORD IMAGEAPI SymSetOptions(DWORD SymOptions); + DWORD IMAGEAPI SymGetOptions(VOID); + BOOL IMAGEAPI SymCleanup(HANDLE hProcess); + BOOL IMAGEAPI SymMatchString(LPSTR string,LPSTR expression,BOOL fCase); + + typedef BOOL (CALLBACK *PSYM_ENUMSOURCFILES_CALLBACK)(PSOURCEFILE pSourceFile,PVOID UserContext); + + BOOL IMAGEAPI SymEnumSourceFiles(HANDLE hProcess,ULONG64 ModBase,LPSTR Mask,PSYM_ENUMSOURCFILES_CALLBACK cbSrcFiles,PVOID UserContext); + BOOL IMAGEAPI SymEnumerateModules64(HANDLE hProcess,PSYM_ENUMMODULES_CALLBACK64 EnumModulesCallback,PVOID UserContext); + +#ifdef _IMAGEHLP64 +#define SymEnumerateModules SymEnumerateModules64 +#else + BOOL IMAGEAPI SymEnumerateModules(HANDLE hProcess,PSYM_ENUMMODULES_CALLBACK EnumModulesCallback,PVOID UserContext); +#endif + + BOOL IMAGEAPI SymEnumerateSymbols64(HANDLE hProcess,DWORD64 BaseOfDll,PSYM_ENUMSYMBOLS_CALLBACK64 EnumSymbolsCallback,PVOID UserContext); + BOOL IMAGEAPI SymEnumerateSymbolsW64(HANDLE hProcess,DWORD64 BaseOfDll,PSYM_ENUMSYMBOLS_CALLBACK64W EnumSymbolsCallback,PVOID UserContext); + +#ifdef _IMAGEHLP64 +#define SymEnumerateSymbols SymEnumerateSymbols64 +#define SymEnumerateSymbolsW SymEnumerateSymbolsW64 +#else + BOOL IMAGEAPI SymEnumerateSymbols(HANDLE hProcess,DWORD BaseOfDll,PSYM_ENUMSYMBOLS_CALLBACK EnumSymbolsCallback,PVOID UserContext); + BOOL IMAGEAPI SymEnumerateSymbolsW(HANDLE hProcess,DWORD BaseOfDll,PSYM_ENUMSYMBOLS_CALLBACKW EnumSymbolsCallback,PVOID UserContext); +#endif + + BOOL IMAGEAPI EnumerateLoadedModules64(HANDLE hProcess,PENUMLOADED_MODULES_CALLBACK64 EnumLoadedModulesCallback,PVOID UserContext); +#ifdef _IMAGEHLP64 +#define EnumerateLoadedModules EnumerateLoadedModules64 +#else + BOOL IMAGEAPI EnumerateLoadedModules(HANDLE hProcess,PENUMLOADED_MODULES_CALLBACK EnumLoadedModulesCallback,PVOID UserContext); +#endif + + PVOID IMAGEAPI SymFunctionTableAccess64(HANDLE hProcess,DWORD64 AddrBase); + +#ifdef _IMAGEHLP64 +#define SymFunctionTableAccess SymFunctionTableAccess64 +#else + PVOID IMAGEAPI SymFunctionTableAccess(HANDLE hProcess,DWORD AddrBase); +#endif + + BOOL IMAGEAPI SymGetModuleInfo64(HANDLE hProcess,DWORD64 qwAddr,PIMAGEHLP_MODULE64 ModuleInfo); + BOOL IMAGEAPI SymGetModuleInfoW64(HANDLE hProcess,DWORD64 qwAddr,PIMAGEHLP_MODULEW64 ModuleInfo); + +#ifdef _IMAGEHLP64 +#define SymGetModuleInfo SymGetModuleInfo64 +#define SymGetModuleInfoW SymGetModuleInfoW64 +#else + BOOL IMAGEAPI SymGetModuleInfo(HANDLE hProcess,DWORD dwAddr,PIMAGEHLP_MODULE ModuleInfo); + BOOL IMAGEAPI SymGetModuleInfoW(HANDLE hProcess,DWORD dwAddr,PIMAGEHLP_MODULEW ModuleInfo); +#endif + + DWORD64 IMAGEAPI SymGetModuleBase64(HANDLE hProcess,DWORD64 qwAddr); + +#ifdef _IMAGEHLP64 +#define SymGetModuleBase SymGetModuleBase64 +#else + DWORD IMAGEAPI SymGetModuleBase(HANDLE hProcess,DWORD dwAddr); +#endif + + BOOL IMAGEAPI SymGetSymNext64(HANDLE hProcess,PIMAGEHLP_SYMBOL64 Symbol); + +#ifdef _IMAGEHLP64 +#define SymGetSymNext SymGetSymNext64 +#else + BOOL IMAGEAPI SymGetSymNext(HANDLE hProcess,PIMAGEHLP_SYMBOL Symbol); +#endif + + BOOL IMAGEAPI SymGetSymPrev64(HANDLE hProcess,PIMAGEHLP_SYMBOL64 Symbol); + +#ifdef _IMAGEHLP64 +#define SymGetSymPrev SymGetSymPrev64 +#else + BOOL IMAGEAPI SymGetSymPrev(HANDLE hProcess,PIMAGEHLP_SYMBOL Symbol); +#endif + + typedef struct _SRCCODEINFO { + DWORD SizeOfStruct; + PVOID Key; + DWORD64 ModBase; + CHAR Obj[MAX_PATH + 1]; + CHAR FileName[MAX_PATH + 1]; + DWORD LineNumber; + DWORD64 Address; + } SRCCODEINFO,*PSRCCODEINFO; + + typedef BOOL (CALLBACK *PSYM_ENUMLINES_CALLBACK)(PSRCCODEINFO LineInfo,PVOID UserContext); + + BOOL IMAGEAPI SymEnumLines(HANDLE hProcess,ULONG64 Base,PCSTR Obj,PCSTR File,PSYM_ENUMLINES_CALLBACK EnumLinesCallback,PVOID UserContext); + BOOL IMAGEAPI SymGetLineFromAddr64(HANDLE hProcess,DWORD64 qwAddr,PDWORD pdwDisplacement,PIMAGEHLP_LINE64 Line64); + +#ifdef _IMAGEHLP64 +#define SymGetLineFromAddr SymGetLineFromAddr64 +#else + BOOL IMAGEAPI SymGetLineFromAddr(HANDLE hProcess,DWORD dwAddr,PDWORD pdwDisplacement,PIMAGEHLP_LINE Line); +#endif + + BOOL IMAGEAPI SymGetLineFromName64(HANDLE hProcess,PSTR ModuleName,PSTR FileName,DWORD dwLineNumber,PLONG plDisplacement,PIMAGEHLP_LINE64 Line); + +#ifdef _IMAGEHLP64 +#define SymGetLineFromName SymGetLineFromName64 +#else + BOOL IMAGEAPI SymGetLineFromName(HANDLE hProcess,PSTR ModuleName,PSTR FileName,DWORD dwLineNumber,PLONG plDisplacement,PIMAGEHLP_LINE Line); +#endif + + BOOL IMAGEAPI SymGetLineNext64(HANDLE hProcess,PIMAGEHLP_LINE64 Line); + +#ifdef _IMAGEHLP64 +#define SymGetLineNext SymGetLineNext64 +#else + BOOL IMAGEAPI SymGetLineNext(HANDLE hProcess,PIMAGEHLP_LINE Line); +#endif + + BOOL IMAGEAPI SymGetLinePrev64(HANDLE hProcess,PIMAGEHLP_LINE64 Line); + +#ifdef _IMAGEHLP64 +#define SymGetLinePrev SymGetLinePrev64 +#else + BOOL IMAGEAPI SymGetLinePrev(HANDLE hProcess,PIMAGEHLP_LINE Line); +#endif + + BOOL IMAGEAPI SymMatchFileName(PSTR FileName,PSTR Match,PSTR *FileNameStop,PSTR *MatchStop); + BOOL IMAGEAPI SymInitialize(HANDLE hProcess,PSTR UserSearchPath,BOOL fInvadeProcess); + BOOL IMAGEAPI SymGetSearchPath(HANDLE hProcess,PSTR SearchPath,DWORD SearchPathLength); + BOOL IMAGEAPI SymSetSearchPath(HANDLE hProcess,PSTR SearchPath); + DWORD64 IMAGEAPI SymLoadModule64(HANDLE hProcess,HANDLE hFile,PSTR ImageName,PSTR ModuleName,DWORD64 BaseOfDll,DWORD SizeOfDll); + +#define SLMFLAG_VIRTUAL 0x1 + + DWORD64 IMAGEAPI SymLoadModuleEx(HANDLE hProcess,HANDLE hFile,PSTR ImageName,PSTR ModuleName,DWORD64 BaseOfDll,DWORD DllSize,PMODLOAD_DATA Data,DWORD Flags); + +#ifdef _IMAGEHLP64 +#define SymLoadModule SymLoadModule64 +#else + DWORD IMAGEAPI SymLoadModule(HANDLE hProcess,HANDLE hFile,PSTR ImageName,PSTR ModuleName,DWORD BaseOfDll,DWORD SizeOfDll); +#endif + + BOOL IMAGEAPI SymUnloadModule64(HANDLE hProcess,DWORD64 BaseOfDll); + +#ifdef _IMAGEHLP64 +#define SymUnloadModule SymUnloadModule64 +#else + BOOL IMAGEAPI SymUnloadModule(HANDLE hProcess,DWORD BaseOfDll); +#endif + + BOOL IMAGEAPI SymUnDName64(PIMAGEHLP_SYMBOL64 sym,PSTR UnDecName,DWORD UnDecNameLength); + +#ifdef _IMAGEHLP64 +#define SymUnDName SymUnDName64 +#else + BOOL IMAGEAPI SymUnDName(PIMAGEHLP_SYMBOL sym,PSTR UnDecName,DWORD UnDecNameLength); +#endif + + BOOL IMAGEAPI SymRegisterCallback64(HANDLE hProcess,PSYMBOL_REGISTERED_CALLBACK64 CallbackFunction,ULONG64 UserContext); + + BOOL IMAGEAPI SymRegisterFunctionEntryCallback64(HANDLE hProcess,PSYMBOL_FUNCENTRY_CALLBACK64 CallbackFunction,ULONG64 UserContext); + +#ifdef _IMAGEHLP64 +#define SymRegisterCallback SymRegisterCallback64 +#define SymRegisterFunctionEntryCallback SymRegisterFunctionEntryCallback64 +#else + BOOL IMAGEAPI SymRegisterCallback(HANDLE hProcess,PSYMBOL_REGISTERED_CALLBACK CallbackFunction,PVOID UserContext); + BOOL IMAGEAPI SymRegisterFunctionEntryCallback(HANDLE hProcess,PSYMBOL_FUNCENTRY_CALLBACK CallbackFunction,PVOID UserContext); +#endif + + typedef struct _IMAGEHLP_SYMBOL_SRC { + DWORD sizeofstruct; + DWORD type; + char file[MAX_PATH]; + } IMAGEHLP_SYMBOL_SRC,*PIMAGEHLP_SYMBOL_SRC; + + typedef struct _MODULE_TYPE_INFO { + USHORT dataLength; + USHORT leaf; + BYTE data[1]; + } MODULE_TYPE_INFO,*PMODULE_TYPE_INFO; + + typedef struct _SYMBOL_INFO { + ULONG SizeOfStruct; + ULONG TypeIndex; + ULONG64 Reserved[2]; + ULONG info; + ULONG Size; + ULONG64 ModBase; + ULONG Flags; + ULONG64 Value; + ULONG64 Address; + ULONG Register; + ULONG Scope; + ULONG Tag; + ULONG NameLen; + ULONG MaxNameLen; + CHAR Name[1]; + } SYMBOL_INFO,*PSYMBOL_INFO; + + typedef struct _SYMBOL_INFO_PACKAGE { + SYMBOL_INFO si; + CHAR name[MAX_SYM_NAME + 1]; + } SYMBOL_INFO_PACKAGE,*PSYMBOL_INFO_PACKAGE; + + typedef struct _IMAGEHLP_STACK_FRAME + { + ULONG64 InstructionOffset; + ULONG64 ReturnOffset; + ULONG64 FrameOffset; + ULONG64 StackOffset; + ULONG64 BackingStoreOffset; + ULONG64 FuncTableEntry; + ULONG64 Params[4]; + ULONG64 Reserved[5]; + BOOL Virtual; + ULONG Reserved2; + } IMAGEHLP_STACK_FRAME,*PIMAGEHLP_STACK_FRAME; + + typedef VOID IMAGEHLP_CONTEXT,*PIMAGEHLP_CONTEXT; + + BOOL IMAGEAPI SymSetContext(HANDLE hProcess,PIMAGEHLP_STACK_FRAME StackFrame,PIMAGEHLP_CONTEXT Context); + BOOL IMAGEAPI SymFromAddr(HANDLE hProcess,DWORD64 Address,PDWORD64 Displacement,PSYMBOL_INFO Symbol); + BOOL IMAGEAPI SymFromToken(HANDLE hProcess,DWORD64 Base,DWORD Token,PSYMBOL_INFO Symbol); + BOOL IMAGEAPI SymFromName(HANDLE hProcess,LPSTR Name,PSYMBOL_INFO Symbol); + + typedef BOOL (CALLBACK *PSYM_ENUMERATESYMBOLS_CALLBACK)(PSYMBOL_INFO pSymInfo,ULONG SymbolSize,PVOID UserContext); + + BOOL IMAGEAPI SymEnumSymbols(HANDLE hProcess,ULONG64 BaseOfDll,PCSTR Mask,PSYM_ENUMERATESYMBOLS_CALLBACK EnumSymbolsCallback,PVOID UserContext); + BOOL IMAGEAPI SymEnumSymbolsForAddr(HANDLE hProcess,DWORD64 Address,PSYM_ENUMERATESYMBOLS_CALLBACK EnumSymbolsCallback,PVOID UserContext); + +#define SYMENUMFLAG_FULLSRCH 1 +#define SYMENUMFLAG_SPEEDSRCH 2 + + typedef enum _IMAGEHLP_SYMBOL_TYPE_INFO { + TI_GET_SYMTAG,TI_GET_SYMNAME,TI_GET_LENGTH,TI_GET_TYPE,TI_GET_TYPEID,TI_GET_BASETYPE,TI_GET_ARRAYINDEXTYPEID,TI_FINDCHILDREN, + TI_GET_DATAKIND,TI_GET_ADDRESSOFFSET,TI_GET_OFFSET,TI_GET_VALUE,TI_GET_COUNT,TI_GET_CHILDRENCOUNT,TI_GET_BITPOSITION,TI_GET_VIRTUALBASECLASS, + TI_GET_VIRTUALTABLESHAPEID,TI_GET_VIRTUALBASEPOINTEROFFSET,TI_GET_CLASSPARENTID,TI_GET_NESTED,TI_GET_SYMINDEX,TI_GET_LEXICALPARENT, + TI_GET_ADDRESS,TI_GET_THISADJUST,TI_GET_UDTKIND,TI_IS_EQUIV_TO,TI_GET_CALLING_CONVENTION + } IMAGEHLP_SYMBOL_TYPE_INFO; + + typedef struct _TI_FINDCHILDREN_PARAMS { + ULONG Count; + ULONG Start; + ULONG ChildId[1]; + } TI_FINDCHILDREN_PARAMS; + + BOOL IMAGEAPI SymGetTypeInfo(HANDLE hProcess,DWORD64 ModBase,ULONG TypeId,IMAGEHLP_SYMBOL_TYPE_INFO GetType,PVOID pInfo); + BOOL IMAGEAPI SymEnumTypes(HANDLE hProcess,ULONG64 BaseOfDll,PSYM_ENUMERATESYMBOLS_CALLBACK EnumSymbolsCallback,PVOID UserContext); + BOOL IMAGEAPI SymGetTypeFromName(HANDLE hProcess,ULONG64 BaseOfDll,LPSTR Name,PSYMBOL_INFO Symbol); + BOOL IMAGEAPI SymAddSymbol(HANDLE hProcess,ULONG64 BaseOfDll,PCSTR Name,DWORD64 Address,DWORD Size,DWORD Flags); + BOOL IMAGEAPI SymDeleteSymbol(HANDLE hProcess,ULONG64 BaseOfDll,PCSTR Name,DWORD64 Address,DWORD Flags); + + typedef BOOL (WINAPI *PDBGHELP_CREATE_USER_DUMP_CALLBACK)(DWORD DataType,PVOID *Data,LPDWORD DataLength,PVOID UserData); + + BOOL WINAPI DbgHelpCreateUserDump(LPSTR FileName,PDBGHELP_CREATE_USER_DUMP_CALLBACK Callback,PVOID UserData); + BOOL WINAPI DbgHelpCreateUserDumpW(LPWSTR FileName,PDBGHELP_CREATE_USER_DUMP_CALLBACK Callback,PVOID UserData); + BOOL IMAGEAPI SymGetSymFromAddr64(HANDLE hProcess,DWORD64 qwAddr,PDWORD64 pdwDisplacement,PIMAGEHLP_SYMBOL64 Symbol); + +#ifdef _IMAGEHLP64 +#define SymGetSymFromAddr SymGetSymFromAddr64 +#else + BOOL IMAGEAPI SymGetSymFromAddr(HANDLE hProcess,DWORD dwAddr,PDWORD pdwDisplacement,PIMAGEHLP_SYMBOL Symbol); +#endif + + BOOL IMAGEAPI SymGetSymFromName64(HANDLE hProcess,PSTR Name,PIMAGEHLP_SYMBOL64 Symbol); + +#ifdef _IMAGEHLP64 +#define SymGetSymFromName SymGetSymFromName64 +#else + BOOL IMAGEAPI SymGetSymFromName(HANDLE hProcess,PSTR Name,PIMAGEHLP_SYMBOL Symbol); +#endif + + DBHLP_DEPRECIATED BOOL IMAGEAPI FindFileInPath(HANDLE hprocess,LPSTR SearchPath,LPSTR FileName,PVOID id,DWORD two,DWORD three,DWORD flags,LPSTR FilePath); + DBHLP_DEPRECIATED BOOL IMAGEAPI FindFileInSearchPath(HANDLE hprocess,LPSTR SearchPath,LPSTR FileName,DWORD one,DWORD two,DWORD three,LPSTR FilePath); + DBHLP_DEPRECIATED BOOL IMAGEAPI SymEnumSym(HANDLE hProcess,ULONG64 BaseOfDll,PSYM_ENUMERATESYMBOLS_CALLBACK EnumSymbolsCallback,PVOID UserContext); + +#define SYMF_OMAP_GENERATED 0x00000001 +#define SYMF_OMAP_MODIFIED 0x00000002 +#define SYMF_REGISTER 0x00000008 +#define SYMF_REGREL 0x00000010 +#define SYMF_FRAMEREL 0x00000020 +#define SYMF_PARAMETER 0x00000040 +#define SYMF_LOCAL 0x00000080 +#define SYMF_CONSTANT 0x00000100 +#define SYMF_EXPORT 0x00000200 +#define SYMF_FORWARDER 0x00000400 +#define SYMF_FUNCTION 0x00000800 +#define SYMF_VIRTUAL 0x00001000 +#define SYMF_THUNK 0x00002000 +#define SYMF_TLSREL 0x00004000 + +#define IMAGEHLP_SYMBOL_INFO_VALUEPRESENT 1 +#define IMAGEHLP_SYMBOL_INFO_REGISTER SYMF_REGISTER +#define IMAGEHLP_SYMBOL_INFO_REGRELATIVE SYMF_REGREL +#define IMAGEHLP_SYMBOL_INFO_FRAMERELATIVE SYMF_FRAMEREL +#define IMAGEHLP_SYMBOL_INFO_PARAMETER SYMF_PARAMETER +#define IMAGEHLP_SYMBOL_INFO_LOCAL SYMF_LOCAL +#define IMAGEHLP_SYMBOL_INFO_CONSTANT SYMF_CONSTANT +#define IMAGEHLP_SYMBOL_FUNCTION SYMF_FUNCTION +#define IMAGEHLP_SYMBOL_VIRTUAL SYMF_VIRTUAL +#define IMAGEHLP_SYMBOL_THUNK SYMF_THUNK +#define IMAGEHLP_SYMBOL_INFO_TLSRELATIVE SYMF_TLSREL + +#include <pshpack4.h> + +#define MINIDUMP_SIGNATURE ('PMDM') +#define MINIDUMP_VERSION (42899) + typedef DWORD RVA; + typedef ULONG64 RVA64; + + typedef struct _MINIDUMP_LOCATION_DESCRIPTOR { + ULONG32 DataSize; + RVA Rva; + } MINIDUMP_LOCATION_DESCRIPTOR; + + typedef struct _MINIDUMP_LOCATION_DESCRIPTOR64 { + ULONG64 DataSize; + RVA64 Rva; + } MINIDUMP_LOCATION_DESCRIPTOR64; + + typedef struct _MINIDUMP_MEMORY_DESCRIPTOR { + ULONG64 StartOfMemoryRange; + MINIDUMP_LOCATION_DESCRIPTOR Memory; + } MINIDUMP_MEMORY_DESCRIPTOR,*PMINIDUMP_MEMORY_DESCRIPTOR; + + typedef struct _MINIDUMP_MEMORY_DESCRIPTOR64 { + ULONG64 StartOfMemoryRange; + ULONG64 DataSize; + } MINIDUMP_MEMORY_DESCRIPTOR64,*PMINIDUMP_MEMORY_DESCRIPTOR64; + + typedef struct _MINIDUMP_HEADER { + ULONG32 Signature; + ULONG32 Version; + ULONG32 NumberOfStreams; + RVA StreamDirectoryRva; + ULONG32 CheckSum; + union { + ULONG32 Reserved; + ULONG32 TimeDateStamp; + } DUMMYUNIONNAME; + ULONG64 Flags; + } MINIDUMP_HEADER,*PMINIDUMP_HEADER; + + typedef struct _MINIDUMP_DIRECTORY { + ULONG32 StreamType; + MINIDUMP_LOCATION_DESCRIPTOR Location; + } MINIDUMP_DIRECTORY,*PMINIDUMP_DIRECTORY; + + typedef struct _MINIDUMP_STRING { + ULONG32 Length; + WCHAR Buffer [0]; + } MINIDUMP_STRING,*PMINIDUMP_STRING; + + typedef enum _MINIDUMP_STREAM_TYPE { + UnusedStream = 0,ReservedStream0 = 1,ReservedStream1 = 2,ThreadListStream = 3,ModuleListStream = 4,MemoryListStream = 5, + ExceptionStream = 6,SystemInfoStream = 7,ThreadExListStream = 8,Memory64ListStream = 9,CommentStreamA = 10,CommentStreamW = 11, + HandleDataStream = 12,FunctionTableStream = 13,UnloadedModuleListStream = 14,MiscInfoStream = 15,LastReservedStream = 0xffff + } MINIDUMP_STREAM_TYPE; + + typedef union _CPU_INFORMATION { + struct { + ULONG32 VendorId [3 ]; + ULONG32 VersionInformation; + ULONG32 FeatureInformation; + ULONG32 AMDExtendedCpuFeatures; + } X86CpuInfo; + struct { + ULONG64 ProcessorFeatures [2 ]; + } OtherCpuInfo; + } CPU_INFORMATION,*PCPU_INFORMATION; + + typedef struct _MINIDUMP_SYSTEM_INFO { + USHORT ProcessorArchitecture; + USHORT ProcessorLevel; + USHORT ProcessorRevision; + union { + USHORT Reserved0; + struct { + UCHAR NumberOfProcessors; + UCHAR ProductType; + } DUMMYSTRUCTNAME; + } DUMMYUNIONNAME; + ULONG32 MajorVersion; + ULONG32 MinorVersion; + ULONG32 BuildNumber; + ULONG32 PlatformId; + RVA CSDVersionRva; + union { + ULONG32 Reserved1; + struct { + USHORT SuiteMask; + USHORT Reserved2; + } DUMMYSTRUCTNAME; + } DUMMYUNIONNAME1; + CPU_INFORMATION Cpu; + } MINIDUMP_SYSTEM_INFO,*PMINIDUMP_SYSTEM_INFO; + + C_ASSERT (sizeof (((PPROCESS_INFORMATION)0)->dwThreadId)==4); + + typedef struct _MINIDUMP_THREAD { + ULONG32 ThreadId; + ULONG32 SuspendCount; + ULONG32 PriorityClass; + ULONG32 Priority; + ULONG64 Teb; + MINIDUMP_MEMORY_DESCRIPTOR Stack; + MINIDUMP_LOCATION_DESCRIPTOR ThreadContext; + } MINIDUMP_THREAD,*PMINIDUMP_THREAD; + + typedef struct _MINIDUMP_THREAD_LIST { + ULONG32 NumberOfThreads; + MINIDUMP_THREAD Threads [0]; + } MINIDUMP_THREAD_LIST,*PMINIDUMP_THREAD_LIST; + + typedef struct _MINIDUMP_THREAD_EX { + ULONG32 ThreadId; + ULONG32 SuspendCount; + ULONG32 PriorityClass; + ULONG32 Priority; + ULONG64 Teb; + MINIDUMP_MEMORY_DESCRIPTOR Stack; + MINIDUMP_LOCATION_DESCRIPTOR ThreadContext; + MINIDUMP_MEMORY_DESCRIPTOR BackingStore; + } MINIDUMP_THREAD_EX,*PMINIDUMP_THREAD_EX; + + typedef struct _MINIDUMP_THREAD_EX_LIST { + ULONG32 NumberOfThreads; + MINIDUMP_THREAD_EX Threads [0]; + } MINIDUMP_THREAD_EX_LIST,*PMINIDUMP_THREAD_EX_LIST; + + typedef struct _MINIDUMP_EXCEPTION { + ULONG32 ExceptionCode; + ULONG32 ExceptionFlags; + ULONG64 ExceptionRecord; + ULONG64 ExceptionAddress; + ULONG32 NumberParameters; + ULONG32 __unusedAlignment; + ULONG64 ExceptionInformation [EXCEPTION_MAXIMUM_PARAMETERS ]; + } MINIDUMP_EXCEPTION,*PMINIDUMP_EXCEPTION; + + typedef struct MINIDUMP_EXCEPTION_STREAM { + ULONG32 ThreadId; + ULONG32 __alignment; + MINIDUMP_EXCEPTION ExceptionRecord; + MINIDUMP_LOCATION_DESCRIPTOR ThreadContext; + } MINIDUMP_EXCEPTION_STREAM,*PMINIDUMP_EXCEPTION_STREAM; + + typedef struct _MINIDUMP_MODULE { + ULONG64 BaseOfImage; + ULONG32 SizeOfImage; + ULONG32 CheckSum; + ULONG32 TimeDateStamp; + RVA ModuleNameRva; + VS_FIXEDFILEINFO VersionInfo; + MINIDUMP_LOCATION_DESCRIPTOR CvRecord; + MINIDUMP_LOCATION_DESCRIPTOR MiscRecord; + ULONG64 Reserved0; + ULONG64 Reserved1; + } MINIDUMP_MODULE,*PMINIDUMP_MODULE; + + typedef struct _MINIDUMP_MODULE_LIST { + ULONG32 NumberOfModules; + MINIDUMP_MODULE Modules [0 ]; + } MINIDUMP_MODULE_LIST,*PMINIDUMP_MODULE_LIST; + + typedef struct _MINIDUMP_MEMORY_LIST { + ULONG32 NumberOfMemoryRanges; + MINIDUMP_MEMORY_DESCRIPTOR MemoryRanges [0]; + } MINIDUMP_MEMORY_LIST,*PMINIDUMP_MEMORY_LIST; + + typedef struct _MINIDUMP_MEMORY64_LIST { + ULONG64 NumberOfMemoryRanges; + RVA64 BaseRva; + MINIDUMP_MEMORY_DESCRIPTOR64 MemoryRanges [0]; + } MINIDUMP_MEMORY64_LIST,*PMINIDUMP_MEMORY64_LIST; + + typedef struct _MINIDUMP_EXCEPTION_INFORMATION { + DWORD ThreadId; + PEXCEPTION_POINTERS ExceptionPointers; + BOOL ClientPointers; + } MINIDUMP_EXCEPTION_INFORMATION,*PMINIDUMP_EXCEPTION_INFORMATION; + + typedef struct _MINIDUMP_EXCEPTION_INFORMATION64 { + DWORD ThreadId; + ULONG64 ExceptionRecord; + ULONG64 ContextRecord; + BOOL ClientPointers; + } MINIDUMP_EXCEPTION_INFORMATION64,*PMINIDUMP_EXCEPTION_INFORMATION64; + + typedef struct _MINIDUMP_HANDLE_DESCRIPTOR { + ULONG64 Handle; + RVA TypeNameRva; + RVA ObjectNameRva; + ULONG32 Attributes; + ULONG32 GrantedAccess; + ULONG32 HandleCount; + ULONG32 PointerCount; + } MINIDUMP_HANDLE_DESCRIPTOR,*PMINIDUMP_HANDLE_DESCRIPTOR; + + typedef struct _MINIDUMP_HANDLE_DATA_STREAM { + ULONG32 SizeOfHeader; + ULONG32 SizeOfDescriptor; + ULONG32 NumberOfDescriptors; + ULONG32 Reserved; + } MINIDUMP_HANDLE_DATA_STREAM,*PMINIDUMP_HANDLE_DATA_STREAM; + + typedef struct _MINIDUMP_FUNCTION_TABLE_DESCRIPTOR { + ULONG64 MinimumAddress; + ULONG64 MaximumAddress; + ULONG64 BaseAddress; + ULONG32 EntryCount; + ULONG32 SizeOfAlignPad; + } MINIDUMP_FUNCTION_TABLE_DESCRIPTOR,*PMINIDUMP_FUNCTION_TABLE_DESCRIPTOR; + + typedef struct _MINIDUMP_FUNCTION_TABLE_STREAM { + ULONG32 SizeOfHeader; + ULONG32 SizeOfDescriptor; + ULONG32 SizeOfNativeDescriptor; + ULONG32 SizeOfFunctionEntry; + ULONG32 NumberOfDescriptors; + ULONG32 SizeOfAlignPad; + } MINIDUMP_FUNCTION_TABLE_STREAM,*PMINIDUMP_FUNCTION_TABLE_STREAM; + + typedef struct _MINIDUMP_UNLOADED_MODULE { + ULONG64 BaseOfImage; + ULONG32 SizeOfImage; + ULONG32 CheckSum; + ULONG32 TimeDateStamp; + RVA ModuleNameRva; + } MINIDUMP_UNLOADED_MODULE,*PMINIDUMP_UNLOADED_MODULE; + + typedef struct _MINIDUMP_UNLOADED_MODULE_LIST { + ULONG32 SizeOfHeader; + ULONG32 SizeOfEntry; + ULONG32 NumberOfEntries; + } MINIDUMP_UNLOADED_MODULE_LIST,*PMINIDUMP_UNLOADED_MODULE_LIST; + +#define MINIDUMP_MISC1_PROCESS_ID 0x00000001 +#define MINIDUMP_MISC1_PROCESS_TIMES 0x00000002 + + typedef struct _MINIDUMP_MISC_INFO { + ULONG32 SizeOfInfo; + ULONG32 Flags1; + ULONG32 ProcessId; + ULONG32 ProcessCreateTime; + ULONG32 ProcessUserTime; + ULONG32 ProcessKernelTime; + } MINIDUMP_MISC_INFO,*PMINIDUMP_MISC_INFO; + + typedef struct _MINIDUMP_USER_RECORD { + ULONG32 Type; + MINIDUMP_LOCATION_DESCRIPTOR Memory; + } MINIDUMP_USER_RECORD,*PMINIDUMP_USER_RECORD; + + typedef struct _MINIDUMP_USER_STREAM { + ULONG32 Type; + ULONG BufferSize; + PVOID Buffer; + } MINIDUMP_USER_STREAM,*PMINIDUMP_USER_STREAM; + + typedef struct _MINIDUMP_USER_STREAM_INFORMATION { + ULONG UserStreamCount; + PMINIDUMP_USER_STREAM UserStreamArray; + } MINIDUMP_USER_STREAM_INFORMATION,*PMINIDUMP_USER_STREAM_INFORMATION; + + typedef enum _MINIDUMP_CALLBACK_TYPE { + ModuleCallback,ThreadCallback,ThreadExCallback,IncludeThreadCallback,IncludeModuleCallback,MemoryCallback + } MINIDUMP_CALLBACK_TYPE; + + typedef struct _MINIDUMP_THREAD_CALLBACK { + ULONG ThreadId; + HANDLE ThreadHandle; + CONTEXT Context; + ULONG SizeOfContext; + ULONG64 StackBase; + ULONG64 StackEnd; + } MINIDUMP_THREAD_CALLBACK,*PMINIDUMP_THREAD_CALLBACK; + + typedef struct _MINIDUMP_THREAD_EX_CALLBACK { + ULONG ThreadId; + HANDLE ThreadHandle; + CONTEXT Context; + ULONG SizeOfContext; + ULONG64 StackBase; + ULONG64 StackEnd; + ULONG64 BackingStoreBase; + ULONG64 BackingStoreEnd; + } MINIDUMP_THREAD_EX_CALLBACK,*PMINIDUMP_THREAD_EX_CALLBACK; + + typedef struct _MINIDUMP_INCLUDE_THREAD_CALLBACK { + ULONG ThreadId; + } MINIDUMP_INCLUDE_THREAD_CALLBACK,*PMINIDUMP_INCLUDE_THREAD_CALLBACK; + + typedef enum _THREAD_WRITE_FLAGS { + ThreadWriteThread = 0x0001,ThreadWriteStack = 0x0002,ThreadWriteContext = 0x0004,ThreadWriteBackingStore = 0x0008, + ThreadWriteInstructionWindow = 0x0010,ThreadWriteThreadData = 0x0020 + } THREAD_WRITE_FLAGS; + + typedef struct _MINIDUMP_MODULE_CALLBACK { + PWCHAR FullPath; + ULONG64 BaseOfImage; + ULONG SizeOfImage; + ULONG CheckSum; + ULONG TimeDateStamp; + VS_FIXEDFILEINFO VersionInfo; + PVOID CvRecord; + ULONG SizeOfCvRecord; + PVOID MiscRecord; + ULONG SizeOfMiscRecord; + } MINIDUMP_MODULE_CALLBACK,*PMINIDUMP_MODULE_CALLBACK; + + typedef struct _MINIDUMP_INCLUDE_MODULE_CALLBACK { + ULONG64 BaseOfImage; + } MINIDUMP_INCLUDE_MODULE_CALLBACK,*PMINIDUMP_INCLUDE_MODULE_CALLBACK; + + typedef enum _MODULE_WRITE_FLAGS { + ModuleWriteModule = 0x0001,ModuleWriteDataSeg = 0x0002,ModuleWriteMiscRecord = 0x0004,ModuleWriteCvRecord = 0x0008, + ModuleReferencedByMemory = 0x0010 + } MODULE_WRITE_FLAGS; + + typedef struct _MINIDUMP_CALLBACK_INPUT { + ULONG ProcessId; + HANDLE ProcessHandle; + ULONG CallbackType; + union { + MINIDUMP_THREAD_CALLBACK Thread; + MINIDUMP_THREAD_EX_CALLBACK ThreadEx; + MINIDUMP_MODULE_CALLBACK Module; + MINIDUMP_INCLUDE_THREAD_CALLBACK IncludeThread; + MINIDUMP_INCLUDE_MODULE_CALLBACK IncludeModule; + } DUMMYUNIONNAME; + } MINIDUMP_CALLBACK_INPUT,*PMINIDUMP_CALLBACK_INPUT; + + typedef struct _MINIDUMP_CALLBACK_OUTPUT { + union { + ULONG ModuleWriteFlags; + ULONG ThreadWriteFlags; + struct { + ULONG64 MemoryBase; + ULONG MemorySize; + } DUMMYSTRUCTNAME; + } DUMMYUNIONNAME; + } MINIDUMP_CALLBACK_OUTPUT,*PMINIDUMP_CALLBACK_OUTPUT; + + typedef enum _MINIDUMP_TYPE { + MiniDumpNormal = 0x0000,MiniDumpWithDataSegs = 0x0001,MiniDumpWithFullMemory = 0x0002,MiniDumpWithHandleData = 0x0004, + MiniDumpFilterMemory = 0x0008,MiniDumpScanMemory = 0x0010,MiniDumpWithUnloadedModules = 0x0020,MiniDumpWithIndirectlyReferencedMemory = 0x0040, + MiniDumpFilterModulePaths = 0x0080,MiniDumpWithProcessThreadData = 0x0100,MiniDumpWithPrivateReadWriteMemory = 0x0200, + MiniDumpWithoutOptionalData = 0x0400 + } MINIDUMP_TYPE; + + typedef BOOL (WINAPI *MINIDUMP_CALLBACK_ROUTINE)(PVOID CallbackParam,CONST PMINIDUMP_CALLBACK_INPUT CallbackInput,PMINIDUMP_CALLBACK_OUTPUT CallbackOutput); + + typedef struct _MINIDUMP_CALLBACK_INFORMATION { + MINIDUMP_CALLBACK_ROUTINE CallbackRoutine; + PVOID CallbackParam; + } MINIDUMP_CALLBACK_INFORMATION,*PMINIDUMP_CALLBACK_INFORMATION; + +#define RVA_TO_ADDR(Mapping,Rva) ((PVOID)(((ULONG_PTR) (Mapping)) + (Rva))) + + BOOL WINAPI MiniDumpWriteDump(HANDLE hProcess,DWORD ProcessId,HANDLE hFile,MINIDUMP_TYPE DumpType,CONST PMINIDUMP_EXCEPTION_INFORMATION ExceptionParam,CONST PMINIDUMP_USER_STREAM_INFORMATION UserStreamParam,CONST PMINIDUMP_CALLBACK_INFORMATION CallbackParam); + BOOL WINAPI MiniDumpReadDumpStream(PVOID BaseOfDump,ULONG StreamNumber,PMINIDUMP_DIRECTORY *Dir,PVOID *StreamPointer,ULONG *StreamSize); + +#include <poppack.h> + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index c11f7d383d..421726b40e 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -128,21 +128,6 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; } - /* vertex shader - still required to provide the linkage between - * fragment shader input semantics and vertex_element/buffers. - */ - { - const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC }; - const uint semantic_indexes[] = { 0, 0 }; - ctx->vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names, - semantic_indexes); - } - - /* fragment shader */ - ctx->fs[TGSI_WRITEMASK_XYZW] = - util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D, - TGSI_INTERPOLATE_LINEAR); ctx->vbuf = NULL; /* init vertex data that doesn't change */ @@ -170,7 +155,8 @@ util_destroy_blit(struct blit_state *ctx) struct pipe_context *pipe = ctx->pipe; unsigned i; - pipe->delete_vs_state(pipe, ctx->vs); + if (ctx->vs) + pipe->delete_vs_state(pipe, ctx->vs); for (i = 0; i < Elements(ctx->fs); i++) if (ctx->fs[i]) @@ -186,6 +172,59 @@ util_destroy_blit(struct blit_state *ctx) /** + * Helper function to set the fragment shaders. + */ +static INLINE void +set_fragment_shader(struct blit_state *ctx, uint writemask) +{ + if (!ctx->fs[writemask]) + ctx->fs[writemask] = + util_make_fragment_tex_shader_writemask(ctx->pipe, TGSI_TEXTURE_2D, + TGSI_INTERPOLATE_LINEAR, + writemask); + + cso_set_fragment_shader_handle(ctx->cso, ctx->fs[writemask]); +} + + +/** + * Helper function to set the depthwrite shader. + */ +static INLINE void +set_depth_fragment_shader(struct blit_state *ctx) +{ + if (!ctx->fs_depth) + ctx->fs_depth = + util_make_fragment_tex_shader_writedepth(ctx->pipe, TGSI_TEXTURE_2D, + TGSI_INTERPOLATE_LINEAR); + + cso_set_fragment_shader_handle(ctx->cso, ctx->fs_depth); +} + + +/** + * Helper function to set the vertex shader. + */ +static INLINE void +set_vertex_shader(struct blit_state *ctx) +{ + /* vertex shader - still required to provide the linkage between + * fragment shader input semantics and vertex_element/buffers. + */ + if (!ctx->vs) { + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC }; + const uint semantic_indexes[] = { 0, 0 }; + ctx->vs = util_make_vertex_passthrough_shader(ctx->pipe, 2, + semantic_names, + semantic_indexes); + } + + cso_set_vertex_shader_handle(ctx->cso, ctx->vs); +} + + +/** * Get offset of next free slot in vertex buffer for quad vertices. */ static unsigned @@ -199,6 +238,7 @@ get_next_slot( struct blit_state *ctx ) if (!ctx->vbuf) { ctx->vbuf = pipe_buffer_create(ctx->pipe->screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, max_slots * sizeof ctx->vertices); } @@ -303,8 +343,10 @@ util_blit_pixels_writemask(struct blit_state *ctx, { struct pipe_context *pipe = ctx->pipe; struct pipe_screen *screen = pipe->screen; + enum pipe_format src_format, dst_format; struct pipe_sampler_view *sampler_view = NULL; struct pipe_sampler_view sv_templ; + struct pipe_surface *dst_surface; struct pipe_framebuffer_state fb; const int srcW = abs(srcX1 - srcX0); const int srcH = abs(srcY1 - srcY0); @@ -325,12 +367,15 @@ util_blit_pixels_writemask(struct blit_state *ctx, regions_overlap(srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1); + src_format = util_format_linear(src_tex->format); + dst_format = util_format_linear(dst->format); + /* * Check for simple case: no format conversion, no flipping, no stretching, * no overlapping. * Filter mode should not matter since there's no stretching. */ - if (dst->format == src_tex->format && + if (dst_format == src_format && srcX0 < srcX1 && dstX0 < dstX1 && srcY0 < srcY1 && @@ -353,6 +398,14 @@ util_blit_pixels_writemask(struct blit_state *ctx, return; } + if (dst_format == dst->format) { + dst_surface = dst; + } else { + struct pipe_surface templ = *dst; + templ.format = dst_format; + dst_surface = pipe->create_surface(pipe, dst->texture, &templ); + } + /* Create a temporary texture when src and dest alias or when src * is anything other than a 2d texture. * XXX should just use appropriate shader to access 1d / 3d slice / cube face, @@ -360,9 +413,9 @@ util_blit_pixels_writemask(struct blit_state *ctx, * * This can still be improved upon. */ - if ((src_tex == dst->texture && - dst->u.tex.level == src_level && - dst->u.tex.first_layer == srcZ0) || + if ((src_tex == dst_surface->texture && + dst_surface->u.tex.level == src_level && + dst_surface->u.tex.first_layer == srcZ0) || (src_tex->target != PIPE_TEXTURE_2D && src_tex->target != PIPE_TEXTURE_2D && src_tex->target != PIPE_TEXTURE_RECT)) @@ -391,7 +444,7 @@ util_blit_pixels_writemask(struct blit_state *ctx, /* create temp texture */ memset(&texTemp, 0, sizeof(texTemp)); texTemp.target = ctx->internal_target; - texTemp.format = src_tex->format; + texTemp.format = src_format; texTemp.last_level = 0; texTemp.width0 = srcW; texTemp.height0 = srcH; @@ -438,7 +491,7 @@ util_blit_pixels_writemask(struct blit_state *ctx, pipe_resource_reference(&tex, NULL); } else { - u_sampler_view_default_template(&sv_templ, src_tex, src_tex->format); + u_sampler_view_default_template(&sv_templ, src_tex, src_format); sampler_view = pipe->create_sampler_view(pipe, src_tex, &sv_templ); if (!sampler_view) { @@ -459,15 +512,15 @@ util_blit_pixels_writemask(struct blit_state *ctx, } } - dst_is_depth = util_format_is_depth_or_stencil(dst->format); + dst_is_depth = util_format_is_depth_or_stencil(dst_format); assert(screen->is_format_supported(screen, sampler_view->format, ctx->internal_target, sampler_view->texture->nr_samples, - PIPE_BIND_SAMPLER_VIEW, 0)); - assert(screen->is_format_supported(screen, dst->format, ctx->internal_target, - dst->texture->nr_samples, + PIPE_BIND_SAMPLER_VIEW)); + assert(screen->is_format_supported(screen, dst_format, ctx->internal_target, + dst_surface->texture->nr_samples, dst_is_depth ? PIPE_BIND_DEPTH_STENCIL : - PIPE_BIND_RENDER_TARGET, 0)); + PIPE_BIND_RENDER_TARGET)); /* save state (restored below) */ cso_save_blend(ctx->cso); cso_save_depth_stencil_alpha(ctx->cso); @@ -480,6 +533,7 @@ util_blit_pixels_writemask(struct blit_state *ctx, cso_save_vertex_shader(ctx->cso); cso_save_clip(ctx->cso); cso_save_vertex_elements(ctx->cso); + cso_save_vertex_buffers(ctx->cso); /* set misc state we care about */ cso_set_blend(ctx->cso, &ctx->blend); @@ -500,12 +554,12 @@ util_blit_pixels_writemask(struct blit_state *ctx, cso_single_sampler_done(ctx->cso); /* viewport */ - ctx->viewport.scale[0] = 0.5f * dst->width; - ctx->viewport.scale[1] = 0.5f * dst->height; + ctx->viewport.scale[0] = 0.5f * dst_surface->width; + ctx->viewport.scale[1] = 0.5f * dst_surface->height; ctx->viewport.scale[2] = 0.5f; ctx->viewport.scale[3] = 1.0f; - ctx->viewport.translate[0] = 0.5f * dst->width; - ctx->viewport.translate[1] = 0.5f * dst->height; + ctx->viewport.translate[0] = 0.5f * dst_surface->width; + ctx->viewport.translate[1] = 0.5f * dst_surface->height; ctx->viewport.translate[2] = 0.5f; ctx->viewport.translate[3] = 0.0f; cso_set_viewport(ctx->cso, &ctx->viewport); @@ -515,46 +569,35 @@ util_blit_pixels_writemask(struct blit_state *ctx, /* shaders */ if (dst_is_depth) { - if (ctx->fs_depth == NULL) - ctx->fs_depth = - util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_2D, - TGSI_INTERPOLATE_LINEAR); - - cso_set_fragment_shader_handle(ctx->cso, ctx->fs_depth); + set_depth_fragment_shader(ctx); } else { - if (ctx->fs[writemask] == NULL) - ctx->fs[writemask] = - util_make_fragment_tex_shader_writemask(pipe, TGSI_TEXTURE_2D, - TGSI_INTERPOLATE_LINEAR, - writemask); - - cso_set_fragment_shader_handle(ctx->cso, ctx->fs[writemask]); + set_fragment_shader(ctx, writemask); } - cso_set_vertex_shader_handle(ctx->cso, ctx->vs); + set_vertex_shader(ctx); /* drawing dest */ memset(&fb, 0, sizeof(fb)); - fb.width = dst->width; - fb.height = dst->height; + fb.width = dst_surface->width; + fb.height = dst_surface->height; if (dst_is_depth) { - fb.zsbuf = dst; + fb.zsbuf = dst_surface; } else { fb.nr_cbufs = 1; - fb.cbufs[0] = dst; + fb.cbufs[0] = dst_surface; } cso_set_framebuffer(ctx->cso, &fb); /* draw quad */ offset = setup_vertex_data_tex(ctx, - (float) dstX0 / dst->width * 2.0f - 1.0f, - (float) dstY0 / dst->height * 2.0f - 1.0f, - (float) dstX1 / dst->width * 2.0f - 1.0f, - (float) dstY1 / dst->height * 2.0f - 1.0f, + (float) dstX0 / dst_surface->width * 2.0f - 1.0f, + (float) dstY0 / dst_surface->height * 2.0f - 1.0f, + (float) dstX1 / dst_surface->width * 2.0f - 1.0f, + (float) dstY1 / dst_surface->height * 2.0f - 1.0f, s0, t0, s1, t1, z); - util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, offset, + util_draw_vertex_buffer(ctx->pipe, ctx->cso, ctx->vbuf, offset, PIPE_PRIM_TRIANGLE_FAN, 4, /* verts */ 2); /* attribs/vert */ @@ -571,8 +614,11 @@ util_blit_pixels_writemask(struct blit_state *ctx, cso_restore_vertex_shader(ctx->cso); cso_restore_clip(ctx->cso); cso_restore_vertex_elements(ctx->cso); + cso_restore_vertex_buffers(ctx->cso); pipe_sampler_view_reference(&sampler_view, NULL); + if (dst_surface != dst) + pipe_surface_reference(&dst_surface, NULL); } @@ -657,8 +703,7 @@ util_blit_pixels_tex(struct blit_state *ctx, assert(ctx->pipe->screen->is_format_supported(ctx->pipe->screen, dst->format, PIPE_TEXTURE_2D, dst->texture->nr_samples, - PIPE_BIND_RENDER_TARGET, - 0)); + PIPE_BIND_RENDER_TARGET)); /* save state (restored below) */ cso_save_blend(ctx->cso); @@ -672,6 +717,7 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_save_vertex_shader(ctx->cso); cso_save_clip(ctx->cso); cso_save_vertex_elements(ctx->cso); + cso_save_vertex_buffers(ctx->cso); /* set misc state we care about */ cso_set_blend(ctx->cso, &ctx->blend); @@ -702,8 +748,8 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_set_fragment_sampler_views(ctx->cso, 1, &src_sampler_view); /* shaders */ - cso_set_fragment_shader_handle(ctx->cso, ctx->fs[TGSI_WRITEMASK_XYZW]); - cso_set_vertex_shader_handle(ctx->cso, ctx->vs); + set_fragment_shader(ctx, TGSI_WRITEMASK_XYZW); + set_vertex_shader(ctx); /* drawing dest */ memset(&fb, 0, sizeof(fb)); @@ -722,7 +768,7 @@ util_blit_pixels_tex(struct blit_state *ctx, s0, t0, s1, t1, z); - util_draw_vertex_buffer(ctx->pipe, + util_draw_vertex_buffer(ctx->pipe, ctx->cso, ctx->vbuf, offset, PIPE_PRIM_TRIANGLE_FAN, 4, /* verts */ @@ -740,4 +786,5 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_restore_vertex_shader(ctx->cso); cso_restore_clip(ctx->cso); cso_restore_vertex_elements(ctx->cso); + cso_restore_vertex_buffers(ctx->cso); } diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 4c986e3565..a4c399052f 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -86,7 +86,6 @@ struct blitter_context_priv void *dsa_write_depth_keep_stencil; void *dsa_keep_depth_stencil; void *dsa_keep_depth_write_stencil; - void *dsa_flush_depth_stencil; void *velem_state; @@ -156,10 +155,6 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) ctx->dsa_keep_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &dsa); - dsa.depth.writemask = 1; - ctx->dsa_flush_depth_stencil = - pipe->create_depth_stencil_alpha_state(pipe, &dsa); - dsa.depth.enabled = 1; dsa.depth.writemask = 1; dsa.depth.func = PIPE_FUNC_ALWAYS; @@ -225,9 +220,10 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) ctx->vertices[i][0][3] = 1; /*v.w*/ /* create the vertex buffer */ - ctx->vbuf = pipe_buffer_create(ctx->base.pipe->screen, - PIPE_BIND_VERTEX_BUFFER, - sizeof(ctx->vertices)); + ctx->vbuf = pipe_user_buffer_create(ctx->base.pipe->screen, + ctx->vertices, + sizeof(ctx->vertices), + PIPE_BIND_VERTEX_BUFFER); return &ctx->base; } @@ -245,7 +241,6 @@ void util_blitter_destroy(struct blitter_context *blitter) ctx->dsa_write_depth_keep_stencil); pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil); pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_write_stencil); - pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil); pipe->delete_rasterizer_state(pipe, ctx->rs_state); pipe->delete_vs_state(pipe, ctx->vs); @@ -272,6 +267,12 @@ void util_blitter_destroy(struct blitter_context *blitter) static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx) { + if (ctx->base.running) { + _debug_printf("u_blitter: Caught recursion on save. " + "This is a driver bug.\n"); + } + ctx->base.running = TRUE; + /* make sure these CSOs have been saved */ assert(ctx->base.saved_blend_state != INVALID_PTR && ctx->base.saved_dsa_state != INVALID_PTR && @@ -302,7 +303,6 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx) ctx->base.saved_velem_state = INVALID_PTR; pipe->set_stencil_ref(pipe, &ctx->base.saved_stencil_ref); - pipe->set_viewport_state(pipe, &ctx->base.saved_viewport); pipe->set_clip_state(pipe, &ctx->base.saved_clip); @@ -346,6 +346,12 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx) } ctx->base.saved_num_vertex_buffers = ~0; } + + if (!ctx->base.running) { + _debug_printf("u_blitter: Caught recursion on restore. " + "This is a driver bug.\n"); + } + ctx->base.running = FALSE; } static void blitter_set_rectangle(struct blitter_context_priv *ctx, @@ -509,22 +515,6 @@ static void blitter_set_dst_dimensions(struct blitter_context_priv *ctx, ctx->dst_height = height; } -static void blitter_draw_quad(struct blitter_context_priv *ctx) -{ - struct pipe_context *pipe = ctx->base.pipe; - struct pipe_box box; - - /* write vertices and draw them */ - u_box_1d(0, sizeof(ctx->vertices), &box); - pipe->transfer_inline_write(pipe, ctx->vbuf, 0, - PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, - &box, ctx->vertices, sizeof(ctx->vertices), 0); - - util_draw_vertex_buffer(pipe, ctx->vbuf, 0, PIPE_PRIM_TRIANGLE_FAN, - 4, /* verts */ - 2); /* attribs/vert */ -} - static INLINE void **blitter_get_sampler_state(struct blitter_context_priv *ctx, int miplevel, boolean normalized) @@ -649,15 +639,19 @@ static void blitter_draw_rectangle(struct blitter_context *blitter, } blitter_set_rectangle(ctx, x1, y1, x2, y2, depth); - blitter_draw_quad(ctx); + ctx->base.pipe->redefine_user_buffer(ctx->base.pipe, ctx->vbuf, + 0, ctx->vbuf->width0); + util_draw_vertex_buffer(ctx->base.pipe, NULL, ctx->vbuf, 0, + PIPE_PRIM_TRIANGLE_FAN, 4, 2); } -void util_blitter_clear(struct blitter_context *blitter, - unsigned width, unsigned height, - unsigned num_cbufs, - unsigned clear_buffers, - const float *rgba, - double depth, unsigned stencil) +static void util_blitter_clear_custom(struct blitter_context *blitter, + unsigned width, unsigned height, + unsigned num_cbufs, + unsigned clear_buffers, + const float *rgba, + double depth, unsigned stencil, + void *custom_blend, void *custom_dsa) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; struct pipe_context *pipe = ctx->base.pipe; @@ -668,26 +662,28 @@ void util_blitter_clear(struct blitter_context *blitter, blitter_check_saved_CSOs(ctx); /* bind CSOs */ - if (clear_buffers & PIPE_CLEAR_COLOR) + if (custom_blend) { + pipe->bind_blend_state(pipe, custom_blend); + } else if (clear_buffers & PIPE_CLEAR_COLOR) { pipe->bind_blend_state(pipe, ctx->blend_write_color); - else + } else { pipe->bind_blend_state(pipe, ctx->blend_keep_color); + } - if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) { - sr.ref_value[0] = stencil & 0xff; + if (custom_dsa) { + pipe->bind_depth_stencil_alpha_state(pipe, custom_dsa); + } else if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) { pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil); - pipe->set_stencil_ref(pipe, &sr); - } - else if (clear_buffers & PIPE_CLEAR_DEPTH) { + } else if (clear_buffers & PIPE_CLEAR_DEPTH) { pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_keep_stencil); - } - else if (clear_buffers & PIPE_CLEAR_STENCIL) { - sr.ref_value[0] = stencil & 0xff; + } else if (clear_buffers & PIPE_CLEAR_STENCIL) { pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_write_stencil); - pipe->set_stencil_ref(pipe, &sr); - } - else + } else { pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); + } + + sr.ref_value[0] = stencil & 0xff; + pipe->set_stencil_ref(pipe, &sr); pipe->bind_rasterizer_state(pipe, ctx->rs_state); pipe->bind_vertex_elements_state(pipe, ctx->velem_state); @@ -700,6 +696,27 @@ void util_blitter_clear(struct blitter_context *blitter, blitter_restore_CSOs(ctx); } +void util_blitter_clear(struct blitter_context *blitter, + unsigned width, unsigned height, + unsigned num_cbufs, + unsigned clear_buffers, + const float *rgba, + double depth, unsigned stencil) +{ + util_blitter_clear_custom(blitter, width, height, num_cbufs, + clear_buffers, rgba, depth, stencil, + NULL, NULL); +} + +void util_blitter_clear_depth_custom(struct blitter_context *blitter, + unsigned width, unsigned height, + double depth, void *custom_dsa) +{ + const float rgba[4] = {0, 0, 0, 0}; + util_blitter_clear_custom(blitter, width, height, 0, + 0, rgba, depth, 0, NULL, custom_dsa); +} + static boolean is_overlap(unsigned sx1, unsigned sx2, unsigned sy1, unsigned sy2, unsigned dx1, unsigned dx2, unsigned dy1, unsigned dy2) @@ -737,9 +754,6 @@ void util_blitter_copy_region(struct blitter_context *blitter, if (dst == src) { assert(!is_overlap(srcbox->x, srcbox->x + width, srcbox->y, srcbox->y + height, dstx, dstx + width, dsty, dsty + height)); - } else { - assert(util_is_format_compatible(util_format_description(dst->format), - util_format_description(src->format))); } assert(src->target < PIPE_MAX_TEXTURE_TYPES); /* XXX should handle 3d regions */ @@ -758,17 +772,20 @@ void util_blitter_copy_region(struct blitter_context *blitter, /* (assuming copying a stencil buffer is not possible) */ if ((!ignore_stencil && is_stencil) || !screen->is_format_supported(screen, dst->format, dst->target, - dst->nr_samples, bind, 0) || + dst->nr_samples, bind) || !screen->is_format_supported(screen, src->format, src->target, - src->nr_samples, PIPE_BIND_SAMPLER_VIEW, 0)) { + src->nr_samples, PIPE_BIND_SAMPLER_VIEW)) { + ctx->base.running = TRUE; util_resource_copy_region(pipe, dst, dstlevel, dstx, dsty, dstz, src, srclevel, srcbox); + ctx->base.running = FALSE; return; } /* Get surface. */ memset(&surf_templ, 0, sizeof(surf_templ)); u_surface_default_template(&surf_templ, dst, bind); + surf_templ.format = util_format_linear(dst->format); surf_templ.u.tex.level = dstlevel; surf_templ.u.tex.first_layer = dstz; surf_templ.u.tex.last_layer = dstz; @@ -807,7 +824,7 @@ void util_blitter_copy_region(struct blitter_context *blitter, normalized = src->target != PIPE_TEXTURE_RECT; /* Initialize sampler view. */ - u_sampler_view_default_template(&viewTempl, src, src->format); + u_sampler_view_default_template(&viewTempl, src, util_format_linear(src->format)); view = pipe->create_sampler_view(pipe, src, &viewTempl); /* Set rasterizer state, shaders, and textures. */ @@ -853,7 +870,10 @@ void util_blitter_copy_region(struct blitter_context *blitter, /* Draw. */ blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, 0); - blitter_draw_quad(ctx); + ctx->base.pipe->redefine_user_buffer(ctx->base.pipe, ctx->vbuf, + 0, ctx->vbuf->width0); + util_draw_vertex_buffer(ctx->base.pipe, NULL, ctx->vbuf, 0, + PIPE_PRIM_TRIANGLE_FAN, 4, 2); break; default: @@ -1014,12 +1034,3 @@ void util_blitter_custom_depth_stencil(struct blitter_context *blitter, UTIL_BLITTER_ATTRIB_NONE, NULL); blitter_restore_CSOs(ctx); } - -/* flush a region of a depth stencil surface for r300g */ -void util_blitter_flush_depth_stencil(struct blitter_context *blitter, - struct pipe_surface *dstsurf) -{ - struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; - util_blitter_custom_depth_stencil(blitter, dstsurf, NULL, - ctx->dsa_flush_depth_stencil, 0.0f); -} diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index c5660cf2d0..41470d92bb 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -55,13 +55,13 @@ struct blitter_context * \param y1 A Y coordinate of the top-left corner. * \param x2 An X coordinate of the bottom-right corner. * \param y2 A Y coordinate of the bottom-right corner. - * \param depth A depth which the rectangle is rendered at. + * \param depth A depth which the rectangle is rendered at. * * \param type Semantics of the attributes "attrib". * If type is UTIL_BLITTER_ATTRIB_NONE, ignore them. * If type is UTIL_BLITTER_ATTRIB_COLOR, the attributes - * make up a constant RGBA color, and should go to the COLOR0 - * varying slot of a fragment shader. + * make up a constant RGBA color, and should go + * to the GENERIC0 varying slot of a fragment shader. * If type is UTIL_BLITTER_ATTRIB_TEXCOORD, {a1, a2} and * {a3, a4} specify top-left and bottom-right texture * coordinates of the rectangle, respectively, and should go @@ -79,6 +79,9 @@ struct blitter_context enum blitter_attrib_type type, const float attrib[4]); + /* Whether the blitter is running. */ + boolean running; + /* Private members, really. */ struct pipe_context *pipe; /**< pipe context */ @@ -141,6 +144,10 @@ void util_blitter_clear(struct blitter_context *blitter, const float *rgba, double depth, unsigned stencil); +void util_blitter_clear_depth_custom(struct blitter_context *blitter, + unsigned width, unsigned height, + double depth, void *custom_dsa); + /** * Copy a block of pixels from one surface to another. * @@ -200,9 +207,6 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, unsigned dstx, unsigned dsty, unsigned width, unsigned height); -void util_blitter_flush_depth_stencil(struct blitter_context *blitter, - struct pipe_surface *dstsurf); - void util_blitter_custom_depth_stencil(struct blitter_context *blitter, struct pipe_surface *zsurf, struct pipe_surface *cbsurf, @@ -318,21 +322,13 @@ util_blitter_save_vertex_buffers(struct blitter_context *blitter, int num_vertex_buffers, struct pipe_vertex_buffer *vertex_buffers) { - unsigned i; assert(num_vertex_buffers <= Elements(blitter->saved_vertex_buffers)); - blitter->saved_num_vertex_buffers = num_vertex_buffers; - - for (i = 0; i < num_vertex_buffers; i++) { - if (vertex_buffers[i].buffer) { - pipe_resource_reference(&blitter->saved_vertex_buffers[i].buffer, - vertex_buffers[i].buffer); - } - } - - memcpy(blitter->saved_vertex_buffers, - vertex_buffers, - num_vertex_buffers * sizeof(struct pipe_vertex_buffer)); + blitter->saved_num_vertex_buffers = 0; + util_copy_vertex_buffers(blitter->saved_vertex_buffers, + (unsigned*)&blitter->saved_num_vertex_buffers, + vertex_buffers, + num_vertex_buffers); } #ifdef __cplusplus diff --git a/src/gallium/auxiliary/util/u_cache.c b/src/gallium/auxiliary/util/u_cache.c index 15f4a8831f..df08ec302a 100644 --- a/src/gallium/auxiliary/util/u_cache.c +++ b/src/gallium/auxiliary/util/u_cache.c @@ -27,9 +27,10 @@ /** * @file - * Simple cache implementation. + * Improved cache implementation. * - * We simply have fixed size array, and destroy previous values on collision. + * Fixed size array with linear probing on collision and LRU eviction + * on full. * * @author Jose Fonseca <jfonseca@vmware.com> */ @@ -41,10 +42,17 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_cache.h" +#include "util/u_simple_list.h" struct util_cache_entry { + enum { EMPTY = 0, FILLED, DELETED } state; + uint32_t hash; + + struct util_cache_entry *next; + struct util_cache_entry *prev; + void *key; void *value; @@ -69,11 +77,14 @@ struct util_cache struct util_cache_entry *entries; -#ifdef DEBUG unsigned count; -#endif + struct util_cache_entry lru; }; +static void +ensure_sanity(const struct util_cache *cache); + +#define CACHE_DEFAULT_ALPHA 2 struct util_cache * util_cache_create(uint32_t (*hash)(const void *key), @@ -90,6 +101,10 @@ util_cache_create(uint32_t (*hash)(const void *key), cache->hash = hash; cache->compare = compare; cache->destroy = destroy; + + make_empty_list(&cache->lru); + + size *= CACHE_DEFAULT_ALPHA; cache->size = size; cache->entries = CALLOC(size, sizeof(struct util_cache_entry)); @@ -98,19 +113,46 @@ util_cache_create(uint32_t (*hash)(const void *key), return NULL; } + ensure_sanity(cache); return cache; } -static INLINE struct util_cache_entry * +static struct util_cache_entry * util_cache_entry_get(struct util_cache *cache, + uint32_t hash, const void *key) { - uint32_t hash; - - hash = cache->hash(key); + struct util_cache_entry *first_unfilled = NULL; + uint32_t index = hash % cache->size; + uint32_t probe; + + /* Probe until we find either a matching FILLED entry or an EMPTY + * slot (which has never been occupied). + * + * Deleted or non-matching slots are not indicative of completion + * as a previous linear probe for the same key could have continued + * past this point. + */ + for (probe = 0; probe < cache->size; probe++) { + uint32_t i = (index + probe) % cache->size; + struct util_cache_entry *current = &cache->entries[i]; + + if (current->state == FILLED) { + if (current->hash == hash && + cache->compare(key, current->key) == 0) + return current; + } + else { + if (!first_unfilled) + first_unfilled = current; + + if (current->state == EMPTY) + return first_unfilled; + } + } - return &cache->entries[hash % cache->size]; + return NULL; } static INLINE void @@ -123,9 +165,15 @@ util_cache_entry_destroy(struct util_cache *cache, entry->key = NULL; entry->value = NULL; - if(key || value) + if (entry->state == FILLED) { + remove_from_list(entry); + cache->count--; + if(cache->destroy) cache->destroy(key, value); + + entry->state = DELETED; + } } @@ -135,21 +183,33 @@ util_cache_set(struct util_cache *cache, void *value) { struct util_cache_entry *entry; + uint32_t hash = cache->hash(key); assert(cache); if (!cache) return; - entry = util_cache_entry_get(cache, key); + entry = util_cache_entry_get(cache, hash, key); + if (!entry) + entry = cache->lru.prev; + + if (cache->count >= cache->size / CACHE_DEFAULT_ALPHA) + util_cache_entry_destroy(cache, cache->lru.prev); + util_cache_entry_destroy(cache, entry); #ifdef DEBUG ++entry->count; - ++cache->count; #endif entry->key = key; + entry->hash = hash; entry->value = value; + entry->state = FILLED; + insert_at_head(&cache->lru, entry); + cache->count++; + + ensure_sanity(cache); } @@ -158,17 +218,18 @@ util_cache_get(struct util_cache *cache, const void *key) { struct util_cache_entry *entry; + uint32_t hash = cache->hash(key); assert(cache); if (!cache) return NULL; - entry = util_cache_entry_get(cache, key); - if(!entry->key && !entry->value) - return NULL; - - if(cache->compare(key, entry->key) != 0) + entry = util_cache_entry_get(cache, hash, key); + if (!entry) return NULL; + + if (entry->state == FILLED) + move_to_head(&cache->lru, entry); return entry->value; } @@ -183,8 +244,14 @@ util_cache_clear(struct util_cache *cache) if (!cache) return; - for(i = 0; i < cache->size; ++i) + for(i = 0; i < cache->size; ++i) { util_cache_entry_destroy(cache, &cache->entries[i]); + cache->entries[i].state = EMPTY; + } + + assert(cache->count == 0); + assert(is_empty_list(&cache->lru)); + ensure_sanity(cache); } @@ -215,3 +282,70 @@ util_cache_destroy(struct util_cache *cache) FREE(cache->entries); FREE(cache); } + + +void +util_cache_remove(struct util_cache *cache, + const void *key) +{ + struct util_cache_entry *entry; + uint32_t hash; + + assert(cache); + if (!cache) + return; + + hash = cache->hash(key); + + entry = util_cache_entry_get(cache, hash, key); + if (!entry) + return; + + if (entry->state == FILLED) + util_cache_entry_destroy(cache, entry); + + ensure_sanity(cache); +} + + +static void +ensure_sanity(const struct util_cache *cache) +{ +#ifdef DEBUG + unsigned i, cnt = 0; + + assert(cache); + for (i = 0; i < cache->size; i++) { + struct util_cache_entry *header = &cache->entries[i]; + + assert(header); + assert(header->state == FILLED || + header->state == EMPTY || + header->state == DELETED); + if (header->state == FILLED) { + cnt++; + assert(header->hash == cache->hash(header->key)); + } + } + + assert(cnt == cache->count); + assert(cache->size >= cnt); + + if (cache->count == 0) { + assert (is_empty_list(&cache->lru)); + } + else { + struct util_cache_entry *header = cache->lru.next; + + assert (header); + assert (!is_empty_list(&cache->lru)); + + for (i = 0; i < cache->count; i++) + header = header->next; + + assert(header == &cache->lru); + } +#endif + + (void)cache; +} diff --git a/src/gallium/auxiliary/util/u_cache.h b/src/gallium/auxiliary/util/u_cache.h index 8a612c6585..be3631b725 100644 --- a/src/gallium/auxiliary/util/u_cache.h +++ b/src/gallium/auxiliary/util/u_cache.h @@ -79,6 +79,10 @@ util_cache_clear(struct util_cache *cache); void util_cache_destroy(struct util_cache *cache); +void +util_cache_remove(struct util_cache *cache, + const void *key); + #ifdef __cplusplus } diff --git a/src/gallium/auxiliary/util/u_caps.c b/src/gallium/auxiliary/util/u_caps.c index e209a98b70..75677b2b13 100644 --- a/src/gallium/auxiliary/util/u_caps.c +++ b/src/gallium/auxiliary/util/u_caps.c @@ -69,8 +69,7 @@ util_check_caps_out(struct pipe_screen *screen, const unsigned *list, int *out) list[i++], PIPE_TEXTURE_2D, 0, - PIPE_BIND_SAMPLER_VIEW, - 0)) { + PIPE_BIND_SAMPLER_VIEW)) { *out = i - 2; return FALSE; } diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 2ad2f95b13..36ce4b5771 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -44,9 +44,11 @@ #include "util/u_surface.h" #include <limits.h> /* CHAR_BIT */ +#include <ctype.h> /* isalnum */ void _debug_vprintf(const char *format, va_list ap) { +#if defined(PIPE_OS_WINDOWS) || defined(PIPE_OS_EMBEDDED) /* We buffer until we find a newline. */ static char buf[4096] = {'\0'}; size_t len = strlen(buf); @@ -55,6 +57,10 @@ void _debug_vprintf(const char *format, va_list ap) os_log_message(buf); buf[0] = '\0'; } +#else + /* Just print as-is to stderr */ + vfprintf(stderr, format, ap); +#endif } @@ -175,6 +181,48 @@ debug_get_num_option(const char *name, long dfault) return result; } +static boolean str_has_option(const char *str, const char *name) +{ + /* Empty string. */ + if (!*str) { + return FALSE; + } + + /* OPTION=all */ + if (!util_strcmp(str, "all")) { + return TRUE; + } + + /* Find 'name' in 'str' surrounded by non-alphanumeric characters. */ + { + const char *start = str; + unsigned name_len = strlen(name); + + /* 'start' is the beginning of the currently-parsed word, + * we increment 'str' each iteration. + * if we find either the end of string or a non-alphanumeric character, + * we compare 'start' up to 'str-1' with 'name'. */ + + while (1) { + if (!*str || !isalnum(*str)) { + if (str-start == name_len && + !memcmp(start, name, name_len)) { + return TRUE; + } + + if (!*str) { + return FALSE; + } + + start = str+1; + } + + str++; + } + } + + return FALSE; +} unsigned long debug_get_flags_option(const char *name, @@ -202,7 +250,7 @@ debug_get_flags_option(const char *name, else { result = 0; while( flags->name ) { - if (!util_strcmp(str, "all") || util_strstr(str, flags->name )) + if (str_has_option(str, flags->name)) result |= flags->value; ++flags; } @@ -354,6 +402,41 @@ const char *u_prim_name( unsigned prim ) +#ifdef DEBUG +int fl_indent = 0; +const char* fl_function[1024]; + +int debug_funclog_enter(const char* f, const int line, const char* file) +{ + int i; + + for (i = 0; i < fl_indent; i++) + debug_printf(" "); + debug_printf("%s\n", f); + + assert(fl_indent < 1023); + fl_function[fl_indent++] = f; + + return 0; +} + +void debug_funclog_exit(const char* f, const int line, const char* file) +{ + --fl_indent; + assert(fl_indent >= 0); + assert(fl_function[fl_indent] == f); +} + +void debug_funclog_enter_exit(const char* f, const int line, const char* file) +{ + int i; + for (i = 0; i < fl_indent; i++) + debug_printf(" "); + debug_printf("%s\n", f); +} +#endif + + #ifdef DEBUG /** diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h index 1c9624ea3e..c47c13c64c 100644 --- a/src/gallium/auxiliary/util/u_debug.h +++ b/src/gallium/auxiliary/util/u_debug.h @@ -280,6 +280,43 @@ debug_dump_flags(const struct debug_named_value *names, /** + * Function enter exit loggers + */ +#ifdef DEBUG +int debug_funclog_enter(const char* f, const int line, const char* file); +void debug_funclog_exit(const char* f, const int line, const char* file); +void debug_funclog_enter_exit(const char* f, const int line, const char* file); + +#define DEBUG_FUNCLOG_ENTER() \ + int __debug_decleration_work_around = \ + debug_funclog_enter(__FUNCTION__, __LINE__, __FILE__) +#define DEBUG_FUNCLOG_EXIT() \ + do { \ + (void)__debug_decleration_work_around; \ + debug_funclog_exit(__FUNCTION__, __LINE__, __FILE__); \ + return; \ + } while(0) +#define DEBUG_FUNCLOG_EXIT_RET(ret) \ + do { \ + (void)__debug_decleration_work_around; \ + debug_funclog_exit(__FUNCTION__, __LINE__, __FILE__); \ + return ret; \ + } while(0) +#define DEBUG_FUNCLOG_ENTER_EXIT() \ + debug_funclog_enter_exit(__FUNCTION__, __LINE__, __FILE__) + +#else +#define DEBUG_FUNCLOG_ENTER() \ + int __debug_decleration_work_around +#define DEBUG_FUNCLOG_EXIT() \ + do { (void)__debug_decleration_work_around; return; } while(0) +#define DEBUG_FUNCLOG_EXIT_RET(ret) \ + do { (void)__debug_decleration_work_around; return ret; } while(0) +#define DEBUG_FUNCLOG_ENTER_EXIT() +#endif + + +/** * Get option. * * It is an alias for getenv on Linux. diff --git a/src/gallium/auxiliary/util/u_debug_refcnt.c b/src/gallium/auxiliary/util/u_debug_refcnt.c index 40a26c9c69..6f706a35fd 100644 --- a/src/gallium/auxiliary/util/u_debug_refcnt.c +++ b/src/gallium/auxiliary/util/u_debug_refcnt.c @@ -43,7 +43,8 @@ int debug_refcnt_state; struct os_stream* stream; /* TODO: maybe move this serial machinery to a stand-alone module and expose it? */ -static pipe_mutex serials_mutex; +pipe_static_mutex(serials_mutex); + static struct util_hash_table* serials_hash; static unsigned serials_last; @@ -66,6 +67,15 @@ static boolean debug_serial(void* p, unsigned* pserial) { unsigned serial; boolean found = TRUE; +#ifdef PIPE_SUBSYSTEM_WINDOWS_USER + static boolean first = TRUE; + + if (first) { + pipe_mutex_init(serials_mutex); + first = FALSE; + } +#endif + pipe_mutex_lock(serials_mutex); if(!serials_hash) serials_hash = util_hash_table_create(hash_ptr, compare_ptr); diff --git a/src/gallium/auxiliary/util/u_debug_symbol.c b/src/gallium/auxiliary/util/u_debug_symbol.c index 44d437747a..bae9be87a2 100644 --- a/src/gallium/auxiliary/util/u_debug_symbol.c +++ b/src/gallium/auxiliary/util/u_debug_symbol.c @@ -40,20 +40,43 @@ #include "u_debug_symbol.h" #include "u_hash_table.h" -#if defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) +#if defined(PIPE_OS_WINDOWS) && defined(PIPE_ARCH_X86) #include <windows.h> #include <stddef.h> -#include <imagehlp.h> -/* - * TODO: Cleanup code. - * TODO: Support x86_64 - */ +#include "dbghelp.h" + static BOOL bSymInitialized = FALSE; -static HMODULE hModule_Imagehlp = NULL; +static HMODULE hModule_Dbghelp = NULL; + + +static +FARPROC WINAPI __GetProcAddress(LPCSTR lpProcName) +{ +#ifdef PIPE_CC_GCC + if (!hModule_Dbghelp) { + /* + * bfdhelp.dll is a dbghelp.dll look-alike replacement, which is able to + * understand MinGW symbols using BFD library. It is available from + * http://people.freedesktop.org/~jrfonseca/bfdhelp/ for now. + */ + hModule_Dbghelp = LoadLibraryA("bfdhelp.dll"); + } +#endif + + if (!hModule_Dbghelp) { + hModule_Dbghelp = LoadLibraryA("dbghelp.dll"); + if (!hModule_Dbghelp) { + return NULL; + } + } + + return GetProcAddress(hModule_Dbghelp, lpProcName); +} + typedef BOOL (WINAPI *PFNSYMINITIALIZE)(HANDLE, LPSTR, BOOL); static PFNSYMINITIALIZE pfnSymInitialize = NULL; @@ -62,8 +85,7 @@ static BOOL WINAPI j_SymInitialize(HANDLE hProcess, PSTR UserSearchPath, BOOL fInvadeProcess) { if( - (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) && - (pfnSymInitialize || (pfnSymInitialize = (PFNSYMINITIALIZE) GetProcAddress(hModule_Imagehlp, "SymInitialize"))) + (pfnSymInitialize || (pfnSymInitialize = (PFNSYMINITIALIZE) __GetProcAddress("SymInitialize"))) ) return pfnSymInitialize(hProcess, UserSearchPath, fInvadeProcess); else @@ -77,57 +99,41 @@ static DWORD WINAPI j_SymSetOptions(DWORD SymOptions) { if( - (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) && - (pfnSymSetOptions || (pfnSymSetOptions = (PFNSYMSETOPTIONS) GetProcAddress(hModule_Imagehlp, "SymSetOptions"))) + (pfnSymSetOptions || (pfnSymSetOptions = (PFNSYMSETOPTIONS) __GetProcAddress("SymSetOptions"))) ) return pfnSymSetOptions(SymOptions); else return FALSE; } -typedef PGET_MODULE_BASE_ROUTINE PFNSYMGETMODULEBASE; -static PFNSYMGETMODULEBASE pfnSymGetModuleBase = NULL; +typedef BOOL (WINAPI *PFNSYMGETSYMFROMADDR)(HANDLE, DWORD64, PDWORD64, PSYMBOL_INFO); +static PFNSYMGETSYMFROMADDR pfnSymFromAddr = NULL; static -DWORD WINAPI j_SymGetModuleBase(HANDLE hProcess, DWORD dwAddr) +BOOL WINAPI j_SymFromAddr(HANDLE hProcess, DWORD64 Address, PDWORD64 Displacement, PSYMBOL_INFO Symbol) { if( - (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) && - (pfnSymGetModuleBase || (pfnSymGetModuleBase = (PFNSYMGETMODULEBASE) GetProcAddress(hModule_Imagehlp, "SymGetModuleBase"))) + (pfnSymFromAddr || (pfnSymFromAddr = (PFNSYMGETSYMFROMADDR) __GetProcAddress("SymFromAddr"))) ) - return pfnSymGetModuleBase(hProcess, dwAddr); - else - return 0; -} - -typedef BOOL (WINAPI *PFNSYMGETSYMFROMADDR)(HANDLE, DWORD, LPDWORD, PIMAGEHLP_SYMBOL); -static PFNSYMGETSYMFROMADDR pfnSymGetSymFromAddr = NULL; - -static -BOOL WINAPI j_SymGetSymFromAddr(HANDLE hProcess, DWORD Address, PDWORD Displacement, PIMAGEHLP_SYMBOL Symbol) -{ - if( - (hModule_Imagehlp || (hModule_Imagehlp = LoadLibraryA("IMAGEHLP.DLL"))) && - (pfnSymGetSymFromAddr || (pfnSymGetSymFromAddr = (PFNSYMGETSYMFROMADDR) GetProcAddress(hModule_Imagehlp, "SymGetSymFromAddr"))) - ) - return pfnSymGetSymFromAddr(hProcess, Address, Displacement, Symbol); + return pfnSymFromAddr(hProcess, Address, Displacement, Symbol); else return FALSE; } static INLINE void -debug_symbol_name_imagehlp(const void *addr, char* buf, unsigned size) +debug_symbol_name_dbghelp(const void *addr, char* buf, unsigned size) { HANDLE hProcess; BYTE symbolBuffer[1024]; - PIMAGEHLP_SYMBOL pSymbol = (PIMAGEHLP_SYMBOL) symbolBuffer; - DWORD dwDisplacement = 0; /* Displacement of the input address, relative to the start of the symbol */ + PSYMBOL_INFO pSymbol = (PSYMBOL_INFO) symbolBuffer; + DWORD64 dwDisplacement = 0; /* Displacement of the input address, relative to the start of the symbol */ hProcess = GetCurrentProcess(); + memset(pSymbol, 0, sizeof *pSymbol); pSymbol->SizeOfStruct = sizeof(symbolBuffer); - pSymbol->MaxNameLength = sizeof(symbolBuffer) - offsetof(IMAGEHLP_SYMBOL, Name); + pSymbol->MaxNameLen = sizeof(symbolBuffer) - offsetof(SYMBOL_INFO, Name); if(!bSymInitialized) { j_SymSetOptions(/* SYMOPT_UNDNAME | */ SYMOPT_LOAD_LINES); @@ -135,7 +141,7 @@ debug_symbol_name_imagehlp(const void *addr, char* buf, unsigned size) bSymInitialized = TRUE; } - if(!j_SymGetSymFromAddr(hProcess, (DWORD)addr, &dwDisplacement, pSymbol)) + if(!j_SymFromAddr(hProcess, (DWORD64)(uintptr_t)addr, &dwDisplacement, pSymbol)) buf[0] = 0; else { @@ -165,8 +171,8 @@ debug_symbol_name_glibc(const void *addr, char* buf, unsigned size) void debug_symbol_name(const void *addr, char* buf, unsigned size) { -#if defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) - debug_symbol_name_imagehlp(addr, buf, size); +#if defined(PIPE_OS_WINDOWS) && defined(PIPE_ARCH_X86) + debug_symbol_name_dbghelp(addr, buf, size); if(buf[0]) return; #endif @@ -190,7 +196,7 @@ debug_symbol_print(const void *addr) } struct util_hash_table* symbols_hash; -pipe_mutex symbols_mutex; +pipe_static_mutex(symbols_mutex); static unsigned hash_ptr(void* p) { @@ -211,6 +217,15 @@ const char* debug_symbol_name_cached(const void *addr) { const char* name; +#ifdef PIPE_SUBSYSTEM_WINDOWS_USER + static boolean first = TRUE; + + if (first) { + pipe_mutex_init(symbols_mutex); + first = FALSE; + } +#endif + pipe_mutex_lock(symbols_mutex); if(!symbols_hash) symbols_hash = util_hash_table_create(hash_ptr, compare_ptr); diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index 0b6dc5880f..8ed3b3c095 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -31,6 +31,7 @@ #include "util/u_inlines.h" #include "util/u_draw_quad.h" #include "util/u_memory.h" +#include "cso_cache/cso_context.h" /** @@ -39,6 +40,7 @@ */ void util_draw_vertex_buffer(struct pipe_context *pipe, + struct cso_context *cso, struct pipe_resource *vbuf, uint offset, uint prim_type, @@ -54,8 +56,12 @@ util_draw_vertex_buffer(struct pipe_context *pipe, vbuffer.buffer = vbuf; vbuffer.stride = num_attribs * 4 * sizeof(float); /* vertex size */ vbuffer.buffer_offset = offset; - vbuffer.max_index = num_verts - 1; - pipe->set_vertex_buffers(pipe, 1, &vbuffer); + + if (cso) { + cso_set_vertex_buffers(cso, 1, &vbuffer); + } else { + pipe->set_vertex_buffers(pipe, 1, &vbuffer); + } /* note: vertex elements already set by caller */ @@ -70,13 +76,13 @@ util_draw_vertex_buffer(struct pipe_context *pipe, * Note: this isn't especially efficient. */ void -util_draw_texquad(struct pipe_context *pipe, +util_draw_texquad(struct pipe_context *pipe, struct cso_context *cso, float x0, float y0, float x1, float y1, float z) { uint numAttribs = 2, i, j; uint vertexBytes = 4 * (4 * numAttribs * sizeof(float)); struct pipe_resource *vbuf = NULL; - uint *v = NULL; + float *v = NULL; v = MALLOC(vertexBytes); if (v == NULL) @@ -115,10 +121,10 @@ util_draw_texquad(struct pipe_context *pipe, vbuf = pipe_user_buffer_create(pipe->screen, v, vertexBytes, PIPE_BIND_VERTEX_BUFFER); - if (!vbuf) + if (!vbuf) goto out; - util_draw_vertex_buffer(pipe, vbuf, 0, PIPE_PRIM_TRIANGLE_FAN, 4, 2); + util_draw_vertex_buffer(pipe, cso, vbuf, 0, PIPE_PRIM_TRIANGLE_FAN, 4, 2); out: if (vbuf) diff --git a/src/gallium/auxiliary/util/u_draw_quad.h b/src/gallium/auxiliary/util/u_draw_quad.h index 52994fe05c..f1167786f0 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.h +++ b/src/gallium/auxiliary/util/u_draw_quad.h @@ -38,17 +38,18 @@ extern "C" { #endif struct pipe_resource; +struct cso_context; #include "util/u_draw.h" extern void -util_draw_vertex_buffer(struct pipe_context *pipe, +util_draw_vertex_buffer(struct pipe_context *pipe, struct cso_context *cso, struct pipe_resource *vbuf, uint offset, uint num_attribs, uint num_verts, uint prim_type); extern void -util_draw_texquad(struct pipe_context *pipe, +util_draw_texquad(struct pipe_context *pipe, struct cso_context *cso, float x0, float y0, float x1, float y1, float z); diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c index b471d59eeb..5ecf8cbb06 100644 --- a/src/gallium/auxiliary/util/u_dump_state.c +++ b/src/gallium/auxiliary/util/u_dump_state.c @@ -681,7 +681,6 @@ util_dump_vertex_buffer(struct os_stream *stream, const struct pipe_vertex_buffe util_dump_struct_begin(stream, "pipe_vertex_buffer"); util_dump_member(stream, uint, state, stride); - util_dump_member(stream, uint, state, max_index); util_dump_member(stream, uint, state, buffer_offset); util_dump_member(stream, ptr, state, buffer); diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 1fbd83841c..6f5cc618ff 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -76,6 +76,7 @@ PIPE_FORMAT_B4G4R4X4_UNORM , plain, 1, 1, un4 , un4 , un4 , x4 , zyx1, r PIPE_FORMAT_B5G6R5_UNORM , plain, 1, 1, un5 , un6 , un5 , , zyx1, rgb PIPE_FORMAT_R10G10B10A2_UNORM , plain, 1, 1, un10, un10, un10, un2 , xyzw, rgb PIPE_FORMAT_B10G10R10A2_UNORM , plain, 1, 1, un10, un10, un10, un2 , zyxw, rgb +PIPE_FORMAT_B2G3R3_UNORM , plain, 1, 1, un2 , un3 , un3 , , zyx1, rgb # Luminance/Intensity/Alpha formats PIPE_FORMAT_L8_UNORM , plain, 1, 1, un8 , , , , xxx1, rgb @@ -84,6 +85,9 @@ PIPE_FORMAT_I8_UNORM , plain, 1, 1, un8 , , , , xxxx, r PIPE_FORMAT_L4A4_UNORM , plain, 1, 1, un4 , un4 , , , xxxy, rgb PIPE_FORMAT_L8A8_UNORM , plain, 1, 1, un8 , un8 , , , xxxy, rgb PIPE_FORMAT_L16_UNORM , plain, 1, 1, un16, , , , xxx1, rgb +PIPE_FORMAT_A16_UNORM , plain, 1, 1, un16, , , , 000x, rgb +PIPE_FORMAT_I16_UNORM , plain, 1, 1, un16, , , , xxxx, rgb +PIPE_FORMAT_L16A16_UNORM , plain, 1, 1, un16, un16, , , xxxy, rgb # SRGB formats PIPE_FORMAT_L8_SRGB , plain, 1, 1, un8 , , , , xxx1, srgb @@ -137,6 +141,7 @@ PIPE_FORMAT_R8G8Bx_SNORM , other, 1, 1, sn8 , sn8 , , , x # - http://en.wikipedia.org/wiki/S3_Texture_Compression # - http://www.opengl.org/registry/specs/EXT/texture_compression_s3tc.txt # - http://www.opengl.org/registry/specs/ARB/texture_compression_rgtc.txt +# - http://www.opengl.org/registry/specs/EXT/texture_compression_latc.txt # - http://msdn.microsoft.com/en-us/library/bb694531.aspx PIPE_FORMAT_DXT1_RGB , s3tc, 4, 4, x64 , , , , xyz1, rgb PIPE_FORMAT_DXT1_RGBA , s3tc, 4, 4, x64 , , , , xyzw, rgb @@ -152,6 +157,11 @@ PIPE_FORMAT_RGTC1_SNORM , rgtc, 4, 4, x64, , , , x001, rg PIPE_FORMAT_RGTC2_UNORM , rgtc, 4, 4, x128, , , , xy01, rgb PIPE_FORMAT_RGTC2_SNORM , rgtc, 4, 4, x128, , , , xy01, rgb +PIPE_FORMAT_LATC1_UNORM , rgtc, 4, 4, x64, , , , xxx1, rgb +PIPE_FORMAT_LATC1_SNORM , rgtc, 4, 4, x64, , , , xxx1, rgb +PIPE_FORMAT_LATC2_UNORM , rgtc, 4, 4, x128, , , , xxxy, rgb +PIPE_FORMAT_LATC2_SNORM , rgtc, 4, 4, x128, , , , xxxy, rgb + # Straightforward D3D10-like formats (also used for # vertex buffer element description) # diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 03b73c0e98..7659a802a4 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -673,7 +673,8 @@ util_format_has_alpha(enum pipe_format format) } /** - * Return the matching SRGB format, or PIPE_FORMAT_NONE if none. + * Given a linear RGB colorspace format, return the corresponding SRGB + * format, or PIPE_FORMAT_NONE if none. */ static INLINE enum pipe_format util_format_srgb(enum pipe_format format) @@ -711,6 +712,45 @@ util_format_srgb(enum pipe_format format) } /** + * Given an sRGB format, return the corresponding linear colorspace format. + * For non sRGB formats, return the format unchanged. + */ +static INLINE enum pipe_format +util_format_linear(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_L8_SRGB: + return PIPE_FORMAT_L8_UNORM; + case PIPE_FORMAT_L8A8_SRGB: + return PIPE_FORMAT_L8A8_UNORM; + case PIPE_FORMAT_R8G8B8_SRGB: + return PIPE_FORMAT_R8G8B8_UNORM; + case PIPE_FORMAT_A8B8G8R8_SRGB: + return PIPE_FORMAT_A8B8G8R8_UNORM; + case PIPE_FORMAT_X8B8G8R8_SRGB: + return PIPE_FORMAT_X8B8G8R8_UNORM; + case PIPE_FORMAT_B8G8R8A8_SRGB: + return PIPE_FORMAT_B8G8R8A8_UNORM; + case PIPE_FORMAT_B8G8R8X8_SRGB: + return PIPE_FORMAT_B8G8R8X8_UNORM; + case PIPE_FORMAT_A8R8G8B8_SRGB: + return PIPE_FORMAT_A8R8G8B8_UNORM; + case PIPE_FORMAT_X8R8G8B8_SRGB: + return PIPE_FORMAT_X8R8G8B8_UNORM; + case PIPE_FORMAT_DXT1_SRGB: + return PIPE_FORMAT_DXT1_RGB; + case PIPE_FORMAT_DXT1_SRGBA: + return PIPE_FORMAT_DXT1_RGBA; + case PIPE_FORMAT_DXT3_SRGBA: + return PIPE_FORMAT_DXT3_RGBA; + case PIPE_FORMAT_DXT5_SRGBA: + return PIPE_FORMAT_DXT5_RGBA; + default: + return format; + } +} + +/** * Return the number of components stored. * Formats with block size != 1x1 will always have 1 component (the block). */ diff --git a/src/gallium/auxiliary/util/u_format_latc.c b/src/gallium/auxiliary/util/u_format_latc.c new file mode 100644 index 0000000000..e84c493bb1 --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_latc.c @@ -0,0 +1,328 @@ +/************************************************************************** + * + * Copyright (C) 2011 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdio.h> +#include "u_math.h" +#include "u_format.h" +#include "u_format_rgtc.h" +#include "u_format_latc.h" + +static void u_format_unsigned_encode_rgtc_chan(uint8_t *blkaddr, uint8_t srccolors[4][4], + int numxpixels, int numypixels); + +static void u_format_unsigned_fetch_texel_rgtc(unsigned srcRowStride, const uint8_t *pixdata, + unsigned i, unsigned j, uint8_t *value, unsigned comps); + +static void u_format_signed_encode_rgtc_chan(int8_t *blkaddr, int8_t srccolors[4][4], + int numxpixels, int numypixels); + +static void u_format_signed_fetch_texel_rgtc(unsigned srcRowStride, const int8_t *pixdata, + unsigned i, unsigned j, int8_t *value, unsigned comps); + +void +util_format_latc1_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + /* Fix warnings here: */ + (void) u_format_unsigned_encode_rgtc_chan; + (void) u_format_signed_encode_rgtc_chan; + + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, dst, 1); +} + +void +util_format_latc1_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_rgtc1_unorm_unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_latc1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, + unsigned src_stride, unsigned width, unsigned height) +{ + util_format_rgtc1_unorm_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_latc1_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j; + int block_size = 8; + + for(y = 0; y < height; y += 4) { + const uint8_t *src = src_row; + for(x = 0; x < width; x += 4) { + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + uint8_t tmp_r; + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 1); + dst[0] = + dst[1] = + dst[2] = ubyte_to_float(tmp_r); + dst[3] = 1.0; + } + } + src += block_size; + } + src_row += src_stride; + } +} + +void +util_format_latc1_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_rgtc1_unorm_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_latc1_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp_r; + + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 1); + dst[0] = + dst[1] = + dst[2] = ubyte_to_float(tmp_r); + dst[3] = 1.0; +} + +void +util_format_latc1_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_latc1_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_latc1_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_latc1_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_rgtc1_snorm_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_latc1_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j; + int block_size = 8; + + for(y = 0; y < height; y += 4) { + const int8_t *src = (int8_t *)src_row; + for(x = 0; x < width; x += 4) { + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + int8_t tmp_r; + u_format_signed_fetch_texel_rgtc(0, src, i, j, &tmp_r, 1); + dst[0] = + dst[1] = + dst[2] = byte_to_float_tex(tmp_r); + dst[3] = 1.0; + } + } + src += block_size; + } + src_row += src_stride; + } +} + +void +util_format_latc1_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + int8_t tmp_r; + + u_format_signed_fetch_texel_rgtc(0, (int8_t *)src, i, j, &tmp_r, 1); + dst[0] = + dst[1] = + dst[2] = byte_to_float_tex(tmp_r); + dst[3] = 1.0; +} + + +void +util_format_latc2_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + puts(__func__); + + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, dst, 2); + u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, dst + 1, 2); +} + +void +util_format_latc2_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_rgtc2_unorm_unpack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_latc2_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_rgtc2_unorm_pack_rgba_8unorm(dst_row, dst_stride, src_row, src_stride, width, height); +} + +void +util_format_latc2_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_rxtc2_unorm_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height, 3); +} + +void +util_format_latc2_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j; + int block_size = 16; + + for(y = 0; y < height; y += 4) { + const uint8_t *src = src_row; + for(x = 0; x < width; x += 4) { + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + uint8_t tmp_r, tmp_g; + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 2); + u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, &tmp_g, 2); + dst[0] = + dst[1] = + dst[2] = ubyte_to_float(tmp_r); + dst[3] = ubyte_to_float(tmp_g); + } + } + src += block_size; + } + src_row += src_stride; + } +} + +void +util_format_latc2_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp_r, tmp_g; + + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 2); + u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, &tmp_g, 2); + dst[0] = + dst[1] = + dst[2] = ubyte_to_float(tmp_r); + dst[3] = ubyte_to_float(tmp_g); +} + + +void +util_format_latc2_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_latc2_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_latc2_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_latc2_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j; + int block_size = 16; + + for(y = 0; y < height; y += 4) { + const int8_t *src = (int8_t *)src_row; + for(x = 0; x < width; x += 4) { + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + int8_t tmp_r, tmp_g; + u_format_signed_fetch_texel_rgtc(0, src, i, j, &tmp_r, 2); + u_format_signed_fetch_texel_rgtc(0, src + 8, i, j, &tmp_g, 2); + dst[0] = + dst[1] = + dst[2] = byte_to_float_tex(tmp_r); + dst[3] = byte_to_float_tex(tmp_g); + } + } + src += block_size; + } + src_row += src_stride; + } +} + +void +util_format_latc2_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_rxtc2_snorm_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height, 3); +} + +void +util_format_latc2_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + int8_t tmp_r, tmp_g; + + u_format_signed_fetch_texel_rgtc(0, (int8_t *)src, i, j, &tmp_r, 2); + u_format_signed_fetch_texel_rgtc(0, (int8_t *)src + 8, i, j, &tmp_g, 2); + dst[0] = + dst[1] = + dst[2] = byte_to_float_tex(tmp_r); + dst[3] = byte_to_float_tex(tmp_g); +} + + +#define TAG(x) u_format_unsigned_##x +#define TYPE uint8_t +#define T_MIN 0 +#define T_MAX 255 + +#include "../../../mesa/main/texcompress_rgtc_tmp.h" + +#undef TYPE +#undef TAG +#undef T_MIN +#undef T_MAX + + +#define TAG(x) u_format_signed_##x +#define TYPE int8_t +#define T_MIN (int8_t)-128 +#define T_MAX (int8_t)127 + +#include "../../../mesa/main/texcompress_rgtc_tmp.h" + +#undef TYPE +#undef TAG +#undef T_MIN +#undef T_MAX diff --git a/src/gallium/auxiliary/util/u_format_latc.h b/src/gallium/auxiliary/util/u_format_latc.h new file mode 100644 index 0000000000..1f08887533 --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_latc.h @@ -0,0 +1,108 @@ +/************************************************************************** + * + * Copyright 2011 Red Hat Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + +#ifndef U_FORMAT_LATC_H_ +#define U_FORMAT_LATC_H_ + +void +util_format_latc1_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_latc1_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_latc1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_latc1_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_latc1_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_latc1_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); + + + +void +util_format_latc1_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_latc1_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_latc1_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_latc1_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_latc1_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_latc1_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); + + +void +util_format_latc2_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_latc2_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_latc2_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_latc2_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_latc2_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_latc2_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); + + +void +util_format_latc2_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_latc2_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_latc2_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_latc2_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_latc2_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_latc2_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); + + +#endif diff --git a/src/gallium/auxiliary/util/u_format_pack.py b/src/gallium/auxiliary/util/u_format_pack.py index 6d0016c0ad..cc173f808a 100644 --- a/src/gallium/auxiliary/util/u_format_pack.py +++ b/src/gallium/auxiliary/util/u_format_pack.py @@ -632,7 +632,7 @@ def generate_format_fetch(format, dst_channel, dst_native_type, dst_suffix): def is_format_hand_written(format): - return format.layout in ('s3tc', 'subsampled', 'other') or format.colorspace == ZS + return format.layout in ('s3tc', 'rgtc', 'subsampled', 'other') or format.colorspace == ZS def generate(formats): diff --git a/src/gallium/auxiliary/util/u_format_rgtc.c b/src/gallium/auxiliary/util/u_format_rgtc.c new file mode 100644 index 0000000000..c929fd47e9 --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_rgtc.c @@ -0,0 +1,464 @@ +/************************************************************************** + * + * Copyright (C) 2011 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdio.h> +#include "u_math.h" +#include "u_format.h" +#include "u_format_rgtc.h" + +static void u_format_unsigned_encode_rgtc_chan(uint8_t *blkaddr, uint8_t srccolors[4][4], + int numxpixels, int numypixels); + +static void u_format_unsigned_fetch_texel_rgtc(unsigned srcRowStride, const uint8_t *pixdata, + unsigned i, unsigned j, uint8_t *value, unsigned comps); + +static void u_format_signed_encode_rgtc_chan(int8_t *blkaddr, int8_t srccolors[4][4], + int numxpixels, int numypixels); + +static void u_format_signed_fetch_texel_rgtc(unsigned srcRowStride, const int8_t *pixdata, + unsigned i, unsigned j, int8_t *value, unsigned comps); + +void +util_format_rgtc1_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, dst, 1); +} + +void +util_format_rgtc1_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + const unsigned bw = 4, bh = 4, comps = 4; + unsigned x, y, i, j; + unsigned block_size = 8; + + for(y = 0; y < height; y += bh) { + const uint8_t *src = src_row; + for(x = 0; x < width; x += bw) { + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*comps; + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, dst, 1); + } + } + src += block_size; + } + src_row += src_stride; + } +} + +void +util_format_rgtc1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, + unsigned src_stride, unsigned width, unsigned height) +{ + const unsigned bw = 4, bh = 4, bytes_per_block = 8; + unsigned x, y, i, j; + + for(y = 0; y < height; y += bh) { + uint8_t *dst = dst_row; + for(x = 0; x < width; x += bw) { + uint8_t tmp[4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + tmp[j][i] = src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4]; + } + } + u_format_unsigned_encode_rgtc_chan(dst, tmp, 4, 4); + dst += bytes_per_block; + } + dst_row += dst_stride / sizeof(*dst_row); + } +} + +void +util_format_rgtc1_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j; + int block_size = 8; + for(y = 0; y < height; y += 4) { + const uint8_t *src = src_row; + for(x = 0; x < width; x += 4) { + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + uint8_t tmp_r; + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 1); + dst[0] = ubyte_to_float(tmp_r); + dst[1] = 0.0; + dst[2] = 0.0; + dst[3] = 1.0; + } + } + src += block_size; + } + src_row += src_stride; + } +} + +void +util_format_rgtc1_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + const unsigned bw = 4, bh = 4, bytes_per_block = 8; + unsigned x, y, i, j; + + for(y = 0; y < height; y += bh) { + uint8_t *dst = dst_row; + for(x = 0; x < width; x += bw) { + uint8_t tmp[4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + tmp[j][i] = float_to_ubyte(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4]); + } + } + u_format_unsigned_encode_rgtc_chan(dst, tmp, 4, 4); + dst += bytes_per_block; + } + dst_row += dst_stride / sizeof(*dst_row); + } +} + +void +util_format_rgtc1_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp_r; + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 1); + dst[0] = ubyte_to_float(tmp_r); + dst[1] = 0.0; + dst[2] = 0.0; + dst[3] = 1.0; +} + +void +util_format_rgtc1_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_rgtc1_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_rgtc1_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_rgtc1_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + const unsigned bw = 4, bh = 4, bytes_per_block = 8; + unsigned x, y, i, j; + + for(y = 0; y < height; y += bh) { + int8_t *dst = (int8_t *)dst_row; + for(x = 0; x < width; x += bw) { + int8_t tmp[4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + tmp[j][i] = float_to_byte_tex(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4]); + } + } + u_format_signed_encode_rgtc_chan(dst, tmp, 4, 4); + dst += bytes_per_block; + } + dst_row += dst_stride / sizeof(*dst_row); + } +} + +void +util_format_rgtc1_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j; + int block_size = 8; + for(y = 0; y < height; y += 4) { + const int8_t *src = (int8_t *)src_row; + for(x = 0; x < width; x += 4) { + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + int8_t tmp_r; + u_format_signed_fetch_texel_rgtc(0, src, i, j, &tmp_r, 1); + dst[0] = byte_to_float_tex(tmp_r); + dst[1] = 0.0; + dst[2] = 0.0; + dst[3] = 1.0; + } + } + src += block_size; + } + src_row += src_stride; + } +} + +void +util_format_rgtc1_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + int8_t tmp_r; + u_format_signed_fetch_texel_rgtc(0, (int8_t *)src, i, j, &tmp_r, 1); + dst[0] = byte_to_float_tex(tmp_r); + dst[1] = 0.0; + dst[2] = 0.0; + dst[3] = 1.0; +} + + +void +util_format_rgtc2_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, dst, 2); + u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, dst + 1, 2); +} + +void +util_format_rgtc2_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + const unsigned bw = 4, bh = 4, comps = 4; + unsigned x, y, i, j; + unsigned block_size = 16; + + for(y = 0; y < height; y += bh) { + const uint8_t *src = src_row; + for(x = 0; x < width; x += bw) { + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*comps; + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, dst, 2); + u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, dst + 1, 2); + + } + } + src += block_size; + } + src_row += src_stride; + } +} + +void +util_format_rgtc2_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + const unsigned bw = 4, bh = 4, bytes_per_block = 16; + unsigned x, y, i, j; + + for(y = 0; y < height; y += bh) { + uint8_t *dst = dst_row; + for(x = 0; x < width; x += bw) { + uint8_t tmp_r[4][4]; /* [bh][bw] */ + uint8_t tmp_g[4][4]; /* [bh][bw] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + tmp_r[j][i] = src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4]; + tmp_g[j][i] = src_row[((y + j)*src_stride/sizeof(*src_row) + (x + i)*4) + 1]; + } + } + u_format_unsigned_encode_rgtc_chan(dst, tmp_r, 4, 4); + u_format_unsigned_encode_rgtc_chan(dst + 8, tmp_g, 4, 4); + dst += bytes_per_block; + } + dst_row += dst_stride / sizeof(*dst_row); + } +} + +void +util_format_rxtc2_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height, unsigned chan2off) +{ + const unsigned bw = 4, bh = 4, bytes_per_block = 16; + unsigned x, y, i, j; + + for(y = 0; y < height; y += bh) { + uint8_t *dst = dst_row; + for(x = 0; x < width; x += bw) { + uint8_t tmp_r[4][4]; /* [bh][bw][comps] */ + uint8_t tmp_g[4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + tmp_r[j][i] = float_to_ubyte(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4]); + tmp_g[j][i] = float_to_ubyte(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4 + chan2off]); + } + } + u_format_unsigned_encode_rgtc_chan(dst, tmp_r, 4, 4); + u_format_unsigned_encode_rgtc_chan(dst + 8, tmp_g, 4, 4); + dst += bytes_per_block; + } + dst_row += dst_stride / sizeof(*dst_row); + } +} + +void +util_format_rgtc2_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_rxtc2_unorm_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height, 1); +} + +void +util_format_rgtc2_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j; + int block_size = 16; + for(y = 0; y < height; y += 4) { + const uint8_t *src = src_row; + for(x = 0; x < width; x += 4) { + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + uint8_t tmp_r, tmp_g; + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 2); + u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, &tmp_g, 2); + dst[0] = ubyte_to_float(tmp_r); + dst[1] = ubyte_to_float(tmp_g); + dst[2] = 0.0; + dst[3] = 1.0; + } + } + src += block_size; + } + src_row += src_stride; + } +} + +void +util_format_rgtc2_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + uint8_t tmp_r, tmp_g; + u_format_unsigned_fetch_texel_rgtc(0, src, i, j, &tmp_r, 2); + u_format_unsigned_fetch_texel_rgtc(0, src + 8, i, j, &tmp_g, 2); + dst[0] = ubyte_to_float(tmp_r); + dst[1] = ubyte_to_float(tmp_g); + dst[2] = 0.0; + dst[3] = 1.0; +} + + +void +util_format_rgtc2_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_rgtc2_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_rgtc2_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + fprintf(stderr,"%s\n", __func__); +} + +void +util_format_rgtc2_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + unsigned x, y, i, j; + int block_size = 16; + for(y = 0; y < height; y += 4) { + const int8_t *src = (int8_t *)src_row; + for(x = 0; x < width; x += 4) { + for(j = 0; j < 4; ++j) { + for(i = 0; i < 4; ++i) { + float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; + int8_t tmp_r, tmp_g; + u_format_signed_fetch_texel_rgtc(0, src, i, j, &tmp_r, 2); + u_format_signed_fetch_texel_rgtc(0, src + 8, i, j, &tmp_g, 2); + dst[0] = byte_to_float_tex(tmp_r); + dst[1] = byte_to_float_tex(tmp_g); + dst[2] = 0.0; + dst[3] = 1.0; + } + } + src += block_size; + } + src_row += src_stride; + } +} + +void +util_format_rxtc2_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height, unsigned chan2off) +{ + const unsigned bw = 4, bh = 4, bytes_per_block = 16; + unsigned x, y, i, j; + + for(y = 0; y < height; y += bh) { + int8_t *dst = (int8_t *)dst_row; + for(x = 0; x < width; x += bw) { + int8_t tmp_r[4][4]; /* [bh][bw][comps] */ + int8_t tmp_g[4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + tmp_r[j][i] = float_to_byte_tex(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4]); + tmp_g[j][i] = float_to_byte_tex(src_row[(y + j)*src_stride/sizeof(*src_row) + (x + i)*4 + chan2off]); + } + } + u_format_signed_encode_rgtc_chan(dst, tmp_r, 4, 4); + u_format_signed_encode_rgtc_chan(dst + 8, tmp_g, 4, 4); + dst += bytes_per_block; + } + dst_row += dst_stride / sizeof(*dst_row); + } +} + +void +util_format_rgtc2_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_rxtc2_snorm_pack_rgba_float(dst_row, dst_stride, src_row, src_stride, width, height, 1); +} + +void +util_format_rgtc2_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j) +{ + int8_t tmp_r, tmp_g; + u_format_signed_fetch_texel_rgtc(0, (int8_t *)src, i, j, &tmp_r, 2); + u_format_signed_fetch_texel_rgtc(0, (int8_t *)src + 8, i, j, &tmp_g, 2); + dst[0] = byte_to_float_tex(tmp_r); + dst[1] = byte_to_float_tex(tmp_g); + dst[2] = 0.0; + dst[3] = 1.0; +} + + +#define TAG(x) u_format_unsigned_##x +#define TYPE uint8_t +#define T_MIN 0 +#define T_MAX 255 + +#include "../../../mesa/main/texcompress_rgtc_tmp.h" + +#undef TYPE +#undef TAG +#undef T_MIN +#undef T_MAX + + +#define TAG(x) u_format_signed_##x +#define TYPE int8_t +#define T_MIN (int8_t)-128 +#define T_MAX (int8_t)127 + +#include "../../../mesa/main/texcompress_rgtc_tmp.h" + +#undef TYPE +#undef TAG +#undef T_MIN +#undef T_MAX diff --git a/src/gallium/auxiliary/util/u_format_rgtc.h b/src/gallium/auxiliary/util/u_format_rgtc.h new file mode 100644 index 0000000000..67ac4728e5 --- /dev/null +++ b/src/gallium/auxiliary/util/u_format_rgtc.h @@ -0,0 +1,114 @@ +/************************************************************************** + * + * Copyright 2011 Red Hat Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + +#ifndef U_FORMAT_RGTC_H_ +#define U_FORMAT_RGTC_H_ + +void +util_format_rgtc1_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_rgtc1_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc1_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc1_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc1_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc1_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); + + + +void +util_format_rgtc1_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_rgtc1_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc1_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc1_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc1_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc1_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); + + +void +util_format_rgtc2_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_rgtc2_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc2_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rxtc2_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height, unsigned chan2off); + +void +util_format_rgtc2_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc2_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc2_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); + + +void +util_format_rgtc2_snorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); + +void +util_format_rgtc2_snorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc2_snorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc2_snorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rxtc2_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height, unsigned chan2off); + +void +util_format_rgtc2_snorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_rgtc2_snorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j); + + +#endif diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py index 8cc22a5637..55e0f7827e 100755 --- a/src/gallium/auxiliary/util/u_format_table.py +++ b/src/gallium/auxiliary/util/u_format_table.py @@ -87,6 +87,8 @@ def write_format_table(formats): print print '#include "u_format.h"' print '#include "u_format_s3tc.h"' + print '#include "u_format_rgtc.h"' + print '#include "u_format_latc.h"' print u_format_pack.generate(formats) @@ -132,7 +134,7 @@ def write_format_table(formats): if format.colorspace != ZS: print " &util_format_%s_unpack_rgba_8unorm," % format.short_name() print " &util_format_%s_pack_rgba_8unorm," % format.short_name() - if format.layout == 's3tc': + if format.layout == 's3tc' or format.layout == 'rgtc': print " &util_format_%s_fetch_rgba_8unorm," % format.short_name() else: print " NULL, /* fetch_rgba_8unorm */" diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index d22ae8b375..4a1662462e 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -67,7 +67,7 @@ struct gen_mipmap_state struct pipe_vertex_element velem[2]; void *vs; - void *fs1d, *fs2d, *fs3d, *fsCube; + void *fs[TGSI_TEXTURE_COUNT]; /**< Not all are used, but simplifies code */ struct pipe_resource *vbuf; /**< quad vertices */ unsigned vbuf_slot; @@ -1301,27 +1301,6 @@ util_create_gen_mipmap(struct pipe_context *pipe, ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; } - /* vertex shader - still needed to specify mapping from fragment - * shader input semantics to vertex elements - */ - { - const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC }; - const uint semantic_indexes[] = { 0, 0 }; - ctx->vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names, - semantic_indexes); - } - - /* fragment shader */ - ctx->fs1d = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_1D, - TGSI_INTERPOLATE_LINEAR); - ctx->fs2d = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_2D, - TGSI_INTERPOLATE_LINEAR); - ctx->fs3d = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_3D, - TGSI_INTERPOLATE_LINEAR); - ctx->fsCube = util_make_fragment_tex_shader(pipe, TGSI_TEXTURE_CUBE, - TGSI_INTERPOLATE_LINEAR); - /* vertex data that doesn't change */ for (i = 0; i < 4; i++) { ctx->vertices[i][0][2] = 0.0f; /* z */ @@ -1336,6 +1315,44 @@ util_create_gen_mipmap(struct pipe_context *pipe, /** + * Helper function to set the fragment shaders. + */ +static INLINE void +set_fragment_shader(struct gen_mipmap_state *ctx, uint type) +{ + if (!ctx->fs[type]) + ctx->fs[type] = + util_make_fragment_tex_shader(ctx->pipe, type, + TGSI_INTERPOLATE_LINEAR); + + cso_set_fragment_shader_handle(ctx->cso, ctx->fs[type]); +} + + +/** + * Helper function to set the vertex shader. + */ +static INLINE void +set_vertex_shader(struct gen_mipmap_state *ctx) +{ + /* vertex shader - still required to provide the linkage between + * fragment shader input semantics and vertex_element/buffers. + */ + if (!ctx->vs) + { + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC }; + const uint semantic_indexes[] = { 0, 0 }; + ctx->vs = util_make_vertex_passthrough_shader(ctx->pipe, 2, + semantic_names, + semantic_indexes); + } + + cso_set_vertex_shader_handle(ctx->cso, ctx->vs); +} + + +/** * Get next "slot" of vertex space in the vertex buffer. * We're allocating one large vertex buffer and using it piece by piece. */ @@ -1350,6 +1367,7 @@ get_next_slot(struct gen_mipmap_state *ctx) if (!ctx->vbuf) { ctx->vbuf = pipe_buffer_create(ctx->pipe->screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, max_slots * sizeof ctx->vertices); } @@ -1389,8 +1407,25 @@ set_vertex_data(struct gen_mipmap_state *ctx, util_map_texcoords2d_onto_cubemap(layer, &st[0][0], 2, &ctx->vertices[0][1][0], 8); } - else { - /* 1D/2D/3D */ + else if (tex_target == PIPE_TEXTURE_1D_ARRAY) { + /* 1D texture array */ + ctx->vertices[0][1][0] = 0.0f; /*s*/ + ctx->vertices[0][1][1] = r; /*t*/ + ctx->vertices[0][1][2] = 0.0f; /*r*/ + + ctx->vertices[1][1][0] = 1.0f; + ctx->vertices[1][1][1] = r; + ctx->vertices[1][1][2] = 0.0f; + + ctx->vertices[2][1][0] = 1.0f; + ctx->vertices[2][1][1] = r; + ctx->vertices[2][1][2] = 0.0f; + + ctx->vertices[3][1][0] = 0.0f; + ctx->vertices[3][1][1] = r; + ctx->vertices[3][1][2] = 0.0f; + } else { + /* 1D/2D/3D/2D array */ ctx->vertices[0][1][0] = 0.0f; /*s*/ ctx->vertices[0][1][1] = 0.0f; /*t*/ ctx->vertices[0][1][2] = r; /*r*/ @@ -1425,12 +1460,14 @@ void util_destroy_gen_mipmap(struct gen_mipmap_state *ctx) { struct pipe_context *pipe = ctx->pipe; + unsigned i; + + for (i = 0; i < Elements(ctx->fs); i++) + if (ctx->fs[i]) + pipe->delete_fs_state(pipe, ctx->fs[i]); - pipe->delete_fs_state(pipe, ctx->fsCube); - pipe->delete_fs_state(pipe, ctx->fs3d); - pipe->delete_fs_state(pipe, ctx->fs2d); - pipe->delete_fs_state(pipe, ctx->fs1d); - pipe->delete_vs_state(pipe, ctx->vs); + if (ctx->vs) + pipe->delete_vs_state(pipe, ctx->vs); pipe_resource_reference(&ctx->vbuf, NULL); @@ -1469,9 +1506,9 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, struct pipe_screen *screen = pipe->screen; struct pipe_framebuffer_state fb; struct pipe_resource *pt = psv->texture; - void *fs; uint dstLevel; uint offset; + uint type; /* The texture object should have room for the levels which we're * about to generate. @@ -1486,27 +1523,31 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, switch (pt->target) { case PIPE_TEXTURE_1D: - fs = ctx->fs1d; + type = TGSI_TEXTURE_1D; break; case PIPE_TEXTURE_2D: - fs = ctx->fs2d; + type = TGSI_TEXTURE_2D; break; case PIPE_TEXTURE_3D: - fs = ctx->fs3d; + type = TGSI_TEXTURE_3D; break; case PIPE_TEXTURE_CUBE: - fs = ctx->fsCube; + type = TGSI_TEXTURE_CUBE; break; case PIPE_TEXTURE_1D_ARRAY: + type = TGSI_TEXTURE_1D_ARRAY; + break; case PIPE_TEXTURE_2D_ARRAY: + type = TGSI_TEXTURE_2D_ARRAY; + break; default: assert(0); - fs = ctx->fs2d; + type = TGSI_TEXTURE_2D; } /* check if we can render in the texture's format */ if (!screen->is_format_supported(screen, psv->format, pt->target, - pt->nr_samples, PIPE_BIND_RENDER_TARGET, 0)) { + pt->nr_samples, PIPE_BIND_RENDER_TARGET)) { fallback_gen_mipmap(ctx, pt, face, baseLevel, lastLevel); return; } @@ -1531,8 +1572,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, cso_set_clip(ctx->cso, &ctx->clip); cso_set_vertex_elements(ctx->cso, 2, ctx->velem); - cso_set_fragment_shader_handle(ctx->cso, fs); - cso_set_vertex_shader_handle(ctx->cso, ctx->vs); + set_fragment_shader(ctx, type); + set_vertex_shader(ctx); /* init framebuffer state */ memset(&fb, 0, sizeof(fb)); @@ -1554,6 +1595,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, if (pt->target == PIPE_TEXTURE_3D) nr_layers = u_minify(pt->depth0, dstLevel); + else if (pt->target == PIPE_TEXTURE_2D_ARRAY || pt->target == PIPE_TEXTURE_1D_ARRAY) + nr_layers = pt->array_size; else nr_layers = 1; @@ -1563,11 +1606,12 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, /* in theory with geom shaders and driver with full layer support could do that in one go. */ layer = i; - offset = 1.0f / (float)(nr_layers * 2); /* XXX hmm really? */ rcoord = (float)layer / (float)nr_layers + 1.0f / (float)(nr_layers * 2); - } - else + } else if (pt->target == PIPE_TEXTURE_2D_ARRAY || pt->target == PIPE_TEXTURE_1D_ARRAY) { + layer = i; + rcoord = (float)layer; + } else layer = face; memset(&surf_templ, 0, sizeof(surf_templ)); @@ -1616,15 +1660,14 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, face, rcoord); - util_draw_vertex_buffer(ctx->pipe, + util_draw_vertex_buffer(ctx->pipe, + ctx->cso, ctx->vbuf, offset, PIPE_PRIM_TRIANGLE_FAN, 4, /* verts */ 2); /* attribs/vert */ - pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); - /* need to signal that the texture has changed _after_ rendering to it */ pipe_surface_reference( &surf, NULL ); } diff --git a/src/gallium/auxiliary/util/u_index_modify.c b/src/gallium/auxiliary/util/u_index_modify.c index 65b079ed53..d0a28b5fdf 100644 --- a/src/gallium/auxiliary/util/u_index_modify.c +++ b/src/gallium/auxiliary/util/u_index_modify.c @@ -24,26 +24,77 @@ #include "util/u_index_modify.h" #include "util/u_inlines.h" +/* Ubyte indices. */ + +void util_shorten_ubyte_elts_to_userptr(struct pipe_context *context, + struct pipe_resource *elts, + int index_bias, + unsigned start, + unsigned count, + void *out) +{ + struct pipe_transfer *src_transfer; + unsigned char *in_map; + unsigned short *out_map = out; + unsigned i; + + in_map = pipe_buffer_map(context, elts, + PIPE_TRANSFER_READ | + PIPE_TRANSFER_UNSYNCHRONIZED, + &src_transfer); + in_map += start; + + for (i = 0; i < count; i++) { + *out_map = (unsigned short)(*in_map + index_bias); + in_map++; + out_map++; + } + + pipe_buffer_unmap(context, src_transfer); +} + void util_shorten_ubyte_elts(struct pipe_context *context, struct pipe_resource **elts, int index_bias, unsigned start, unsigned count) { - struct pipe_screen* screen = context->screen; struct pipe_resource* new_elts; - unsigned char *in_map; unsigned short *out_map; - struct pipe_transfer *src_transfer, *dst_transfer; - unsigned i; + struct pipe_transfer *dst_transfer; - new_elts = pipe_buffer_create(screen, + new_elts = pipe_buffer_create(context->screen, PIPE_BIND_INDEX_BUFFER, + PIPE_USAGE_STATIC, 2 * count); - in_map = pipe_buffer_map(context, *elts, PIPE_TRANSFER_READ, &src_transfer); - out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &dst_transfer); + out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, + &dst_transfer); + util_shorten_ubyte_elts_to_userptr(context, *elts, index_bias, + start, count, out_map); + pipe_buffer_unmap(context, dst_transfer); + + *elts = new_elts; +} + + +/* Ushort indices. */ +void util_rebuild_ushort_elts_to_userptr(struct pipe_context *context, + struct pipe_resource *elts, + int index_bias, + unsigned start, unsigned count, + void *out) +{ + struct pipe_transfer *in_transfer = NULL; + unsigned short *in_map; + unsigned short *out_map = out; + unsigned i; + + in_map = pipe_buffer_map(context, elts, + PIPE_TRANSFER_READ | + PIPE_TRANSFER_UNSYNCHRONIZED, + &in_transfer); in_map += start; for (i = 0; i < count; i++) { @@ -52,10 +103,7 @@ void util_shorten_ubyte_elts(struct pipe_context *context, out_map++; } - pipe_buffer_unmap(context, *elts, src_transfer); - pipe_buffer_unmap(context, new_elts, dst_transfer); - - *elts = new_elts; + pipe_buffer_unmap(context, in_transfer); } void util_rebuild_ushort_elts(struct pipe_context *context, @@ -63,33 +111,51 @@ void util_rebuild_ushort_elts(struct pipe_context *context, int index_bias, unsigned start, unsigned count) { - struct pipe_transfer *in_transfer = NULL; struct pipe_transfer *out_transfer = NULL; struct pipe_resource *new_elts; - unsigned short *in_map; unsigned short *out_map; - unsigned i; new_elts = pipe_buffer_create(context->screen, PIPE_BIND_INDEX_BUFFER, + PIPE_USAGE_STATIC, 2 * count); - in_map = pipe_buffer_map(context, *elts, - PIPE_TRANSFER_READ, &in_transfer); out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &out_transfer); + util_rebuild_ushort_elts_to_userptr(context, *elts, index_bias, + start, count, out_map); + pipe_buffer_unmap(context, out_transfer); + + *elts = new_elts; +} + + +/* Uint indices. */ +void util_rebuild_uint_elts_to_userptr(struct pipe_context *context, + struct pipe_resource *elts, + int index_bias, + unsigned start, unsigned count, + void *out) +{ + struct pipe_transfer *in_transfer = NULL; + unsigned int *in_map; + unsigned int *out_map = out; + unsigned i; + + in_map = pipe_buffer_map(context, elts, + PIPE_TRANSFER_READ | + PIPE_TRANSFER_UNSYNCHRONIZED, + &in_transfer); in_map += start; + for (i = 0; i < count; i++) { - *out_map = (unsigned short)(*in_map + index_bias); + *out_map = (unsigned int)(*in_map + index_bias); in_map++; out_map++; } - pipe_buffer_unmap(context, *elts, in_transfer); - pipe_buffer_unmap(context, new_elts, out_transfer); - - *elts = new_elts; + pipe_buffer_unmap(context, in_transfer); } void util_rebuild_uint_elts(struct pipe_context *context, @@ -97,31 +163,20 @@ void util_rebuild_uint_elts(struct pipe_context *context, int index_bias, unsigned start, unsigned count) { - struct pipe_transfer *in_transfer = NULL; struct pipe_transfer *out_transfer = NULL; struct pipe_resource *new_elts; - unsigned int *in_map; unsigned int *out_map; - unsigned i; new_elts = pipe_buffer_create(context->screen, PIPE_BIND_INDEX_BUFFER, + PIPE_USAGE_STATIC, 2 * count); - in_map = pipe_buffer_map(context, *elts, - PIPE_TRANSFER_READ, &in_transfer); out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &out_transfer); - - in_map += start; - for (i = 0; i < count; i++) { - *out_map = (unsigned int)(*in_map + index_bias); - in_map++; - out_map++; - } - - pipe_buffer_unmap(context, *elts, in_transfer); - pipe_buffer_unmap(context, new_elts, out_transfer); + util_rebuild_uint_elts_to_userptr(context, *elts, index_bias, + start, count, out_map); + pipe_buffer_unmap(context, out_transfer); *elts = new_elts; } diff --git a/src/gallium/auxiliary/util/u_index_modify.h b/src/gallium/auxiliary/util/u_index_modify.h index 01a6cae94f..1e9de3dfac 100644 --- a/src/gallium/auxiliary/util/u_index_modify.h +++ b/src/gallium/auxiliary/util/u_index_modify.h @@ -23,19 +23,46 @@ #ifndef UTIL_INDEX_MODIFY_H #define UTIL_INDEX_MODIFY_H +struct pipe_context; +struct pipe_resource; + +void util_shorten_ubyte_elts_to_userptr(struct pipe_context *context, + struct pipe_resource *elts, + int index_bias, + unsigned start, + unsigned count, + void *out); + void util_shorten_ubyte_elts(struct pipe_context *context, struct pipe_resource **elts, int index_bias, unsigned start, unsigned count); + + +void util_rebuild_ushort_elts_to_userptr(struct pipe_context *context, + struct pipe_resource *elts, + int index_bias, + unsigned start, unsigned count, + void *out); + void util_rebuild_ushort_elts(struct pipe_context *context, struct pipe_resource **elts, int index_bias, unsigned start, unsigned count); + + +void util_rebuild_uint_elts_to_userptr(struct pipe_context *context, + struct pipe_resource *elts, + int index_bias, + unsigned start, unsigned count, + void *out); + void util_rebuild_uint_elts(struct pipe_context *context, struct pipe_resource **elts, int index_bias, unsigned start, unsigned count); + #endif diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h index e55aafe90f..ddb81b5b95 100644 --- a/src/gallium/auxiliary/util/u_inlines.h +++ b/src/gallium/auxiliary/util/u_inlines.h @@ -160,6 +160,21 @@ pipe_surface_init(struct pipe_context *ctx, struct pipe_surface* ps, pipe_surface_reset(ctx, ps, pt, level, layer, flags); } +/* Return true if the surfaces are equal. */ +static INLINE boolean +pipe_surface_equal(struct pipe_surface *s1, struct pipe_surface *s2) +{ + return s1->texture == s2->texture && + s1->format == s2->format && + (s1->texture->target != PIPE_BUFFER || + (s1->u.buf.first_element == s2->u.buf.first_element && + s1->u.buf.last_element == s2->u.buf.last_element)) && + (s1->texture->target == PIPE_BUFFER || + (s1->u.tex.level == s2->u.tex.level && + s1->u.tex.first_layer == s2->u.tex.first_layer && + s1->u.tex.last_layer == s2->u.tex.last_layer)); +} + /* * Convenience wrappers for screen buffer functions. */ @@ -167,6 +182,7 @@ pipe_surface_init(struct pipe_context *ctx, struct pipe_surface* ps, static INLINE struct pipe_resource * pipe_buffer_create( struct pipe_screen *screen, unsigned bind, + unsigned usage, unsigned size ) { struct pipe_resource buffer; @@ -174,7 +190,7 @@ pipe_buffer_create( struct pipe_screen *screen, buffer.target = PIPE_BUFFER; buffer.format = PIPE_FORMAT_R8_UNORM; /* want TYPELESS or similar */ buffer.bind = bind; - buffer.usage = PIPE_USAGE_DEFAULT; + buffer.usage = usage; buffer.flags = 0; buffer.width0 = size; buffer.height0 = 1; @@ -220,6 +236,7 @@ pipe_buffer_map_range(struct pipe_context *pipe, map = pipe->transfer_map( pipe, *transfer ); if (map == NULL) { pipe->transfer_destroy( pipe, *transfer ); + *transfer = NULL; return NULL; } @@ -242,7 +259,6 @@ pipe_buffer_map(struct pipe_context *pipe, static INLINE void pipe_buffer_unmap(struct pipe_context *pipe, - struct pipe_resource *buf, struct pipe_transfer *transfer) { if (transfer) { @@ -283,13 +299,20 @@ pipe_buffer_write(struct pipe_context *pipe, const void *data) { struct pipe_box box; + unsigned usage = PIPE_TRANSFER_WRITE; + + if (offset == 0 && size == buf->width0) { + usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; + } else { + usage |= PIPE_TRANSFER_DISCARD_RANGE; + } u_box_1d(offset, size, &box); pipe->transfer_inline_write( pipe, buf, 0, - PIPE_TRANSFER_WRITE, + usage, &box, data, size, @@ -341,7 +364,7 @@ pipe_buffer_read(struct pipe_context *pipe, if (map) memcpy(data, map + offset, size); - pipe_buffer_unmap(pipe, buf, src_transfer); + pipe_buffer_unmap(pipe, src_transfer); } static INLINE struct pipe_transfer * @@ -401,6 +424,34 @@ static INLINE boolean util_get_offset( } } +/** + * This function is used to copy an array of pipe_vertex_buffer structures, + * while properly referencing the pipe_vertex_buffer::buffer member. + * + * \sa util_copy_framebuffer_state + */ +static INLINE void util_copy_vertex_buffers(struct pipe_vertex_buffer *dst, + unsigned *dst_count, + const struct pipe_vertex_buffer *src, + unsigned src_count) +{ + unsigned i; + + /* Reference the buffers of 'src' in 'dst'. */ + for (i = 0; i < src_count; i++) { + pipe_resource_reference(&dst[i].buffer, src[i].buffer); + } + /* Unreference the rest of the buffers in 'dst'. */ + for (; i < *dst_count; i++) { + pipe_resource_reference(&dst[i].buffer, NULL); + } + + /* Update the size of 'dst' and copy over the other members + * of pipe_vertex_buffer. */ + *dst_count = src_count; + memcpy(dst, src, src_count * sizeof(struct pipe_vertex_buffer)); +} + #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index cfcb9cd857..00653562ad 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -176,7 +176,7 @@ static INLINE float logf( float f ) #define isfinite(x) _finite((double)(x)) #define isnan(x) _isnan((double)(x)) -#endif +#endif /* _MSC_VER < 1400 && !defined(__cplusplus) */ static INLINE double log2( double x ) { @@ -184,6 +184,18 @@ static INLINE double log2( double x ) return log( x ) * invln2; } +static INLINE double +round(double x) +{ + return x >= 0.0 ? floor(x + 0.5) : ceil(x - 0.5); +} + +static INLINE float +roundf(float x) +{ + return x >= 0.0f ? floorf(x + 0.5f) : ceilf(x - 0.5f); +} + #endif /* _MSC_VER */ @@ -473,6 +485,17 @@ float_to_ubyte(float f) } } +static INLINE float +byte_to_float_tex(int8_t b) +{ + return (b == -128) ? -1.0F : b * 1.0F / 127.0F; +} + +static INLINE int8_t +float_to_byte_tex(float f) +{ + return (int8_t) (127.0F * f); +} /** * Calc log base 2 diff --git a/src/gallium/auxiliary/util/u_pstipple.c b/src/gallium/auxiliary/util/u_pstipple.c new file mode 100644 index 0000000000..f79a6938d1 --- /dev/null +++ b/src/gallium/auxiliary/util/u_pstipple.c @@ -0,0 +1,434 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Polygon stipple helper module. Drivers/GPUs which don't support polygon + * stipple natively can use this module to simulate it. + * + * Basically, modify fragment shader to sample the 32x32 stipple pattern + * texture and do a fragment kill for the 'off' bits. + * + * This was originally a 'draw' module stage, but since we don't need + * vertex window coords or anything, it can be a stand-alone utility module. + * + * Authors: Brian Paul + */ + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_inlines.h" + +#include "util/u_format.h" +#include "util/u_memory.h" +#include "util/u_pstipple.h" +#include "util/u_sampler.h" + +#include "tgsi/tgsi_transform.h" +#include "tgsi/tgsi_dump.h" + +/** Approx number of new tokens for instructions in pstip_transform_inst() */ +#define NUM_NEW_TOKENS 50 + + +static void +util_pstipple_update_stipple_texture(struct pipe_context *pipe, + struct pipe_resource *tex, + const uint32_t pattern[32]) +{ + static const uint bit31 = 1 << 31; + struct pipe_transfer *transfer; + ubyte *data; + int i, j; + + /* map texture memory */ + transfer = pipe_get_transfer(pipe, tex, 0, 0, + PIPE_TRANSFER_WRITE, 0, 0, 32, 32); + data = pipe->transfer_map(pipe, transfer); + + /* + * Load alpha texture. + * Note: 0 means keep the fragment, 255 means kill it. + * We'll negate the texel value and use KILP which kills if value + * is negative. + */ + for (i = 0; i < 32; i++) { + for (j = 0; j < 32; j++) { + if (pattern[i] & (bit31 >> j)) { + /* fragment "on" */ + data[i * transfer->stride + j] = 0; + } + else { + /* fragment "off" */ + data[i * transfer->stride + j] = 255; + } + } + } + + /* unmap */ + pipe->transfer_unmap(pipe, transfer); + pipe->transfer_destroy(pipe, transfer); +} + + +/** + * Create a 32x32 alpha8 texture that encodes the given stipple pattern. + */ +struct pipe_resource * +util_pstipple_create_stipple_texture(struct pipe_context *pipe, + const uint32_t pattern[32]) +{ + struct pipe_screen *screen = pipe->screen; + struct pipe_resource templat, *tex; + + memset(&templat, 0, sizeof(templat)); + templat.target = PIPE_TEXTURE_2D; + templat.format = PIPE_FORMAT_A8_UNORM; + templat.last_level = 0; + templat.width0 = 32; + templat.height0 = 32; + templat.depth0 = 1; + templat.array_size = 1; + templat.bind = PIPE_BIND_SAMPLER_VIEW; + + tex = screen->resource_create(screen, &templat); + + if (tex) + util_pstipple_update_stipple_texture(pipe, tex, pattern); + + return tex; +} + + +/** + * Create sampler view to sample the stipple texture. + */ +struct pipe_sampler_view * +util_pstipple_create_sampler_view(struct pipe_context *pipe, + struct pipe_resource *tex) +{ + struct pipe_sampler_view templat, *sv; + + u_sampler_view_default_template(&templat, tex, tex->format); + sv = pipe->create_sampler_view(pipe, tex, &templat); + + return sv; +} + + +/** + * Create the sampler CSO that'll be used for stippling. + */ +void * +util_pstipple_create_sampler(struct pipe_context *pipe) +{ + struct pipe_sampler_state templat; + void *s; + + memset(&templat, 0, sizeof(templat)); + templat.wrap_s = PIPE_TEX_WRAP_REPEAT; + templat.wrap_t = PIPE_TEX_WRAP_REPEAT; + templat.wrap_r = PIPE_TEX_WRAP_REPEAT; + templat.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + templat.min_img_filter = PIPE_TEX_FILTER_NEAREST; + templat.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + templat.normalized_coords = 1; + templat.min_lod = 0.0f; + templat.max_lod = 0.0f; + + s = pipe->create_sampler_state(pipe, &templat); + return s; +} + + + +/** + * Subclass of tgsi_transform_context, used for transforming the + * user's fragment shader to add the extra texture sample and fragment kill + * instructions. + */ +struct pstip_transform_context { + struct tgsi_transform_context base; + uint tempsUsed; /**< bitmask */ + int wincoordInput; + int maxInput; + uint samplersUsed; /**< bitfield of samplers used */ + int freeSampler; /** an available sampler for the pstipple */ + int texTemp; /**< temp registers */ + int numImmed; + boolean firstInstruction; +}; + + +/** + * TGSI declaration transform callback. + * Look for a free sampler, a free input attrib, and two free temp regs. + */ +static void +pstip_transform_decl(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl) +{ + struct pstip_transform_context *pctx = + (struct pstip_transform_context *) ctx; + + if (decl->Declaration.File == TGSI_FILE_SAMPLER) { + uint i; + for (i = decl->Range.First; + i <= decl->Range.Last; i++) { + pctx->samplersUsed |= 1 << i; + } + } + else if (decl->Declaration.File == TGSI_FILE_INPUT) { + pctx->maxInput = MAX2(pctx->maxInput, (int) decl->Range.Last); + if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) + pctx->wincoordInput = (int) decl->Range.First; + } + else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { + uint i; + for (i = decl->Range.First; + i <= decl->Range.Last; i++) { + pctx->tempsUsed |= (1 << i); + } + } + + ctx->emit_declaration(ctx, decl); +} + + +static void +pstip_transform_immed(struct tgsi_transform_context *ctx, + struct tgsi_full_immediate *immed) +{ + struct pstip_transform_context *pctx = + (struct pstip_transform_context *) ctx; + pctx->numImmed++; +} + + +/** + * Find the lowest zero bit in the given word, or -1 if bitfield is all ones. + */ +static int +free_bit(uint bitfield) +{ + return ffs(~bitfield) - 1; +} + + +/** + * TGSI instruction transform callback. + * Replace writes to result.color w/ a temp reg. + * Upon END instruction, insert texture sampling code for antialiasing. + */ +static void +pstip_transform_inst(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct pstip_transform_context *pctx = + (struct pstip_transform_context *) ctx; + + if (pctx->firstInstruction) { + /* emit our new declarations before the first instruction */ + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction newInst; + uint i; + int wincoordInput; + + /* find free sampler */ + pctx->freeSampler = free_bit(pctx->samplersUsed); + if (pctx->freeSampler >= PIPE_MAX_SAMPLERS) + pctx->freeSampler = PIPE_MAX_SAMPLERS - 1; + + if (pctx->wincoordInput < 0) + wincoordInput = pctx->maxInput + 1; + else + wincoordInput = pctx->wincoordInput; + + /* find one free temp reg */ + for (i = 0; i < 32; i++) { + if ((pctx->tempsUsed & (1 << i)) == 0) { + /* found a free temp */ + if (pctx->texTemp < 0) + pctx->texTemp = i; + else + break; + } + } + assert(pctx->texTemp >= 0); + + if (pctx->wincoordInput < 0) { + /* declare new position input reg */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Interpolate = TGSI_INTERPOLATE_LINEAR; + decl.Declaration.Semantic = 1; + decl.Semantic.Name = TGSI_SEMANTIC_POSITION; + decl.Semantic.Index = 0; + decl.Range.First = + decl.Range.Last = wincoordInput; + ctx->emit_declaration(ctx, &decl); + } + + /* declare new sampler */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.Range.First = + decl.Range.Last = pctx->freeSampler; + ctx->emit_declaration(ctx, &decl); + + /* declare new temp regs */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.Range.First = + decl.Range.Last = pctx->texTemp; + ctx->emit_declaration(ctx, &decl); + + /* emit immediate = {1/32, 1/32, 1, 1} + * The index/position of this immediate will be pctx->numImmed + */ + { + static const float value[4] = { 1.0/32, 1.0/32, 1.0, 1.0 }; + struct tgsi_full_immediate immed; + uint size = 4; + immed = tgsi_default_full_immediate(); + immed.Immediate.NrTokens = 1 + size; /* one for the token itself */ + immed.u[0].Float = value[0]; + immed.u[1].Float = value[1]; + immed.u[2].Float = value[2]; + immed.u[3].Float = value[3]; + ctx->emit_immediate(ctx, &immed); + } + + pctx->firstInstruction = FALSE; + + + /* + * Insert new MUL/TEX/KILP instructions at start of program + * Take gl_FragCoord, divide by 32 (stipple size), sample the + * texture and kill fragment if needed. + * + * We'd like to use non-normalized texcoords to index into a RECT + * texture, but we can only use REPEAT wrap mode with normalized + * texcoords. Darn. + */ + + /* XXX invert wincoord if origin isn't lower-left... */ + + /* MUL texTemp, INPUT[wincoord], 1/32; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = pctx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.Src[0].Register.File = TGSI_FILE_INPUT; + newInst.Src[0].Register.Index = wincoordInput; + newInst.Src[1].Register.File = TGSI_FILE_IMMEDIATE; + newInst.Src[1].Register.Index = pctx->numImmed; + ctx->emit_instruction(ctx, &newInst); + + /* TEX texTemp, texTemp, sampler; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_TEX; + newInst.Instruction.NumDstRegs = 1; + newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Dst[0].Register.Index = pctx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.Instruction.Texture = TRUE; + newInst.Texture.Texture = TGSI_TEXTURE_2D; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = pctx->texTemp; + newInst.Src[1].Register.File = TGSI_FILE_SAMPLER; + newInst.Src[1].Register.Index = pctx->freeSampler; + ctx->emit_instruction(ctx, &newInst); + + /* KIL -texTemp; # if -texTemp < 0, KILL fragment */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_KIL; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 1; + newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + newInst.Src[0].Register.Index = pctx->texTemp; + newInst.Src[0].Register.Negate = 1; + ctx->emit_instruction(ctx, &newInst); + } + + /* emit this instruction */ + ctx->emit_instruction(ctx, inst); +} + + +/** + * Given a fragment shader, return a new fragment shader which + * samples a stipple texture and executes KILL. + */ +struct pipe_shader_state * +util_pstipple_create_fragment_shader(struct pipe_context *pipe, + struct pipe_shader_state *fs, + unsigned *samplerUnitOut) +{ + struct pipe_shader_state *new_fs; + struct pstip_transform_context transform; + const uint newLen = tgsi_num_tokens(fs->tokens) + NUM_NEW_TOKENS; + + new_fs = MALLOC(sizeof(*new_fs)); + if (!new_fs) + return NULL; + + new_fs->tokens = tgsi_alloc_tokens(newLen); + if (!new_fs->tokens) { + FREE(new_fs); + return NULL; + } + + memset(&transform, 0, sizeof(transform)); + transform.wincoordInput = -1; + transform.maxInput = -1; + transform.texTemp = -1; + transform.firstInstruction = TRUE; + transform.base.transform_instruction = pstip_transform_inst; + transform.base.transform_declaration = pstip_transform_decl; + transform.base.transform_immediate = pstip_transform_immed; + + tgsi_transform_shader(fs->tokens, + (struct tgsi_token *) new_fs->tokens, + newLen, &transform.base); + +#if 0 /* DEBUG */ + tgsi_dump(fs->tokens, 0); + tgsi_dump(pstip_fs.tokens, 0); +#endif + + assert(transform.freeSampler < PIPE_MAX_SAMPLERS); + *samplerUnitOut = transform.freeSampler; + + return new_fs; +} + diff --git a/src/gallium/auxiliary/util/u_pstipple.h b/src/gallium/auxiliary/util/u_pstipple.h new file mode 100644 index 0000000000..1c2f5f48af --- /dev/null +++ b/src/gallium/auxiliary/util/u_pstipple.h @@ -0,0 +1,56 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_PSTIPPLE_H +#define U_PSTIPPLE_H + +#include "pipe/p_compiler.h" + +struct pipe_context; +struct pipe_resource; +struct pipe_shader_state; + + +extern struct pipe_resource * +util_pstipple_create_stipple_texture(struct pipe_context *pipe, + const uint32_t pattern[32]); + +extern struct pipe_sampler_view * +util_pstipple_create_sampler_view(struct pipe_context *pipe, + struct pipe_resource *tex); + +extern void * +util_pstipple_create_sampler(struct pipe_context *pipe); + +extern struct pipe_shader_state * +util_pstipple_create_fragment_shader(struct pipe_context *pipe, + struct pipe_shader_state *fs, + unsigned *samplerUnitOut); + + +#endif diff --git a/src/gallium/auxiliary/util/u_resource.c b/src/gallium/auxiliary/util/u_resource.c index 443c9f8067..50a7cd4d55 100644 --- a/src/gallium/auxiliary/util/u_resource.c +++ b/src/gallium/auxiliary/util/u_resource.c @@ -24,18 +24,10 @@ void u_resource_destroy_vtbl(struct pipe_screen *screen, ur->vtbl->resource_destroy(screen, resource); } -unsigned u_is_resource_referenced_vtbl( struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned level, int layer) -{ - struct u_resource *ur = u_resource(resource); - return ur->vtbl->is_resource_referenced(pipe, resource, level, layer); -} - struct pipe_transfer *u_get_transfer_vtbl(struct pipe_context *context, struct pipe_resource *resource, unsigned level, - enum pipe_transfer_usage usage, + unsigned usage, const struct pipe_box *box) { struct u_resource *ur = u_resource(resource); diff --git a/src/gallium/auxiliary/util/u_simple_screen.h b/src/gallium/auxiliary/util/u_simple_screen.h index 7139aaabc5..7caeb75cd2 100644 --- a/src/gallium/auxiliary/util/u_simple_screen.h +++ b/src/gallium/auxiliary/util/u_simple_screen.h @@ -159,7 +159,8 @@ struct pipe_winsys */ int (*fence_finish)( struct pipe_winsys *ws, struct pipe_fence_handle *fence, - unsigned flag ); + unsigned flags, + uint64_t timeout ); }; diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index 4eddd3f519..9caf76c802 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -84,7 +84,7 @@ util_create_rgba_surface(struct pipe_context *pipe, /* Choose surface format */ for (i = 0; rgbaFormats[i]; i++) { if (screen->is_format_supported(screen, rgbaFormats[i], - target, 0, bind, 0)) { + target, 0, bind)) { format = rgbaFormats[i]; break; } diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 44cadbfcdd..e3c7085ba9 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -367,47 +367,19 @@ pipe_get_tile_rgba(struct pipe_context *pipe, uint x, uint y, uint w, uint h, float *p) { - unsigned dst_stride = w * 4; - void *packed; - enum pipe_format format = pt->resource->format; - - if (u_clip_tile(x, y, &w, &h, &pt->box)) - return; - - packed = MALLOC(util_format_get_nblocks(format, w, h) * util_format_get_blocksize(format)); - - if (!packed) - return; - - if(format == PIPE_FORMAT_UYVY || format == PIPE_FORMAT_YUYV) - assert((x & 1) == 0); - - pipe_get_tile_raw(pipe, pt, x, y, w, h, packed, 0); - - pipe_tile_raw_to_rgba(format, packed, w, h, p, dst_stride); - - FREE(packed); + pipe_get_tile_rgba_format(pipe, pt, x, y, w, h, pt->resource->format, p); } void -pipe_get_tile_swizzle(struct pipe_context *pipe, - struct pipe_transfer *pt, - uint x, - uint y, - uint w, - uint h, - uint swizzle_r, - uint swizzle_g, - uint swizzle_b, - uint swizzle_a, - enum pipe_format format, - float *p) +pipe_get_tile_rgba_format(struct pipe_context *pipe, + struct pipe_transfer *pt, + uint x, uint y, uint w, uint h, + enum pipe_format format, + float *p) { unsigned dst_stride = w * 4; void *packed; - uint iy; - float rgba01[6]; if (u_clip_tile(x, y, &w, &h, &pt->box)) { return; @@ -427,35 +399,6 @@ pipe_get_tile_swizzle(struct pipe_context *pipe, pipe_tile_raw_to_rgba(format, packed, w, h, p, dst_stride); FREE(packed); - - if (swizzle_r == PIPE_SWIZZLE_RED && - swizzle_g == PIPE_SWIZZLE_GREEN && - swizzle_b == PIPE_SWIZZLE_BLUE && - swizzle_a == PIPE_SWIZZLE_ALPHA) { - /* no-op, skip */ - return; - } - - rgba01[PIPE_SWIZZLE_ZERO] = 0.0f; - rgba01[PIPE_SWIZZLE_ONE] = 1.0f; - - for (iy = 0; iy < h; iy++) { - float *row = p; - uint ix; - - for (ix = 0; ix < w; ix++) { - rgba01[PIPE_SWIZZLE_RED] = row[0]; - rgba01[PIPE_SWIZZLE_GREEN] = row[1]; - rgba01[PIPE_SWIZZLE_BLUE] = row[2]; - rgba01[PIPE_SWIZZLE_ALPHA] = row[3]; - - *row++ = rgba01[swizzle_r]; - *row++ = rgba01[swizzle_g]; - *row++ = rgba01[swizzle_b]; - *row++ = rgba01[swizzle_a]; - } - p += dst_stride; - } } @@ -465,9 +408,19 @@ pipe_put_tile_rgba(struct pipe_context *pipe, uint x, uint y, uint w, uint h, const float *p) { + pipe_put_tile_rgba_format(pipe, pt, x, y, w, h, pt->resource->format, p); +} + + +void +pipe_put_tile_rgba_format(struct pipe_context *pipe, + struct pipe_transfer *pt, + uint x, uint y, uint w, uint h, + enum pipe_format format, + const float *p) +{ unsigned src_stride = w * 4; void *packed; - enum pipe_format format = pt->resource->format; if (u_clip_tile(x, y, &w, &h, &pt->box)) return; diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h index 558351d0ce..0ba274308f 100644 --- a/src/gallium/auxiliary/util/u_tile.h +++ b/src/gallium/auxiliary/util/u_tile.h @@ -80,18 +80,11 @@ pipe_get_tile_rgba(struct pipe_context *pipe, float *p); void -pipe_get_tile_swizzle(struct pipe_context *pipe, - struct pipe_transfer *pt, - uint x, - uint y, - uint w, - uint h, - uint swizzle_r, - uint swizzle_g, - uint swizzle_b, - uint swizzle_a, - enum pipe_format format, - float *p); +pipe_get_tile_rgba_format(struct pipe_context *pipe, + struct pipe_transfer *pt, + uint x, uint y, uint w, uint h, + enum pipe_format format, + float *p); void pipe_put_tile_rgba(struct pipe_context *pipe, @@ -99,6 +92,13 @@ pipe_put_tile_rgba(struct pipe_context *pipe, uint x, uint y, uint w, uint h, const float *p); +void +pipe_put_tile_rgba_format(struct pipe_context *pipe, + struct pipe_transfer *pt, + uint x, uint y, uint w, uint h, + enum pipe_format format, + const float *p); + void pipe_get_tile_z(struct pipe_context *pipe, diff --git a/src/gallium/auxiliary/util/u_transfer.c b/src/gallium/auxiliary/util/u_transfer.c index e2828cfd99..7e72a59ee0 100644 --- a/src/gallium/auxiliary/util/u_transfer.c +++ b/src/gallium/auxiliary/util/u_transfer.c @@ -73,13 +73,6 @@ void u_default_transfer_flush_region( struct pipe_context *pipe, */ } -unsigned u_default_is_resource_referenced( struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned level, int layer) -{ - return 0; -} - struct pipe_transfer * u_default_get_transfer(struct pipe_context *context, struct pipe_resource *resource, unsigned level, @@ -112,3 +105,10 @@ void u_default_transfer_destroy(struct pipe_context *pipe, FREE(transfer); } +void u_default_redefine_user_buffer(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned offset, + unsigned size) +{ + resource->width0 = MAX2(resource->width0, offset + size); +} diff --git a/src/gallium/auxiliary/util/u_transfer.h b/src/gallium/auxiliary/util/u_transfer.h index 3412e13c3c..5b5ddeb4ab 100644 --- a/src/gallium/auxiliary/util/u_transfer.h +++ b/src/gallium/auxiliary/util/u_transfer.h @@ -27,10 +27,6 @@ void u_default_transfer_flush_region( struct pipe_context *pipe, struct pipe_transfer *transfer, const struct pipe_box *box); -unsigned u_default_is_resource_referenced( struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned level, int layer); - struct pipe_transfer * u_default_get_transfer(struct pipe_context *context, struct pipe_resource *resource, unsigned level, @@ -57,10 +53,6 @@ struct u_resource_vtbl { void (*resource_destroy)(struct pipe_screen *, struct pipe_resource *pt); - unsigned (*is_resource_referenced)(struct pipe_context *pipe, - struct pipe_resource *texture, - unsigned level, int layer); - struct pipe_transfer *(*get_transfer)(struct pipe_context *, struct pipe_resource *resource, unsigned level, @@ -93,7 +85,7 @@ struct u_resource_vtbl { struct u_resource { struct pipe_resource b; - struct u_resource_vtbl *vtbl; + const struct u_resource_vtbl *vtbl; }; @@ -104,10 +96,6 @@ boolean u_resource_get_handle_vtbl(struct pipe_screen *screen, void u_resource_destroy_vtbl(struct pipe_screen *screen, struct pipe_resource *resource); -unsigned u_is_resource_referenced_vtbl( struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned level, int layer); - struct pipe_transfer *u_get_transfer_vtbl(struct pipe_context *context, struct pipe_resource *resource, unsigned level, @@ -136,11 +124,9 @@ void u_transfer_inline_write_vtbl( struct pipe_context *rm_ctx, unsigned stride, unsigned layer_stride); - - - - - - +void u_default_redefine_user_buffer(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned offset, + unsigned size); #endif diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c index af229e61a0..9562acb821 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.c +++ b/src/gallium/auxiliary/util/u_upload_mgr.c @@ -41,22 +41,23 @@ struct u_upload_mgr { struct pipe_context *pipe; - unsigned default_size; - unsigned alignment; - unsigned usage; - - /* The active buffer: - */ - struct pipe_resource *buffer; - unsigned size; - unsigned offset; + unsigned default_size; /* Minimum size of the upload buffer, in bytes. */ + unsigned alignment; /* Alignment of each sub-allocation. */ + unsigned bind; /* Bitmask of PIPE_BIND_* flags. */ + + struct pipe_resource *buffer; /* Upload buffer. */ + struct pipe_transfer *transfer; /* Transfer object for the upload buffer. */ + uint8_t *map; /* Pointer to the mapped upload buffer. */ + unsigned size; /* Actual size of the upload buffer. */ + unsigned offset; /* Aligned offset to the upload buffer, pointing + * at the first unused byte. */ }; struct u_upload_mgr *u_upload_create( struct pipe_context *pipe, unsigned default_size, unsigned alignment, - unsigned usage ) + unsigned bind ) { struct u_upload_mgr *upload = CALLOC_STRUCT( u_upload_mgr ); if (!upload) @@ -65,54 +66,12 @@ struct u_upload_mgr *u_upload_create( struct pipe_context *pipe, upload->pipe = pipe; upload->default_size = default_size; upload->alignment = alignment; - upload->usage = usage; + upload->bind = bind; upload->buffer = NULL; return upload; } -/* Slightly specialized version of buffer_write designed to maximize - * chances of the driver consolidating successive writes into a single - * upload. - * - * dirty_size may be slightly greater than size to cope with - * alignment. We don't want to leave holes between succesively mapped - * regions as that may prevent the driver from consolidating uploads. - * - * Note that the 'data' pointer has probably come from the application - * and we cannot read even a byte past its end without risking - * segfaults, or at least complaints from valgrind.. - */ -static INLINE enum pipe_error -my_buffer_write(struct pipe_context *pipe, - struct pipe_resource *buf, - unsigned offset, unsigned size, unsigned dirty_size, - const void *data) -{ - struct pipe_transfer *transfer = NULL; - uint8_t *map; - - assert(offset < buf->width0); - assert(offset + size <= buf->width0); - assert(dirty_size >= size); - assert(size); - - map = pipe_buffer_map_range(pipe, buf, offset, dirty_size, - PIPE_TRANSFER_WRITE | - PIPE_TRANSFER_FLUSH_EXPLICIT | - PIPE_TRANSFER_DISCARD | - PIPE_TRANSFER_UNSYNCHRONIZED, - &transfer); - if (map == NULL) - return PIPE_ERROR_OUT_OF_MEMORY; - - memcpy(map + offset, data, size); - pipe_buffer_flush_mapped_range(pipe, transfer, offset, dirty_size); - pipe_buffer_unmap(pipe, buf, transfer); - - return PIPE_OK; -} - /* Release old buffer. * * This must usually be called prior to firing the command stream @@ -124,6 +83,16 @@ my_buffer_write(struct pipe_context *pipe, */ void u_upload_flush( struct u_upload_mgr *upload ) { + /* Unmap and unreference the upload buffer. */ + if (upload->transfer) { + if (upload->offset) { + pipe_buffer_flush_mapped_range(upload->pipe, upload->transfer, + 0, upload->offset); + } + pipe_transfer_unmap(upload->pipe, upload->transfer); + pipe_transfer_destroy(upload->pipe, upload->transfer); + upload->transfer = NULL; + } pipe_resource_reference( &upload->buffer, NULL ); upload->size = 0; } @@ -142,7 +111,7 @@ u_upload_alloc_buffer( struct u_upload_mgr *upload, { unsigned size; - /* Release old buffer, if present: + /* Release the old buffer, if present: */ u_upload_flush( upload ); @@ -151,10 +120,18 @@ u_upload_alloc_buffer( struct u_upload_mgr *upload, size = align(MAX2(upload->default_size, min_size), 4096); upload->buffer = pipe_buffer_create( upload->pipe->screen, - upload->usage, + upload->bind, + PIPE_USAGE_STREAM, size ); if (upload->buffer == NULL) goto fail; + + /* Map the new buffer. */ + upload->map = pipe_buffer_map_range(upload->pipe, upload->buffer, + 0, size, + PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_FLUSH_EXPLICIT, + &upload->transfer); upload->size = size; @@ -168,38 +145,62 @@ fail: return PIPE_ERROR_OUT_OF_MEMORY; } - -enum pipe_error u_upload_data( struct u_upload_mgr *upload, - unsigned size, - const void *data, - unsigned *out_offset, - struct pipe_resource **outbuf ) +enum pipe_error u_upload_alloc( struct u_upload_mgr *upload, + unsigned min_out_offset, + unsigned size, + unsigned *out_offset, + struct pipe_resource **outbuf, + boolean *flushed, + void **ptr ) { unsigned alloc_size = align( size, upload->alignment ); - enum pipe_error ret = PIPE_OK; + unsigned alloc_offset = align(min_out_offset, upload->alignment); + unsigned offset; - if (upload->offset + alloc_size > upload->size) { - ret = u_upload_alloc_buffer( upload, alloc_size ); + /* Make sure we have enough space in the upload buffer + * for the sub-allocation. */ + if (MAX2(upload->offset, alloc_offset) + alloc_size > upload->size) { + enum pipe_error ret = u_upload_alloc_buffer(upload, + alloc_offset + alloc_size); if (ret) return ret; + + *flushed = TRUE; + } else { + *flushed = FALSE; } - /* Copy the data, using map_range if available: - */ - ret = my_buffer_write( upload->pipe, - upload->buffer, - upload->offset, - size, - alloc_size, - data ); + offset = MAX2(upload->offset, alloc_offset); + + assert(offset < upload->buffer->width0); + assert(offset + size <= upload->buffer->width0); + assert(size); + + /* Emit the return values: */ + *ptr = upload->map + offset; + pipe_resource_reference( outbuf, upload->buffer ); + *out_offset = offset; + + upload->offset = offset + alloc_size; + return PIPE_OK; +} + +enum pipe_error u_upload_data( struct u_upload_mgr *upload, + unsigned min_out_offset, + unsigned size, + const void *data, + unsigned *out_offset, + struct pipe_resource **outbuf, + boolean *flushed ) +{ + uint8_t *ptr; + enum pipe_error ret = u_upload_alloc(upload, min_out_offset, size, + out_offset, outbuf, flushed, + (void**)&ptr); if (ret) return ret; - /* Emit the return values: - */ - pipe_resource_reference( outbuf, upload->buffer ); - *out_offset = upload->offset; - upload->offset += alloc_size; + memcpy(ptr, data, size); return PIPE_OK; } @@ -210,11 +211,13 @@ enum pipe_error u_upload_data( struct u_upload_mgr *upload, * renders or DrawElements calls. */ enum pipe_error u_upload_buffer( struct u_upload_mgr *upload, + unsigned min_out_offset, unsigned offset, unsigned size, struct pipe_resource *inbuf, unsigned *out_offset, - struct pipe_resource **outbuf ) + struct pipe_resource **outbuf, + boolean *flushed ) { enum pipe_error ret = PIPE_OK; struct pipe_transfer *transfer = NULL; @@ -233,17 +236,16 @@ enum pipe_error u_upload_buffer( struct u_upload_mgr *upload, if (0) debug_printf("upload ptr %p ofs %d sz %d\n", map, offset, size); - ret = u_upload_data( upload, + ret = u_upload_data( upload, + min_out_offset, size, map + offset, out_offset, - outbuf ); - if (ret) - goto done; + outbuf, flushed ); done: if (map) - pipe_buffer_unmap( upload->pipe, inbuf, transfer ); + pipe_buffer_unmap( upload->pipe, transfer ); return ret; } diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h index de016df02e..c9a2ffeb57 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.h +++ b/src/gallium/auxiliary/util/u_upload_mgr.h @@ -32,15 +32,28 @@ #ifndef U_UPLOAD_MGR_H #define U_UPLOAD_MGR_H +#include "pipe/p_compiler.h" + struct pipe_context; struct pipe_resource; +/** + * Create the upload manager. + * + * \param pipe Pipe driver. + * \param default_size Minimum size of the upload buffer, in bytes. + * \param alignment Alignment of each suballocation in the upload buffer. + * \param bind Bitmask of PIPE_BIND_* flags. + */ struct u_upload_mgr *u_upload_create( struct pipe_context *pipe, unsigned default_size, unsigned alignment, - unsigned usage ); + unsigned bind ); +/** + * Destroy the upload manager. + */ void u_upload_destroy( struct u_upload_mgr *upload ); /* Unmap and release old buffer. @@ -53,20 +66,55 @@ void u_upload_destroy( struct u_upload_mgr *upload ); */ void u_upload_flush( struct u_upload_mgr *upload ); +/** + * Sub-allocate new memory from the upload buffer. + * + * \param upload Upload manager + * \param min_out_offset Minimum offset that should be returned in out_offset. + * \param size Size of the allocation. + * \param out_offset Pointer to where the new buffer offset will be returned. + * \param outbuf Pointer to where the upload buffer will be returned. + * \param flushed Whether the upload buffer was flushed. + * \param ptr Pointer to the allocated memory that is returned. + */ +enum pipe_error u_upload_alloc( struct u_upload_mgr *upload, + unsigned min_out_offset, + unsigned size, + unsigned *out_offset, + struct pipe_resource **outbuf, + boolean *flushed, + void **ptr ); + +/** + * Allocate and write data to the upload buffer. + * + * Same as u_upload_alloc, but in addition to that, it copies "data" + * to the pointer returned from u_upload_alloc. + */ enum pipe_error u_upload_data( struct u_upload_mgr *upload, + unsigned min_out_offset, unsigned size, const void *data, unsigned *out_offset, - struct pipe_resource **outbuf ); + struct pipe_resource **outbuf, + boolean *flushed ); +/** + * Allocate and copy an input buffer to the upload buffer. + * + * Same as u_upload_data, except that the input data comes from a buffer + * instead of a user pointer. + */ enum pipe_error u_upload_buffer( struct u_upload_mgr *upload, + unsigned min_out_offset, unsigned offset, unsigned size, struct pipe_resource *inbuf, unsigned *out_offset, - struct pipe_resource **outbuf ); + struct pipe_resource **outbuf, + boolean *flushed ); diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.c b/src/gallium/auxiliary/util/u_vbuf_mgr.c new file mode 100644 index 0000000000..521ac07747 --- /dev/null +++ b/src/gallium/auxiliary/util/u_vbuf_mgr.c @@ -0,0 +1,609 @@ +/************************************************************************** + * + * Copyright 2011 Marek Olšák <maraeo@gmail.com> + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_vbuf_mgr.h" + +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_upload_mgr.h" +#include "translate/translate.h" +#include "translate/translate_cache.h" + +/* Hardware vertex fetcher limitations can be described by this structure. */ +struct u_vbuf_caps { + /* Vertex format CAPs. */ + /* TRUE if hardware supports it. */ + unsigned format_fixed32:1; /* PIPE_FORMAT_*32*_FIXED */ + unsigned format_float16:1; /* PIPE_FORMAT_*16*_FLOAT */ + unsigned format_float64:1; /* PIPE_FORMAT_*64*_FLOAT */ + unsigned format_norm32:1; /* PIPE_FORMAT_*32*NORM */ + unsigned format_scaled32:1; /* PIPE_FORMAT_*32*SCALED */ + + /* Whether vertex fetches don't have to be dword-aligned. */ + /* TRUE if hardware supports it. */ + unsigned fetch_dword_unaligned:1; +}; + +struct u_vbuf_mgr_elements { + unsigned count; + struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS]; + + /* If (velem[i].src_format != real_format[i]), the vertex buffer + * referenced by the vertex element cannot be used for rendering and + * its vertex data must be translated to real_format[i]. */ + enum pipe_format native_format[PIPE_MAX_ATTRIBS]; + unsigned native_format_size[PIPE_MAX_ATTRIBS]; + + /* This might mean two things: + * - src_format != native_format, as discussed above. + * - src_offset % 4 != 0 (if the caps don't allow such an offset). */ + boolean incompatible_layout; +}; + +struct u_vbuf_mgr_priv { + struct u_vbuf_mgr b; + struct u_vbuf_caps caps; + struct pipe_context *pipe; + + struct translate_cache *translate_cache; + unsigned translate_vb_slot; + + struct u_vbuf_mgr_elements *ve; + void *saved_ve, *fallback_ve; + boolean ve_binding_lock; + + boolean any_user_vbs; + boolean incompatible_vb_layout; +}; + +static void u_vbuf_mgr_init_format_caps(struct u_vbuf_mgr_priv *mgr) +{ + struct pipe_screen *screen = mgr->pipe->screen; + + mgr->caps.format_fixed32 = + screen->is_format_supported(screen, PIPE_FORMAT_R32_FIXED, PIPE_BUFFER, + 0, PIPE_BIND_VERTEX_BUFFER); + + mgr->caps.format_float16 = + screen->is_format_supported(screen, PIPE_FORMAT_R16_FLOAT, PIPE_BUFFER, + 0, PIPE_BIND_VERTEX_BUFFER); + + mgr->caps.format_float64 = + screen->is_format_supported(screen, PIPE_FORMAT_R64_FLOAT, PIPE_BUFFER, + 0, PIPE_BIND_VERTEX_BUFFER); + + mgr->caps.format_norm32 = + screen->is_format_supported(screen, PIPE_FORMAT_R32_UNORM, PIPE_BUFFER, + 0, PIPE_BIND_VERTEX_BUFFER) && + screen->is_format_supported(screen, PIPE_FORMAT_R32_SNORM, PIPE_BUFFER, + 0, PIPE_BIND_VERTEX_BUFFER); + + mgr->caps.format_scaled32 = + screen->is_format_supported(screen, PIPE_FORMAT_R32_USCALED, PIPE_BUFFER, + 0, PIPE_BIND_VERTEX_BUFFER) && + screen->is_format_supported(screen, PIPE_FORMAT_R32_SSCALED, PIPE_BUFFER, + 0, PIPE_BIND_VERTEX_BUFFER); +} + +struct u_vbuf_mgr * +u_vbuf_mgr_create(struct pipe_context *pipe, + unsigned upload_buffer_size, + unsigned upload_buffer_alignment, + unsigned upload_buffer_bind, + enum u_fetch_alignment fetch_alignment) +{ + struct u_vbuf_mgr_priv *mgr = CALLOC_STRUCT(u_vbuf_mgr_priv); + + mgr->pipe = pipe; + mgr->translate_cache = translate_cache_create(); + + mgr->b.uploader = u_upload_create(pipe, upload_buffer_size, + upload_buffer_alignment, + upload_buffer_bind); + + mgr->caps.fetch_dword_unaligned = + fetch_alignment == U_VERTEX_FETCH_BYTE_ALIGNED; + + u_vbuf_mgr_init_format_caps(mgr); + + return &mgr->b; +} + +void u_vbuf_mgr_destroy(struct u_vbuf_mgr *mgrb) +{ + struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb; + unsigned i; + + for (i = 0; i < mgr->b.nr_real_vertex_buffers; i++) { + pipe_resource_reference(&mgr->b.vertex_buffer[i].buffer, NULL); + pipe_resource_reference(&mgr->b.real_vertex_buffer[i], NULL); + } + + translate_cache_destroy(mgr->translate_cache); + u_upload_destroy(mgr->b.uploader); + FREE(mgr); +} + + +static void u_vbuf_translate_begin(struct u_vbuf_mgr_priv *mgr, + int min_index, int max_index, + boolean *upload_flushed) +{ + struct translate_key key; + struct translate_element *te; + unsigned tr_elem_index[PIPE_MAX_ATTRIBS]; + struct translate *tr; + boolean vb_translated[PIPE_MAX_ATTRIBS] = {0}; + uint8_t *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; + struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}; + struct pipe_resource *out_buffer = NULL; + unsigned i, num_verts, out_offset; + struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; + + memset(&key, 0, sizeof(key)); + memset(tr_elem_index, 0xff, sizeof(tr_elem_index)); + + /* Initialize the translate key, i.e. the recipe how vertices should be + * translated. */ + memset(&key, 0, sizeof key); + for (i = 0; i < mgr->ve->count; i++) { + struct pipe_vertex_buffer *vb = + &mgr->b.vertex_buffer[mgr->ve->ve[i].vertex_buffer_index]; + enum pipe_format output_format = mgr->ve->native_format[i]; + unsigned output_format_size = mgr->ve->native_format_size[i]; + + /* Check for support. */ + if (mgr->ve->ve[i].src_format == mgr->ve->native_format[i] && + (mgr->caps.fetch_dword_unaligned || + (vb->buffer_offset % 4 == 0 && + vb->stride % 4 == 0 && + mgr->ve->ve[i].src_offset % 4 == 0))) { + continue; + } + + /* Workaround for translate: output floats instead of halfs. */ + switch (output_format) { + case PIPE_FORMAT_R16_FLOAT: + output_format = PIPE_FORMAT_R32_FLOAT; + output_format_size = 4; + break; + case PIPE_FORMAT_R16G16_FLOAT: + output_format = PIPE_FORMAT_R32G32_FLOAT; + output_format_size = 8; + break; + case PIPE_FORMAT_R16G16B16_FLOAT: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + output_format_size = 12; + break; + case PIPE_FORMAT_R16G16B16A16_FLOAT: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + output_format_size = 16; + break; + default:; + } + + /* Add this vertex element. */ + te = &key.element[key.nr_elements]; + /*te->type; + te->instance_divisor;*/ + te->input_buffer = mgr->ve->ve[i].vertex_buffer_index; + te->input_format = mgr->ve->ve[i].src_format; + te->input_offset = mgr->ve->ve[i].src_offset; + te->output_format = output_format; + te->output_offset = key.output_stride; + + key.output_stride += output_format_size; + vb_translated[mgr->ve->ve[i].vertex_buffer_index] = TRUE; + tr_elem_index[i] = key.nr_elements; + key.nr_elements++; + } + + /* Get a translate object. */ + tr = translate_cache_find(mgr->translate_cache, &key); + + /* Map buffers we want to translate. */ + for (i = 0; i < mgr->b.nr_vertex_buffers; i++) { + if (vb_translated[i]) { + struct pipe_vertex_buffer *vb = &mgr->b.vertex_buffer[i]; + + vb_map[i] = pipe_buffer_map(mgr->pipe, vb->buffer, + PIPE_TRANSFER_READ, &vb_transfer[i]); + + tr->set_buffer(tr, i, + vb_map[i] + vb->buffer_offset + vb->stride * min_index, + vb->stride, ~0); + } + } + + /* Create and map the output buffer. */ + num_verts = max_index + 1 - min_index; + + u_upload_alloc(mgr->b.uploader, + key.output_stride * min_index, + key.output_stride * num_verts, + &out_offset, &out_buffer, upload_flushed, + (void**)&out_map); + + out_offset -= key.output_stride * min_index; + + /* Translate. */ + tr->run(tr, 0, num_verts, 0, out_map); + + /* Unmap all buffers. */ + for (i = 0; i < mgr->b.nr_vertex_buffers; i++) { + if (vb_translated[i]) { + pipe_buffer_unmap(mgr->pipe, vb_transfer[i]); + } + } + + /* Setup the new vertex buffer in the first free slot. */ + mgr->translate_vb_slot = ~0; + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (!mgr->b.vertex_buffer[i].buffer) { + mgr->translate_vb_slot = i; + + if (i >= mgr->b.nr_vertex_buffers) { + mgr->b.nr_real_vertex_buffers = i+1; + } + break; + } + } + + if (mgr->translate_vb_slot != ~0) { + /* Setup the new vertex buffer. */ + pipe_resource_reference( + &mgr->b.real_vertex_buffer[mgr->translate_vb_slot], out_buffer); + mgr->b.vertex_buffer[mgr->translate_vb_slot].buffer_offset = out_offset; + mgr->b.vertex_buffer[mgr->translate_vb_slot].stride = key.output_stride; + + /* Setup new vertex elements. */ + for (i = 0; i < mgr->ve->count; i++) { + if (tr_elem_index[i] < key.nr_elements) { + te = &key.element[tr_elem_index[i]]; + new_velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor; + new_velems[i].src_format = te->output_format; + new_velems[i].src_offset = te->output_offset; + new_velems[i].vertex_buffer_index = mgr->translate_vb_slot; + } else { + memcpy(&new_velems[i], &mgr->ve->ve[i], + sizeof(struct pipe_vertex_element)); + } + } + + mgr->fallback_ve = + mgr->pipe->create_vertex_elements_state(mgr->pipe, mgr->ve->count, + new_velems); + + /* Preserve saved_ve. */ + mgr->ve_binding_lock = TRUE; + mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->fallback_ve); + mgr->ve_binding_lock = FALSE; + } + + pipe_resource_reference(&out_buffer, NULL); +} + +static void u_vbuf_translate_end(struct u_vbuf_mgr_priv *mgr) +{ + if (mgr->fallback_ve == NULL) { + return; + } + + /* Restore vertex elements. */ + /* Note that saved_ve will be overwritten in bind_vertex_elements_state. */ + mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->saved_ve); + mgr->pipe->delete_vertex_elements_state(mgr->pipe, mgr->fallback_ve); + mgr->fallback_ve = NULL; + + /* Delete the now-unused VBO. */ + pipe_resource_reference(&mgr->b.real_vertex_buffer[mgr->translate_vb_slot], + NULL); + mgr->b.nr_real_vertex_buffers = mgr->b.nr_vertex_buffers; +} + +#define FORMAT_REPLACE(what, withwhat) \ + case PIPE_FORMAT_##what: format = PIPE_FORMAT_##withwhat; break + +struct u_vbuf_mgr_elements * +u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb, + unsigned count, + const struct pipe_vertex_element *attribs, + struct pipe_vertex_element *native_attribs) +{ + struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb; + unsigned i; + struct u_vbuf_mgr_elements *ve = CALLOC_STRUCT(u_vbuf_mgr_elements); + + ve->count = count; + + if (!count) { + return ve; + } + + memcpy(ve->ve, attribs, sizeof(struct pipe_vertex_element) * count); + memcpy(native_attribs, attribs, sizeof(struct pipe_vertex_element) * count); + + /* Set the best native format in case the original format is not + * supported. */ + for (i = 0; i < count; i++) { + enum pipe_format format = ve->ve[i].src_format; + + /* Choose a native format. + * For now we don't care about the alignment, that's going to + * be sorted out later. */ + if (!mgr->caps.format_fixed32) { + switch (format) { + FORMAT_REPLACE(R32_FIXED, R32_FLOAT); + FORMAT_REPLACE(R32G32_FIXED, R32G32_FLOAT); + FORMAT_REPLACE(R32G32B32_FIXED, R32G32B32_FLOAT); + FORMAT_REPLACE(R32G32B32A32_FIXED, R32G32B32A32_FLOAT); + default:; + } + } + if (!mgr->caps.format_float16) { + switch (format) { + FORMAT_REPLACE(R16_FLOAT, R32_FLOAT); + FORMAT_REPLACE(R16G16_FLOAT, R32G32_FLOAT); + FORMAT_REPLACE(R16G16B16_FLOAT, R32G32B32_FLOAT); + FORMAT_REPLACE(R16G16B16A16_FLOAT, R32G32B32A32_FLOAT); + default:; + } + } + if (!mgr->caps.format_float64) { + switch (format) { + FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); + FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); + FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); + FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); + default:; + } + } + if (!mgr->caps.format_norm32) { + switch (format) { + FORMAT_REPLACE(R32_UNORM, R32_FLOAT); + FORMAT_REPLACE(R32G32_UNORM, R32G32_FLOAT); + FORMAT_REPLACE(R32G32B32_UNORM, R32G32B32_FLOAT); + FORMAT_REPLACE(R32G32B32A32_UNORM, R32G32B32A32_FLOAT); + FORMAT_REPLACE(R32_SNORM, R32_FLOAT); + FORMAT_REPLACE(R32G32_SNORM, R32G32_FLOAT); + FORMAT_REPLACE(R32G32B32_SNORM, R32G32B32_FLOAT); + FORMAT_REPLACE(R32G32B32A32_SNORM, R32G32B32A32_FLOAT); + default:; + } + } + if (!mgr->caps.format_scaled32) { + switch (format) { + FORMAT_REPLACE(R32_USCALED, R32_FLOAT); + FORMAT_REPLACE(R32G32_USCALED, R32G32_FLOAT); + FORMAT_REPLACE(R32G32B32_USCALED, R32G32B32_FLOAT); + FORMAT_REPLACE(R32G32B32A32_USCALED,R32G32B32A32_FLOAT); + FORMAT_REPLACE(R32_SSCALED, R32_FLOAT); + FORMAT_REPLACE(R32G32_SSCALED, R32G32_FLOAT); + FORMAT_REPLACE(R32G32B32_SSCALED, R32G32B32_FLOAT); + FORMAT_REPLACE(R32G32B32A32_SSCALED,R32G32B32A32_FLOAT); + default:; + } + } + + native_attribs[i].src_format = format; + ve->native_format[i] = format; + ve->native_format_size[i] = + util_format_get_blocksize(ve->native_format[i]); + + ve->incompatible_layout = + ve->incompatible_layout || + ve->ve[i].src_format != ve->native_format[i] || + (!mgr->caps.fetch_dword_unaligned && ve->ve[i].src_offset % 4 != 0); + } + + /* Align the formats to the size of DWORD if needed. */ + if (!mgr->caps.fetch_dword_unaligned) { + for (i = 0; i < count; i++) { + ve->native_format_size[i] = align(ve->native_format_size[i], 4); + } + } + + return ve; +} + +void u_vbuf_mgr_bind_vertex_elements(struct u_vbuf_mgr *mgrb, + void *cso, + struct u_vbuf_mgr_elements *ve) +{ + struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb; + + if (!cso) { + return; + } + + if (!mgr->ve_binding_lock) { + mgr->saved_ve = cso; + mgr->ve = ve; + } +} + +void u_vbuf_mgr_destroy_vertex_elements(struct u_vbuf_mgr *mgr, + struct u_vbuf_mgr_elements *ve) +{ + FREE(ve); +} + +void u_vbuf_mgr_set_vertex_buffers(struct u_vbuf_mgr *mgrb, + unsigned count, + const struct pipe_vertex_buffer *bufs) +{ + struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb; + unsigned i; + + mgr->b.max_index = ~0; + mgr->any_user_vbs = FALSE; + mgr->incompatible_vb_layout = FALSE; + + if (!mgr->caps.fetch_dword_unaligned) { + /* Check if the strides and offsets are aligned to the size of DWORD. */ + for (i = 0; i < count; i++) { + if (bufs[i].buffer) { + if (bufs[i].stride % 4 != 0 || + bufs[i].buffer_offset % 4 != 0) { + mgr->incompatible_vb_layout = TRUE; + break; + } + } + } + } + + for (i = 0; i < count; i++) { + const struct pipe_vertex_buffer *vb = &bufs[i]; + + pipe_resource_reference(&mgr->b.vertex_buffer[i].buffer, vb->buffer); + pipe_resource_reference(&mgr->b.real_vertex_buffer[i], NULL); + + if (u_vbuf_resource(vb->buffer)->user_ptr) { + mgr->any_user_vbs = TRUE; + continue; + } + + pipe_resource_reference(&mgr->b.real_vertex_buffer[i], vb->buffer); + + /* The stride of zero means we will be fetching only the first + * vertex, so don't care about max_index. */ + if (!vb->stride) { + continue; + } + + /* Update the maximum index. */ + mgr->b.max_index = + MIN2(mgr->b.max_index, + (vb->buffer->width0 - vb->buffer_offset) / vb->stride); + } + + for (; i < mgr->b.nr_real_vertex_buffers; i++) { + pipe_resource_reference(&mgr->b.vertex_buffer[i].buffer, NULL); + pipe_resource_reference(&mgr->b.real_vertex_buffer[i], NULL); + } + + memcpy(mgr->b.vertex_buffer, bufs, + sizeof(struct pipe_vertex_buffer) * count); + + mgr->b.nr_vertex_buffers = count; + mgr->b.nr_real_vertex_buffers = count; +} + +static void u_vbuf_upload_buffers(struct u_vbuf_mgr_priv *mgr, + int min_index, int max_index, + unsigned instance_count, + boolean *upload_flushed) +{ + int i, nr = mgr->ve->count; + unsigned count = max_index + 1 - min_index; + boolean uploaded[PIPE_MAX_ATTRIBS] = {0}; + + for (i = 0; i < nr; i++) { + unsigned index = mgr->ve->ve[i].vertex_buffer_index; + struct pipe_vertex_buffer *vb = &mgr->b.vertex_buffer[index]; + + if (vb->buffer && + u_vbuf_resource(vb->buffer)->user_ptr && + !uploaded[index]) { + unsigned first, size; + boolean flushed; + unsigned instance_div = mgr->ve->ve[i].instance_divisor; + + if (instance_div) { + first = 0; + size = vb->stride * + ((instance_count + instance_div - 1) / instance_div); + } else if (vb->stride) { + first = vb->stride * min_index; + size = vb->stride * count; + } else { + first = 0; + size = mgr->ve->native_format_size[i]; + } + + u_upload_data(mgr->b.uploader, first, size, + u_vbuf_resource(vb->buffer)->user_ptr + first, + &vb->buffer_offset, + &mgr->b.real_vertex_buffer[index], + &flushed); + + vb->buffer_offset -= first; + + uploaded[index] = TRUE; + *upload_flushed = *upload_flushed || flushed; + } else { + assert(mgr->b.real_vertex_buffer[index]); + } + } +} + +void u_vbuf_mgr_draw_begin(struct u_vbuf_mgr *mgrb, + const struct pipe_draw_info *info, + boolean *buffers_updated, + boolean *uploader_flushed) +{ + struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb; + boolean bufs_updated = FALSE, upload_flushed = FALSE; + int min_index, max_index; + + min_index = info->min_index - info->index_bias; + max_index = info->max_index - info->index_bias; + + /* Translate vertices with non-native layouts or formats. */ + if (mgr->incompatible_vb_layout || mgr->ve->incompatible_layout) { + u_vbuf_translate_begin(mgr, min_index, max_index, &upload_flushed); + + if (mgr->fallback_ve) { + bufs_updated = TRUE; + } + } + + /* Upload user buffers. */ + if (mgr->any_user_vbs) { + u_vbuf_upload_buffers(mgr, min_index, max_index, info->instance_count, + &upload_flushed); + bufs_updated = TRUE; + } + + /* Set the return values. */ + if (buffers_updated) { + *buffers_updated = bufs_updated; + } + if (uploader_flushed) { + *uploader_flushed = upload_flushed; + } +} + +void u_vbuf_mgr_draw_end(struct u_vbuf_mgr *mgrb) +{ + struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb; + + if (mgr->fallback_ve) { + u_vbuf_translate_end(mgr); + } +} diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.h b/src/gallium/auxiliary/util/u_vbuf_mgr.h new file mode 100644 index 0000000000..8b241854c8 --- /dev/null +++ b/src/gallium/auxiliary/util/u_vbuf_mgr.h @@ -0,0 +1,121 @@ +/************************************************************************** + * + * Copyright 2011 Marek Olšák <maraeo@gmail.com> + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_VBUF_MGR_H +#define U_VBUF_MGR_H + +/* This module builds upon u_upload_mgr and translate_cache and takes care of + * user buffer uploads and vertex format fallbacks. It's designed + * for the drivers which don't always use the Draw module. (e.g. for HWTCL) + */ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_transfer.h" + +/* The manager. + * This structure should also be used to access vertex buffers + * from a driver. */ +struct u_vbuf_mgr { + /* This is what was set in set_vertex_buffers. + * May contain user buffers. */ + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_buffers; + + /* Contains only real vertex buffers. + * Hardware drivers should use real_vertex_buffers[i] + * instead of vertex_buffers[i].buffer. */ + struct pipe_resource *real_vertex_buffer[PIPE_MAX_ATTRIBS]; + int nr_real_vertex_buffers; + + /* Precomputed max_index for hardware vertex buffers. */ + int max_index; + + /* This uploader can optionally be used by the driver. + * + * Allowed functions: + * - u_upload_alloc + * - u_upload_data + * - u_upload_buffer + * - u_upload_flush */ + struct u_upload_mgr *uploader; +}; + +struct u_vbuf_resource { + struct u_resource b; + uint8_t *user_ptr; +}; + +/* Opaque type containing information about vertex elements for the manager. */ +struct u_vbuf_mgr_elements; + +enum u_fetch_alignment { + U_VERTEX_FETCH_BYTE_ALIGNED, + U_VERTEX_FETCH_DWORD_ALIGNED +}; + + +struct u_vbuf_mgr * +u_vbuf_mgr_create(struct pipe_context *pipe, + unsigned upload_buffer_size, + unsigned upload_buffer_alignment, + unsigned upload_buffer_bind, + enum u_fetch_alignment fetch_alignment); + +void u_vbuf_mgr_destroy(struct u_vbuf_mgr *mgr); + +struct u_vbuf_mgr_elements * +u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgr, + unsigned count, + const struct pipe_vertex_element *attrs, + struct pipe_vertex_element *native_attrs); + +void u_vbuf_mgr_bind_vertex_elements(struct u_vbuf_mgr *mgr, + void *cso, + struct u_vbuf_mgr_elements *ve); + +void u_vbuf_mgr_destroy_vertex_elements(struct u_vbuf_mgr *mgr, + struct u_vbuf_mgr_elements *ve); + +void u_vbuf_mgr_set_vertex_buffers(struct u_vbuf_mgr *mgr, + unsigned count, + const struct pipe_vertex_buffer *bufs); + +void u_vbuf_mgr_draw_begin(struct u_vbuf_mgr *mgr, + const struct pipe_draw_info *info, + boolean *buffers_updated, + boolean *uploader_flushed); + +void u_vbuf_mgr_draw_end(struct u_vbuf_mgr *mgr); + + +static INLINE struct u_vbuf_resource *u_vbuf_resource(struct pipe_resource *r) +{ + return (struct u_vbuf_resource*)r; +} + +#endif |