diff options
Diffstat (limited to 'src/gallium/auxiliary')
53 files changed, 1306 insertions, 667 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index f8e65cf6c6..2daed382cf 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -101,6 +101,7 @@ C_SOURCES = \ util/u_blit.c \ util/u_blitter.c \ util/u_cache.c \ + util/u_caps.c \ util/u_cpu_detect.c \ util/u_dl.c \ util/u_draw_quad.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index db3a1e7311..a0673df8a8 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -144,6 +144,7 @@ source = [ 'util/u_blit.c', 'util/u_blitter.c', 'util/u_cache.c', + 'util/u_caps.c', 'util/u_cpu_detect.c', 'util/u_debug.c', 'util/u_debug_memory.c', diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 6fd4bd3642..c5fe7efa02 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -313,10 +313,13 @@ void cso_release_all( struct cso_context *ctx ) } +/** + * Free the CSO context. NOTE: the state tracker should have previously called + * cso_release_all(). + */ void cso_destroy_context( struct cso_context *ctx ) { if (ctx) { - /*cso_release_all( ctx );*/ FREE( ctx ); } } @@ -349,6 +352,7 @@ enum pipe_error cso_set_blend(struct cso_context *ctx, if (!cso) return PIPE_ERROR_OUT_OF_MEMORY; + memset(&cso->state, 0, sizeof cso->state); memcpy(&cso->state, templ, key_size); cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_blend_state; diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 9d11031769..2c234285b5 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -24,6 +24,8 @@ /* generates the draw jit function */ static void draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var); +static void +draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var); static void init_globals(struct draw_llvm *llvm) @@ -205,7 +207,9 @@ draw_llvm_create(struct draw_context *draw) void draw_llvm_destroy(struct draw_llvm *llvm) { - free(llvm); + LLVMDisposePassManager(llvm->pass); + + FREE(llvm); } struct draw_llvm_variant * @@ -218,6 +222,7 @@ draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs) llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs); draw_llvm_generate(llvm, variant); + draw_llvm_generate_elts(llvm, variant); return variant; } @@ -252,7 +257,8 @@ generate_vs(struct draw_llvm *llvm, NULL /*pos*/, inputs, outputs, - NULL/*sampler*/); + NULL/*sampler*/, + &llvm->draw->vs.vertex_shader->info); } #if DEBUG_STORE @@ -285,10 +291,16 @@ generate_fetch(LLVMBuilderRef builder, LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, &indices, 1, ""); LLVMValueRef vb_stride = draw_jit_vbuffer_stride(builder, vbuf); + LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(builder, vbuf); LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(builder, vbuf); - LLVMValueRef stride = LLVMBuildMul(builder, - vb_stride, - index, ""); + LLVMValueRef cond; + LLVMValueRef stride; + + cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, ""); + + index = LLVMBuildSelect(builder, cond, index, vb_max_index, ""); + + stride = LLVMBuildMul(builder, vb_stride, index, ""); vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer"); @@ -689,6 +701,158 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) lp_disassemble(variant->jit_func); } + +static void +draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant) +{ + LLVMTypeRef arg_types[7]; + LLVMTypeRef func_type; + LLVMValueRef context_ptr; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr; + LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; + struct draw_context *draw = llvm->draw; + unsigned i, j; + struct lp_build_context bld; + struct lp_build_context bld_int; + struct lp_build_loop_state lp_loop; + struct lp_type vs_type = lp_type_float_vec(32); + const int max_vertices = 4; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; + LLVMValueRef fetch_max; + + arg_types[0] = llvm->context_ptr_type; /* context */ + arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ + arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ + arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0); /* fetch_elts * */ + arg_types[4] = LLVMInt32Type(); /* fetch_count */ + arg_types[5] = LLVMInt32Type(); /* stride */ + arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ + + func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + + variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type); + LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv); + for(i = 0; i < Elements(arg_types); ++i) + if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) + LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), LLVMNoAliasAttribute); + + context_ptr = LLVMGetParam(variant->function_elts, 0); + io_ptr = LLVMGetParam(variant->function_elts, 1); + vbuffers_ptr = LLVMGetParam(variant->function_elts, 2); + fetch_elts = LLVMGetParam(variant->function_elts, 3); + fetch_count = LLVMGetParam(variant->function_elts, 4); + stride = LLVMGetParam(variant->function_elts, 5); + vb_ptr = LLVMGetParam(variant->function_elts, 6); + + lp_build_name(context_ptr, "context"); + lp_build_name(io_ptr, "io"); + lp_build_name(vbuffers_ptr, "vbuffers"); + lp_build_name(fetch_elts, "fetch_elts"); + lp_build_name(fetch_count, "fetch_count"); + lp_build_name(stride, "stride"); + lp_build_name(vb_ptr, "vb"); + + /* + * Function body + */ + + block = LLVMAppendBasicBlock(variant->function_elts, "entry"); + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, block); + + lp_build_context_init(&bld, builder, vs_type); + lp_build_context_init(&bld_int, builder, lp_type_int(32)); + + step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); + + fetch_max = LLVMBuildSub(builder, fetch_count, + LLVMConstInt(LLVMInt32Type(), 1, 0), + "fetch_max"); + + lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop); + { + LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; + LLVMValueRef io; + const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; + + io_itr = lp_loop.counter; + io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); +#if DEBUG_STORE + lp_build_printf(builder, " --- io %d = %p, loop counter %d\n", + io_itr, io, lp_loop.counter); +#endif + for (i = 0; i < NUM_CHANNELS; ++i) { + LLVMValueRef true_index = LLVMBuildAdd( + builder, + lp_loop.counter, + LLVMConstInt(LLVMInt32Type(), i, 0), ""); + LLVMValueRef fetch_ptr; + + /* make sure we're not out of bounds which can happen + * if fetch_count % 4 != 0, because on the last iteration + * a few of the 4 vertex fetches will be out of bounds */ + true_index = lp_build_min(&bld_int, true_index, fetch_max); + + fetch_ptr = LLVMBuildGEP(builder, fetch_elts, + &true_index, 1, ""); + true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt"); + for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { + struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; + LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(), + velem->vertex_buffer_index, + 0); + LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, + &vb_index, 1, ""); + generate_fetch(builder, vbuffers_ptr, + &aos_attribs[j][i], velem, vb, true_index); + } + } + convert_to_soa(builder, aos_attribs, inputs, + draw->pt.nr_vertex_elements); + + ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs; + generate_vs(llvm, + builder, + outputs, + ptr_aos, + context_ptr); + + convert_to_aos(builder, io, outputs, + draw->vs.vertex_shader->info.num_outputs, + max_vertices); + } + lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop); + + LLVMBuildRetVoid(builder); + + LLVMDisposeBuilder(builder); + + /* + * Translate the LLVM IR into machine code. + */ +#ifdef DEBUG + if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) { + LLVMDumpValue(variant->function_elts); + assert(0); + } +#endif + + LLVMRunFunctionPassManager(llvm->pass, variant->function_elts); + + if (0) { + LLVMDumpValue(variant->function_elts); + debug_printf("\n"); + } + variant->jit_func_elts = (draw_jit_vert_func_elts)LLVMGetPointerToGlobal( + llvm->draw->engine, variant->function_elts); + + if (0) + lp_disassemble(variant->jit_func_elts); +} + void draw_llvm_make_variant_key(struct draw_llvm *llvm, struct draw_llvm_variant_key *key) diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index 28b9044a81..58fee7f9d6 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -78,7 +78,10 @@ struct draw_jit_context #define draw_jit_vbuffer_stride(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 0, "stride") -#define draw_jit_vbuffer_offset(_builder, _ptr) \ +#define draw_jit_vbuffer_max_index(_builder, _ptr) \ + lp_build_struct_get(_builder, _ptr, 1, "max_index") + +#define draw_jit_vbuffer_offset(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 2, "buffer_offset") @@ -91,6 +94,16 @@ typedef void unsigned stride, struct pipe_vertex_buffer *vertex_buffers); + +typedef void +(*draw_jit_vert_func_elts)(struct draw_jit_context *context, + struct vertex_header *io, + const char *vbuffers[PIPE_MAX_ATTRIBS], + const unsigned *fetch_elts, + unsigned fetch_count, + unsigned stride, + struct pipe_vertex_buffer *vertex_buffers); + struct draw_llvm { struct draw_context *draw; @@ -119,7 +132,9 @@ struct draw_llvm_variant { struct draw_llvm_variant_key key; LLVMValueRef function; + LLVMValueRef function_elts; draw_jit_vert_func jit_func; + draw_jit_vert_func_elts jit_func_elts; struct draw_llvm_variant *next; }; diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index ee2b811603..abbf6247ab 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -137,7 +137,7 @@ emit_vertex( struct vbuf_stage *vbuf, */ /* Note: we really do want data[0] here, not data[pos]: */ - vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0); + vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0, ~0); vbuf->translate->run(vbuf->translate, 0, 1, 0, vbuf->vertex_ptr); if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); @@ -271,7 +271,7 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim ) translate_key_sanitize(&hw_key); vbuf->translate = translate_cache_find(vbuf->cache, &hw_key); - vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0); + vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0, ~0); } vbuf->point_size = vbuf->stage.draw->rasterizer->point_size; diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c index 265a420d01..ab16706581 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c @@ -54,7 +54,6 @@ static INLINE struct wideline_stage *wideline_stage( struct draw_stage *stage ) /** * Draw a wide line by drawing a quad (two triangles). - * XXX need to disable polygon stipple. */ static void wideline_line( struct draw_stage *stage, struct prim_header *header ) diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 4bb3282f62..a2bfb693c0 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -132,6 +132,7 @@ struct draw_context struct draw_pt_middle_end *fetch_emit; struct draw_pt_middle_end *fetch_shade_emit; struct draw_pt_middle_end *general; + struct draw_pt_middle_end *llvm; } middle; struct { @@ -253,8 +254,8 @@ struct draw_context #ifdef HAVE_LLVM LLVMExecutionEngineRef engine; - boolean use_llvm; #endif + void *driver_private; }; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index b5876bb1bd..b853f3a89f 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -37,6 +37,13 @@ #include "util/u_math.h" #include "util/u_prim.h" + +DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE) +DEBUG_GET_ONCE_BOOL_OPTION(draw_no_fse, "DRAW_NO_FSE", FALSE) +#ifdef HAVE_LLVM +DEBUG_GET_ONCE_BOOL_OPTION(draw_use_llvm, "DRAW_USE_LLVM", TRUE) +#endif + static unsigned trim( unsigned count, unsigned first, unsigned incr ) { if (count < first) @@ -90,12 +97,16 @@ draw_pt_arrays(struct draw_context *draw, opt |= PT_SHADE; } - if (opt == 0) - middle = draw->pt.middle.fetch_emit; - else if (opt == PT_SHADE && !draw->pt.no_fse) - middle = draw->pt.middle.fetch_shade_emit; - else - middle = draw->pt.middle.general; + if (draw->pt.middle.llvm) { + middle = draw->pt.middle.llvm; + } else { + if (opt == 0) + middle = draw->pt.middle.fetch_emit; + else if (opt == PT_SHADE && !draw->pt.no_fse) + middle = draw->pt.middle.fetch_shade_emit; + else + middle = draw->pt.middle.general; + } /* Pick the right frontend @@ -122,8 +133,8 @@ draw_pt_arrays(struct draw_context *draw, boolean draw_pt_init( struct draw_context *draw ) { - draw->pt.test_fse = debug_get_bool_option("DRAW_FSE", FALSE); - draw->pt.no_fse = debug_get_bool_option("DRAW_NO_FSE", FALSE); + draw->pt.test_fse = debug_get_option_draw_fse(); + draw->pt.no_fse = debug_get_option_draw_no_fse(); draw->pt.front.vcache = draw_pt_vcache( draw ); if (!draw->pt.front.vcache) @@ -141,25 +152,26 @@ boolean draw_pt_init( struct draw_context *draw ) if (!draw->pt.middle.fetch_shade_emit) return FALSE; -#if HAVE_LLVM - draw->use_llvm = debug_get_bool_option("DRAW_USE_LLVM", TRUE); - if (draw->use_llvm) - draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit_llvm( draw ); -#else - draw->pt.middle.general = NULL; -#endif - - if (!draw->pt.middle.general) - draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); + draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); if (!draw->pt.middle.general) return FALSE; +#if HAVE_LLVM + if (debug_get_option_draw_use_llvm()) + draw->pt.middle.llvm = draw_pt_fetch_pipeline_or_emit_llvm( draw ); +#endif + return TRUE; } void draw_pt_destroy( struct draw_context *draw ) { + if (draw->pt.middle.llvm) { + draw->pt.middle.llvm->destroy( draw->pt.middle.llvm ); + draw->pt.middle.llvm = NULL; + } + if (draw->pt.middle.general) { draw->pt.middle.general->destroy( draw->pt.middle.general ); draw->pt.middle.general = NULL; diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index a7917f54b0..ad48fa39a4 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -171,12 +171,14 @@ void draw_pt_emit( struct pt_emit *emit, translate->set_buffer(translate, 0, vertex_data, - stride ); + stride, + ~0); translate->set_buffer(translate, 1, &draw->rasterizer->point_size, - 0); + 0, + ~0); translate->run( translate, 0, @@ -232,11 +234,11 @@ void draw_pt_emit_linear(struct pt_emit *emit, goto fail; translate->set_buffer(translate, 0, - vertex_data, stride); + vertex_data, stride, count - 1); translate->set_buffer(translate, 1, &draw->rasterizer->point_size, - 0); + 0, ~0); translate->run(translate, 0, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 252be5053e..a1347221b5 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -149,7 +149,8 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, fetch->translate->set_buffer(fetch->translate, draw->pt.nr_vertex_buffers, &vh, - 0); + 0, + ~0); } } @@ -172,7 +173,8 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, i, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].stride ); + draw->pt.vertex_buffer[i].stride, + draw->pt.vertex_buffer[i].max_index); } translate->run_elts( translate, @@ -198,7 +200,8 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch, i, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].stride ); + draw->pt.vertex_buffer[i].stride, + draw->pt.vertex_buffer[i].max_index); } translate->run( translate, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 1994ddf2bc..d7735bf1ac 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -168,7 +168,8 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, feme->translate->set_buffer(feme->translate, draw->pt.nr_vertex_buffers, &feme->point_size, - 0); + 0, + ~0); } feme->point_size = draw->rasterizer->point_size; @@ -178,7 +179,8 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, i, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].stride ); + draw->pt.vertex_buffer[i].stride, + draw->pt.vertex_buffer[i].max_index); } *max_vertices = (draw->render->max_vertex_buffer_bytes / diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 389e2b105e..cbb5b6c960 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -167,7 +167,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle, i, ((const ubyte *) draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].stride ); + draw->pt.vertex_buffer[i].stride, + draw->pt.vertex_buffer[i].max_index ); } *max_vertices = (draw->render->max_vertex_buffer_bytes / diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index f71271bd91..35913a5995 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -167,8 +167,6 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle, { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *vshader = draw->vs.vertex_shader; - struct draw_geometry_shader *gshader = draw->gs.geometry_shader; unsigned opt = fpme->opt; unsigned alloc_count = align( fetch_count, 4 ); @@ -182,35 +180,13 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle, return; } - /* Fetch into our vertex buffer - */ - draw_pt_fetch_run( fpme->fetch, - fetch_elts, - fetch_count, - (char *)pipeline_verts ); - - /* Run the shader, note that this overwrites the data[] parts of - * the pipeline verts. If there is no shader, eg if - * bypass_vs_clip_and_viewport, then the inputs == outputs, and are - * already in the correct place.*/ - if (opt & PT_SHADE) - { - vshader->run_linear(vshader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.vs_constants, - fetch_count, - fpme->vertex_size, - fpme->vertex_size); - if (gshader) - draw_geometry_shader_run(gshader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.gs_constants, - fetch_count, - fpme->vertex_size, - fpme->vertex_size); - } + fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context, + pipeline_verts, + (const char **)draw->pt.user.vbuffer, + fetch_elts, + fetch_count, + fpme->vertex_size, + draw->pt.vertex_buffer ); if (draw_pt_post_vs_run( fpme->post_vs, pipeline_verts, @@ -373,7 +349,31 @@ static void llvm_middle_end_finish( struct draw_pt_middle_end *middle ) static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle ) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_llvm_variant *variant = NULL; + variant = fpme->variants; + while(variant) { + struct draw_llvm_variant *next = variant->next; + + if (variant->function_elts) { + if (variant->function_elts) + LLVMFreeMachineCodeForFunction(draw->engine, + variant->function_elts); + LLVMDeleteFunction(variant->function_elts); + } + + if (variant->function) { + if (variant->function) + LLVMFreeMachineCodeForFunction(draw->engine, + variant->function); + LLVMDeleteFunction(variant->function); + } + + FREE(variant); + + variant = next; + } if (fpme->fetch) draw_pt_fetch_destroy( fpme->fetch ); diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index cfd5154024..c2832eefa2 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -46,7 +46,7 @@ #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_exec.h" - +DEBUG_GET_ONCE_BOOL_OPTION(gallium_dump_vs, "GALLIUM_DUMP_VS", FALSE) void draw_vs_set_constants(struct draw_context *draw, @@ -157,7 +157,7 @@ draw_delete_vertex_shader(struct draw_context *draw, boolean draw_vs_init( struct draw_context *draw ) { - draw->dump_vs = debug_get_bool_option("GALLIUM_DUMP_VS", FALSE); + draw->dump_vs = debug_get_option_gallium_dump_vs(); draw->vs.machine = tgsi_exec_machine_create(); if (!draw->vs.machine) diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index f49332352b..6c7e94db43 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -80,7 +80,8 @@ struct draw_vs_varient { void (*set_buffer)( struct draw_vs_varient *, unsigned i, const void *ptr, - unsigned stride ); + unsigned stride, + unsigned max_stride ); void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader, unsigned start, @@ -168,8 +169,9 @@ draw_create_vs_ppc(struct draw_context *draw, struct draw_vs_varient_key; struct draw_vertex_shader; -struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ); +struct draw_vs_varient * +draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ); @@ -187,8 +189,9 @@ struct translate *draw_vs_get_fetch( struct draw_context *draw, struct translate *draw_vs_get_emit( struct draw_context *draw, struct translate_key *key ); -struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ); +struct draw_vs_varient * +draw_vs_create_varient_generic( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index e7121f3654..19f49e34c8 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -2089,13 +2089,21 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, } +/** cast wrapper */ +static INLINE struct draw_vs_varient_aos_sse * +draw_vs_varient_aos_sse(struct draw_vs_varient *varient) +{ + return (struct draw_vs_varient_aos_sse *) varient; +} + static void vaos_set_buffer( struct draw_vs_varient *varient, unsigned buf, const void *ptr, - unsigned stride ) + unsigned stride, + unsigned max_stride) { - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); if (buf < vaos->nr_vb) { vaos->buffer[buf].base_ptr = (char *)ptr; @@ -2112,7 +2120,7 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, unsigned count, void *output_buffer ) { - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); struct aos_machine *machine = vaos->draw->vs.aos_machine; unsigned i; @@ -2136,7 +2144,7 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, unsigned count, void *output_buffer ) { - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); struct aos_machine *machine = vaos->draw->vs.aos_machine; unsigned i; @@ -2165,7 +2173,7 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, static void vaos_destroy( struct draw_vs_varient *varient ) { - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct draw_vs_varient_aos_sse *vaos = draw_vs_varient_aos_sse(varient); FREE( vaos->buffer ); @@ -2241,13 +2249,14 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, } -struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ) +struct draw_vs_varient * +draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ) { struct draw_vs_varient *varient = varient_aos_sse( vs, key ); if (varient == NULL) { - varient = draw_vs_varient_generic( vs, key ); + varient = draw_vs_create_varient_generic( vs, key ); } return varient; diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 7deca2b69d..bc34d390da 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -203,7 +203,7 @@ draw_create_vs_exec(struct draw_context *draw, vs->base.prepare = vs_exec_prepare; vs->base.run_linear = vs_exec_run_linear; vs->base.delete = vs_exec_delete; - vs->base.create_varient = draw_vs_varient_generic; + vs->base.create_varient = draw_vs_create_varient_generic; vs->machine = draw->vs.machine; return &vs->base; diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index d869eecec5..5df84916c5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -125,7 +125,7 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, */ shader->func(inputs_soa, outputs_soa, temps_soa, (float (*)[4]) shader->base.immediates, - (const float (*)[4])constants[0], + (float (*)[4])constants[0], ppc_builtin_constants); /* convert (up to) four output verts from SoA back to AoS format */ @@ -190,7 +190,7 @@ draw_create_vs_ppc(struct draw_context *draw, vs->base.create_varient = draw_vs_varient_aos_ppc; else #endif - vs->base.create_varient = draw_vs_varient_generic; + vs->base.create_varient = draw_vs_create_varient_generic; vs->base.prepare = vs_ppc_prepare; vs->base.run_linear = vs_ppc_run_linear; vs->base.delete = vs_ppc_delete; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 54e6423388..14c95082a9 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -165,9 +165,9 @@ draw_create_vs_sse(struct draw_context *draw, vs->base.draw = draw; if (1) - vs->base.create_varient = draw_vs_varient_aos_sse; + vs->base.create_varient = draw_vs_create_varient_aos_sse; else - vs->base.create_varient = draw_vs_varient_generic; + vs->base.create_varient = draw_vs_create_varient_generic; vs->base.prepare = vs_sse_prepare; vs->base.run_linear = vs_sse_run_linear; vs->base.delete = vs_sse_delete; diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 5ed706cb4f..6eb26927f2 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -66,14 +66,16 @@ struct draw_vs_varient_generic { static void vsvg_set_buffer( struct draw_vs_varient *varient, unsigned buffer, const void *ptr, - unsigned stride ) + unsigned stride, + unsigned max_index ) { struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; vsvg->fetch->set_buffer(vsvg->fetch, buffer, ptr, - stride); + stride, + max_index ); } @@ -172,12 +174,14 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->emit->set_buffer( vsvg->emit, 0, temp_buffer, - temp_vertex_stride ); + temp_vertex_stride, + ~0 ); vsvg->emit->set_buffer( vsvg->emit, 1, &vsvg->draw->rasterizer->point_size, - 0); + 0, + ~0 ); vsvg->emit->run( vsvg->emit, 0, count, @@ -232,12 +236,14 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->emit->set_buffer( vsvg->emit, 0, temp_buffer, - temp_vertex_stride ); + temp_vertex_stride, + ~0 ); vsvg->emit->set_buffer( vsvg->emit, 1, &vsvg->draw->rasterizer->point_size, - 0); + 0, + ~0 ); vsvg->emit->run( vsvg->emit, 0, count, @@ -257,8 +263,9 @@ static void vsvg_destroy( struct draw_vs_varient *varient ) } -struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, - const struct draw_vs_varient_key *key ) +struct draw_vs_varient * +draw_vs_create_varient_generic( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ) { unsigned i; struct translate_key fetch, emit; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 8e8fcccf56..20ae958714 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1210,6 +1210,14 @@ LLVMValueRef lp_build_cos(struct lp_build_context *bld, LLVMValueRef a) { +#ifdef PIPE_OS_WINDOWS + /* + * FIXME: X86 backend translates llvm.cos.v4f32 to 4 calls to CRT's cosf() + * which is neither efficient nor does the CRT linkage work on Windows + * causing segmentation fault. So simply disable the code for now. + */ + return bld->one; +#else const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -1220,6 +1228,7 @@ lp_build_cos(struct lp_build_context *bld, util_snprintf(intrinsic, sizeof intrinsic, "llvm.cos.v%uf%u", type.length, type.width); return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a); +#endif } @@ -1230,6 +1239,14 @@ LLVMValueRef lp_build_sin(struct lp_build_context *bld, LLVMValueRef a) { +#ifdef PIPE_OS_WINDOWS + /* + * FIXME: X86 backend translates llvm.sin.v4f32 to 4 calls to CRT's sinf() + * which is neither efficient nor does the CRT linkage work on Windows + * causing segmentation fault. So simply disable the code for now. + */ + return bld->zero; +#else const struct lp_type type = bld->type; LLVMTypeRef vec_type = lp_build_vec_type(type); char intrinsic[32]; @@ -1240,6 +1257,7 @@ lp_build_sin(struct lp_build_context *bld, util_snprintf(intrinsic, sizeof intrinsic, "llvm.sin.v%uf%u", type.length, type.width); return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a); +#endif } @@ -1502,8 +1520,10 @@ lp_build_log2_approx(struct lp_build_context *bld, res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); } - if(p_exp) + if(p_exp) { + exp = LLVMBuildBitCast(bld->builder, exp, vec_type, ""); *p_exp = exp; + } if(p_floor_log2) *p_floor_log2 = logexp; @@ -1573,8 +1593,10 @@ lp_build_float_log2_approx(struct lp_build_context *bld, res = LLVMBuildAdd(bld->builder, logmant, logexp, ""); } - if(p_exp) + if(p_exp) { + exp = LLVMBuildBitCast(bld->builder, exp, float_type, ""); *p_exp = exp; + } if(p_floor_log2) *p_floor_log2 = logexp; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index e60ab4f6ba..8f15b1d287 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -792,3 +792,78 @@ lp_build_endif(struct lp_build_if_state *ctx) /* Resume building code at end of the ifthen->merge_block */ LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); } + + +/** + * Allocate a scalar (or vector) variable. + * + * Although not strictly part of control flow, control flow has deep impact in + * how variables should be allocated. + * + * The mem2reg optimization pass is the recommended way to dealing with mutable + * variables, and SSA. It looks for allocas and if it can handle them, it + * promotes them, but only looks for alloca instructions in the entry block of + * the function. Being in the entry block guarantees that the alloca is only + * executed once, which makes analysis simpler. + * + * See also: + * - http://www.llvm.org/docs/tutorial/OCamlLangImpl7.html#memory + */ +LLVMValueRef +lp_build_alloca(LLVMBuilderRef builder, + LLVMTypeRef type, + const char *name) +{ + LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(current_block); + LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function); + LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block); + LLVMBuilderRef first_builder = LLVMCreateBuilder(); + LLVMValueRef res; + + LLVMPositionBuilderAtEnd(first_builder, first_block); + LLVMPositionBuilderBefore(first_builder, first_instr); + + res = LLVMBuildAlloca(first_builder, type, name); + + LLVMDisposeBuilder(first_builder); + + return res; +} + + +/** + * Allocate an array of scalars/vectors. + * + * mem2reg pass is not capable of promoting structs or arrays to registers, but + * we still put it in the first block anyway as failure to put allocas in the + * first block may prevent the X86 backend from successfully align the stack as + * required. + * + * Also the scalarrepl pass is supossedly more powerful and can promote + * arrays in many cases. + * + * See also: + * - http://www.llvm.org/docs/tutorial/OCamlLangImpl7.html#memory + */ +LLVMValueRef +lp_build_array_alloca(LLVMBuilderRef builder, + LLVMTypeRef type, + LLVMValueRef count, + const char *name) +{ + LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder); + LLVMValueRef function = LLVMGetBasicBlockParent(current_block); + LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function); + LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block); + LLVMBuilderRef first_builder = LLVMCreateBuilder(); + LLVMValueRef res; + + LLVMPositionBuilderBefore(first_builder, first_instr); + + res = LLVMBuildArrayAlloca(first_builder, type, count, name); + + LLVMDisposeBuilder(first_builder); + + return res; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index 745838570c..fffb493a93 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -156,5 +156,15 @@ lp_build_endif(struct lp_build_if_state *ctx); LLVMBasicBlockRef lp_build_insert_new_block(LLVMBuilderRef builder, const char *name); +LLVMValueRef +lp_build_alloca(LLVMBuilderRef builder, + LLVMTypeRef type, + const char *name); + +LLVMValueRef +lp_build_array_alloca(LLVMBuilderRef builder, + LLVMTypeRef type, + LLVMValueRef count, + const char *name); #endif /* !LP_BLD_FLOW_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index a5a019fa92..6257e9a404 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -40,6 +40,7 @@ #include "lp_bld_init.h" #include "lp_bld_type.h" +#include "lp_bld_flow.h" #include "lp_bld_format.h" @@ -370,11 +371,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float); } - /* - * XXX: this should better go to the first block in the function - */ - - tmp = LLVMBuildAlloca(builder, LLVMVectorType(LLVMFloatType(), 4), ""); + tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), ""); /* * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index de07c222a3..5067d0a164 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -27,6 +27,7 @@ #include "pipe/p_compiler.h" +#include "util/u_cpu_detect.h" #include "util/u_debug.h" #include "lp_bld_init.h" @@ -62,6 +63,15 @@ lp_build_init(void) if (!lp_build_target) lp_build_target = LLVMGetExecutionEngineTargetData(lp_build_engine); + + util_cpu_detect(); + +#if 0 + /* For simulating less capable machines */ + util_cpu_caps.has_sse3 = 0; + util_cpu_caps.has_ssse3 = 0; + util_cpu_caps.has_sse4_1 = 0; +#endif } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index a3b6970116..d13fa1a5d0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -472,18 +472,6 @@ lp_build_select_aos(struct lp_build_context *bld, } } -LLVMValueRef -lp_build_alloca(struct lp_build_context *bld) -{ - const struct lp_type type = bld->type; - - if (type.length > 1) { /*vector*/ - return LLVMBuildAlloca(bld->builder, lp_build_vec_type(type), ""); - } else { /*scalar*/ - return LLVMBuildAlloca(bld->builder, lp_build_elem_type(type), ""); - } -} - /** Return (a & ~b) */ LLVMValueRef diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h index 00a8c75019..29f9fc3b20 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h @@ -76,9 +76,6 @@ lp_build_select_aos(struct lp_build_context *bld, LLVMValueRef b, const boolean cond[4]); -LLVMValueRef -lp_build_alloca(struct lp_build_context *bld); - LLVMValueRef lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index eb75b9b393..195a4953ab 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -77,6 +77,11 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, */ state->format = view->format; + state->swizzle_r = view->swizzle_r; + state->swizzle_g = view->swizzle_g; + state->swizzle_b = view->swizzle_b; + state->swizzle_a = view->swizzle_a; + state->target = texture->target; state->pot_width = util_is_pot(texture->width0); state->pot_height = util_is_pot(texture->height0); @@ -181,54 +186,16 @@ lp_build_sample_offset(struct lp_build_context *bld, LLVMValueRef offset; x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8); + offset = lp_build_mul(bld, x, x_stride); - if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - LLVMValueRef x_lo, x_hi; - LLVMValueRef y_lo, y_hi; - LLVMValueRef x_stride_lo, x_stride_hi; - LLVMValueRef y_stride_lo, y_stride_hi; - LLVMValueRef x_offset_lo, x_offset_hi; - LLVMValueRef y_offset_lo, y_offset_hi; - LLVMValueRef offset_lo, offset_hi; - - /* XXX 1D & 3D addressing not done yet */ - assert(!z); - assert(!z_stride); - - x_lo = LLVMBuildAnd(bld->builder, x, bld->one, ""); - y_lo = LLVMBuildAnd(bld->builder, y, bld->one, ""); - - x_hi = LLVMBuildLShr(bld->builder, x, bld->one, ""); - y_hi = LLVMBuildLShr(bld->builder, y, bld->one, ""); - - x_stride_lo = x_stride; - y_stride_lo = lp_build_const_vec(bld->type, 2*format_desc->block.bits/8); - - x_stride_hi = lp_build_const_vec(bld->type, 4*format_desc->block.bits/8); - y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, ""); - - x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo); - y_offset_lo = lp_build_mul(bld, y_lo, y_stride_lo); - offset_lo = lp_build_add(bld, x_offset_lo, y_offset_lo); - - x_offset_hi = lp_build_mul(bld, x_hi, x_stride_hi); - y_offset_hi = lp_build_mul(bld, y_hi, y_stride_hi); - offset_hi = lp_build_add(bld, x_offset_hi, y_offset_hi); - - offset = lp_build_add(bld, offset_hi, offset_lo); + if (y && y_stride) { + LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride); + offset = lp_build_add(bld, offset, y_offset); } - else { - offset = lp_build_mul(bld, x, x_stride); - - if (y && y_stride) { - LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride); - offset = lp_build_add(bld, offset, y_offset); - } - - if (z && z_stride) { - LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride); - offset = lp_build_add(bld, offset, z_offset); - } + + if (z && z_stride) { + LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride); + offset = lp_build_add(bld, offset, z_offset); } return offset; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index e287376385..8ceb20473d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -54,8 +54,14 @@ struct lp_build_context; */ struct lp_sampler_static_state { - /* pipe_texture's state */ + /* pipe_sampler_view's state */ enum pipe_format format; + unsigned swizzle_r:3; + unsigned swizzle_g:3; + unsigned swizzle_b:3; + unsigned swizzle_a:3; + + /* pipe_texture's state */ unsigned target:3; unsigned pot_width:1; unsigned pot_height:1; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index c9b613e21c..54c0ad7ce4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -185,6 +185,53 @@ texture_dims(enum pipe_texture_target tex) } +static LLVMValueRef +lp_build_swizzle_chan_soa(struct lp_type type, + const LLVMValueRef *unswizzled, + enum util_format_swizzle swizzle) +{ + switch (swizzle) { + case PIPE_SWIZZLE_RED: + case PIPE_SWIZZLE_GREEN: + case PIPE_SWIZZLE_BLUE: + case PIPE_SWIZZLE_ALPHA: + return unswizzled[swizzle]; + case PIPE_SWIZZLE_ZERO: + return lp_build_zero(type); + case PIPE_SWIZZLE_ONE: + return lp_build_one(type); + default: + assert(0); + return lp_build_undef(type); + } +} + + +static void +lp_build_swizzle_soa(struct lp_build_sample_context *bld, + LLVMValueRef *texel) +{ + LLVMValueRef unswizzled[4]; + unsigned char swizzles[4]; + unsigned chan; + + for (chan = 0; chan < 4; ++chan) { + unswizzled[chan] = texel[chan]; + } + + swizzles[0] = bld->static_state->swizzle_r; + swizzles[1] = bld->static_state->swizzle_g; + swizzles[2] = bld->static_state->swizzle_b; + swizzles[3] = bld->static_state->swizzle_a; + + for (chan = 0; chan < 4; ++chan) { + unsigned swizzle = swizzles[chan]; + texel[chan] = lp_build_swizzle_chan_soa(bld->texel_type, + unswizzled, swizzle); + } +} + + /** * Generate code to fetch a texel from a texture at int coords (x, y, z). @@ -278,6 +325,18 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, bld->format_desc, x, y, z, y_stride, z_stride); + if (use_border) { + /* If we can sample the border color, it means that texcoords may + * lie outside the bounds of the texture image. We need to do + * something to prevent reading out of bounds and causing a segfault. + * + * Simply AND the texture coords with !use_border. This will cause + * coords which are out of bounds to become zero. Zero's guaranteed + * to be inside the texture image. + */ + offset = lp_build_andc(&bld->uint_coord_bld, offset, use_border); + } + lp_build_fetch_rgba_soa(bld->builder, bld->format_desc, bld->texel_type, @@ -285,6 +344,8 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, i, j, texel); + lp_build_swizzle_soa(bld, texel); + /* * Note: if we find an app which frequently samples the texture border * we might want to implement a true conditional here to avoid sampling @@ -842,6 +903,7 @@ lp_build_minify(struct lp_build_sample_context *bld, * \param s vector of texcoord s values * \param t vector of texcoord t values * \param r vector of texcoord r values + * \param shader_lod_bias vector float with the shader lod bias, * \param width scalar int texture width * \param height scalar int texture height * \param depth scalar int texture depth @@ -851,6 +913,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, + LLVMValueRef shader_lod_bias, LLVMValueRef width, LLVMValueRef height, LLVMValueRef depth) @@ -865,8 +928,8 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, else { const int dims = texture_dims(bld->static_state->target); struct lp_build_context *float_bld = &bld->float_bld; - LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), - bld->static_state->lod_bias); + LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(), + bld->static_state->lod_bias); LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod); LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), @@ -940,8 +1003,14 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, /* compute lod = log2(rho) */ lod = lp_build_log2(float_bld, rho); - /* add lod bias */ - lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias"); + /* add sampler lod bias */ + lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler LOD bias"); + + /* add shader lod bias */ + /* XXX for now we take only the first element since our lod is scalar */ + shader_lod_bias = LLVMBuildExtractElement(bld->builder, shader_lod_bias, + LLVMConstInt(LLVMInt32Type(), 0, 0), ""); + lod = LLVMBuildAdd(bld->builder, lod, shader_lod_bias, "shader LOD bias"); /* clamp lod */ lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); @@ -1527,6 +1596,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, + LLVMValueRef lodbias, LLVMValueRef width, LLVMValueRef height, LLVMValueRef depth, @@ -1564,7 +1634,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, /* Need to compute lod either to choose mipmap levels or to * distinguish between minification/magnification with one mipmap level. */ - lod = lp_build_lod_selector(bld, s, t, r, width, height, depth); + lod = lp_build_lod_selector(bld, s, t, r, lodbias, width, height, depth); } /* @@ -1772,6 +1842,11 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, LLVMValueRef unswizzled[4]; LLVMValueRef stride; + assert(bld->static_state->target == PIPE_TEXTURE_2D); + assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR); + assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR); + assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE); + lp_build_context_init(&i32, builder, lp_type_int_vec(32)); lp_build_context_init(&h16, builder, lp_type_ufixed(16)); lp_build_context_init(&u8n, builder, lp_type_unorm(8)); @@ -1945,6 +2020,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, lp_build_format_swizzle_soa(bld->format_desc, bld->texel_type, unswizzled, texel); + + lp_build_swizzle_soa(bld, texel); } @@ -1984,6 +2061,24 @@ lp_build_sample_compare(struct lp_build_sample_context *bld, /** + * Just set texels to white instead of actually sampling the texture. + * For debugging. + */ +static void +lp_build_sample_nop(struct lp_build_sample_context *bld, + LLVMValueRef *texel) +{ + struct lp_build_context *texel_bld = &bld->texel_bld; + unsigned chan; + + for (chan = 0; chan < 4; chan++) { + /*lp_bld_mov(texel_bld, texel, texel_bld->one);*/ + texel[chan] = texel_bld->one; + } +} + + +/** * Build texture sampling code. * 'texel' will return a vector of four LLVMValueRefs corresponding to * R, G, B, A. @@ -2048,19 +2143,23 @@ lp_build_sample_soa(LLVMBuilderRef builder, height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height); depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth); - if (util_format_is_rgba8_variant(bld.format_desc) && - static_state->target == PIPE_TEXTURE_2D && - static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR && - static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR && - static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && - is_simple_wrap_mode(static_state->wrap_s) && - is_simple_wrap_mode(static_state->wrap_t)) { + if (0) { + /* For debug: no-op texture sampling */ + lp_build_sample_nop(&bld, texel); + } + else if (util_format_is_rgba8_variant(bld.format_desc) && + static_state->target == PIPE_TEXTURE_2D && + static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR && + static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR && + static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && + is_simple_wrap_mode(static_state->wrap_s) && + is_simple_wrap_mode(static_state->wrap_t)) { /* special case */ lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec, row_stride_array, data_array, texel); } else { - lp_build_sample_general(&bld, unit, s, t, r, + lp_build_sample_general(&bld, unit, s, t, r, lodbias, width, height, depth, width_vec, height_vec, depth_vec, row_stride_array, img_stride_array, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 63b938bfa9..2eac5da6c6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -39,6 +39,7 @@ struct tgsi_token; +struct tgsi_shader_info; struct lp_type; struct lp_build_context; struct lp_build_mask_context; @@ -78,7 +79,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[4], LLVMValueRef (*outputs)[4], - struct lp_build_sampler_soa *sampler); + struct lp_build_sampler_soa *sampler, + struct tgsi_shader_info *info); #endif /* LP_BLD_TGSI_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 44f8aec1bf..d3c769e28b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -46,6 +46,7 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_scan.h" #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_arit.h" @@ -125,6 +126,12 @@ struct lp_build_tgsi_soa_context LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS]; LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS]; + LLVMValueRef addr[LP_MAX_TEMPS][NUM_CHANNELS]; + + /* we allocate an array of temps if we have indirect + * addressing and then the temps above is unused */ + LLVMValueRef temps_array; + boolean has_indirect_addressing; struct lp_build_mask_context *mask; struct lp_exec_mask exec_mask; @@ -169,8 +176,7 @@ static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context static void lp_exec_mask_update(struct lp_exec_mask *mask) { if (mask->loop_stack_size) { - /*for loops we need to update the entire mask at - * runtime */ + /*for loops we need to update the entire mask at runtime */ LLVMValueRef tmp; assert(mask->break_mask); tmp = LLVMBuildAnd(mask->bld->builder, @@ -232,6 +238,9 @@ static void lp_exec_bgnloop(struct lp_exec_mask *mask) mask->break_mask = LLVMConstAllOnes(mask->int_vec_type); if (mask->cond_stack_size == 0) mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type); + + mask->break_stack[mask->break_stack_size++] = mask->break_mask; + mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask; mask->loop_stack[mask->loop_stack_size++] = mask->loop_block; mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop"); LLVMBuildBr(mask->bld->builder, mask->loop_block); @@ -246,7 +255,10 @@ static void lp_exec_break(struct lp_exec_mask *mask) mask->exec_mask, "break"); - mask->break_stack[mask->break_stack_size++] = mask->break_mask; + /* mask->break_stack_size > 1 implies that we encountered a break + * statemant already and if that's the case we want to make sure + * our mask is a combination of the previous break and the current + * execution mask */ if (mask->break_stack_size > 1) { mask->break_mask = LLVMBuildAnd(mask->bld->builder, mask->break_mask, @@ -263,7 +275,6 @@ static void lp_exec_continue(struct lp_exec_mask *mask) mask->exec_mask, ""); - mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask; if (mask->cont_stack_size > 1) { mask->cont_mask = LLVMBuildAnd(mask->bld->builder, mask->cont_mask, @@ -299,17 +310,23 @@ static void lp_exec_endloop(struct lp_exec_mask *mask) LLVMPositionBuilderAtEnd(mask->bld->builder, endloop); mask->loop_block = mask->loop_stack[--mask->loop_stack_size]; - /* pop the break mask */ + /* pop the cont mask */ if (mask->cont_stack_size) { mask->cont_mask = mask->cont_stack[--mask->cont_stack_size]; } + /* pop the break mask */ if (mask->break_stack_size) { - mask->break_mask = mask->cont_stack[--mask->break_stack_size]; + mask->break_mask = mask->break_stack[--mask->break_stack_size]; } lp_exec_mask_update(mask); } +/* stores val into an address pointed to by dst. + * mask->exec_mask is used to figure out which bits of val + * should be stored into the address + * (0 means don't store this bit, 1 means do store). + */ static void lp_exec_mask_store(struct lp_exec_mask *mask, LLVMValueRef val, LLVMValueRef dst) @@ -347,6 +364,23 @@ emit_ddy(struct lp_build_tgsi_soa_context *bld, return lp_build_sub(&bld->base, src_top, src_bottom); } +static LLVMValueRef +get_temp_ptr(struct lp_build_tgsi_soa_context *bld, + unsigned index, + unsigned swizzle, + boolean is_indirect, + LLVMValueRef addr) +{ + if (!bld->has_indirect_addressing) { + return bld->temps[index][swizzle]; + } else { + LLVMValueRef lindex = + LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0); + if (is_indirect) + lindex = lp_build_add(&bld->base, lindex, addr); + return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, ""); + } +} /** * Register fetch. @@ -361,6 +395,7 @@ emit_fetch( const struct tgsi_full_src_register *reg = &inst->Src[index]; unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); LLVMValueRef res; + LLVMValueRef addr; switch (swizzle) { case TGSI_SWIZZLE_X: @@ -368,11 +403,34 @@ emit_fetch( case TGSI_SWIZZLE_Z: case TGSI_SWIZZLE_W: + if (reg->Register.Indirect) { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); + unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); + addr = LLVMBuildLoad(bld->base.builder, + bld->addr[reg->Indirect.Index][swizzle], + ""); + /* for indexing we want integers */ + addr = LLVMBuildFPToSI(bld->base.builder, addr, + int_vec_type, ""); + addr = LLVMBuildExtractElement(bld->base.builder, + addr, LLVMConstInt(LLVMInt32Type(), 0, 0), + ""); + addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0)); + } + switch (reg->Register.File) { case TGSI_FILE_CONSTANT: { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0); - LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); - LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); + LLVMValueRef scalar, scalar_ptr; + + if (reg->Register.Indirect) { + /*lp_build_printf(bld->base.builder, + "\taddr = %d\n", addr);*/ + index = lp_build_add(&bld->base, index, addr); + } + scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); + scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); + res = lp_build_broadcast_scalar(&bld->base, scalar); break; } @@ -387,11 +445,16 @@ emit_fetch( assert(res); break; - case TGSI_FILE_TEMPORARY: - res = LLVMBuildLoad(bld->base.builder, bld->temps[reg->Register.Index][swizzle], ""); + case TGSI_FILE_TEMPORARY: { + LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, + swizzle, + reg->Register.Indirect, + addr); + res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); if(!res) return bld->base.undef; break; + } default: assert( 0 ); @@ -469,6 +532,7 @@ emit_store( LLVMValueRef value) { const struct tgsi_full_dst_register *reg = &inst->Dst[index]; + LLVMValueRef addr; switch( inst->Instruction.Saturate ) { case TGSI_SAT_NONE: @@ -488,20 +552,39 @@ emit_store( assert(0); } + if (reg->Register.Indirect) { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); + unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); + addr = LLVMBuildLoad(bld->base.builder, + bld->addr[reg->Indirect.Index][swizzle], + ""); + /* for indexing we want integers */ + addr = LLVMBuildFPToSI(bld->base.builder, addr, + int_vec_type, ""); + addr = LLVMBuildExtractElement(bld->base.builder, + addr, LLVMConstInt(LLVMInt32Type(), 0, 0), + ""); + addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0)); + } + switch( reg->Register.File ) { case TGSI_FILE_OUTPUT: lp_exec_mask_store(&bld->exec_mask, value, bld->outputs[reg->Register.Index][chan_index]); break; - case TGSI_FILE_TEMPORARY: - lp_exec_mask_store(&bld->exec_mask, value, - bld->temps[reg->Register.Index][chan_index]); + case TGSI_FILE_TEMPORARY: { + LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, + chan_index, + reg->Register.Indirect, + addr); + lp_exec_mask_store(&bld->exec_mask, value, temp_ptr); break; + } case TGSI_FILE_ADDRESS: - /* FIXME */ - assert(0); + lp_exec_mask_store(&bld->exec_mask, value, + bld->addr[reg->Indirect.Index][chan_index]); break; case TGSI_FILE_PREDICATE: @@ -656,62 +739,42 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld, lp_build_mask_update(bld->mask, mask); } - -/** - * Check if inst src/dest regs use indirect addressing into temporary - * register file. - */ -static boolean -indirect_temp_reference(const struct tgsi_full_instruction *inst) -{ - uint i; - for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *reg = &inst->Src[i]; - if (reg->Register.File == TGSI_FILE_TEMPORARY && - reg->Register.Indirect) - return TRUE; - } - for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *reg = &inst->Dst[i]; - if (reg->Register.File == TGSI_FILE_TEMPORARY && - reg->Register.Indirect) - return TRUE; - } - return FALSE; -} - static int emit_declaration( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_declaration *decl) { + LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type); + unsigned first = decl->Range.First; unsigned last = decl->Range.Last; unsigned idx, i; - LLVMBasicBlockRef current_block = - LLVMGetInsertBlock(bld->base.builder); - LLVMBasicBlockRef first_block = - LLVMGetEntryBasicBlock( - LLVMGetBasicBlockParent(current_block)); - LLVMValueRef first_inst = - LLVMGetFirstInstruction(first_block); - - /* we want alloca's to be the first instruction - * in the function so we need to rewind the builder - * to the very beginning */ - LLVMPositionBuilderBefore(bld->base.builder, - first_inst); for (idx = first; idx <= last; ++idx) { switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: - for (i = 0; i < NUM_CHANNELS; i++) - bld->temps[idx][i] = lp_build_alloca(&bld->base); + if (bld->has_indirect_addressing) { + LLVMValueRef val = LLVMConstInt(LLVMInt32Type(), + last*4 + 4, 0); + bld->temps_array = lp_build_array_alloca(bld->base.builder, + vec_type, val, ""); + } else { + for (i = 0; i < NUM_CHANNELS; i++) + bld->temps[idx][i] = lp_build_alloca(bld->base.builder, + vec_type, ""); + } break; case TGSI_FILE_OUTPUT: for (i = 0; i < NUM_CHANNELS; i++) - bld->outputs[idx][i] = lp_build_alloca(&bld->base); + bld->outputs[idx][i] = lp_build_alloca(bld->base.builder, + vec_type, ""); + break; + + case TGSI_FILE_ADDRESS: + for (i = 0; i < NUM_CHANNELS; i++) + bld->addr[idx][i] = lp_build_alloca(bld->base.builder, + vec_type, ""); break; default: @@ -720,8 +783,6 @@ emit_declaration( } } - LLVMPositionBuilderAtEnd(bld->base.builder, - current_block); return TRUE; } @@ -747,10 +808,6 @@ emit_instruction( LLVMValueRef res; LLVMValueRef dst0[NUM_CHANNELS]; - /* we can't handle indirect addressing into temp register file yet */ - if (indirect_temp_reference(inst)) - return FALSE; - /* * Stores and write masks are handled in a general fashion after the long * instruction opcode switch statement. @@ -770,17 +827,13 @@ emit_instruction( } switch (inst->Instruction.Opcode) { -#if 0 case TGSI_OPCODE_ARL: - /* FIXME */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_flr(bld, 0, 0); - emit_f2it( bld, 0 ); + tmp0 = lp_build_floor(&bld->base, tmp0); dst0[chan_index] = tmp0; } break; -#endif case TGSI_OPCODE_MOV: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { @@ -1350,17 +1403,13 @@ emit_instruction( return FALSE; break; -#if 0 case TGSI_OPCODE_ARR: - /* FIXME */ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { tmp0 = emit_fetch( bld, inst, 0, chan_index ); - emit_rnd( bld, 0, 0 ); - emit_f2it( bld, 0 ); + tmp0 = lp_build_round(&bld->base, tmp0); dst0[chan_index] = tmp0; } break; -#endif case TGSI_OPCODE_BRA: /* deprecated */ @@ -1540,22 +1589,10 @@ emit_instruction( lp_exec_mask_cond_push(&bld->exec_mask, tmp0); break; - case TGSI_OPCODE_BGNFOR: - /* deprecated */ - assert(0); - return FALSE; - break; - case TGSI_OPCODE_BGNLOOP: lp_exec_bgnloop(&bld->exec_mask); break; - case TGSI_OPCODE_REP: - /* deprecated */ - assert(0); - return FALSE; - break; - case TGSI_OPCODE_ELSE: lp_exec_mask_cond_invert(&bld->exec_mask); break; @@ -1564,22 +1601,10 @@ emit_instruction( lp_exec_mask_cond_pop(&bld->exec_mask); break; - case TGSI_OPCODE_ENDFOR: - /* deprecated */ - assert(0); - return FALSE; - break; - case TGSI_OPCODE_ENDLOOP: lp_exec_endloop(&bld->exec_mask); break; - case TGSI_OPCODE_ENDREP: - /* deprecated */ - assert(0); - return FALSE; - break; - case TGSI_OPCODE_PUSHA: /* deprecated? */ assert(0); @@ -1710,7 +1735,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, const LLVMValueRef *pos, const LLVMValueRef (*inputs)[NUM_CHANNELS], LLVMValueRef (*outputs)[NUM_CHANNELS], - struct lp_build_sampler_soa *sampler) + struct lp_build_sampler_soa *sampler, + struct tgsi_shader_info *info) { struct lp_build_tgsi_soa_context bld; struct tgsi_parse_context parse; @@ -1726,6 +1752,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, bld.outputs = outputs; bld.consts_ptr = consts_ptr; bld.sampler = sampler; + bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 || + info->opcode_count[TGSI_OPCODE_ARL] > 0; lp_exec_mask_init(&bld.exec_mask, &bld.base); @@ -1746,10 +1774,10 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, case TGSI_TOKEN_TYPE_INSTRUCTION: { unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode; - const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode); - if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info )) + const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(opcode); + if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, opcode_info )) _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", - info ? info->mnemonic : "<invalid>"); + opcode_info->mnemonic); } break; diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h index 07a4268fc0..c09e8a7a76 100644 --- a/src/gallium/auxiliary/os/os_thread.h +++ b/src/gallium/auxiliary/os/os_thread.h @@ -302,6 +302,7 @@ static INLINE void pipe_barrier_wait(pipe_barrier *barrier) typedef struct { unsigned count; unsigned waiters; + uint64_t sequence; pipe_mutex mutex; pipe_condvar condvar; } pipe_barrier; @@ -310,6 +311,7 @@ static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count) { barrier->count = count; barrier->waiters = 0; + barrier->sequence = 0; pipe_mutex_init(barrier->mutex); pipe_condvar_init(barrier->condvar); } @@ -329,9 +331,14 @@ static INLINE void pipe_barrier_wait(pipe_barrier *barrier) barrier->waiters++; if (barrier->waiters < barrier->count) { - pipe_condvar_wait(barrier->condvar, barrier->mutex); + uint64_t sequence = barrier->sequence; + + do { + pipe_condvar_wait(barrier->condvar, barrier->mutex); + } while (sequence == barrier->sequence); } else { barrier->waiters = 0; + barrier->sequence++; pipe_condvar_broadcast(barrier->condvar); } diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt index 080fd4c731..5d9eed9258 100644 --- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt +++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt @@ -661,25 +661,6 @@ TGSI Instruction Specification TBD -1.9.8 BGNFOR - Begin a For-Loop - - dst.x = floor(src.x) - dst.y = floor(src.y) - dst.z = floor(src.z) - - if (dst.y <= 0) - pc = [matching ENDFOR] + 1 - endif - - Note: The destination must be a loop register. - The source must be a constant register. - - -1.9.9 REP - Repeat - - TBD - - 1.9.10 ELSE - Else TBD @@ -690,23 +671,6 @@ TGSI Instruction Specification TBD -1.9.12 ENDFOR - End a For-Loop - - dst.x = dst.x + dst.z - dst.y = dst.y - 1.0 - - if (dst.y > 0) - pc = [matching BGNFOR instruction] + 1 - endif - - Note: The destination must be a loop register. - - -1.9.13 ENDREP - End Repeat - - TBD - - 1.10 GL_NV_vertex_program3 --------------------------- diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 57031419f8..8300020018 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -586,7 +586,6 @@ iter_instruction( /* update indentation */ if (inst->Instruction.Opcode == TGSI_OPCODE_IF || inst->Instruction.Opcode == TGSI_OPCODE_ELSE || - inst->Instruction.Opcode == TGSI_OPCODE_BGNFOR || inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) { ctx->indentation += indent_spaces; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 11045e4ba2..82eac05dc4 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -3186,14 +3186,6 @@ exec_instruction( *pc = -1; break; - case TGSI_OPCODE_REP: - assert (0); - break; - - case TGSI_OPCODE_ENDREP: - assert (0); - break; - case TGSI_OPCODE_PUSHA: assert (0); break; @@ -3258,29 +3250,6 @@ exec_instruction( emit_primitive(mach); break; - case TGSI_OPCODE_BGNFOR: - assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - for (chan_index = 0; chan_index < 3; chan_index++) { - FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); - } - ++mach->LoopCounterStackTop; - STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X); - /* update LoopMask */ - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { - mach->LoopMask &= ~0x1; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { - mach->LoopMask &= ~0x2; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { - mach->LoopMask &= ~0x4; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { - mach->LoopMask &= ~0x8; - } - /* TODO: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); - /* fall-through (for now) */ case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); @@ -3295,56 +3264,6 @@ exec_instruction( mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; break; - case TGSI_OPCODE_ENDFOR: - assert(mach->LoopCounterStackTop > 0); - micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], - &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); - /* update LoopMask */ - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { - mach->LoopMask &= ~0x1; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { - mach->LoopMask &= ~0x2; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { - mach->LoopMask &= ~0x4; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { - mach->LoopMask &= ~0x8; - } - micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); - assert(mach->LoopLabelStackTop > 0); - inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; - STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X); - /* Restore ContMask, but don't pop */ - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; - UPDATE_EXEC_MASK(mach); - if (mach->ExecMask) { - /* repeat loop: jump to instruction just past BGNLOOP */ - assert(mach->LoopLabelStackTop > 0); - *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; - } - else { - /* exit loop: pop LoopMask */ - assert(mach->LoopStackTop > 0); - mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - /* pop ContMask */ - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[--mach->ContStackTop]; - assert(mach->LoopLabelStackTop > 0); - --mach->LoopLabelStackTop; - assert(mach->LoopCounterStackTop > 0); - --mach->LoopCounterStackTop; - - mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; - } - UPDATE_EXEC_MASK(mach); - break; - case TGSI_OPCODE_ENDLOOP: /* Restore ContMask, but don't pop */ assert(mach->ContStackTop > 0); diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index de0e09cdba..cfa2f631bd 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -106,12 +106,12 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 2, 1, 0, 0, 0, "TXL", TGSI_OPCODE_TXL }, { 0, 0, 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK }, { 0, 1, 0, 1, 0, 1, "IF", TGSI_OPCODE_IF }, - { 1, 1, 0, 0, 0, 1, "BGNFOR", TGSI_OPCODE_BGNFOR }, - { 0, 1, 0, 0, 0, 1, "REP", TGSI_OPCODE_REP }, + { 1, 1, 0, 0, 0, 1, "", 75 }, /* removed */ + { 0, 1, 0, 0, 0, 1, "", 76 }, /* removed */ { 0, 0, 0, 1, 1, 1, "ELSE", TGSI_OPCODE_ELSE }, { 0, 0, 0, 0, 1, 0, "ENDIF", TGSI_OPCODE_ENDIF }, - { 1, 0, 0, 0, 1, 0, "ENDFOR", TGSI_OPCODE_ENDFOR }, - { 0, 0, 0, 0, 1, 0, "ENDREP", TGSI_OPCODE_ENDREP }, + { 1, 0, 0, 0, 1, 0, "", 79 }, /* removed */ + { 0, 0, 0, 0, 1, 0, "", 80 }, /* removed */ { 0, 1, 0, 0, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA }, { 1, 0, 0, 0, 0, 0, "POPA", TGSI_OPCODE_POPA }, { 1, 1, 0, 0, 0, 0, "CEIL", TGSI_OPCODE_CEIL }, diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index e4af15c156..e472947507 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -111,12 +111,8 @@ OP12(DP2) OP12_TEX(TXL) OP00(BRK) OP01_LBL(IF) -OP11(BGNFOR) -OP01(REP) OP00_LBL(ELSE) OP00(ENDIF) -OP10(ENDFOR) -OP00(ENDREP) OP01(PUSHA) OP10(POPA) OP11(CEIL) diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 371f690b29..76b7564cc3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -346,25 +346,6 @@ iter_instruction( } } - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_BGNFOR: - case TGSI_OPCODE_ENDFOR: - if (inst->Dst[0].Register.File != TGSI_FILE_LOOP || - inst->Dst[0].Register.Index != 0) { - report_error(ctx, "Destination register must be LOOP[0]"); - } - break; - } - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_BGNFOR: - if (inst->Src[0].Register.File != TGSI_FILE_CONSTANT && - inst->Src[0].Register.File != TGSI_FILE_IMMEDIATE) { - report_error(ctx, "Source register file must be either CONST or IMM"); - } - break; - } - ctx->num_instructions++; return TRUE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index a85cc4659e..1071298b49 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -2533,14 +2533,6 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_BGNFOR: - return 0; - break; - - case TGSI_OPCODE_REP: - return 0; - break; - case TGSI_OPCODE_ELSE: return 0; break; @@ -2549,14 +2541,6 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_ENDFOR: - return 0; - break; - - case TGSI_OPCODE_ENDREP: - return 0; - break; - case TGSI_OPCODE_PUSHA: return 0; break; diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index 54ed2c1a4b..edd95e0788 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -76,7 +76,8 @@ struct translate { void (*set_buffer)( struct translate *, unsigned i, const void *ptr, - unsigned stride ); + unsigned stride, + unsigned max_index ); void (PIPE_CDECL *run_elts)( struct translate *, const unsigned *elts, diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index c3ec9ae3f4..a9272fbb49 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -31,6 +31,7 @@ */ #include "util/u_memory.h" +#include "util/u_math.h" #include "pipe/p_state.h" #include "translate.h" @@ -58,6 +59,7 @@ struct translate_generic { char *input_ptr; unsigned input_stride; + unsigned max_index; } attrib[PIPE_MAX_ATTRIBS]; @@ -588,19 +590,22 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, for (attr = 0; attr < nr_attrs; attr++) { float data[4]; const char *src; + unsigned index; char *dst = (vert + tg->attrib[attr].output_offset); if (tg->attrib[attr].instance_divisor) { - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * - (instance_id / tg->attrib[attr].instance_divisor); + index = instance_id / tg->attrib[attr].instance_divisor; } else { - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * elt; + index = elt; } + index = MIN2(index, tg->attrib[attr].max_index); + + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * index; + tg->attrib[attr].fetch( src, data ); if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n", @@ -670,7 +675,8 @@ static void PIPE_CDECL generic_run( struct translate *translate, static void generic_set_buffer( struct translate *translate, unsigned buf, const void *ptr, - unsigned stride ) + unsigned stride, + unsigned max_index ) { struct translate_generic *tg = translate_generic(translate); unsigned i; @@ -680,6 +686,7 @@ static void generic_set_buffer( struct translate *translate, tg->attrib[i].input_ptr = ((char *)ptr + tg->attrib[i].input_offset); tg->attrib[i].input_stride = stride; + tg->attrib[i].max_index = max_index; } } } diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index c13e742738..ef3aa674a3 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -61,6 +61,7 @@ typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate, struct translate_buffer { const void *base_ptr; unsigned stride; + unsigned max_index; }; struct translate_buffer_varient { @@ -423,6 +424,11 @@ static boolean init_inputs( struct translate_sse *p, } else { x86_mov(p->func, tmp_EAX, elt); } + + /* + * TODO: Respect translate_buffer::max_index. + */ + x86_imul(p->func, tmp_EAX, buf_stride); x86_add(p->func, tmp_EAX, buf_base_ptr); @@ -666,13 +672,15 @@ static boolean build_vertex_emit( struct translate_sse *p, static void translate_sse_set_buffer( struct translate *translate, unsigned buf, const void *ptr, - unsigned stride ) + unsigned stride, + unsigned max_index ) { struct translate_sse *p = (struct translate_sse *)translate; if (buf < p->nr_buffers) { p->buffer[buf].base_ptr = (char *)ptr; p->buffer[buf].stride = stride; + p->buffer[buf].max_index = max_index; } if (0) debug_printf("%s %d/%d: %p %d\n", diff --git a/src/gallium/auxiliary/util/u_caps.c b/src/gallium/auxiliary/util/u_caps.c new file mode 100644 index 0000000000..c7c1e830e0 --- /dev/null +++ b/src/gallium/auxiliary/util/u_caps.c @@ -0,0 +1,245 @@ +/************************************************************************** + * + * Copyright 2010 Vmware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_screen.h" +#include "util/u_format.h" +#include "util/u_debug.h" +#include "u_caps.h" + +/** + * Iterates over a list of caps checks as defined in u_caps.h. Should + * all checks pass returns TRUE and out is set to the last element of + * the list (TERMINATE). Should any check fail returns FALSE and set + * out to the index of the start of the first failing check. + */ +boolean +util_check_caps_out(struct pipe_screen *screen, const unsigned *list, int *out) +{ + int i, tmpi; + float tmpf; + + for (i = 0; list[i];) { + switch(list[i++]) { + case UTIL_CAPS_CHECK_CAP: + if (!screen->get_param(screen, list[i++])) { + *out = i - 2; + return FALSE; + } + break; + case UTIL_CAPS_CHECK_INT: + tmpi = screen->get_param(screen, list[i++]); + if (tmpi < (int)list[i++]) { + *out = i - 3; + return FALSE; + } + break; + case UTIL_CAPS_CHECK_FLOAT: + tmpf = screen->get_paramf(screen, list[i++]); + if (tmpf < (float)list[i++]) { + *out = i - 3; + return FALSE; + } + break; + case UTIL_CAPS_CHECK_FORMAT: + if (!screen->is_format_supported(screen, + list[i++], + PIPE_TEXTURE_2D, + PIPE_BIND_SAMPLER_VIEW, + 0)) { + *out = i - 2; + return FALSE; + } + break; + case UTIL_CAPS_CHECK_UNIMPLEMENTED: + *out = i - 1; + return FALSE; + default: + assert(!"Unsupported check"); + return FALSE; + } + } + + *out = i; + return TRUE; +} + +/** + * Iterates over a list of caps checks as defined in u_caps.h. + * Returns TRUE if all caps checks pass returns FALSE otherwise. + */ +boolean +util_check_caps(struct pipe_screen *screen, const unsigned *list) +{ + int out; + return util_check_caps_out(screen, list, &out); +} + + +/* + * Below follows some demo lists. + * + * None of these lists are exhausting lists of what is + * actually needed to support said API and more here for + * as example on how to uses the above functions. Especially + * for DX10 and DX11 where Gallium is missing features. + */ + +/* DX 9_1 */ +static unsigned caps_dx_9_1[] = { + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 1), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 12), /* 2048 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */ + UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 2), + UTIL_CHECK_TERMINATE +}; + +/* DX 9_2 */ +static unsigned caps_dx_9_2[] = { + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_CAP(BLEND_EQUATION_SEPARATE), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 1), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 12), /* 2048 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */ + UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16), + UTIL_CHECK_TERMINATE +}; + +/* DX 9_3 */ +static unsigned caps_dx_9_3[] = { + UTIL_CHECK_CAP(SM3), + //UTIL_CHECK_CAP(INSTANCING), + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 4), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 13), /* 4096 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 9), /* 256 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 10), /* 512 */ + UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16), + UTIL_CHECK_TERMINATE +}; + +/* DX 10 */ +static unsigned caps_dx_10[] = { + UTIL_CHECK_CAP(SM3), + //UTIL_CHECK_CAP(INSTANCING), + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 14), /* 8192 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 12), /* 2048 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 14), /* 8192 */ + UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16), + UTIL_CHECK_UNIMPLEMENTED, /* XXX Unimplemented features in Gallium */ + UTIL_CHECK_TERMINATE +}; + +/* DX11 */ +static unsigned caps_dx_11[] = { + UTIL_CHECK_CAP(SM3), + //UTIL_CHECK_CAP(INSTANCING), + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8), + UTIL_CHECK_INT(MAX_TEXTURE_2D_LEVELS, 14), /* 16384 */ + UTIL_CHECK_INT(MAX_TEXTURE_3D_LEVELS, 12), /* 2048 */ + UTIL_CHECK_INT(MAX_TEXTURE_CUBE_LEVELS, 14), /* 16384 */ + UTIL_CHECK_FLOAT(MAX_TEXTURE_ANISOTROPY, 16), + UTIL_CHECK_FORMAT(B8G8R8A8_UNORM), + UTIL_CHECK_UNIMPLEMENTED, /* XXX Unimplemented features in Gallium */ + UTIL_CHECK_TERMINATE +}; + +/* OpenGL 2.1 */ +static unsigned caps_opengl_2_1[] = { + UTIL_CHECK_CAP(GLSL), + UTIL_CHECK_CAP(OCCLUSION_QUERY), + UTIL_CHECK_CAP(TWO_SIDED_STENCIL), + UTIL_CHECK_CAP(BLEND_EQUATION_SEPARATE), + UTIL_CHECK_INT(MAX_RENDER_TARGETS, 2), + UTIL_CHECK_TERMINATE +}; + +/* OpenGL 3.0 */ +/* UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8), */ + + +/** + * Demo function which checks against theoretical caps needed for different APIs. + */ +void util_caps_demo_print(struct pipe_screen *screen) +{ + struct { + char* name; + unsigned *list; + } list[] = { + {"DX 9.1", caps_dx_9_1}, + {"DX 9.2", caps_dx_9_2}, + {"DX 9.3", caps_dx_9_3}, + {"DX 10", caps_dx_10}, + {"DX 11", caps_dx_11}, + {"OpenGL 2.1", caps_opengl_2_1}, +/* {"OpenGL 3.0", caps_opengl_3_0},*/ + {NULL, NULL} + }; + int i, out = 0; + + for (i = 0; list[i].name; i++) { + if (util_check_caps_out(screen, list[i].list, &out)) { + debug_printf("%s: %s yes\n", __FUNCTION__, list[i].name); + continue; + } + switch (list[i].list[out]) { + case UTIL_CAPS_CHECK_CAP: + debug_printf("%s: %s no (cap %u not supported)\n", __FUNCTION__, + list[i].name, + list[i].list[out + 1]); + break; + case UTIL_CAPS_CHECK_INT: + debug_printf("%s: %s no (cap %u less then %u)\n", __FUNCTION__, + list[i].name, + list[i].list[out + 1], + list[i].list[out + 2]); + break; + case UTIL_CAPS_CHECK_FLOAT: + debug_printf("%s: %s no (cap %u less then %f)\n", __FUNCTION__, + list[i].name, + list[i].list[out + 1], + (double)(int)list[i].list[out + 2]); + break; + case UTIL_CAPS_CHECK_FORMAT: + debug_printf("%s: %s no (format %s not supported)\n", __FUNCTION__, + list[i].name, + util_format_name(list[i].list[out + 1]) + 12); + break; + case UTIL_CAPS_CHECK_UNIMPLEMENTED: + debug_printf("%s: %s no (not implemented in gallium or state tracker)\n", + __FUNCTION__, list[i].name); + break; + default: + assert(!"Unsupported check"); + } + } +} diff --git a/src/gallium/auxiliary/util/u_caps.h b/src/gallium/auxiliary/util/u_caps.h new file mode 100644 index 0000000000..b1074f9eb2 --- /dev/null +++ b/src/gallium/auxiliary/util/u_caps.h @@ -0,0 +1,67 @@ +/************************************************************************** + * + * Copyright 2010 Vmware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_CAPS_H +#define U_CAPS_H + +#include "pipe/p_compiler.h" + +struct pipe_screen; + +enum u_caps_check_enum { + UTIL_CAPS_CHECK_TERMINATE = 0, + UTIL_CAPS_CHECK_CAP, + UTIL_CAPS_CHECK_INT, + UTIL_CAPS_CHECK_FLOAT, + UTIL_CAPS_CHECK_FORMAT, + UTIL_CAPS_CHECK_UNIMPLEMENTED, +}; + +#define UTIL_CHECK_CAP(cap) \ + UTIL_CAPS_CHECK_CAP, PIPE_CAP_##cap + +#define UTIL_CHECK_INT(cap, higher) \ + UTIL_CAPS_CHECK_INT, PIPE_CAP_##cap, (unsigned)(higher) + +/* Floats currently lose precision */ +#define UTIL_CHECK_FLOAT(cap, higher) \ + UTIL_CAPS_CHECK_FLOAT, PIPE_CAP_##cap, (unsigned)(int)(higher) + +#define UTIL_CHECK_FORMAT(format) \ + UTIL_CAPS_CHECK_FORMAT, PIPE_FORMAT_##format + +#define UTIL_CHECK_UNIMPLEMENTED \ + UTIL_CAPS_CHECK_UNIMPLEMENTED + +#define UTIL_CHECK_TERMINATE \ + UTIL_CAPS_CHECK_TERMINATE + +boolean util_check_caps(struct pipe_screen *screen, const unsigned *list); +boolean util_check_caps_out(struct pipe_screen *screen, const unsigned *list, int *out); +void util_caps_demo_print(struct pipe_screen *screen); + +#endif diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index dd044973f9..0de38e791d 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -74,6 +74,24 @@ void debug_print_blob( const char *name, #endif +static boolean +debug_get_option_should_print(void) +{ + static boolean first = TRUE; + static boolean value = FALSE; + + if (!first) + return value; + + /* Oh hey this will call into this function, + * but its cool since we set first to false + */ + first = FALSE; + value = debug_get_bool_option("GALLIUM_PRINT_OPTIONS", TRUE); + /* XXX should we print this option? Currently it wont */ + return value; +} + const char * debug_get_option(const char *name, const char *dfault) { @@ -82,8 +100,9 @@ debug_get_option(const char *name, const char *dfault) result = os_get_option(name); if(!result) result = dfault; - - debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? result : "(null)"); + + if (debug_get_option_should_print()) + debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? result : "(null)"); return result; } @@ -109,7 +128,8 @@ debug_get_bool_option(const char *name, boolean dfault) else result = TRUE; - debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? "TRUE" : "FALSE"); + if (debug_get_option_should_print()) + debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? "TRUE" : "FALSE"); return result; } @@ -142,8 +162,9 @@ debug_get_num_option(const char *name, long dfault) } result *= sign; } - - debug_printf("%s: %s = %li\n", __FUNCTION__, name, result); + + if (debug_get_option_should_print()) + debug_printf("%s: %s = %li\n", __FUNCTION__, name, result); return result; } @@ -176,11 +197,12 @@ debug_get_flags_option(const char *name, } } - if (str) { - debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str); - } - else { - debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result); + if (debug_get_option_should_print()) { + if (str) { + debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str); + } else { + debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result); + } } return result; diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h index b6d0b508e3..e8ff2773e6 100644 --- a/src/gallium/auxiliary/util/u_debug.h +++ b/src/gallium/auxiliary/util/u_debug.h @@ -303,6 +303,45 @@ debug_get_flags_option(const char *name, const struct debug_named_value *flags, unsigned long dfault); +#define DEBUG_GET_ONCE_BOOL_OPTION(sufix, name, dfault) \ +static boolean \ +debug_get_option_ ## sufix (void) \ +{ \ + static boolean first = TRUE; \ + static boolean value; \ + if (first) { \ + first = FALSE; \ + value = debug_get_bool_option(name, dfault); \ + } \ + return value; \ +} + +#define DEBUG_GET_ONCE_NUM_OPTION(sufix, name, dfault) \ +static long \ +debug_get_option_ ## sufix (void) \ +{ \ + static boolean first = TRUE; \ + static long value; \ + if (first) { \ + first = FALSE; \ + value = debug_get_num_option(name, dfault); \ + } \ + return value; \ +} + +#define DEBUG_GET_ONCE_FLAGS_OPTION(sufix, name, flags, dfault) \ +static unsigned long \ +debug_get_option_ ## sufix (void) \ +{ \ + static boolean first = TRUE; \ + static unsigned long value; \ + if (first) { \ + first = FALSE; \ + value = debug_get_flags_option(name, flags, dfault); \ + } \ + return value; \ +} + unsigned long debug_memory_begin(void); diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c index c134f13e90..2ce643e90c 100644 --- a/src/gallium/auxiliary/util/u_dump_state.c +++ b/src/gallium/auxiliary/util/u_dump_state.c @@ -656,12 +656,12 @@ util_dump_transfer(struct os_stream *stream, const struct pipe_transfer *state) util_dump_struct_begin(stream, "pipe_transfer"); util_dump_member(stream, ptr, state, resource); -// util_dump_member(stream, uint, state, box); + /*util_dump_member(stream, uint, state, box);*/ util_dump_member(stream, uint, state, stride); util_dump_member(stream, uint, state, slice_stride); -// util_dump_member(stream, ptr, state, data); + /*util_dump_member(stream, ptr, state, data);*/ util_dump_struct_end(stream); } diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index 5e3dc694be..fb6ade5c06 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -192,6 +192,7 @@ struct util_format_description /** * Unpack pixel blocks to R8G8B8A8_UNORM. + * Note: strides are in bytes. * * Only defined for non-depth-stencil formats. */ @@ -202,6 +203,7 @@ struct util_format_description /** * Pack pixel blocks from R8G8B8A8_UNORM. + * Note: strides are in bytes. * * Only defined for non-depth-stencil formats. */ @@ -212,6 +214,7 @@ struct util_format_description /** * Unpack pixel blocks to R32G32B32A32_FLOAT. + * Note: strides are in bytes. * * Only defined for non-depth-stencil formats. */ @@ -222,6 +225,7 @@ struct util_format_description /** * Pack pixel blocks from R32G32B32A32_FLOAT. + * Note: strides are in bytes. * * Only defined for non-depth-stencil formats. */ @@ -242,6 +246,7 @@ struct util_format_description /** * Unpack pixels to Z32_UNORM. + * Note: strides are in bytes. * * Only defined for depth formats. */ @@ -252,6 +257,7 @@ struct util_format_description /** * Pack pixels from Z32_FLOAT. + * Note: strides are in bytes. * * Only defined for depth formats. */ @@ -262,6 +268,7 @@ struct util_format_description /** * Unpack pixels to Z32_FLOAT. + * Note: strides are in bytes. * * Only defined for depth formats. */ @@ -272,6 +279,7 @@ struct util_format_description /** * Pack pixels from Z32_FLOAT. + * Note: strides are in bytes. * * Only defined for depth formats. */ @@ -282,6 +290,7 @@ struct util_format_description /** * Unpack pixels to S8_USCALED. + * Note: strides are in bytes. * * Only defined for stencil formats. */ @@ -292,6 +301,7 @@ struct util_format_description /** * Pack pixels from S8_USCALED. + * Note: strides are in bytes. * * Only defined for stencil formats. */ @@ -322,7 +332,7 @@ util_format_name(enum pipe_format format) assert(desc); if (!desc) { - return "???"; + return "PIPE_FORMAT_???"; } return desc->name; diff --git a/src/gallium/auxiliary/util/u_format_s3tc.c b/src/gallium/auxiliary/util/u_format_s3tc.c index 79dee2b423..5b279b8fe2 100644 --- a/src/gallium/auxiliary/util/u_format_s3tc.c +++ b/src/gallium/auxiliary/util/u_format_s3tc.c @@ -233,108 +233,80 @@ util_format_dxt5_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned * Block decompression. */ -void -util_format_dxt1_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) -{ +static INLINE void +util_format_dxtn_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height, + util_format_dxtn_fetch_t fetch, + unsigned block_size) +{ + const unsigned bw = 4, bh = 4, comps = 4; unsigned x, y, i, j; - for(y = 0; y < height; y += 4) { + for(y = 0; y < height; y += bh) { const uint8_t *src = src_row; - for(x = 0; x < width; x += 4) { - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; - util_format_dxt1_rgb_fetch(0, src, i, j, dst); + for(x = 0; x < width; x += bw) { + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*comps; + fetch(0, src, i, j, dst); } } - src += 8; + src += block_size; } src_row += src_stride; } } void -util_format_dxt1_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt1_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) { - unsigned x, y, i, j; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; - for(x = 0; x < width; x += 4) { - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; - util_format_dxt1_rgba_fetch(0, src, i, j, dst); - } - } - src += 8; - } - src_row += src_stride; - } + util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt1_rgb_fetch, 8); } void -util_format_dxt3_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt1_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) { - unsigned x, y, i, j; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; - for(x = 0; x < width; x += 4) { - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; - util_format_dxt3_rgba_fetch(0, src, i, j, dst); - } - } - src += 16; - } - src_row += src_stride; - } + util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt1_rgba_fetch, 8); } void -util_format_dxt5_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt3_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) { - unsigned x, y, i, j; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; - for(x = 0; x < width; x += 4) { - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - uint8_t *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; - util_format_dxt5_rgba_fetch(0, src, i, j, dst); - } - } - src += 16; - } - src_row += src_stride; - } + util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt3_rgba_fetch, 16); } void -util_format_dxt1_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt5_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) { - unsigned x, y, i, j; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; - for(x = 0; x < width; x += 4) { - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; - uint8_t tmp[4]; - util_format_dxt1_rgb_fetch(0, src, i, j, tmp); - dst[0] = ubyte_to_float(tmp[0]); - dst[1] = ubyte_to_float(tmp[1]); - dst[2] = ubyte_to_float(tmp[2]); - dst[3] = 1.0; - } - } - src += 8; - } - src_row += src_stride; - } + util_format_dxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt5_rgba_fetch, 16); } -void -util_format_dxt1_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +static INLINE void +util_format_dxtn_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height, + util_format_dxtn_fetch_t fetch, + unsigned block_size) { unsigned x, y, i, j; for(y = 0; y < height; y += 4) { @@ -344,65 +316,61 @@ util_format_dxt1_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, con for(i = 0; i < 4; ++i) { float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; uint8_t tmp[4]; - util_format_dxt1_rgba_fetch(0, src, i, j, tmp); + fetch(0, src, i, j, tmp); dst[0] = ubyte_to_float(tmp[0]); dst[1] = ubyte_to_float(tmp[1]); dst[2] = ubyte_to_float(tmp[2]); dst[3] = ubyte_to_float(tmp[3]); } } - src += 8; + src += block_size; } src_row += src_stride; } } void -util_format_dxt3_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt1_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) { - unsigned x, y, i, j; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; - for(x = 0; x < width; x += 4) { - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; - uint8_t tmp[4]; - util_format_dxt3_rgba_fetch(0, src, i, j, tmp); - dst[0] = ubyte_to_float(tmp[0]); - dst[1] = ubyte_to_float(tmp[1]); - dst[2] = ubyte_to_float(tmp[2]); - dst[3] = ubyte_to_float(tmp[3]); - } - } - src += 16; - } - src_row += src_stride; - } + util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt1_rgb_fetch, 8); } void -util_format_dxt5_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt1_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) { - unsigned x, y, i, j; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; - for(x = 0; x < width; x += 4) { - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i)*4; - uint8_t tmp[4]; - util_format_dxt5_rgba_fetch(0, src, i, j, tmp); - dst[0] = ubyte_to_float(tmp[0]); - dst[1] = ubyte_to_float(tmp[1]); - dst[2] = ubyte_to_float(tmp[2]); - dst[3] = ubyte_to_float(tmp[3]); - } - } - src += 16; - } - src_row += src_stride; - } + util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt1_rgba_fetch, 8); +} + +void +util_format_dxt3_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt3_rgba_fetch, 16); +} + +void +util_format_dxt5_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_dxtn_rgb_unpack_rgba_float(dst_row, dst_stride, + src_row, src_stride, + width, height, + util_format_dxt5_rgba_fetch, 16); } @@ -411,201 +379,198 @@ util_format_dxt5_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, con */ void -util_format_dxt1_rgb_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt1_rgb_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height) { + const unsigned bw = 4, bh = 4, bytes_per_block = 8; unsigned x, y, i, j, k; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; + for(y = 0; y < height; y += bh) { uint8_t *dst = dst_row; - for(x = 0; x < width; x += 4) { - uint8_t tmp[4][4][3]; - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { + for(x = 0; x < width; x += bw) { + uint8_t tmp[4][4][3]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { for(k = 0; k < 3; ++k) { - tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k]; + tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*4 + k]; } } } - util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, dst_stride); - src += 4*4; - dst += 8; + util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, 0); + dst += bytes_per_block; } - src_row += src_stride; - dst_row += 4*dst_stride/sizeof(*dst_row); + dst_row += dst_stride / sizeof(*dst_row); } } void -util_format_dxt1_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt1_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height) { + const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 8; unsigned x, y, i, j, k; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; + for(y = 0; y < height; y += bh) { uint8_t *dst = dst_row; - for(x = 0; x < width; x += 4) { - uint8_t tmp[4][4][4]; - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - for(k = 0; k < 4; ++k) { - tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k]; + for(x = 0; x < width; x += bw) { + uint8_t tmp[4][4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + for(k = 0; k < comps; ++k) { + tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k]; } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, dst_stride); - src += 4*4; - dst += 8; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, 0); + dst += bytes_per_block; } - src_row += src_stride; - dst_row += 4*dst_stride/sizeof(*dst_row); + dst_row += dst_stride / sizeof(*dst_row); } } void -util_format_dxt3_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt3_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height) { + const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 16; unsigned x, y, i, j, k; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; + for(y = 0; y < height; y += bh) { uint8_t *dst = dst_row; - for(x = 0; x < width; x += 4) { - uint8_t tmp[4][4][4]; - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - for(k = 0; k < 4; ++k) { - tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k]; + for(x = 0; x < width; x += bw) { + uint8_t tmp[4][4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + for(k = 0; k < comps; ++k) { + tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k]; } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, dst_stride); - src += 4*4; - dst += 16; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, 0); + dst += bytes_per_block; } - src_row += src_stride; - dst_row += 4*dst_stride/sizeof(*dst_row); + dst_row += dst_stride / sizeof(*dst_row); } } void -util_format_dxt5_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt5_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height) { + const unsigned bw = 4, bh = 4, comps = 4, bytes_per_block = 16; unsigned x, y, i, j, k; - for(y = 0; y < height; y += 4) { - const uint8_t *src = src_row; + + for(y = 0; y < height; y += bh) { uint8_t *dst = dst_row; - for(x = 0; x < width; x += 4) { - uint8_t tmp[4][4][4]; - for(j = 0; j < 4; ++j) { - for(i = 0; i < 4; ++i) { - for(k = 0; k < 4; ++k) { - tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + i*4 + k]; + for(x = 0; x < width; x += bw) { + uint8_t tmp[4][4][4]; /* [bh][bw][comps] */ + for(j = 0; j < bh; ++j) { + for(i = 0; i < bw; ++i) { + for(k = 0; k < comps; ++k) { + tmp[j][i][k] = src[(y + j)*src_stride/sizeof(*src) + (x + i)*comps + k]; } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, dst_stride); - src += 4*4; - dst += 16; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, 0); + dst += bytes_per_block; } - src_row += src_stride; - dst_row += 4*dst_stride/sizeof(*dst_row); + dst_row += dst_stride / sizeof(*dst_row); } } void -util_format_dxt1_rgb_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt1_rgb_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src, unsigned src_stride, + unsigned width, unsigned height) { unsigned x, y, i, j, k; for(y = 0; y < height; y += 4) { - const float *src = src_row; uint8_t *dst = dst_row; for(x = 0; x < width; x += 4) { uint8_t tmp[4][4][3]; for(j = 0; j < 4; ++j) { for(i = 0; i < 4; ++i) { for(k = 0; k < 3; ++k) { - tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]); + tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]); } } } - util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, dst_stride); - src += 4*4; + util_format_dxtn_pack(3, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGB, dst, 0); dst += 8; } - src_row += src_stride; dst_row += 4*dst_stride/sizeof(*dst_row); } } void -util_format_dxt1_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt1_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src, unsigned src_stride, + unsigned width, unsigned height) { unsigned x, y, i, j, k; for(y = 0; y < height; y += 4) { - const float *src = src_row; uint8_t *dst = dst_row; for(x = 0; x < width; x += 4) { uint8_t tmp[4][4][4]; for(j = 0; j < 4; ++j) { for(i = 0; i < 4; ++i) { for(k = 0; k < 4; ++k) { - tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]); + tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]); } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, dst_stride); - src += 4*4; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT1_RGBA, dst, 0); dst += 8; } - src_row += src_stride; dst_row += 4*dst_stride/sizeof(*dst_row); } } void -util_format_dxt3_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt3_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src, unsigned src_stride, + unsigned width, unsigned height) { unsigned x, y, i, j, k; for(y = 0; y < height; y += 4) { - const float *src = src_row; uint8_t *dst = dst_row; for(x = 0; x < width; x += 4) { uint8_t tmp[4][4][4]; for(j = 0; j < 4; ++j) { for(i = 0; i < 4; ++i) { for(k = 0; k < 4; ++k) { - tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]); + tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]); } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, dst_stride); - src += 4*4; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT3_RGBA, dst, 0); dst += 16; } - src_row += src_stride; dst_row += 4*dst_stride/sizeof(*dst_row); } } void -util_format_dxt5_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height) +util_format_dxt5_rgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, + const float *src, unsigned src_stride, + unsigned width, unsigned height) { unsigned x, y, i, j, k; for(y = 0; y < height; y += 4) { - const float *src = src_row; uint8_t *dst = dst_row; for(x = 0; x < width; x += 4) { uint8_t tmp[4][4][4]; for(j = 0; j < 4; ++j) { for(i = 0; i < 4; ++i) { for(k = 0; k < 4; ++k) { - tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + i*4 + k]); + tmp[j][i][k] = float_to_ubyte(src[(y + j)*src_stride/sizeof(*src) + (x+i)*4 + k]); } } } - util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, dst_stride); - src += 4*4; + util_format_dxtn_pack(4, 4, 4, &tmp[0][0][0], UTIL_FORMAT_DXT5_RGBA, dst, 0); dst += 16; } - src_row += src_stride; dst_row += 4*dst_stride/sizeof(*dst_row); } } diff --git a/src/gallium/auxiliary/util/u_surfaces.c b/src/gallium/auxiliary/util/u_surfaces.c index 0be4609a20..668da8c5c2 100644 --- a/src/gallium/auxiliary/util/u_surfaces.c +++ b/src/gallium/auxiliary/util/u_surfaces.c @@ -9,13 +9,13 @@ static unsigned hash(void *key) { - return (unsigned)key; + return (unsigned)(uintptr_t)key; } static int compare(void *key1, void *key2) { - return (unsigned)key1 - (unsigned)key2; + return (unsigned)(uintptr_t)key1 - (unsigned)(uintptr_t)key2; } struct pipe_surface * @@ -67,7 +67,7 @@ util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps) struct pipe_resource *pt = ps->texture; if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) { /* or 2D array */ - void* key = (void*)(((ps->zslice + ps->face) << 8) | ps->level); + void* key = (void*)(uintptr_t)(((ps->zslice + ps->face) << 8) | ps->level); util_hash_table_remove(us->u.table, key); } else @@ -105,7 +105,7 @@ util_surfaces_destroy(struct util_surfaces *us, struct pipe_resource *pt, void ( if(ps) destroy_surface(ps); } - free(us->u.array); + FREE(us->u.array); us->u.array = NULL; } } diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index fe327c302b..f7aa1403d0 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -544,7 +544,7 @@ pipe_put_tile_z(struct pipe_context *pipe, case PIPE_FORMAT_Z24_UNORM_S8_USCALED: { uint *pDest = (uint *) (map + y * pt->stride + x*4); - //assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE); + /*assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);*/ for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 24-bit Z, preserve stencil */ @@ -571,7 +571,7 @@ pipe_put_tile_z(struct pipe_context *pipe, case PIPE_FORMAT_S8_USCALED_Z24_UNORM: { uint *pDest = (uint *) (map + y * pt->stride + x*4); - //assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE); + /*assert((pt->usage & PIPE_TRANSFER_READ_WRITE) == PIPE_TRANSFER_READ_WRITE);*/ for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 24-bit Z, preserve stencil */ |